From ee5daee1b75289d007193718b69bdede5f3aa11c Mon Sep 17 00:00:00 2001 From: Todd Lipcon Date: Mon, 9 Apr 2012 03:22:41 +0000 Subject: [PATCH 01/29] HDFS-3214. InterDatanodeProtocolServerSideTranslatorPB doesn't handle null response from initReplicaRecovery. Contributed by Todd Lipcon. git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/trunk@1311125 13f79535-47bb-0310-9956-ffa450edef68 --- hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt | 3 +++ ...nterDatanodeProtocolServerSideTranslatorPB.java | 14 +++++++++++--- .../InterDatanodeProtocolTranslatorPB.java | 11 +++++++++++ .../src/main/proto/InterDatanodeProtocol.proto | 7 +++++-- .../fsdataset/impl/TestInterDatanodeProtocol.java | 10 ++++++++-- 5 files changed, 38 insertions(+), 7 deletions(-) diff --git a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt index 115a855139b..7d3a52465dc 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt +++ b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt @@ -453,6 +453,9 @@ Release 2.0.0 - UNRELEASED HDFS-3136. Remove SLF4J dependency as HDFS does not need it to fix unnecessary warnings. (Jason Lowe via suresh) + HDFS-3214. InterDatanodeProtocolServerSideTranslatorPB doesn't handle + null response from initReplicaRecovery (todd) + BREAKDOWN OF HDFS-1623 SUBTASKS HDFS-2179. Add fencing framework and mechanisms for NameNode HA. (todd) diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/protocolPB/InterDatanodeProtocolServerSideTranslatorPB.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/protocolPB/InterDatanodeProtocolServerSideTranslatorPB.java index 5c475c8502a..8f3eed96852 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/protocolPB/InterDatanodeProtocolServerSideTranslatorPB.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/protocolPB/InterDatanodeProtocolServerSideTranslatorPB.java @@ -56,9 +56,17 @@ public class InterDatanodeProtocolServerSideTranslatorPB implements } catch (IOException e) { throw new ServiceException(e); } - return InitReplicaRecoveryResponseProto.newBuilder() - .setBlock(PBHelper.convert(r)) - .setState(PBHelper.convert(r.getOriginalReplicaState())).build(); + + if (r == null) { + return InitReplicaRecoveryResponseProto.newBuilder() + .setReplicaFound(false) + .build(); + } else { + return InitReplicaRecoveryResponseProto.newBuilder() + .setReplicaFound(true) + .setBlock(PBHelper.convert(r)) + .setState(PBHelper.convert(r.getOriginalReplicaState())).build(); + } } @Override diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/protocolPB/InterDatanodeProtocolTranslatorPB.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/protocolPB/InterDatanodeProtocolTranslatorPB.java index 9d301916ddd..547ca5c21bd 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/protocolPB/InterDatanodeProtocolTranslatorPB.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/protocolPB/InterDatanodeProtocolTranslatorPB.java @@ -85,6 +85,17 @@ public class InterDatanodeProtocolTranslatorPB implements } catch (ServiceException e) { throw ProtobufHelper.getRemoteException(e); } + if (!resp.getReplicaFound()) { + // No replica found on the remote node. + return null; + } else { + if (!resp.hasBlock() || !resp.hasState()) { + throw new IOException("Replica was found but missing fields. " + + "Req: " + req + "\n" + + "Resp: " + resp); + } + } + BlockProto b = resp.getBlock(); return new ReplicaRecoveryInfo(b.getBlockId(), b.getNumBytes(), b.getGenStamp(), PBHelper.convert(resp.getState())); diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/proto/InterDatanodeProtocol.proto b/hadoop-hdfs-project/hadoop-hdfs/src/main/proto/InterDatanodeProtocol.proto index 99c98cc1919..1e7c1e59b31 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/proto/InterDatanodeProtocol.proto +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/proto/InterDatanodeProtocol.proto @@ -38,8 +38,11 @@ message InitReplicaRecoveryRequestProto { * Repica recovery information */ message InitReplicaRecoveryResponseProto { - required ReplicaStateProto state = 1; // State of the replica - required BlockProto block = 2; // block information + required bool replicaFound = 1; + + // The following entries are not set if there was no replica found. + optional ReplicaStateProto state = 2; // State of the replica + optional BlockProto block = 3; // block information } /** diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/datanode/fsdataset/impl/TestInterDatanodeProtocol.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/datanode/fsdataset/impl/TestInterDatanodeProtocol.java index 599521f1793..c1167a4094a 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/datanode/fsdataset/impl/TestInterDatanodeProtocol.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/datanode/fsdataset/impl/TestInterDatanodeProtocol.java @@ -17,8 +17,7 @@ */ package org.apache.hadoop.hdfs.server.datanode.fsdataset.impl; -import static org.junit.Assert.assertTrue; -import static org.junit.Assert.fail; +import static org.junit.Assert.*; import java.io.IOException; import java.net.InetSocketAddress; @@ -172,6 +171,13 @@ public class TestInterDatanodeProtocol { b.getBlockId(), b.getNumBytes()/2, b.getGenerationStamp()+1); idp.updateReplicaUnderRecovery(b, recoveryId, newblock.getNumBytes()); checkMetaInfo(newblock, datanode); + + // Verify correct null response trying to init recovery for a missing block + ExtendedBlock badBlock = new ExtendedBlock("fake-pool", + b.getBlockId(), 0, 0); + assertNull(idp.initReplicaRecovery( + new RecoveringBlock(badBlock, + locatedblock.getLocations(), recoveryId))); } finally { if (cluster != null) {cluster.shutdown();} From 909176c1fc21501d9cd2b2d70528cca2fa02e68b Mon Sep 17 00:00:00 2001 From: Aaron Myers Date: Mon, 9 Apr 2012 05:56:22 +0000 Subject: [PATCH 02/29] HADOOP-8261. Har file system doesn't deal with FS URIs with a host but no port. Contributed by Aaron T. Myers. git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/trunk@1311133 13f79535-47bb-0310-9956-ffa450edef68 --- .../hadoop-common/CHANGES.txt | 3 + .../org/apache/hadoop/fs/HarFileSystem.java | 3 +- .../namenode/ha/TestHarFileSystemWithHA.java | 80 +++++++++++++++++++ 3 files changed, 85 insertions(+), 1 deletion(-) create mode 100644 hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestHarFileSystemWithHA.java diff --git a/hadoop-common-project/hadoop-common/CHANGES.txt b/hadoop-common-project/hadoop-common/CHANGES.txt index 309f5d244ef..44cb78d8e0d 100644 --- a/hadoop-common-project/hadoop-common/CHANGES.txt +++ b/hadoop-common-project/hadoop-common/CHANGES.txt @@ -335,6 +335,9 @@ Release 2.0.0 - UNRELEASED HADOOP-8249. invalid hadoop-auth cookies should trigger authentication if info is avail before returning HTTP 401 (tucu) + HADOOP-8261. Har file system doesn't deal with FS URIs with a host but no + port. (atm) + BREAKDOWN OF HADOOP-7454 SUBTASKS HADOOP-7455. HA: Introduce HA Service Protocol Interface. (suresh) diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/HarFileSystem.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/HarFileSystem.java index e0fc4656bc9..9a5b28381c3 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/HarFileSystem.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/HarFileSystem.java @@ -202,7 +202,8 @@ public class HarFileSystem extends FilterFileSystem { final String underLyingHost = i == host.length()? null: host.substring(i); int underLyingPort = rawURI.getPort(); String auth = (underLyingHost == null && underLyingPort == -1)? - null:(underLyingHost+":"+underLyingPort); + null:(underLyingHost+ + (underLyingPort == -1 ? "" : ":"+underLyingPort)); URI tmp = null; if (rawURI.getQuery() != null) { // query component not allowed diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestHarFileSystemWithHA.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestHarFileSystemWithHA.java new file mode 100644 index 00000000000..311f2602434 --- /dev/null +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestHarFileSystemWithHA.java @@ -0,0 +1,80 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hdfs.server.namenode.ha; + +import java.io.IOException; +import java.io.OutputStream; +import java.net.URI; + +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.FileSystem; +import org.apache.hadoop.fs.HarFileSystem; +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.hdfs.HdfsConfiguration; +import org.apache.hadoop.hdfs.MiniDFSCluster; +import org.apache.hadoop.hdfs.MiniDFSNNTopology; +import org.junit.Test; + +public class TestHarFileSystemWithHA { + + private static final Path TEST_HAR_PATH = new Path("/input.har"); + + /** + * Test that the HarFileSystem works with underlying HDFS URIs that have no + * port specified, as is often the case with an HA setup. + */ + @Test + public void testHarUriWithHaUriWithNoPort() throws Exception { + Configuration conf = new HdfsConfiguration(); + MiniDFSCluster cluster = null; + try { + cluster = new MiniDFSCluster.Builder(conf) + .numDataNodes(1) + .nnTopology(MiniDFSNNTopology.simpleHATopology()) + .build(); + cluster.transitionToActive(0); + HATestUtil.setFailoverConfigurations(cluster, conf); + + createEmptyHarArchive(HATestUtil.configureFailoverFs(cluster, conf), + TEST_HAR_PATH); + + URI failoverUri = FileSystem.getDefaultUri(conf); + Path p = new Path("har://hdfs-" + failoverUri.getAuthority() + TEST_HAR_PATH); + p.getFileSystem(conf); + } finally { + cluster.shutdown(); + } + } + + /** + * Create an empty Har archive in the FileSystem fs at the Path p. + * + * @param fs the file system to create the Har archive in + * @param p the path to create the Har archive at + * @throws IOException in the event of error + */ + private static void createEmptyHarArchive(FileSystem fs, Path p) + throws IOException { + fs.mkdirs(p); + OutputStream out = fs.create(new Path(p, "_masterindex")); + out.write(Integer.toString(HarFileSystem.VERSION).getBytes()); + out.close(); + fs.create(new Path(p, "_index")).close(); + } + +} From 3ced5ea06698fa781f76c5e7d4440f54ce5fcad3 Mon Sep 17 00:00:00 2001 From: Uma Maheswara Rao G Date: Mon, 9 Apr 2012 19:11:58 +0000 Subject: [PATCH 03/29] HDFS-3119. Overreplicated block is not deleted even after the replication factor is reduced after sync follwed by closing that file. Contributed by Ashish Singhi. git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/trunk@1311380 13f79535-47bb-0310-9956-ffa450edef68 --- hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt | 4 +++ .../server/blockmanagement/BlockManager.java | 6 +++- .../hdfs/server/namenode/FSNamesystem.java | 6 ++-- .../TestOverReplicatedBlocks.java | 33 +++++++++++++++++-- 4 files changed, 44 insertions(+), 5 deletions(-) diff --git a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt index 7d3a52465dc..ca6d2bfc0c0 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt +++ b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt @@ -117,6 +117,10 @@ Trunk (unreleased changes) HDFS-3121. Add HDFS tests for HADOOP-8014 change. (John George via suresh) + + HDFS-3119. Overreplicated block is not deleted even after the replication + factor is reduced after sync follwed by closing that file. (Ashish Singhi + via umamahesh) Release 2.0.0 - UNRELEASED diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/BlockManager.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/BlockManager.java index 54bb2b48da9..a3f432d46a3 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/BlockManager.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/BlockManager.java @@ -2767,7 +2767,7 @@ assert storedBlock.findDatanode(dn) < 0 : "Block " + block } } - public void checkReplication(Block block, int numExpectedReplicas) { + public void checkReplication(Block block, short numExpectedReplicas) { // filter out containingNodes that are marked for decommission. NumberReplicas number = countNodes(block); if (isNeededReplication(block, numExpectedReplicas, number.liveReplicas())) { @@ -2775,6 +2775,10 @@ assert storedBlock.findDatanode(dn) < 0 : "Block " + block number.liveReplicas(), number.decommissionedReplicas(), numExpectedReplicas); + return; + } + if (number.liveReplicas() > numExpectedReplicas) { + processOverReplicatedBlock(block, numExpectedReplicas, null, null); } } diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java index 5e24a22fbbf..a5a8ca0d322 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java @@ -2120,10 +2120,12 @@ public class FSNamesystem implements Namesystem, FSClusterStats, /** * Check all blocks of a file. If any blocks are lower than their intended - * replication factor, then insert them into neededReplication + * replication factor, then insert them into neededReplication and if + * the blocks are more than the intended replication factor then insert + * them into invalidateBlocks. */ private void checkReplicationFactor(INodeFile file) { - int numExpectedReplicas = file.getReplication(); + short numExpectedReplicas = file.getReplication(); Block[] pendingBlocks = file.getBlocks(); int nrBlocks = pendingBlocks.length; for (int i = 0; i < nrBlocks; i++) { diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/blockmanagement/TestOverReplicatedBlocks.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/blockmanagement/TestOverReplicatedBlocks.java index cd4dfb94a66..0125b021e96 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/blockmanagement/TestOverReplicatedBlocks.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/blockmanagement/TestOverReplicatedBlocks.java @@ -17,12 +17,13 @@ */ package org.apache.hadoop.hdfs.server.blockmanagement; +import static org.junit.Assert.*; import java.io.File; import java.io.IOException; -import junit.framework.TestCase; import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.FSDataOutputStream; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; import org.apache.hadoop.hdfs.DFSConfigKeys; @@ -36,13 +37,15 @@ import org.apache.hadoop.hdfs.protocol.ExtendedBlock; import org.apache.hadoop.hdfs.server.datanode.DataNodeTestUtils; import org.apache.hadoop.hdfs.server.namenode.FSNamesystem; import org.apache.hadoop.hdfs.server.namenode.NameNodeAdapter; +import org.junit.Test; -public class TestOverReplicatedBlocks extends TestCase { +public class TestOverReplicatedBlocks { /** Test processOverReplicatedBlock can handle corrupt replicas fine. * It make sure that it won't treat corrupt replicas as valid ones * thus prevents NN deleting valid replicas but keeping * corrupt ones. */ + @Test public void testProcesOverReplicateBlock() throws IOException { Configuration conf = new HdfsConfiguration(); conf.setLong(DFSConfigKeys.DFS_BLOCKREPORT_INTERVAL_MSEC_KEY, 1000L); @@ -113,4 +116,30 @@ public class TestOverReplicatedBlocks extends TestCase { cluster.shutdown(); } } + /** + * Test over replicated block should get invalidated when decreasing the + * replication for a partial block. + */ + @Test + public void testInvalidateOverReplicatedBlock() throws Exception { + Configuration conf = new HdfsConfiguration(); + MiniDFSCluster cluster = new MiniDFSCluster.Builder(conf).numDataNodes(3) + .build(); + try { + final FSNamesystem namesystem = cluster.getNamesystem(); + final BlockManager bm = namesystem.getBlockManager(); + FileSystem fs = cluster.getFileSystem(); + Path p = new Path(MiniDFSCluster.getBaseDirectory(), "/foo1"); + FSDataOutputStream out = fs.create(p, (short) 2); + out.writeBytes("HDFS-3119: " + p); + out.hsync(); + fs.setReplication(p, (short) 1); + out.close(); + ExtendedBlock block = DFSTestUtil.getFirstBlock(fs, p); + assertEquals("Expected only one live replica for the block", 1, bm + .countNodes(block.getLocalBlock()).liveReplicas()); + } finally { + cluster.shutdown(); + } + } } From 706394d03992b394e9f907aff2155df493e4ea4e Mon Sep 17 00:00:00 2001 From: Eli Collins Date: Mon, 9 Apr 2012 19:39:58 +0000 Subject: [PATCH 04/29] HDFS-3004. Implement Recovery Mode. Contributed by Colin Patrick McCabe git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/trunk@1311394 13f79535-47bb-0310-9956-ffa450edef68 --- hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt | 2 + .../dev-support/findbugsExcludeFile.xml | 6 + .../BookKeeperEditLogInputStream.java | 10 +- .../server/namenode/FSEditLogTestUtil.java | 8 +- .../content/xdocs/hdfs_user_guide.xml | 27 +- .../server/common/HdfsServerConstants.java | 24 +- .../hdfs/server/namenode/BackupImage.java | 29 +- .../hdfs/server/namenode/Checkpointer.java | 2 +- .../namenode/EditLogBackupInputStream.java | 20 +- .../namenode/EditLogFileInputStream.java | 38 +-- .../server/namenode/EditLogInputStream.java | 84 ++++- .../server/namenode/EditLogOutputStream.java | 3 +- .../hdfs/server/namenode/FSEditLog.java | 52 +-- .../hdfs/server/namenode/FSEditLogLoader.java | 147 +++++---- .../hdfs/server/namenode/FSEditLogOp.java | 254 ++++++++------- .../hadoop/hdfs/server/namenode/FSImage.java | 72 ++--- .../FSImageTransactionalStorageInspector.java | 18 +- .../hdfs/server/namenode/FSNamesystem.java | 8 +- .../server/namenode/FileJournalManager.java | 5 +- .../hdfs/server/namenode/JournalStream.java | 56 ---- .../server/namenode/MetaRecoveryContext.java | 130 ++++++++ .../hdfs/server/namenode/NNStorage.java | 7 +- .../hadoop/hdfs/server/namenode/NameNode.java | 59 +++- .../server/namenode/ha/EditLogTailer.java | 2 +- .../OfflineEditsXmlLoader.java | 4 +- .../apache/hadoop/hdfs/MiniDFSCluster.java | 10 + .../hdfs/server/namenode/TestEditLog.java | 19 +- .../hdfs/server/namenode/TestEditLogRace.java | 4 +- .../server/namenode/TestFSEditLogLoader.java | 4 +- .../server/namenode/TestNameNodeRecovery.java | 305 ++++++++++++++++++ .../namenode/TestSecurityTokenEditLog.java | 4 +- 31 files changed, 1014 insertions(+), 399 deletions(-) delete mode 100644 hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/JournalStream.java create mode 100644 hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/MetaRecoveryContext.java create mode 100644 hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestNameNodeRecovery.java diff --git a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt index ca6d2bfc0c0..0a89cd40f36 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt +++ b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt @@ -199,6 +199,8 @@ Release 2.0.0 - UNRELEASED HDFS-3102. Add CLI tool to initialize the shared-edits dir. (atm) + HDFS-3004. Implement Recovery Mode. (Colin Patrick McCabe via eli) + IMPROVEMENTS HDFS-2018. Move all journal stream management code into one place. diff --git a/hadoop-hdfs-project/hadoop-hdfs/dev-support/findbugsExcludeFile.xml b/hadoop-hdfs-project/hadoop-hdfs/dev-support/findbugsExcludeFile.xml index c84f57d350e..31a38c7aff5 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/dev-support/findbugsExcludeFile.xml +++ b/hadoop-hdfs-project/hadoop-hdfs/dev-support/findbugsExcludeFile.xml @@ -264,4 +264,10 @@ + + + + + + diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/contrib/bkjournal/src/main/java/org/apache/hadoop/contrib/bkjournal/BookKeeperEditLogInputStream.java b/hadoop-hdfs-project/hadoop-hdfs/src/contrib/bkjournal/src/main/java/org/apache/hadoop/contrib/bkjournal/BookKeeperEditLogInputStream.java index 636471a450f..9d070d9637f 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/contrib/bkjournal/src/main/java/org/apache/hadoop/contrib/bkjournal/BookKeeperEditLogInputStream.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/contrib/bkjournal/src/main/java/org/apache/hadoop/contrib/bkjournal/BookKeeperEditLogInputStream.java @@ -94,8 +94,8 @@ class BookKeeperEditLogInputStream extends EditLogInputStream { } @Override - public FSEditLogOp readOp() throws IOException { - return reader.readOp(); + protected FSEditLogOp nextOp() throws IOException { + return reader.readOp(false); } @Override @@ -123,12 +123,6 @@ class BookKeeperEditLogInputStream extends EditLogInputStream { lh.toString(), firstTxId, lastTxId); } - @Override - public JournalType getType() { - assert (false); - return null; - } - // TODO(HA): Test this. @Override public boolean isInProgress() { diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/contrib/bkjournal/src/test/java/org/apache/hadoop/hdfs/server/namenode/FSEditLogTestUtil.java b/hadoop-hdfs-project/hadoop-hdfs/src/contrib/bkjournal/src/test/java/org/apache/hadoop/hdfs/server/namenode/FSEditLogTestUtil.java index 6557b96e18a..41f0292e548 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/contrib/bkjournal/src/test/java/org/apache/hadoop/hdfs/server/namenode/FSEditLogTestUtil.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/contrib/bkjournal/src/test/java/org/apache/hadoop/hdfs/server/namenode/FSEditLogTestUtil.java @@ -18,13 +18,17 @@ package org.apache.hadoop.hdfs.server.namenode; import java.io.IOException; +import org.apache.hadoop.hdfs.server.namenode.FSEditLogOp.OpInstanceCache; /** * Utilities for testing edit logs */ public class FSEditLogTestUtil { + private static OpInstanceCache cache = new OpInstanceCache(); + public static FSEditLogOp getNoOpInstance() { - return FSEditLogOp.LogSegmentOp.getInstance(FSEditLogOpCodes.OP_END_LOG_SEGMENT); + return FSEditLogOp.LogSegmentOp.getInstance(cache, + FSEditLogOpCodes.OP_END_LOG_SEGMENT); } public static long countTransactionsInStream(EditLogInputStream in) @@ -32,4 +36,4 @@ public class FSEditLogTestUtil { FSEditLogLoader.EditLogValidation validation = FSEditLogLoader.validateEditLog(in); return validation.getNumTransactions(); } -} \ No newline at end of file +} diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/docs/src/documentation/content/xdocs/hdfs_user_guide.xml b/hadoop-hdfs-project/hadoop-hdfs/src/main/docs/src/documentation/content/xdocs/hdfs_user_guide.xml index 976800e0350..6c0a846b81c 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/docs/src/documentation/content/xdocs/hdfs_user_guide.xml +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/docs/src/documentation/content/xdocs/hdfs_user_guide.xml @@ -537,7 +537,32 @@ For command usage, see fetchdt command.

-
Upgrade and Rollback +
+
Recovery Mode +

Typically, you will configure multiple metadata storage locations. + Then, if one storage location is corrupt, you can read the + metadata from one of the other storage locations.

+ +

However, what can you do if the only storage locations available are + corrupt? In this case, there is a special NameNode startup mode called + Recovery mode that may allow you to recover most of your data.

+ +

You can start the NameNode in recovery mode like so: + namenode -recover

+ +

When in recovery mode, the NameNode will interactively prompt you at + the command line about possible courses of action you can take to + recover your data.

+ +

If you don't want to be prompted, you can give the + -force option. This option will force + recovery mode to always select the first choice. Normally, this + will be the most reasonable choice.

+ +

Because Recovery mode can cause you to lose data, you should always + back up your edit log and fsimage before using it.

+
+
Upgrade and Rollback

When Hadoop is upgraded on an existing cluster, as with any software upgrade, it is possible there are new bugs or diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/common/HdfsServerConstants.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/common/HdfsServerConstants.java index 710fa4df35e..00275c5917c 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/common/HdfsServerConstants.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/common/HdfsServerConstants.java @@ -22,6 +22,7 @@ import java.io.DataOutput; import java.io.IOException; import org.apache.hadoop.classification.InterfaceAudience; +import org.apache.hadoop.hdfs.server.namenode.MetaRecoveryContext; /************************************ * Some handy internal HDFS constants @@ -54,13 +55,18 @@ public final class HdfsServerConstants { FINALIZE("-finalize"), IMPORT ("-importCheckpoint"), BOOTSTRAPSTANDBY("-bootstrapStandby"), - INITIALIZESHAREDEDITS("-initializeSharedEdits"); + INITIALIZESHAREDEDITS("-initializeSharedEdits"), + RECOVER ("-recover"), + FORCE("-force"); private String name = null; // Used only with format and upgrade options private String clusterId = null; + // Used only with recovery option + private int force = 0; + private StartupOption(String arg) {this.name = arg;} public String getName() {return name;} public NamenodeRole toNodeRole() { @@ -77,10 +83,24 @@ public final class HdfsServerConstants { public void setClusterId(String cid) { clusterId = cid; } - + public String getClusterId() { return clusterId; } + + public MetaRecoveryContext createRecoveryContext() { + if (!name.equals(RECOVER.name)) + return null; + return new MetaRecoveryContext(force); + } + + public void setForce(int force) { + this.force = force; + } + + public int getForce() { + return this.force; + } } // Timeouts for communicating with DataNode for streaming writes/reads diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/BackupImage.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/BackupImage.java index 3bf5d66640a..85f0245928c 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/BackupImage.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/BackupImage.java @@ -213,19 +213,21 @@ public class BackupImage extends FSImage { LOG.debug("data:" + StringUtils.byteToHexString(data)); } - FSEditLogLoader logLoader = new FSEditLogLoader(namesystem); + FSEditLogLoader logLoader = + new FSEditLogLoader(namesystem, lastAppliedTxId); int logVersion = storage.getLayoutVersion(); backupInputStream.setBytes(data, logVersion); - long numLoaded = logLoader.loadEditRecords(logVersion, backupInputStream, - true, lastAppliedTxId + 1); - if (numLoaded != numTxns) { + long numTxnsAdvanced = logLoader.loadEditRecords(logVersion, + backupInputStream, true, lastAppliedTxId + 1, null); + if (numTxnsAdvanced != numTxns) { throw new IOException("Batch of txns starting at txnid " + firstTxId + " was supposed to contain " + numTxns + - " transactions but only was able to apply " + numLoaded); + " transactions, but we were only able to advance by " + + numTxnsAdvanced); } - lastAppliedTxId += numTxns; - + lastAppliedTxId = logLoader.getLastAppliedTxId(); + namesystem.dir.updateCountForINodeWithQuota(); // inefficient! } finally { backupInputStream.clear(); @@ -275,7 +277,7 @@ public class BackupImage extends FSImage { editStreams.add(s); } } - loadEdits(editStreams, namesystem); + loadEdits(editStreams, namesystem, null); } // now, need to load the in-progress file @@ -309,12 +311,11 @@ public class BackupImage extends FSImage { LOG.info("Going to finish converging with remaining " + remainingTxns + " txns from in-progress stream " + stream); - FSEditLogLoader loader = new FSEditLogLoader(namesystem); - long numLoaded = loader.loadFSEdits(stream, lastAppliedTxId + 1); - lastAppliedTxId += numLoaded; - assert numLoaded == remainingTxns : - "expected to load " + remainingTxns + " but loaded " + - numLoaded + " from " + stream; + FSEditLogLoader loader = + new FSEditLogLoader(namesystem, lastAppliedTxId); + loader.loadFSEdits(stream, lastAppliedTxId + 1, null); + lastAppliedTxId = loader.getLastAppliedTxId(); + assert lastAppliedTxId == getEditLog().getLastWrittenTxId(); } finally { FSEditLog.closeAllStreams(editStreams); } diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/Checkpointer.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/Checkpointer.java index 6ae931fd44f..fcdea9c8315 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/Checkpointer.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/Checkpointer.java @@ -292,6 +292,6 @@ class Checkpointer extends Daemon { } LOG.info("Checkpointer about to load edits from " + editsStreams.size() + " stream(s)."); - dstImage.loadEdits(editsStreams, dstNamesystem); + dstImage.loadEdits(editsStreams, dstNamesystem, null); } } diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/EditLogBackupInputStream.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/EditLogBackupInputStream.java index a0fb8fe6291..1f514cdfc8d 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/EditLogBackupInputStream.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/EditLogBackupInputStream.java @@ -70,21 +70,25 @@ class EditLogBackupInputStream extends EditLogInputStream { reader = null; } - @Override // JournalStream + @Override public String getName() { return address; } - @Override // JournalStream - public JournalType getType() { - return JournalType.BACKUP; + @Override + protected FSEditLogOp nextOp() throws IOException { + Preconditions.checkState(reader != null, + "Must call setBytes() before readOp()"); + return reader.readOp(false); } @Override - public FSEditLogOp readOp() throws IOException { - Preconditions.checkState(reader != null, - "Must call setBytes() before readOp()"); - return reader.readOp(); + protected FSEditLogOp nextValidOp() { + try { + return reader.readOp(true); + } catch (IOException e) { + throw new RuntimeException("got unexpected IOException " + e, e); + } } @Override diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/EditLogFileInputStream.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/EditLogFileInputStream.java index 49741861f87..0b00187c662 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/EditLogFileInputStream.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/EditLogFileInputStream.java @@ -89,24 +89,6 @@ public class EditLogFileInputStream extends EditLogInputStream { this.isInProgress = isInProgress; } - /** - * Skip over a number of transactions. Subsequent calls to - * {@link EditLogFileInputStream#readOp()} will begin after these skipped - * transactions. If more transactions are requested to be skipped than remain - * in the edit log, all edit log ops in the log will be skipped and subsequent - * calls to {@link EditLogInputStream#readOp} will return null. - * - * @param transactionsToSkip number of transactions to skip over. - * @throws IOException if there's an error while reading an operation - */ - public void skipTransactions(long transactionsToSkip) throws IOException { - assert firstTxId != HdfsConstants.INVALID_TXID && - lastTxId != HdfsConstants.INVALID_TXID; - for (long i = 0; i < transactionsToSkip; i++) { - reader.readOp(); - } - } - @Override public long getFirstTxId() throws IOException { return firstTxId; @@ -117,19 +99,23 @@ public class EditLogFileInputStream extends EditLogInputStream { return lastTxId; } - @Override // JournalStream + @Override public String getName() { return file.getPath(); } - @Override // JournalStream - public JournalType getType() { - return JournalType.FILE; - } - @Override - public FSEditLogOp readOp() throws IOException { - return reader.readOp(); + protected FSEditLogOp nextOp() throws IOException { + return reader.readOp(false); + } + + @Override + protected FSEditLogOp nextValidOp() { + try { + return reader.readOp(true); + } catch (IOException e) { + return null; + } } @Override diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/EditLogInputStream.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/EditLogInputStream.java index 7a7f8d8743a..c2b42be2461 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/EditLogInputStream.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/EditLogInputStream.java @@ -34,7 +34,14 @@ import org.apache.hadoop.classification.InterfaceStability; */ @InterfaceAudience.Private @InterfaceStability.Evolving -public abstract class EditLogInputStream implements JournalStream, Closeable { +public abstract class EditLogInputStream implements Closeable { + private FSEditLogOp cachedOp = null; + + /** + * @return the name of the EditLogInputStream + */ + public abstract String getName(); + /** * @return the first transaction which will be found in this stream */ @@ -57,8 +64,81 @@ public abstract class EditLogInputStream implements JournalStream, Closeable { * @return an operation from the stream or null if at end of stream * @throws IOException if there is an error reading from the stream */ - public abstract FSEditLogOp readOp() throws IOException; + public FSEditLogOp readOp() throws IOException { + FSEditLogOp ret; + if (cachedOp != null) { + ret = cachedOp; + cachedOp = null; + return ret; + } + return nextOp(); + } + /** + * Position the stream so that a valid operation can be read from it with + * readOp(). + * + * This method can be used to skip over corrupted sections of edit logs. + */ + public void resync() throws IOException { + if (cachedOp != null) { + return; + } + cachedOp = nextValidOp(); + } + + /** + * Get the next operation from the stream storage. + * + * @return an operation from the stream or null if at end of stream + * @throws IOException if there is an error reading from the stream + */ + protected abstract FSEditLogOp nextOp() throws IOException; + + /** + * Get the next valid operation from the stream storage. + * + * This is exactly like nextOp, except that we attempt to skip over damaged + * parts of the edit log + * + * @return an operation from the stream or null if at end of stream + */ + protected FSEditLogOp nextValidOp() { + // This is a trivial implementation which just assumes that any errors mean + // that there is nothing more of value in the log. Subclasses that support + // error recovery will want to override this. + try { + return nextOp(); + } catch (IOException e) { + return null; + } + } + + /** + * Skip edit log operations up to a given transaction ID, or until the + * end of the edit log is reached. + * + * After this function returns, the next call to readOp will return either + * end-of-file (null) or a transaction with a txid equal to or higher than + * the one we asked for. + * + * @param txid The transaction ID to read up until. + * @return Returns true if we found a transaction ID greater than + * or equal to 'txid' in the log. + */ + public boolean skipUntil(long txid) throws IOException { + while (true) { + FSEditLogOp op = readOp(); + if (op == null) { + return false; + } + if (op.getTransactionId() >= txid) { + cachedOp = op; + return true; + } + } + } + /** * Get the layout version of the data in the stream. * @return the layout version of the ops in the stream. diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/EditLogOutputStream.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/EditLogOutputStream.java index d0fc1568015..f2cbcb30f60 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/EditLogOutputStream.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/EditLogOutputStream.java @@ -18,6 +18,7 @@ package org.apache.hadoop.hdfs.server.namenode; import java.io.IOException; +import java.io.Closeable; import static org.apache.hadoop.hdfs.server.common.Util.now; @@ -30,7 +31,7 @@ import org.apache.hadoop.classification.InterfaceStability; */ @InterfaceAudience.Private @InterfaceStability.Evolving -public abstract class EditLogOutputStream { +public abstract class EditLogOutputStream implements Closeable { // these are statistics counters private long numSync; // number of sync(s) to disk private long totalTimeSync; // total time to sync diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSEditLog.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSEditLog.java index 9753b7f4906..7f6435e778b 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSEditLog.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSEditLog.java @@ -127,6 +127,14 @@ public class FSEditLog { private Configuration conf; private List editsDirs; + + private ThreadLocal cache = + new ThreadLocal() { + @Override + protected OpInstanceCache initialValue() { + return new OpInstanceCache(); + } + }; /** * The edit directories that are shared between primary and secondary. @@ -596,7 +604,7 @@ public class FSEditLog { * Records the block locations of the last block. */ public void logOpenFile(String path, INodeFileUnderConstruction newNode) { - AddOp op = AddOp.getInstance() + AddOp op = AddOp.getInstance(cache.get()) .setPath(path) .setReplication(newNode.getReplication()) .setModificationTime(newNode.getModificationTime()) @@ -614,7 +622,7 @@ public class FSEditLog { * Add close lease record to edit log. */ public void logCloseFile(String path, INodeFile newNode) { - CloseOp op = CloseOp.getInstance() + CloseOp op = CloseOp.getInstance(cache.get()) .setPath(path) .setReplication(newNode.getReplication()) .setModificationTime(newNode.getModificationTime()) @@ -627,7 +635,7 @@ public class FSEditLog { } public void logUpdateBlocks(String path, INodeFileUnderConstruction file) { - UpdateBlocksOp op = UpdateBlocksOp.getInstance() + UpdateBlocksOp op = UpdateBlocksOp.getInstance(cache.get()) .setPath(path) .setBlocks(file.getBlocks()); logEdit(op); @@ -637,7 +645,7 @@ public class FSEditLog { * Add create directory record to edit log */ public void logMkDir(String path, INode newNode) { - MkdirOp op = MkdirOp.getInstance() + MkdirOp op = MkdirOp.getInstance(cache.get()) .setPath(path) .setTimestamp(newNode.getModificationTime()) .setPermissionStatus(newNode.getPermissionStatus()); @@ -649,7 +657,7 @@ public class FSEditLog { * TODO: use String parameters until just before writing to disk */ void logRename(String src, String dst, long timestamp) { - RenameOldOp op = RenameOldOp.getInstance() + RenameOldOp op = RenameOldOp.getInstance(cache.get()) .setSource(src) .setDestination(dst) .setTimestamp(timestamp); @@ -660,7 +668,7 @@ public class FSEditLog { * Add rename record to edit log */ void logRename(String src, String dst, long timestamp, Options.Rename... options) { - RenameOp op = RenameOp.getInstance() + RenameOp op = RenameOp.getInstance(cache.get()) .setSource(src) .setDestination(dst) .setTimestamp(timestamp) @@ -672,7 +680,7 @@ public class FSEditLog { * Add set replication record to edit log */ void logSetReplication(String src, short replication) { - SetReplicationOp op = SetReplicationOp.getInstance() + SetReplicationOp op = SetReplicationOp.getInstance(cache.get()) .setPath(src) .setReplication(replication); logEdit(op); @@ -684,7 +692,7 @@ public class FSEditLog { * @param quota the directory size limit */ void logSetQuota(String src, long nsQuota, long dsQuota) { - SetQuotaOp op = SetQuotaOp.getInstance() + SetQuotaOp op = SetQuotaOp.getInstance(cache.get()) .setSource(src) .setNSQuota(nsQuota) .setDSQuota(dsQuota); @@ -693,7 +701,7 @@ public class FSEditLog { /** Add set permissions record to edit log */ void logSetPermissions(String src, FsPermission permissions) { - SetPermissionsOp op = SetPermissionsOp.getInstance() + SetPermissionsOp op = SetPermissionsOp.getInstance(cache.get()) .setSource(src) .setPermissions(permissions); logEdit(op); @@ -701,7 +709,7 @@ public class FSEditLog { /** Add set owner record to edit log */ void logSetOwner(String src, String username, String groupname) { - SetOwnerOp op = SetOwnerOp.getInstance() + SetOwnerOp op = SetOwnerOp.getInstance(cache.get()) .setSource(src) .setUser(username) .setGroup(groupname); @@ -712,7 +720,7 @@ public class FSEditLog { * concat(trg,src..) log */ void logConcat(String trg, String [] srcs, long timestamp) { - ConcatDeleteOp op = ConcatDeleteOp.getInstance() + ConcatDeleteOp op = ConcatDeleteOp.getInstance(cache.get()) .setTarget(trg) .setSources(srcs) .setTimestamp(timestamp); @@ -723,7 +731,7 @@ public class FSEditLog { * Add delete file record to edit log */ void logDelete(String src, long timestamp) { - DeleteOp op = DeleteOp.getInstance() + DeleteOp op = DeleteOp.getInstance(cache.get()) .setPath(src) .setTimestamp(timestamp); logEdit(op); @@ -733,7 +741,7 @@ public class FSEditLog { * Add generation stamp record to edit log */ void logGenerationStamp(long genstamp) { - SetGenstampOp op = SetGenstampOp.getInstance() + SetGenstampOp op = SetGenstampOp.getInstance(cache.get()) .setGenerationStamp(genstamp); logEdit(op); } @@ -742,7 +750,7 @@ public class FSEditLog { * Add access time record to edit log */ void logTimes(String src, long mtime, long atime) { - TimesOp op = TimesOp.getInstance() + TimesOp op = TimesOp.getInstance(cache.get()) .setPath(src) .setModificationTime(mtime) .setAccessTime(atime); @@ -754,7 +762,7 @@ public class FSEditLog { */ void logSymlink(String path, String value, long mtime, long atime, INodeSymlink node) { - SymlinkOp op = SymlinkOp.getInstance() + SymlinkOp op = SymlinkOp.getInstance(cache.get()) .setPath(path) .setValue(value) .setModificationTime(mtime) @@ -770,7 +778,7 @@ public class FSEditLog { */ void logGetDelegationToken(DelegationTokenIdentifier id, long expiryTime) { - GetDelegationTokenOp op = GetDelegationTokenOp.getInstance() + GetDelegationTokenOp op = GetDelegationTokenOp.getInstance(cache.get()) .setDelegationTokenIdentifier(id) .setExpiryTime(expiryTime); logEdit(op); @@ -778,26 +786,26 @@ public class FSEditLog { void logRenewDelegationToken(DelegationTokenIdentifier id, long expiryTime) { - RenewDelegationTokenOp op = RenewDelegationTokenOp.getInstance() + RenewDelegationTokenOp op = RenewDelegationTokenOp.getInstance(cache.get()) .setDelegationTokenIdentifier(id) .setExpiryTime(expiryTime); logEdit(op); } void logCancelDelegationToken(DelegationTokenIdentifier id) { - CancelDelegationTokenOp op = CancelDelegationTokenOp.getInstance() + CancelDelegationTokenOp op = CancelDelegationTokenOp.getInstance(cache.get()) .setDelegationTokenIdentifier(id); logEdit(op); } void logUpdateMasterKey(DelegationKey key) { - UpdateMasterKeyOp op = UpdateMasterKeyOp.getInstance() + UpdateMasterKeyOp op = UpdateMasterKeyOp.getInstance(cache.get()) .setDelegationKey(key); logEdit(op); } void logReassignLease(String leaseHolder, String src, String newHolder) { - ReassignLeaseOp op = ReassignLeaseOp.getInstance() + ReassignLeaseOp op = ReassignLeaseOp.getInstance(cache.get()) .setLeaseHolder(leaseHolder) .setPath(src) .setNewHolder(newHolder); @@ -896,7 +904,7 @@ public class FSEditLog { state = State.IN_SEGMENT; if (writeHeaderTxn) { - logEdit(LogSegmentOp.getInstance( + logEdit(LogSegmentOp.getInstance(cache.get(), FSEditLogOpCodes.OP_START_LOG_SEGMENT)); logSync(); } @@ -912,7 +920,7 @@ public class FSEditLog { "Bad state: %s", state); if (writeEndTxn) { - logEdit(LogSegmentOp.getInstance( + logEdit(LogSegmentOp.getInstance(cache.get(), FSEditLogOpCodes.OP_END_LOG_SEGMENT)); logSync(); } diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSEditLogLoader.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSEditLogLoader.java index ad8ddc06287..8f2b107e798 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSEditLogLoader.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSEditLogLoader.java @@ -71,9 +71,11 @@ public class FSEditLogLoader { static final Log LOG = LogFactory.getLog(FSEditLogLoader.class.getName()); static long REPLAY_TRANSACTION_LOG_INTERVAL = 1000; // 1sec private final FSNamesystem fsNamesys; - - public FSEditLogLoader(FSNamesystem fsNamesys) { + private long lastAppliedTxId; + + public FSEditLogLoader(FSNamesystem fsNamesys, long lastAppliedTxId) { this.fsNamesys = fsNamesys; + this.lastAppliedTxId = lastAppliedTxId; } /** @@ -81,32 +83,29 @@ public class FSEditLogLoader { * This is where we apply edits that we've been writing to disk all * along. */ - long loadFSEdits(EditLogInputStream edits, long expectedStartingTxId) - throws IOException { - long numEdits = 0; + long loadFSEdits(EditLogInputStream edits, long expectedStartingTxId, + MetaRecoveryContext recovery) throws IOException { int logVersion = edits.getVersion(); fsNamesys.writeLock(); try { long startTime = now(); - numEdits = loadEditRecords(logVersion, edits, false, - expectedStartingTxId); + long numEdits = loadEditRecords(logVersion, edits, false, + expectedStartingTxId, recovery); FSImage.LOG.info("Edits file " + edits.getName() + " of size " + edits.length() + " edits # " + numEdits + " loaded in " + (now()-startTime)/1000 + " seconds."); + return numEdits; } finally { edits.close(); fsNamesys.writeUnlock(); } - - return numEdits; } long loadEditRecords(int logVersion, EditLogInputStream in, boolean closeOnExit, - long expectedStartingTxId) - throws IOException, EditLogInputException { + long expectedStartingTxId, MetaRecoveryContext recovery) + throws IOException { FSDirectory fsDir = fsNamesys.dir; - long numEdits = 0; EnumMap> opCounts = new EnumMap>(FSEditLogOpCodes.class); @@ -120,72 +119,99 @@ public class FSEditLogLoader { long recentOpcodeOffsets[] = new long[4]; Arrays.fill(recentOpcodeOffsets, -1); - - long txId = expectedStartingTxId - 1; + + long expectedTxId = expectedStartingTxId; + long numEdits = 0; long lastTxId = in.getLastTxId(); long numTxns = (lastTxId - expectedStartingTxId) + 1; - long lastLogTime = now(); if (LOG.isDebugEnabled()) { LOG.debug("edit log length: " + in.length() + ", start txid: " + expectedStartingTxId + ", last txid: " + lastTxId); } - try { - try { - while (true) { + while (true) { + try { FSEditLogOp op; try { - if ((op = in.readOp()) == null) { + op = in.readOp(); + if (op == null) { break; } - } catch (IOException ioe) { - long badTxId = txId + 1; // because txId hasn't been incremented yet - String errorMessage = formatEditLogReplayError(in, recentOpcodeOffsets, badTxId); + } catch (Throwable e) { + // Handle a problem with our input + check203UpgradeFailure(logVersion, e); + String errorMessage = + formatEditLogReplayError(in, recentOpcodeOffsets, expectedTxId); FSImage.LOG.error(errorMessage); - throw new EditLogInputException(errorMessage, - ioe, numEdits); + if (recovery == null) { + // We will only try to skip over problematic opcodes when in + // recovery mode. + throw new EditLogInputException(errorMessage, e, numEdits); + } + MetaRecoveryContext.editLogLoaderPrompt( + "We failed to read txId " + expectedTxId, + recovery, "skipping the bad section in the log"); + in.resync(); + continue; } recentOpcodeOffsets[(int)(numEdits % recentOpcodeOffsets.length)] = in.getPosition(); if (LayoutVersion.supports(Feature.STORED_TXIDS, logVersion)) { - long expectedTxId = txId + 1; - txId = op.txid; - if (txId != expectedTxId) { - throw new IOException("Expected transaction ID " + - expectedTxId + " but got " + txId); + if (op.getTransactionId() > expectedTxId) { + MetaRecoveryContext.editLogLoaderPrompt("There appears " + + "to be a gap in the edit log. We expected txid " + + expectedTxId + ", but got txid " + + op.getTransactionId() + ".", recovery, "ignoring missing " + + " transaction IDs"); + } else if (op.getTransactionId() < expectedTxId) { + MetaRecoveryContext.editLogLoaderPrompt("There appears " + + "to be an out-of-order edit in the edit log. We " + + "expected txid " + expectedTxId + ", but got txid " + + op.getTransactionId() + ".", recovery, + "skipping the out-of-order edit"); + continue; } } - - incrOpCount(op.opCode, opCounts); try { applyEditLogOp(op, fsDir, logVersion); - } catch (Throwable t) { - // Catch Throwable because in the case of a truly corrupt edits log, any - // sort of error might be thrown (NumberFormat, NullPointer, EOF, etc.) - String errorMessage = formatEditLogReplayError(in, recentOpcodeOffsets, txId); - FSImage.LOG.error(errorMessage); - throw new IOException(errorMessage, t); + } catch (Throwable e) { + LOG.error("Encountered exception on operation " + op, e); + MetaRecoveryContext.editLogLoaderPrompt("Failed to " + + "apply edit log operation " + op + ": error " + + e.getMessage(), recovery, "applying edits"); + } + // Now that the operation has been successfully decoded and + // applied, update our bookkeeping. + incrOpCount(op.opCode, opCounts); + if (op.hasTransactionId()) { + lastAppliedTxId = op.getTransactionId(); + expectedTxId = lastAppliedTxId + 1; + } else { + expectedTxId = lastAppliedTxId = expectedStartingTxId; } - // log progress - if (now() - lastLogTime > REPLAY_TRANSACTION_LOG_INTERVAL) { - int percent = Math.round((float) txId / numTxns * 100); - LOG.info("replaying edit log: " + txId + "/" + numTxns - + " transactions completed. (" + percent + "%)"); - lastLogTime = now(); + if (LayoutVersion.supports(Feature.STORED_TXIDS, logVersion)) { + long now = now(); + if (now - lastLogTime > REPLAY_TRANSACTION_LOG_INTERVAL) { + int percent = Math.round((float)lastAppliedTxId / numTxns * 100); + LOG.info("replaying edit log: " + lastAppliedTxId + "/" + numTxns + + " transactions completed. (" + percent + "%)"); + lastLogTime = now; + } } - numEdits++; + } catch (MetaRecoveryContext.RequestStopException e) { + MetaRecoveryContext.LOG.warn("Stopped reading edit log at " + + in.getPosition() + "/" + in.length()); + break; } - } catch (IOException ex) { - check203UpgradeFailure(logVersion, ex); - } finally { - if(closeOnExit) - in.close(); } } finally { + if(closeOnExit) { + in.close(); + } fsDir.writeUnlock(); fsNamesys.writeUnlock(); @@ -472,7 +498,7 @@ public class FSEditLogLoader { long recentOpcodeOffsets[], long txid) { StringBuilder sb = new StringBuilder(); sb.append("Error replaying edit log at offset " + in.getPosition()); - sb.append(" on transaction ID ").append(txid); + sb.append(". Expected transaction ID was ").append(txid); if (recentOpcodeOffsets[0] != -1) { Arrays.sort(recentOpcodeOffsets); sb.append("\nRecent opcode offsets:"); @@ -519,7 +545,7 @@ public class FSEditLogLoader { if (oldBlock.getBlockId() != newBlock.getBlockId() || (oldBlock.getGenerationStamp() != newBlock.getGenerationStamp() && !(isGenStampUpdate && isLastBlock))) { - throw new IOException("Mismatched block IDs or generation stamps, " + + throw new IOException("Mismatched block IDs or generation stamps, " + "attempting to replace block " + oldBlock + " with " + newBlock + " as block # " + i + "/" + newBlocks.length + " of " + path); @@ -605,7 +631,7 @@ public class FSEditLogLoader { * Throw appropriate exception during upgrade from 203, when editlog loading * could fail due to opcode conflicts. */ - private void check203UpgradeFailure(int logVersion, IOException ex) + private void check203UpgradeFailure(int logVersion, Throwable e) throws IOException { // 0.20.203 version version has conflicting opcodes with the later releases. // The editlog must be emptied by restarting the namenode, before proceeding @@ -616,9 +642,7 @@ public class FSEditLogLoader { + logVersion + " from release 0.20.203. Please go back to the old " + " release and restart the namenode. This empties the editlog " + " and saves the namespace. Resume the upgrade after this step."; - throw new IOException(msg, ex); - } else { - throw ex; + throw new IOException(msg, e); } } @@ -643,14 +667,14 @@ public class FSEditLogLoader { break; } if (firstTxId == HdfsConstants.INVALID_TXID) { - firstTxId = op.txid; + firstTxId = op.getTransactionId(); } if (lastTxId == HdfsConstants.INVALID_TXID - || op.txid == lastTxId + 1) { - lastTxId = op.txid; + || op.getTransactionId() == lastTxId + 1) { + lastTxId = op.getTransactionId(); } else { - FSImage.LOG.error("Out of order txid found. Found " + op.txid - + ", expected " + (lastTxId + 1)); + FSImage.LOG.error("Out of order txid found. Found " + + op.getTransactionId() + ", expected " + (lastTxId + 1)); break; } numValid++; @@ -743,4 +767,7 @@ public class FSEditLogLoader { } } + public long getLastAppliedTxId() { + return lastAppliedTxId; + } } diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSEditLogOp.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSEditLogOp.java index 92ac743c381..a96aa3fc6ee 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSEditLogOp.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSEditLogOp.java @@ -33,6 +33,8 @@ import org.apache.hadoop.fs.Options.Rename; import org.apache.hadoop.fs.permission.FsPermission; import org.apache.hadoop.fs.permission.PermissionStatus; import org.apache.hadoop.hdfs.protocol.Block; +import org.apache.hadoop.hdfs.protocol.DatanodeID; +import org.apache.hadoop.hdfs.protocol.HdfsConstants; import org.apache.hadoop.hdfs.protocol.LayoutVersion; import org.apache.hadoop.hdfs.protocol.LayoutVersion.Feature; import org.apache.hadoop.util.PureJavaCrc32; @@ -54,6 +56,8 @@ import org.xml.sax.ContentHandler; import org.xml.sax.SAXException; import org.xml.sax.helpers.AttributesImpl; +import com.google.common.base.Preconditions; + import java.io.DataInput; import java.io.DataOutput; import java.io.DataInputStream; @@ -74,42 +78,44 @@ public abstract class FSEditLogOp { @SuppressWarnings("deprecation") - private static ThreadLocal> opInstances = - new ThreadLocal>() { - @Override - protected EnumMap initialValue() { - EnumMap instances - = new EnumMap(FSEditLogOpCodes.class); - instances.put(OP_ADD, new AddOp()); - instances.put(OP_CLOSE, new CloseOp()); - instances.put(OP_SET_REPLICATION, new SetReplicationOp()); - instances.put(OP_CONCAT_DELETE, new ConcatDeleteOp()); - instances.put(OP_RENAME_OLD, new RenameOldOp()); - instances.put(OP_DELETE, new DeleteOp()); - instances.put(OP_MKDIR, new MkdirOp()); - instances.put(OP_SET_GENSTAMP, new SetGenstampOp()); - instances.put(OP_SET_PERMISSIONS, new SetPermissionsOp()); - instances.put(OP_SET_OWNER, new SetOwnerOp()); - instances.put(OP_SET_NS_QUOTA, new SetNSQuotaOp()); - instances.put(OP_CLEAR_NS_QUOTA, new ClearNSQuotaOp()); - instances.put(OP_SET_QUOTA, new SetQuotaOp()); - instances.put(OP_TIMES, new TimesOp()); - instances.put(OP_SYMLINK, new SymlinkOp()); - instances.put(OP_RENAME, new RenameOp()); - instances.put(OP_REASSIGN_LEASE, new ReassignLeaseOp()); - instances.put(OP_GET_DELEGATION_TOKEN, new GetDelegationTokenOp()); - instances.put(OP_RENEW_DELEGATION_TOKEN, new RenewDelegationTokenOp()); - instances.put(OP_CANCEL_DELEGATION_TOKEN, - new CancelDelegationTokenOp()); - instances.put(OP_UPDATE_MASTER_KEY, new UpdateMasterKeyOp()); - instances.put(OP_START_LOG_SEGMENT, - new LogSegmentOp(OP_START_LOG_SEGMENT)); - instances.put(OP_END_LOG_SEGMENT, - new LogSegmentOp(OP_END_LOG_SEGMENT)); - instances.put(OP_UPDATE_BLOCKS, new UpdateBlocksOp()); - return instances; - } - }; + final public static class OpInstanceCache { + private EnumMap inst = + new EnumMap(FSEditLogOpCodes.class); + + public OpInstanceCache() { + inst.put(OP_ADD, new AddOp()); + inst.put(OP_CLOSE, new CloseOp()); + inst.put(OP_SET_REPLICATION, new SetReplicationOp()); + inst.put(OP_CONCAT_DELETE, new ConcatDeleteOp()); + inst.put(OP_RENAME_OLD, new RenameOldOp()); + inst.put(OP_DELETE, new DeleteOp()); + inst.put(OP_MKDIR, new MkdirOp()); + inst.put(OP_SET_GENSTAMP, new SetGenstampOp()); + inst.put(OP_SET_PERMISSIONS, new SetPermissionsOp()); + inst.put(OP_SET_OWNER, new SetOwnerOp()); + inst.put(OP_SET_NS_QUOTA, new SetNSQuotaOp()); + inst.put(OP_CLEAR_NS_QUOTA, new ClearNSQuotaOp()); + inst.put(OP_SET_QUOTA, new SetQuotaOp()); + inst.put(OP_TIMES, new TimesOp()); + inst.put(OP_SYMLINK, new SymlinkOp()); + inst.put(OP_RENAME, new RenameOp()); + inst.put(OP_REASSIGN_LEASE, new ReassignLeaseOp()); + inst.put(OP_GET_DELEGATION_TOKEN, new GetDelegationTokenOp()); + inst.put(OP_RENEW_DELEGATION_TOKEN, new RenewDelegationTokenOp()); + inst.put(OP_CANCEL_DELEGATION_TOKEN, + new CancelDelegationTokenOp()); + inst.put(OP_UPDATE_MASTER_KEY, new UpdateMasterKeyOp()); + inst.put(OP_START_LOG_SEGMENT, + new LogSegmentOp(OP_START_LOG_SEGMENT)); + inst.put(OP_END_LOG_SEGMENT, + new LogSegmentOp(OP_END_LOG_SEGMENT)); + inst.put(OP_UPDATE_BLOCKS, new UpdateBlocksOp()); + } + + public FSEditLogOp get(FSEditLogOpCodes opcode) { + return inst.get(opcode); + } + } /** * Constructor for an EditLog Op. EditLog ops cannot be constructed @@ -117,13 +123,22 @@ public abstract class FSEditLogOp { */ private FSEditLogOp(FSEditLogOpCodes opCode) { this.opCode = opCode; - this.txid = 0; + this.txid = HdfsConstants.INVALID_TXID; } public long getTransactionId() { + Preconditions.checkState(txid != HdfsConstants.INVALID_TXID); return txid; } + public String getTransactionIdStr() { + return (txid == HdfsConstants.INVALID_TXID) ? "(none)" : "" + txid; + } + + public boolean hasTransactionId() { + return (txid != HdfsConstants.INVALID_TXID); + } + public void setTransactionId(long txid) { this.txid = txid; } @@ -373,8 +388,8 @@ public abstract class FSEditLogOp { super(OP_ADD); } - static AddOp getInstance() { - return (AddOp)opInstances.get().get(OP_ADD); + static AddOp getInstance(OpInstanceCache cache) { + return (AddOp)cache.get(OP_ADD); } public boolean shouldCompleteLastBlock() { @@ -395,8 +410,8 @@ public abstract class FSEditLogOp { super(OP_CLOSE); } - static CloseOp getInstance() { - return (CloseOp)opInstances.get().get(OP_CLOSE); + static CloseOp getInstance(OpInstanceCache cache) { + return (CloseOp)cache.get(OP_CLOSE); } public boolean shouldCompleteLastBlock() { @@ -420,9 +435,8 @@ public abstract class FSEditLogOp { super(OP_UPDATE_BLOCKS); } - static UpdateBlocksOp getInstance() { - return (UpdateBlocksOp)opInstances.get() - .get(OP_UPDATE_BLOCKS); + static UpdateBlocksOp getInstance(OpInstanceCache cache) { + return (UpdateBlocksOp)cache.get(OP_UPDATE_BLOCKS); } @@ -500,9 +514,8 @@ public abstract class FSEditLogOp { super(OP_SET_REPLICATION); } - static SetReplicationOp getInstance() { - return (SetReplicationOp)opInstances.get() - .get(OP_SET_REPLICATION); + static SetReplicationOp getInstance(OpInstanceCache cache) { + return (SetReplicationOp)cache.get(OP_SET_REPLICATION); } SetReplicationOp setPath(String path) { @@ -571,9 +584,8 @@ public abstract class FSEditLogOp { super(OP_CONCAT_DELETE); } - static ConcatDeleteOp getInstance() { - return (ConcatDeleteOp)opInstances.get() - .get(OP_CONCAT_DELETE); + static ConcatDeleteOp getInstance(OpInstanceCache cache) { + return (ConcatDeleteOp)cache.get(OP_CONCAT_DELETE); } ConcatDeleteOp setTarget(String trg) { @@ -697,9 +709,8 @@ public abstract class FSEditLogOp { super(OP_RENAME_OLD); } - static RenameOldOp getInstance() { - return (RenameOldOp)opInstances.get() - .get(OP_RENAME_OLD); + static RenameOldOp getInstance(OpInstanceCache cache) { + return (RenameOldOp)cache.get(OP_RENAME_OLD); } RenameOldOp setSource(String src) { @@ -790,9 +801,8 @@ public abstract class FSEditLogOp { super(OP_DELETE); } - static DeleteOp getInstance() { - return (DeleteOp)opInstances.get() - .get(OP_DELETE); + static DeleteOp getInstance(OpInstanceCache cache) { + return (DeleteOp)cache.get(OP_DELETE); } DeleteOp setPath(String path) { @@ -872,9 +882,8 @@ public abstract class FSEditLogOp { super(OP_MKDIR); } - static MkdirOp getInstance() { - return (MkdirOp)opInstances.get() - .get(OP_MKDIR); + static MkdirOp getInstance(OpInstanceCache cache) { + return (MkdirOp)cache.get(OP_MKDIR); } MkdirOp setPath(String path) { @@ -977,9 +986,8 @@ public abstract class FSEditLogOp { super(OP_SET_GENSTAMP); } - static SetGenstampOp getInstance() { - return (SetGenstampOp)opInstances.get() - .get(OP_SET_GENSTAMP); + static SetGenstampOp getInstance(OpInstanceCache cache) { + return (SetGenstampOp)cache.get(OP_SET_GENSTAMP); } SetGenstampOp setGenerationStamp(long genStamp) { @@ -1031,9 +1039,8 @@ public abstract class FSEditLogOp { super(OP_SET_PERMISSIONS); } - static SetPermissionsOp getInstance() { - return (SetPermissionsOp)opInstances.get() - .get(OP_SET_PERMISSIONS); + static SetPermissionsOp getInstance(OpInstanceCache cache) { + return (SetPermissionsOp)cache.get(OP_SET_PERMISSIONS); } SetPermissionsOp setSource(String src) { @@ -1098,9 +1105,8 @@ public abstract class FSEditLogOp { super(OP_SET_OWNER); } - static SetOwnerOp getInstance() { - return (SetOwnerOp)opInstances.get() - .get(OP_SET_OWNER); + static SetOwnerOp getInstance(OpInstanceCache cache) { + return (SetOwnerOp)cache.get(OP_SET_OWNER); } SetOwnerOp setSource(String src) { @@ -1179,9 +1185,8 @@ public abstract class FSEditLogOp { super(OP_SET_NS_QUOTA); } - static SetNSQuotaOp getInstance() { - return (SetNSQuotaOp)opInstances.get() - .get(OP_SET_NS_QUOTA); + static SetNSQuotaOp getInstance(OpInstanceCache cache) { + return (SetNSQuotaOp)cache.get(OP_SET_NS_QUOTA); } @Override @@ -1232,9 +1237,8 @@ public abstract class FSEditLogOp { super(OP_CLEAR_NS_QUOTA); } - static ClearNSQuotaOp getInstance() { - return (ClearNSQuotaOp)opInstances.get() - .get(OP_CLEAR_NS_QUOTA); + static ClearNSQuotaOp getInstance(OpInstanceCache cache) { + return (ClearNSQuotaOp)cache.get(OP_CLEAR_NS_QUOTA); } @Override @@ -1281,9 +1285,8 @@ public abstract class FSEditLogOp { super(OP_SET_QUOTA); } - static SetQuotaOp getInstance() { - return (SetQuotaOp)opInstances.get() - .get(OP_SET_QUOTA); + static SetQuotaOp getInstance(OpInstanceCache cache) { + return (SetQuotaOp)cache.get(OP_SET_QUOTA); } SetQuotaOp setSource(String src) { @@ -1360,9 +1363,8 @@ public abstract class FSEditLogOp { super(OP_TIMES); } - static TimesOp getInstance() { - return (TimesOp)opInstances.get() - .get(OP_TIMES); + static TimesOp getInstance(OpInstanceCache cache) { + return (TimesOp)cache.get(OP_TIMES); } TimesOp setPath(String path) { @@ -1458,9 +1460,8 @@ public abstract class FSEditLogOp { super(OP_SYMLINK); } - static SymlinkOp getInstance() { - return (SymlinkOp)opInstances.get() - .get(OP_SYMLINK); + static SymlinkOp getInstance(OpInstanceCache cache) { + return (SymlinkOp)cache.get(OP_SYMLINK); } SymlinkOp setPath(String path) { @@ -1579,9 +1580,8 @@ public abstract class FSEditLogOp { super(OP_RENAME); } - static RenameOp getInstance() { - return (RenameOp)opInstances.get() - .get(OP_RENAME); + static RenameOp getInstance(OpInstanceCache cache) { + return (RenameOp)cache.get(OP_RENAME); } RenameOp setSource(String src) { @@ -1723,9 +1723,8 @@ public abstract class FSEditLogOp { super(OP_REASSIGN_LEASE); } - static ReassignLeaseOp getInstance() { - return (ReassignLeaseOp)opInstances.get() - .get(OP_REASSIGN_LEASE); + static ReassignLeaseOp getInstance(OpInstanceCache cache) { + return (ReassignLeaseOp)cache.get(OP_REASSIGN_LEASE); } ReassignLeaseOp setLeaseHolder(String leaseHolder) { @@ -1798,9 +1797,8 @@ public abstract class FSEditLogOp { super(OP_GET_DELEGATION_TOKEN); } - static GetDelegationTokenOp getInstance() { - return (GetDelegationTokenOp)opInstances.get() - .get(OP_GET_DELEGATION_TOKEN); + static GetDelegationTokenOp getInstance(OpInstanceCache cache) { + return (GetDelegationTokenOp)cache.get(OP_GET_DELEGATION_TOKEN); } GetDelegationTokenOp setDelegationTokenIdentifier( @@ -1870,9 +1868,8 @@ public abstract class FSEditLogOp { super(OP_RENEW_DELEGATION_TOKEN); } - static RenewDelegationTokenOp getInstance() { - return (RenewDelegationTokenOp)opInstances.get() - .get(OP_RENEW_DELEGATION_TOKEN); + static RenewDelegationTokenOp getInstance(OpInstanceCache cache) { + return (RenewDelegationTokenOp)cache.get(OP_RENEW_DELEGATION_TOKEN); } RenewDelegationTokenOp setDelegationTokenIdentifier( @@ -1941,9 +1938,8 @@ public abstract class FSEditLogOp { super(OP_CANCEL_DELEGATION_TOKEN); } - static CancelDelegationTokenOp getInstance() { - return (CancelDelegationTokenOp)opInstances.get() - .get(OP_CANCEL_DELEGATION_TOKEN); + static CancelDelegationTokenOp getInstance(OpInstanceCache cache) { + return (CancelDelegationTokenOp)cache.get(OP_CANCEL_DELEGATION_TOKEN); } CancelDelegationTokenOp setDelegationTokenIdentifier( @@ -1996,9 +1992,8 @@ public abstract class FSEditLogOp { super(OP_UPDATE_MASTER_KEY); } - static UpdateMasterKeyOp getInstance() { - return (UpdateMasterKeyOp)opInstances.get() - .get(OP_UPDATE_MASTER_KEY); + static UpdateMasterKeyOp getInstance(OpInstanceCache cache) { + return (UpdateMasterKeyOp)cache.get(OP_UPDATE_MASTER_KEY); } UpdateMasterKeyOp setDelegationKey(DelegationKey key) { @@ -2050,8 +2045,9 @@ public abstract class FSEditLogOp { code == OP_END_LOG_SEGMENT : "Bad op: " + code; } - static LogSegmentOp getInstance(FSEditLogOpCodes code) { - return (LogSegmentOp)opInstances.get().get(code); + static LogSegmentOp getInstance(OpInstanceCache cache, + FSEditLogOpCodes code) { + return (LogSegmentOp)cache.get(code); } public void readFields(DataInputStream in, int logVersion) @@ -2091,8 +2087,8 @@ public abstract class FSEditLogOp { super(OP_INVALID); } - static InvalidOp getInstance() { - return (InvalidOp)opInstances.get().get(OP_INVALID); + static InvalidOp getInstance(OpInstanceCache cache) { + return (InvalidOp)cache.get(OP_INVALID); } @Override @@ -2207,6 +2203,7 @@ public abstract class FSEditLogOp { private final DataInputStream in; private final int logVersion; private final Checksum checksum; + private final OpInstanceCache cache; /** * Construct the reader @@ -2228,6 +2225,7 @@ public abstract class FSEditLogOp { } else { this.in = in; } + this.cache = new OpInstanceCache(); } /** @@ -2236,16 +2234,42 @@ public abstract class FSEditLogOp { * Note that the objects returned from this method may be re-used by future * calls to the same method. * + * @param skipBrokenEdits If true, attempt to skip over damaged parts of + * the input stream, rather than throwing an IOException * @return the operation read from the stream, or null at the end of the file * @throws IOException on error. */ - public FSEditLogOp readOp() throws IOException { + public FSEditLogOp readOp(boolean skipBrokenEdits) throws IOException { + FSEditLogOp op = null; + while (true) { + try { + in.mark(in.available()); + try { + op = decodeOp(); + } finally { + // If we encountered an exception or an end-of-file condition, + // do not advance the input stream. + if (op == null) { + in.reset(); + } + } + return op; + } catch (IOException e) { + if (!skipBrokenEdits) { + throw e; + } + if (in.skip(1) < 1) { + return null; + } + } + } + } + + private FSEditLogOp decodeOp() throws IOException { if (checksum != null) { checksum.reset(); } - in.mark(1); - byte opCodeByte; try { opCodeByte = in.readByte(); @@ -2255,12 +2279,10 @@ public abstract class FSEditLogOp { } FSEditLogOpCodes opCode = FSEditLogOpCodes.fromByte(opCodeByte); - if (opCode == OP_INVALID) { - in.reset(); // reset back to end of file if somebody reads it again + if (opCode == OP_INVALID) return null; - } - FSEditLogOp op = opInstances.get().get(opCode); + FSEditLogOp op = cache.get(opCode); if (op == null) { throw new IOException("Read invalid opcode " + opCode); } @@ -2268,6 +2290,8 @@ public abstract class FSEditLogOp { if (LayoutVersion.supports(Feature.STORED_TXIDS, logVersion)) { // Read the txid op.setTransactionId(in.readLong()); + } else { + op.setTransactionId(HdfsConstants.INVALID_TXID); } op.readFields(in, logVersion); @@ -2426,8 +2450,4 @@ public abstract class FSEditLogOp { short mode = Short.valueOf(st.getValue("MODE")); return new PermissionStatus(username, groupname, new FsPermission(mode)); } - - public static FSEditLogOp getOpInstance(FSEditLogOpCodes opCode) { - return opInstances.get().get(opCode); - } -} + } diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSImage.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSImage.java index 46e61a599fd..a9bf5c70667 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSImage.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSImage.java @@ -158,8 +158,8 @@ public class FSImage implements Closeable { * @throws IOException * @return true if the image needs to be saved or false otherwise */ - boolean recoverTransitionRead(StartupOption startOpt, FSNamesystem target) - throws IOException { + boolean recoverTransitionRead(StartupOption startOpt, FSNamesystem target, + MetaRecoveryContext recovery) throws IOException { assert startOpt != StartupOption.FORMAT : "NameNode formatting should be performed before reading the image"; @@ -244,7 +244,7 @@ public class FSImage implements Closeable { // just load the image } - return loadFSImage(target); + return loadFSImage(target, recovery); } /** @@ -304,7 +304,7 @@ public class FSImage implements Closeable { if(storage.getDistributedUpgradeState()) { // only distributed upgrade need to continue // don't do version upgrade - this.loadFSImage(target); + this.loadFSImage(target, null); storage.initializeDistributedUpgrade(); return; } @@ -319,7 +319,7 @@ public class FSImage implements Closeable { } // load the latest image - this.loadFSImage(target); + this.loadFSImage(target, null); // Do upgrade for each directory long oldCTime = storage.getCTime(); @@ -505,7 +505,7 @@ public class FSImage implements Closeable { target.dir.fsImage = ckptImage; // load from the checkpoint dirs try { - ckptImage.recoverTransitionRead(StartupOption.REGULAR, target); + ckptImage.recoverTransitionRead(StartupOption.REGULAR, target, null); } finally { ckptImage.close(); } @@ -550,7 +550,7 @@ public class FSImage implements Closeable { target.dir.reset(); LOG.debug("Reloading namespace from " + file); - loadFSImage(file, target); + loadFSImage(file, target, null); } /** @@ -568,7 +568,8 @@ public class FSImage implements Closeable { * @return whether the image should be saved * @throws IOException */ - boolean loadFSImage(FSNamesystem target) throws IOException { + boolean loadFSImage(FSNamesystem target, MetaRecoveryContext recovery) + throws IOException { FSImageStorageInspector inspector = storage.readAndInspectDirs(); isUpgradeFinalized = inspector.isUpgradeFinalized(); @@ -583,7 +584,6 @@ public class FSImage implements Closeable { // We only want to recover streams if we're going into Active mode. editLog.recoverUnclosedStreams(); } - if (LayoutVersion.supports(Feature.TXID_BASED_LAYOUT, getLayoutVersion())) { // If we're open for write, we're either non-HA or we're the active NN, so @@ -610,7 +610,7 @@ public class FSImage implements Closeable { getLayoutVersion())) { // For txid-based layout, we should have a .md5 file // next to the image file - loadFSImage(imageFile.getFile(), target); + loadFSImage(imageFile.getFile(), target, recovery); } else if (LayoutVersion.supports(Feature.FSIMAGE_CHECKSUM, getLayoutVersion())) { // In 0.22, we have the checksum stored in the VERSION file. @@ -622,22 +622,19 @@ public class FSImage implements Closeable { NNStorage.DEPRECATED_MESSAGE_DIGEST_PROPERTY + " not set for storage directory " + sdForProperties.getRoot()); } - loadFSImage(imageFile.getFile(), new MD5Hash(md5), target); + loadFSImage(imageFile.getFile(), new MD5Hash(md5), target, recovery); } else { // We don't have any record of the md5sum - loadFSImage(imageFile.getFile(), null, target); + loadFSImage(imageFile.getFile(), null, target, recovery); } } catch (IOException ioe) { FSEditLog.closeAllStreams(editStreams); throw new IOException("Failed to load image from " + imageFile, ioe); } - - long numLoaded = loadEdits(editStreams, target); + long txnsAdvanced = loadEdits(editStreams, target, recovery); needToSave |= needsResaveBasedOnStaleCheckpoint(imageFile.getFile(), - numLoaded); - - // update the txid for the edit log - editLog.setNextTxId(storage.getMostRecentCheckpointTxId() + numLoaded + 1); + txnsAdvanced); + editLog.setNextTxId(lastAppliedTxId + 1); return needToSave; } @@ -664,33 +661,29 @@ public class FSImage implements Closeable { /** * Load the specified list of edit files into the image. - * @return the number of transactions loaded */ public long loadEdits(Iterable editStreams, - FSNamesystem target) throws IOException, EditLogInputException { + FSNamesystem target, MetaRecoveryContext recovery) throws IOException { LOG.debug("About to load edits:\n " + Joiner.on("\n ").join(editStreams)); - - long startingTxId = getLastAppliedTxId() + 1; - long numLoaded = 0; - + + long prevLastAppliedTxId = lastAppliedTxId; try { - FSEditLogLoader loader = new FSEditLogLoader(target); + FSEditLogLoader loader = new FSEditLogLoader(target, lastAppliedTxId); // Load latest edits for (EditLogInputStream editIn : editStreams) { - LOG.info("Reading " + editIn + " expecting start txid #" + startingTxId); - long thisNumLoaded = 0; + LOG.info("Reading " + editIn + " expecting start txid #" + + (lastAppliedTxId + 1)); try { - thisNumLoaded = loader.loadFSEdits(editIn, startingTxId); - } catch (EditLogInputException elie) { - thisNumLoaded = elie.getNumEditsLoaded(); - throw elie; + loader.loadFSEdits(editIn, lastAppliedTxId + 1, recovery); } finally { // Update lastAppliedTxId even in case of error, since some ops may // have been successfully applied before the error. - lastAppliedTxId = startingTxId + thisNumLoaded - 1; - startingTxId += thisNumLoaded; - numLoaded += thisNumLoaded; + lastAppliedTxId = loader.getLastAppliedTxId(); + } + // If we are in recovery mode, we may have skipped over some txids. + if (editIn.getLastTxId() != HdfsConstants.INVALID_TXID) { + lastAppliedTxId = editIn.getLastTxId(); } } } finally { @@ -698,8 +691,7 @@ public class FSImage implements Closeable { // update the counts target.dir.updateCountForINodeWithQuota(); } - - return numLoaded; + return lastAppliedTxId - prevLastAppliedTxId; } @@ -707,14 +699,14 @@ public class FSImage implements Closeable { * Load the image namespace from the given image file, verifying * it against the MD5 sum stored in its associated .md5 file. */ - private void loadFSImage(File imageFile, FSNamesystem target) - throws IOException { + private void loadFSImage(File imageFile, FSNamesystem target, + MetaRecoveryContext recovery) throws IOException { MD5Hash expectedMD5 = MD5FileUtils.readStoredMd5ForFile(imageFile); if (expectedMD5 == null) { throw new IOException("No MD5 file found corresponding to image file " + imageFile); } - loadFSImage(imageFile, expectedMD5, target); + loadFSImage(imageFile, expectedMD5, target, recovery); } /** @@ -722,7 +714,7 @@ public class FSImage implements Closeable { * filenames and blocks. */ private void loadFSImage(File curFile, MD5Hash expectedMd5, - FSNamesystem target) throws IOException { + FSNamesystem target, MetaRecoveryContext recovery) throws IOException { FSImageFormat.Loader loader = new FSImageFormat.Loader( conf, target); loader.load(curFile); diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSImageTransactionalStorageInspector.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSImageTransactionalStorageInspector.java index dbf1860a85e..c3d35b13630 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSImageTransactionalStorageInspector.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSImageTransactionalStorageInspector.java @@ -56,7 +56,14 @@ class FSImageTransactionalStorageInspector extends FSImageStorageInspector { return; } - maxSeenTxId = Math.max(maxSeenTxId, NNStorage.readTransactionIdFile(sd)); + // Check for a seen_txid file, which marks a minimum transaction ID that + // must be included in our load plan. + try { + maxSeenTxId = Math.max(maxSeenTxId, NNStorage.readTransactionIdFile(sd)); + } catch (IOException ioe) { + LOG.warn("Unable to determine the max transaction ID seen by " + sd, ioe); + return; + } File currentDir = sd.getCurrentDir(); File filesInStorage[]; @@ -91,15 +98,6 @@ class FSImageTransactionalStorageInspector extends FSImageStorageInspector { } } - - // Check for a seen_txid file, which marks a minimum transaction ID that - // must be included in our load plan. - try { - maxSeenTxId = Math.max(maxSeenTxId, NNStorage.readTransactionIdFile(sd)); - } catch (IOException ioe) { - LOG.warn("Unable to determine the max transaction ID seen by " + sd, ioe); - } - // set finalized flag isUpgradeFinalized = isUpgradeFinalized && !sd.getPreviousDir().exists(); } diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java index a5a8ca0d322..1363c6cda05 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java @@ -380,9 +380,12 @@ public class FSNamesystem implements Namesystem, FSClusterStats, FSImage fsImage = new FSImage(conf, namespaceDirs, namespaceEditsDirs); FSNamesystem namesystem = new FSNamesystem(conf, fsImage); + StartupOption startOpt = NameNode.getStartupOption(conf); + if (startOpt == StartupOption.RECOVER) { + namesystem.setSafeMode(SafeModeAction.SAFEMODE_ENTER); + } long loadStart = now(); - StartupOption startOpt = NameNode.getStartupOption(conf); String nameserviceId = DFSUtil.getNamenodeNameServiceId(conf); namesystem.loadFSImage(startOpt, fsImage, HAUtil.isHAEnabled(conf, nameserviceId)); @@ -491,7 +494,8 @@ public class FSNamesystem implements Namesystem, FSClusterStats, writeLock(); try { // We shouldn't be calling saveNamespace if we've come up in standby state. - if (fsImage.recoverTransitionRead(startOpt, this) && !haEnabled) { + MetaRecoveryContext recovery = startOpt.createRecoveryContext(); + if (fsImage.recoverTransitionRead(startOpt, this, recovery) && !haEnabled) { fsImage.saveNamespace(this); } // This will start a new log segment and write to the seen_txid file, so diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FileJournalManager.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FileJournalManager.java index 603dd000909..c2281700478 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FileJournalManager.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FileJournalManager.java @@ -232,7 +232,10 @@ class FileJournalManager implements JournalManager { LOG.info(String.format("Log begins at txid %d, but requested start " + "txid is %d. Skipping %d edits.", elf.getFirstTxId(), fromTxId, transactionsToSkip)); - elfis.skipTransactions(transactionsToSkip); + } + if (elfis.skipUntil(fromTxId) == false) { + throw new IOException("failed to advance input stream to txid " + + fromTxId); } return elfis; } diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/JournalStream.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/JournalStream.java deleted file mode 100644 index d786476470d..00000000000 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/JournalStream.java +++ /dev/null @@ -1,56 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.hadoop.hdfs.server.namenode; - -/** - * A generic interface for journal input and output streams. - */ -interface JournalStream { - /** - * Type of the underlying persistent storage type the stream is based upon. - *

    - *
  • {@link JournalType#FILE} - streams edits into a local file, see - * {@link FSEditLog.EditLogFileOutputStream} and - * {@link FSEditLog.EditLogFileInputStream}
  • - *
  • {@link JournalType#BACKUP} - streams edits to a backup node, see - * {@link EditLogBackupOutputStream} and {@link EditLogBackupInputStream}
  • - *
- */ - static enum JournalType { - FILE, - BACKUP; - boolean isOfType(JournalType other) { - return other == null || this == other; - } - }; - - /** - * Get this stream name. - * - * @return name of the stream - */ - String getName(); - - /** - * Get the type of the stream. - * Determines the underlying persistent storage type. - * @see JournalType - * @return type - */ - JournalType getType(); -} diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/MetaRecoveryContext.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/MetaRecoveryContext.java new file mode 100644 index 00000000000..b4bd119eb58 --- /dev/null +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/MetaRecoveryContext.java @@ -0,0 +1,130 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hdfs.server.namenode; + +import java.io.IOException; +import org.apache.hadoop.classification.InterfaceAudience; +import org.apache.hadoop.classification.InterfaceStability; + +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; + +/** Context data for an ongoing NameNode metadata recovery process. */ +@InterfaceAudience.Private +@InterfaceStability.Evolving +public final class MetaRecoveryContext { + public static final Log LOG = LogFactory.getLog(MetaRecoveryContext.class.getName()); + public final static int FORCE_NONE = 0; + public final static int FORCE_FIRST_CHOICE = 1; + public final static int FORCE_ALL = 2; + private int force; + + /** Exception thrown when the user has requested processing to stop. */ + static public class RequestStopException extends IOException { + private static final long serialVersionUID = 1L; + public RequestStopException(String msg) { + super(msg); + } + } + + public MetaRecoveryContext(int force) { + this.force = force; + } + + /** + * Display a prompt to the user and get his or her choice. + * + * @param prompt The prompt to display + * @param default First choice (will be taken if autoChooseDefault is + * true) + * @param choices Other choies + * + * @return The choice that was taken + * @throws IOException + */ + public String ask(String prompt, String firstChoice, String... choices) + throws IOException { + while (true) { + LOG.info(prompt); + if (force > FORCE_NONE) { + LOG.info("automatically choosing " + firstChoice); + return firstChoice; + } + StringBuilder responseBuilder = new StringBuilder(); + while (true) { + int c = System.in.read(); + if (c == -1 || c == '\r' || c == '\n') { + break; + } + responseBuilder.append((char)c); + } + String response = responseBuilder.toString(); + if (response.equalsIgnoreCase(firstChoice)) + return firstChoice; + for (String c : choices) { + if (response.equalsIgnoreCase(c)) { + return c; + } + } + LOG.error("I'm sorry, I cannot understand your response.\n"); + } + } + + public static void editLogLoaderPrompt(String prompt, + MetaRecoveryContext recovery, String contStr) + throws IOException, RequestStopException + { + if (recovery == null) { + throw new IOException(prompt); + } + LOG.error(prompt); + String answer = recovery.ask("\nEnter 'c' to continue, " + contStr + "\n" + + "Enter 's' to stop reading the edit log here, abandoning any later " + + "edits\n" + + "Enter 'q' to quit without saving\n" + + "Enter 'a' to always select the first choice in the future " + + "without prompting. " + + "(c/s/q/a)\n", "c", "s", "q", "a"); + if (answer.equals("c")) { + LOG.info("Continuing."); + return; + } else if (answer.equals("s")) { + throw new RequestStopException("user requested stop"); + } else if (answer.equals("q")) { + recovery.quit(); + } else { + recovery.setForce(FORCE_FIRST_CHOICE); + return; + } + } + + /** Log a message and quit */ + public void quit() { + LOG.error("Exiting on user request."); + System.exit(0); + } + + public int getForce() { + return this.force; + } + + public void setForce(int force) { + this.force = force; + } +} diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/NNStorage.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/NNStorage.java index 6fae88a9652..d5061b7be8b 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/NNStorage.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/NNStorage.java @@ -49,7 +49,6 @@ import org.apache.hadoop.hdfs.server.common.InconsistentFSStateException; import org.apache.hadoop.hdfs.server.common.Storage; import org.apache.hadoop.hdfs.server.common.UpgradeManager; import org.apache.hadoop.hdfs.server.common.Util; -import org.apache.hadoop.hdfs.server.namenode.JournalStream.JournalType; import org.apache.hadoop.hdfs.server.protocol.NamespaceInfo; import org.apache.hadoop.hdfs.util.AtomicFileOutputStream; @@ -299,8 +298,7 @@ public class NNStorage extends Storage implements Closeable { NameNodeDirType.IMAGE; // Add to the list of storage directories, only if the // URI is of type file:// - if(dirName.getScheme().compareTo(JournalType.FILE.name().toLowerCase()) - == 0){ + if(dirName.getScheme().compareTo("file") == 0) { this.addStorageDir(new StorageDirectory(new File(dirName.getPath()), dirType, !sharedEditsDirs.contains(dirName))); // Don't lock the dir if it's shared. @@ -312,8 +310,7 @@ public class NNStorage extends Storage implements Closeable { checkSchemeConsistency(dirName); // Add to the list of storage directories, only if the // URI is of type file:// - if(dirName.getScheme().compareTo(JournalType.FILE.name().toLowerCase()) - == 0) + if(dirName.getScheme().compareTo("file") == 0) this.addStorageDir(new StorageDirectory(new File(dirName.getPath()), NameNodeDirType.EDITS, !sharedEditsDirs.contains(dirName))); } diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/NameNode.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/NameNode.java index 9fb644e8722..66558606289 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/NameNode.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/NameNode.java @@ -514,6 +514,8 @@ public class NameNode { *
  • {@link StartupOption#CHECKPOINT CHECKPOINT} - start checkpoint node
  • *
  • {@link StartupOption#UPGRADE UPGRADE} - start the cluster * upgrade and create a snapshot of the current file system state
  • + *
  • {@link StartupOption#RECOVERY RECOVERY} - recover name node + * metadata
  • *
  • {@link StartupOption#ROLLBACK ROLLBACK} - roll the * cluster back to the previous state
  • *
  • {@link StartupOption#FINALIZE FINALIZE} - finalize @@ -832,7 +834,10 @@ public class NameNode { StartupOption.FINALIZE.getName() + "] | [" + StartupOption.IMPORT.getName() + "] | [" + StartupOption.BOOTSTRAPSTANDBY.getName() + "] | [" + - StartupOption.INITIALIZESHAREDEDITS.getName() + "]"); + StartupOption.INITIALIZESHAREDEDITS.getName() + "] | [" + + StartupOption.BOOTSTRAPSTANDBY.getName() + "] | [" + + StartupOption.RECOVER.getName() + " [ " + + StartupOption.FORCE.getName() + " ] ]"); } private static StartupOption parseArguments(String args[]) { @@ -876,6 +881,21 @@ public class NameNode { } else if (StartupOption.INITIALIZESHAREDEDITS.getName().equalsIgnoreCase(cmd)) { startOpt = StartupOption.INITIALIZESHAREDEDITS; return startOpt; + } else if (StartupOption.RECOVER.getName().equalsIgnoreCase(cmd)) { + if (startOpt != StartupOption.REGULAR) { + throw new RuntimeException("Can't combine -recover with " + + "other startup options."); + } + startOpt = StartupOption.RECOVER; + while (++i < argsLen) { + if (args[i].equalsIgnoreCase( + StartupOption.FORCE.getName())) { + startOpt.setForce(MetaRecoveryContext.FORCE_FIRST_CHOICE); + } else { + throw new RuntimeException("Error parsing recovery options: " + + "can't understand option \"" + args[i] + "\""); + } + } } else { return null; } @@ -892,6 +912,39 @@ public class NameNode { StartupOption.REGULAR.toString())); } + private static void doRecovery(StartupOption startOpt, Configuration conf) + throws IOException { + if (startOpt.getForce() < MetaRecoveryContext.FORCE_ALL) { + if (!confirmPrompt("You have selected Metadata Recovery mode. " + + "This mode is intended to recover lost metadata on a corrupt " + + "filesystem. Metadata recovery mode often permanently deletes " + + "data from your HDFS filesystem. Please back up your edit log " + + "and fsimage before trying this!\n\n" + + "Are you ready to proceed? (Y/N)\n")) { + System.err.println("Recovery aborted at user request.\n"); + return; + } + } + MetaRecoveryContext.LOG.info("starting recovery..."); + UserGroupInformation.setConfiguration(conf); + NameNode.initMetrics(conf, startOpt.toNodeRole()); + FSNamesystem fsn = null; + try { + fsn = FSNamesystem.loadFromDisk(conf); + fsn.saveNamespace(); + MetaRecoveryContext.LOG.info("RECOVERY COMPLETE"); + } catch (IOException e) { + MetaRecoveryContext.LOG.info("RECOVERY FAILED: caught exception", e); + throw e; + } catch (RuntimeException e) { + MetaRecoveryContext.LOG.info("RECOVERY FAILED: caught exception", e); + throw e; + } finally { + if (fsn != null) + fsn.close(); + } + } + /** * Print out a prompt to the user, and return true if the user * responds with "Y" or "yes". @@ -973,6 +1026,10 @@ public class NameNode { DefaultMetricsSystem.initialize(role.toString().replace(" ", "")); return new BackupNode(conf, role); } + case RECOVER: { + NameNode.doRecovery(startOpt, conf); + return null; + } default: DefaultMetricsSystem.initialize("NameNode"); return new NameNode(conf); diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/ha/EditLogTailer.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/ha/EditLogTailer.java index 0b3a1f93f65..c11f1d760e6 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/ha/EditLogTailer.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/ha/EditLogTailer.java @@ -219,7 +219,7 @@ public class EditLogTailer { // disk are ignored. long editsLoaded = 0; try { - editsLoaded = image.loadEdits(streams, namesystem); + editsLoaded = image.loadEdits(streams, namesystem, null); } catch (EditLogInputException elie) { editsLoaded = elie.getNumEditsLoaded(); throw elie; diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/tools/offlineEditsViewer/OfflineEditsXmlLoader.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/tools/offlineEditsViewer/OfflineEditsXmlLoader.java index 9e28c908ed3..009db6a4776 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/tools/offlineEditsViewer/OfflineEditsXmlLoader.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/tools/offlineEditsViewer/OfflineEditsXmlLoader.java @@ -28,6 +28,7 @@ import org.apache.hadoop.classification.InterfaceStability; import org.apache.hadoop.hdfs.util.XMLUtils.InvalidXmlException; import org.apache.hadoop.hdfs.server.namenode.FSEditLogOp; import org.apache.hadoop.hdfs.server.namenode.FSEditLogOpCodes; +import org.apache.hadoop.hdfs.server.namenode.FSEditLogOp.OpInstanceCache; import org.apache.hadoop.hdfs.util.XMLUtils.Stanza; import org.xml.sax.Attributes; @@ -54,6 +55,7 @@ class OfflineEditsXmlLoader private FSEditLogOpCodes opCode; private StringBuffer cbuf; private long nextTxId; + private final OpInstanceCache opCache = new OpInstanceCache(); static enum ParseState { EXPECT_EDITS_TAG, @@ -207,7 +209,7 @@ class OfflineEditsXmlLoader throw new InvalidXmlException("expected "); } state = ParseState.EXPECT_RECORD; - FSEditLogOp op = FSEditLogOp.getOpInstance(opCode); + FSEditLogOp op = opCache.get(opCode); opCode = null; try { op.decodeXml(stanza); diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/MiniDFSCluster.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/MiniDFSCluster.java index 7d0bf444d50..2f1d992005d 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/MiniDFSCluster.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/MiniDFSCluster.java @@ -581,6 +581,10 @@ public class MiniDFSCluster { } } + if (operation == StartupOption.RECOVER) { + return; + } + // Start the DataNodes startDataNodes(conf, numDataNodes, manageDataDfsDirs, operation, racks, hosts, simulatedCapacities, setupHostsFile); @@ -781,6 +785,9 @@ public class MiniDFSCluster { operation == StartupOption.REGULAR) ? new String[] {} : new String[] {operation.getName()}; NameNode nn = NameNode.createNameNode(args, conf); + if (operation == StartupOption.RECOVER) { + return; + } // After the NN has started, set back the bound ports into // the conf @@ -956,6 +963,9 @@ public class MiniDFSCluster { long[] simulatedCapacities, boolean setupHostsFile, boolean checkDataNodeAddrConfig) throws IOException { + if (operation == StartupOption.RECOVER) { + return; + } conf.set(DFS_DATANODE_HOST_NAME_KEY, "127.0.0.1"); int curDatanodesNum = dataNodes.size(); diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestEditLog.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestEditLog.java index fb3bc9b4c45..05df7fe9835 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestEditLog.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestEditLog.java @@ -179,8 +179,8 @@ public class TestEditLog extends TestCase { } private long testLoad(byte[] data, FSNamesystem namesys) throws IOException { - FSEditLogLoader loader = new FSEditLogLoader(namesys); - return loader.loadFSEdits(new EditLogByteInputStream(data), 1); + FSEditLogLoader loader = new FSEditLogLoader(namesys, 0); + return loader.loadFSEdits(new EditLogByteInputStream(data), 1, null); } /** @@ -315,7 +315,7 @@ public class TestEditLog extends TestCase { // for (Iterator it = fsimage.getStorage().dirIterator(NameNodeDirType.EDITS); it.hasNext();) { - FSEditLogLoader loader = new FSEditLogLoader(namesystem); + FSEditLogLoader loader = new FSEditLogLoader(namesystem, 0); File editFile = NNStorage.getFinalizedEditsFile(it.next(), 3, 3 + expectedTxns - 1); @@ -323,7 +323,7 @@ public class TestEditLog extends TestCase { System.out.println("Verifying file: " + editFile); long numEdits = loader.loadFSEdits( - new EditLogFileInputStream(editFile), 3); + new EditLogFileInputStream(editFile), 3, null); int numLeases = namesystem.leaseManager.countLease(); System.out.println("Number of outstanding leases " + numLeases); assertEquals(0, numLeases); @@ -774,8 +774,8 @@ public class TestEditLog extends TestCase { } @Override - public FSEditLogOp readOp() throws IOException { - return reader.readOp(); + protected FSEditLogOp nextOp() throws IOException { + return reader.readOp(false); } @Override @@ -788,16 +788,11 @@ public class TestEditLog extends TestCase { input.close(); } - @Override // JournalStream + @Override public String getName() { return "AnonEditLogByteInputStream"; } - @Override // JournalStream - public JournalType getType() { - return JournalType.FILE; - } - @Override public boolean isInProgress() { return true; diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestEditLogRace.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestEditLogRace.java index da66b45da2a..a17b54f6bfe 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestEditLogRace.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestEditLogRace.java @@ -236,9 +236,9 @@ public class TestEditLogRace { File editFile = new File(sd.getCurrentDir(), logFileName); System.out.println("Verifying file: " + editFile); - FSEditLogLoader loader = new FSEditLogLoader(namesystem); + FSEditLogLoader loader = new FSEditLogLoader(namesystem, startTxId); long numEditsThisLog = loader.loadFSEdits(new EditLogFileInputStream(editFile), - startTxId); + startTxId, null); System.out.println("Number of edits: " + numEditsThisLog); assertTrue(numEdits == -1 || numEditsThisLog == numEdits); diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestFSEditLogLoader.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestFSEditLogLoader.java index 98605e1f4e1..1917ddeb9a4 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestFSEditLogLoader.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestFSEditLogLoader.java @@ -92,8 +92,8 @@ public class TestFSEditLogLoader { rwf.close(); StringBuilder bld = new StringBuilder(); - bld.append("^Error replaying edit log at offset \\d+"); - bld.append(" on transaction ID \\d+\n"); + bld.append("^Error replaying edit log at offset \\d+. "); + bld.append("Expected transaction ID was \\d+\n"); bld.append("Recent opcode offsets: (\\d+\\s*){4}$"); try { cluster = new MiniDFSCluster.Builder(conf).numDataNodes(NUM_DATA_NODES) diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestNameNodeRecovery.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestNameNodeRecovery.java new file mode 100644 index 00000000000..69680967897 --- /dev/null +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestNameNodeRecovery.java @@ -0,0 +1,305 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hdfs.server.namenode; + +import java.io.File; +import java.io.IOException; +import java.io.RandomAccessFile; +import java.util.HashSet; +import java.util.Set; + +import static org.junit.Assert.*; + +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.FileSystem; +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.hdfs.HdfsConfiguration; +import org.apache.hadoop.hdfs.MiniDFSCluster; +import org.apache.hadoop.hdfs.server.common.HdfsServerConstants.StartupOption; +import org.apache.hadoop.hdfs.server.common.Storage.StorageDirectory; +import org.apache.hadoop.hdfs.server.namenode.FSEditLogOp.OpInstanceCache; +import org.apache.hadoop.hdfs.server.namenode.FSImage; +import org.apache.hadoop.hdfs.server.namenode.FSImageTestUtil; +import org.apache.hadoop.hdfs.server.namenode.FSNamesystem; +import org.apache.hadoop.hdfs.server.namenode.FSEditLogOp.DeleteOp; +import org.apache.hadoop.hdfs.server.namenode.NNStorage.NameNodeDirType; +import org.apache.hadoop.io.IOUtils; +import org.apache.hadoop.util.StringUtils; +import org.junit.Test; + +import com.google.common.collect.Sets; + +/** + * This tests data recovery mode for the NameNode. + */ +public class TestNameNodeRecovery { + private static final Log LOG = LogFactory.getLog(TestNameNodeRecovery.class); + private static StartupOption recoverStartOpt = StartupOption.RECOVER; + + static { + recoverStartOpt.setForce(MetaRecoveryContext.FORCE_ALL); + } + + static void runEditLogTest(EditLogTestSetup elts) throws IOException { + final String TEST_LOG_NAME = "test_edit_log"; + final OpInstanceCache cache = new OpInstanceCache(); + + EditLogFileOutputStream elfos = null; + File file = null; + EditLogFileInputStream elfis = null; + try { + file = new File(TEST_LOG_NAME); + elfos = new EditLogFileOutputStream(file, 0); + elfos.create(); + + elts.addTransactionsToLog(elfos, cache); + elfos.setReadyToFlush(); + elfos.flushAndSync(); + elfos.close(); + elfos = null; + file = new File(TEST_LOG_NAME); + elfis = new EditLogFileInputStream(file); + + // reading through normally will get you an exception + Set validTxIds = elts.getValidTxIds(); + FSEditLogOp op = null; + long prevTxId = 0; + try { + while (true) { + op = elfis.nextOp(); + if (op == null) { + break; + } + LOG.debug("read txid " + op.txid); + if (!validTxIds.contains(op.getTransactionId())) { + fail("read txid " + op.getTransactionId() + + ", which we did not expect to find."); + } + validTxIds.remove(op.getTransactionId()); + prevTxId = op.getTransactionId(); + } + if (elts.getLastValidTxId() != -1) { + fail("failed to throw IoException as expected"); + } + } catch (IOException e) { + if (elts.getLastValidTxId() == -1) { + fail("expected all transactions to be valid, but got exception " + + "on txid " + prevTxId); + } else { + assertEquals(prevTxId, elts.getLastValidTxId()); + } + } + + if (elts.getLastValidTxId() != -1) { + // let's skip over the bad transaction + op = null; + prevTxId = 0; + try { + while (true) { + op = elfis.nextValidOp(); + if (op == null) { + break; + } + prevTxId = op.getTransactionId(); + assertTrue(validTxIds.remove(op.getTransactionId())); + } + } catch (Throwable e) { + fail("caught IOException while trying to skip over bad " + + "transaction. message was " + e.getMessage() + + "\nstack trace\n" + StringUtils.stringifyException(e)); + } + } + // We should have read every valid transaction. + assertTrue(validTxIds.isEmpty()); + } finally { + IOUtils.cleanup(LOG, elfos, elfis); + } + } + + private interface EditLogTestSetup { + /** + * Set up the edit log. + */ + abstract public void addTransactionsToLog(EditLogOutputStream elos, + OpInstanceCache cache) throws IOException; + + /** + * Get the transaction ID right before the transaction which causes the + * normal edit log loading process to bail out-- or -1 if the first + * transaction should be bad. + */ + abstract public long getLastValidTxId(); + + /** + * Get the transaction IDs which should exist and be valid in this + * edit log. + **/ + abstract public Set getValidTxIds(); + } + + private class EltsTestEmptyLog implements EditLogTestSetup { + public void addTransactionsToLog(EditLogOutputStream elos, + OpInstanceCache cache) throws IOException { + // do nothing + } + + public long getLastValidTxId() { + return -1; + } + + public Set getValidTxIds() { + return new HashSet(); + } + } + + /** Test an empty edit log */ + @Test(timeout=180000) + public void testEmptyLog() throws IOException { + runEditLogTest(new EltsTestEmptyLog()); + } + + private class EltsTestGarbageInEditLog implements EditLogTestSetup { + final private long BAD_TXID = 4; + final private long MAX_TXID = 10; + + public void addTransactionsToLog(EditLogOutputStream elos, + OpInstanceCache cache) throws IOException { + for (long txid = 1; txid <= MAX_TXID; txid++) { + if (txid == BAD_TXID) { + byte garbage[] = { 0x1, 0x2, 0x3 }; + elos.writeRaw(garbage, 0, garbage.length); + } + else { + DeleteOp op; + op = DeleteOp.getInstance(cache); + op.setTransactionId(txid); + op.setPath("/foo." + txid); + op.setTimestamp(txid); + elos.write(op); + } + } + } + + public long getLastValidTxId() { + return BAD_TXID - 1; + } + + public Set getValidTxIds() { + return Sets.newHashSet(1L , 2L, 3L, 5L, 6L, 7L, 8L, 9L, 10L); + } + } + + /** Test that we can successfully recover from a situation where there is + * garbage in the middle of the edit log file output stream. */ + @Test(timeout=180000) + public void testSkipEdit() throws IOException { + runEditLogTest(new EltsTestGarbageInEditLog()); + } + + /** Test that we can successfully recover from a situation where the last + * entry in the edit log has been truncated. */ + @Test(timeout=180000) + public void testRecoverTruncatedEditLog() throws IOException { + final String TEST_PATH = "/test/path/dir"; + final int NUM_TEST_MKDIRS = 10; + + // start a cluster + Configuration conf = new HdfsConfiguration(); + MiniDFSCluster cluster = null; + FileSystem fileSys = null; + StorageDirectory sd = null; + try { + cluster = new MiniDFSCluster.Builder(conf).numDataNodes(0) + .build(); + cluster.waitActive(); + fileSys = cluster.getFileSystem(); + final FSNamesystem namesystem = cluster.getNamesystem(); + FSImage fsimage = namesystem.getFSImage(); + for (int i = 0; i < NUM_TEST_MKDIRS; i++) { + fileSys.mkdirs(new Path(TEST_PATH)); + } + sd = fsimage.getStorage().dirIterator(NameNodeDirType.EDITS).next(); + } finally { + if (cluster != null) { + cluster.shutdown(); + } + } + + File editFile = FSImageTestUtil.findLatestEditsLog(sd).getFile(); + assertTrue("Should exist: " + editFile, editFile.exists()); + + // Corrupt the last edit + long fileLen = editFile.length(); + RandomAccessFile rwf = new RandomAccessFile(editFile, "rw"); + rwf.setLength(fileLen - 1); + rwf.close(); + + // Make sure that we can't start the cluster normally before recovery + cluster = null; + try { + LOG.debug("trying to start normally (this should fail)..."); + cluster = new MiniDFSCluster.Builder(conf).numDataNodes(0) + .format(false).build(); + cluster.waitActive(); + cluster.shutdown(); + fail("expected the truncated edit log to prevent normal startup"); + } catch (IOException e) { + // success + } finally { + if (cluster != null) { + cluster.shutdown(); + } + } + + // Perform recovery + cluster = null; + try { + LOG.debug("running recovery..."); + cluster = new MiniDFSCluster.Builder(conf).numDataNodes(0) + .format(false).startupOption(recoverStartOpt).build(); + } catch (IOException e) { + fail("caught IOException while trying to recover. " + + "message was " + e.getMessage() + + "\nstack trace\n" + StringUtils.stringifyException(e)); + } finally { + if (cluster != null) { + cluster.shutdown(); + } + } + + // Make sure that we can start the cluster normally after recovery + cluster = null; + try { + LOG.debug("starting cluster normally after recovery..."); + cluster = new MiniDFSCluster.Builder(conf).numDataNodes(0) + .format(false).build(); + LOG.debug("testRecoverTruncatedEditLog: successfully recovered the " + + "truncated edit log"); + assertTrue(cluster.getFileSystem().exists(new Path(TEST_PATH))); + } catch (IOException e) { + fail("failed to recover. Error message: " + e.getMessage()); + } finally { + if (cluster != null) { + cluster.shutdown(); + } + } + } +} diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestSecurityTokenEditLog.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestSecurityTokenEditLog.java index 596df8d76b1..216fb54b002 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestSecurityTokenEditLog.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestSecurityTokenEditLog.java @@ -143,9 +143,9 @@ public class TestSecurityTokenEditLog extends TestCase { File editFile = NNStorage.getFinalizedEditsFile(sd, 1, 1 + expectedTransactions - 1); System.out.println("Verifying file: " + editFile); - FSEditLogLoader loader = new FSEditLogLoader(namesystem); + FSEditLogLoader loader = new FSEditLogLoader(namesystem, 0); long numEdits = loader.loadFSEdits( - new EditLogFileInputStream(editFile), 1); + new EditLogFileInputStream(editFile), 1, null); assertEquals("Verification for " + editFile, expectedTransactions, numEdits); } } finally { From 48ab08f1c62696e99d7d94c275b68709c499b7bf Mon Sep 17 00:00:00 2001 From: Robert Joseph Evans Date: Mon, 9 Apr 2012 21:52:50 +0000 Subject: [PATCH 05/29] MAPREDUCE-4117. mapred job -status throws NullPointerException (Devaraj K via bobby) git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/trunk@1311479 13f79535-47bb-0310-9956-ffa450edef68 --- hadoop-mapreduce-project/CHANGES.txt | 3 ++ .../java/org/apache/hadoop/mapreduce/Job.java | 5 ++ .../org/apache/hadoop/mapreduce/TestJob.java | 53 +++++++++++++++++++ 3 files changed, 61 insertions(+) create mode 100644 hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/test/java/org/apache/hadoop/mapreduce/TestJob.java diff --git a/hadoop-mapreduce-project/CHANGES.txt b/hadoop-mapreduce-project/CHANGES.txt index 8c66a1c74c8..719669dd848 100644 --- a/hadoop-mapreduce-project/CHANGES.txt +++ b/hadoop-mapreduce-project/CHANGES.txt @@ -280,6 +280,9 @@ Release 0.23.3 - UNRELEASED MAPREDUCE-4051. Remove the empty hadoop-mapreduce-project/assembly/all.xml file (Ravi Prakash via bobby) + MAPREDUCE-4117. mapred job -status throws NullPointerException (Devaraj K + via bobby) + Release 0.23.2 - UNRELEASED INCOMPATIBLE CHANGES diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/Job.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/Job.java index c502d4cb230..51bac982285 100644 --- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/Job.java +++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/Job.java @@ -509,6 +509,11 @@ public class Job extends JobContextImpl implements JobContext { lastEvent = event; } } + if (lastEvent == null) { + return "There are no failed tasks for the job. " + + "Job is failed due to some other reason and reason " + + "can be found in the logs."; + } String[] taskAttemptID = lastEvent.getTaskAttemptId().toString().split("_", 2); String taskID = taskAttemptID[1].substring(0, taskAttemptID[1].length()-2); return (" task " + taskID + " failed " + diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/test/java/org/apache/hadoop/mapreduce/TestJob.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/test/java/org/apache/hadoop/mapreduce/TestJob.java new file mode 100644 index 00000000000..110acba2080 --- /dev/null +++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/test/java/org/apache/hadoop/mapreduce/TestJob.java @@ -0,0 +1,53 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.mapreduce; + +import static org.mockito.Mockito.mock; +import static org.mockito.Mockito.when; + +import java.io.IOException; + +import org.apache.hadoop.mapred.JobConf; +import org.apache.hadoop.mapreduce.JobStatus.State; +import org.apache.hadoop.mapreduce.protocol.ClientProtocol; +import org.junit.Assert; +import org.junit.Test; + +public class TestJob { + @Test + public void testJobToString() throws IOException, InterruptedException { + Cluster cluster = mock(Cluster.class); + ClientProtocol client = mock(ClientProtocol.class); + when(cluster.getClient()).thenReturn(client); + JobID jobid = new JobID("1014873536921", 6); + JobStatus status = new JobStatus(jobid, 0.0f, 0.0f, 0.0f, 0.0f, + State.FAILED, JobPriority.NORMAL, "root", "TestJobToString", + "job file", "tracking url"); + when(client.getJobStatus(jobid)).thenReturn(status); + when(client.getTaskReports(jobid, TaskType.MAP)).thenReturn( + new TaskReport[0]); + when(client.getTaskReports(jobid, TaskType.REDUCE)).thenReturn( + new TaskReport[0]); + when(client.getTaskCompletionEvents(jobid, 0, 10)).thenReturn( + new TaskCompletionEvent[0]); + Job job = Job.getInstance(cluster, status, new JobConf()); + Assert.assertNotNull(job.toString()); + } + +} From 241e3d40accd891e90327d301688b26e505a27ee Mon Sep 17 00:00:00 2001 From: Thomas White Date: Mon, 9 Apr 2012 23:32:43 +0000 Subject: [PATCH 06/29] MAPREDUCE-4105. Yarn RackResolver ignores rack configurations. Contributed by Ahmed Radwan. git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/trunk@1311520 13f79535-47bb-0310-9956-ffa450edef68 --- hadoop-mapreduce-project/CHANGES.txt | 3 ++ .../apache/hadoop/yarn/util/RackResolver.java | 17 +++++-- .../hadoop/yarn/util/TestRackResolver.java | 2 +- .../TestRackResolverScriptBasedMapping.java | 44 +++++++++++++++++++ 4 files changed, 61 insertions(+), 5 deletions(-) create mode 100644 hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-common/src/test/java/org/apache/hadoop/yarn/util/TestRackResolverScriptBasedMapping.java diff --git a/hadoop-mapreduce-project/CHANGES.txt b/hadoop-mapreduce-project/CHANGES.txt index 719669dd848..9fc8379e1b4 100644 --- a/hadoop-mapreduce-project/CHANGES.txt +++ b/hadoop-mapreduce-project/CHANGES.txt @@ -218,6 +218,9 @@ Release 2.0.0 - UNRELEASED org.apache.hadoop.mapred.TestMiniMRWithDFSWithDistinctUsers (Devaraj K via bobby) + MAPREDUCE-4105. Yarn RackResolver ignores rack configurations. + (Ahmed Radwan via tomwhite) + Release 0.23.3 - UNRELEASED INCOMPATIBLE CHANGES diff --git a/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/util/RackResolver.java b/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/util/RackResolver.java index efbc90e2bb9..08e7b7c175a 100644 --- a/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/util/RackResolver.java +++ b/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/util/RackResolver.java @@ -31,6 +31,9 @@ import org.apache.hadoop.net.DNSToSwitchMapping; import org.apache.hadoop.net.Node; import org.apache.hadoop.net.NodeBase; import org.apache.hadoop.net.ScriptBasedMapping; +import org.apache.hadoop.util.ReflectionUtils; + +import com.google.common.annotations.VisibleForTesting; public class RackResolver { private static DNSToSwitchMapping dnsToSwitchMapping; @@ -49,10 +52,8 @@ public class RackResolver { ScriptBasedMapping.class, DNSToSwitchMapping.class); try { - Constructor dnsToSwitchMappingConstructor - = dnsToSwitchMappingClass.getConstructor(); - DNSToSwitchMapping newInstance = - dnsToSwitchMappingConstructor.newInstance(); + DNSToSwitchMapping newInstance = ReflectionUtils.newInstance( + dnsToSwitchMappingClass, conf); // Wrap around the configured class with the Cached implementation so as // to save on repetitive lookups. // Check if the impl is already caching, to avoid double caching. @@ -99,4 +100,12 @@ public class RackResolver { LOG.info("Resolved " + hostName + " to " + rName); return new NodeBase(hostName, rName); } + + /** + * Only used by tests + */ + @VisibleForTesting + static DNSToSwitchMapping getDnsToSwitchMapping(){ + return dnsToSwitchMapping; + } } diff --git a/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-common/src/test/java/org/apache/hadoop/yarn/util/TestRackResolver.java b/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-common/src/test/java/org/apache/hadoop/yarn/util/TestRackResolver.java index a82f129d517..478a7d3cf09 100644 --- a/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-common/src/test/java/org/apache/hadoop/yarn/util/TestRackResolver.java +++ b/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-common/src/test/java/org/apache/hadoop/yarn/util/TestRackResolver.java @@ -52,7 +52,7 @@ public class TestRackResolver { Assert.assertTrue(numHost1 <= 1); return returnList; } - + } @Test diff --git a/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-common/src/test/java/org/apache/hadoop/yarn/util/TestRackResolverScriptBasedMapping.java b/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-common/src/test/java/org/apache/hadoop/yarn/util/TestRackResolverScriptBasedMapping.java new file mode 100644 index 00000000000..e8e875978b5 --- /dev/null +++ b/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-common/src/test/java/org/apache/hadoop/yarn/util/TestRackResolverScriptBasedMapping.java @@ -0,0 +1,44 @@ +/** +* Licensed to the Apache Software Foundation (ASF) under one +* or more contributor license agreements. See the NOTICE file +* distributed with this work for additional information +* regarding copyright ownership. The ASF licenses this file +* to you under the Apache License, Version 2.0 (the +* "License"); you may not use this file except in compliance +* with the License. You may obtain a copy of the License at +* +* http://www.apache.org/licenses/LICENSE-2.0 +* +* Unless required by applicable law or agreed to in writing, software +* distributed under the License is distributed on an "AS IS" BASIS, +* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +* See the License for the specific language governing permissions and +* limitations under the License. +*/ + +package org.apache.hadoop.yarn.util; + +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.CommonConfigurationKeysPublic; +import org.apache.hadoop.net.DNSToSwitchMapping; +import org.apache.hadoop.net.ScriptBasedMapping; +import org.junit.Assert; +import org.junit.Test; + +public class TestRackResolverScriptBasedMapping { + + @Test + public void testScriptName() { + Configuration conf = new Configuration(); + conf + .setClass( + CommonConfigurationKeysPublic. + NET_TOPOLOGY_NODE_SWITCH_MAPPING_IMPL_KEY, + ScriptBasedMapping.class, DNSToSwitchMapping.class); + conf.set(CommonConfigurationKeysPublic.NET_TOPOLOGY_SCRIPT_FILE_NAME_KEY, + "testScript"); + RackResolver.init(conf); + Assert.assertEquals(RackResolver.getDnsToSwitchMapping().toString(), + "script-based mapping with script testScript"); + } +} From b28f134e9cc61c49b404eaacf8a321bb12b78969 Mon Sep 17 00:00:00 2001 From: Siddharth Seth Date: Tue, 10 Apr 2012 00:01:49 +0000 Subject: [PATCH 07/29] MAPREDUCE-3869. Fix classpath for DistributedShell application. (Contributed by Devaraj K) git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/trunk@1311523 13f79535-47bb-0310-9956-ffa450edef68 --- hadoop-mapreduce-project/CHANGES.txt | 3 +++ .../applications/distributedshell/Client.java | 25 ++++++++----------- 2 files changed, 14 insertions(+), 14 deletions(-) diff --git a/hadoop-mapreduce-project/CHANGES.txt b/hadoop-mapreduce-project/CHANGES.txt index 9fc8379e1b4..7c4c352c58a 100644 --- a/hadoop-mapreduce-project/CHANGES.txt +++ b/hadoop-mapreduce-project/CHANGES.txt @@ -221,6 +221,9 @@ Release 2.0.0 - UNRELEASED MAPREDUCE-4105. Yarn RackResolver ignores rack configurations. (Ahmed Radwan via tomwhite) + MAPREDUCE-3869. Fix classpath for DistributedShell application. (Devaraj K + via sseth) + Release 0.23.3 - UNRELEASED INCOMPATIBLE CHANGES diff --git a/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-applications-distributedshell/src/main/java/org/apache/hadoop/yarn/applications/distributedshell/Client.java b/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-applications-distributedshell/src/main/java/org/apache/hadoop/yarn/applications/distributedshell/Client.java index 39fe08a4417..3b962a3048f 100644 --- a/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-applications-distributedshell/src/main/java/org/apache/hadoop/yarn/applications/distributedshell/Client.java +++ b/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-applications-distributedshell/src/main/java/org/apache/hadoop/yarn/applications/distributedshell/Client.java @@ -78,7 +78,6 @@ import org.apache.hadoop.yarn.api.records.YarnApplicationState; import org.apache.hadoop.yarn.conf.YarnConfiguration; import org.apache.hadoop.yarn.exceptions.YarnRemoteException; import org.apache.hadoop.yarn.ipc.YarnRPC; -import org.apache.hadoop.yarn.security.client.ClientRMSecurityInfo; import org.apache.hadoop.yarn.util.ConverterUtils; import org.apache.hadoop.yarn.util.Records; @@ -506,22 +505,20 @@ public class Client { // It should be provided out of the box. // For now setting all required classpaths including // the classpath to "." for the application jar - String classPathEnv = "${CLASSPATH}" - + ":./*" - + ":$HADOOP_CONF_DIR" - + ":$HADOOP_COMMON_HOME/share/hadoop/common/*" - + ":$HADOOP_COMMON_HOME/share/hadoop/common/lib/*" - + ":$HADOOP_HDFS_HOME/share/hadoop/hdfs/*" - + ":$HADOOP_HDFS_HOME/share/hadoop/hdfs/lib/*" - + ":$YARN_HOME/modules/*" - + ":$YARN_HOME/lib/*" - + ":./log4j.properties:"; + StringBuilder classPathEnv = new StringBuilder("${CLASSPATH}:./*"); + for (String c : conf.get(YarnConfiguration.YARN_APPLICATION_CLASSPATH) + .split(",")) { + classPathEnv.append(':'); + classPathEnv.append(c.trim()); + } + classPathEnv.append(":./log4j.properties"); - // add the runtime classpath needed for tests to work + // add the runtime classpath needed for tests to work String testRuntimeClassPath = Client.getTestRuntimeClasspath(); - classPathEnv += ":" + testRuntimeClassPath; + classPathEnv.append(':'); + classPathEnv.append(testRuntimeClassPath); - env.put("CLASSPATH", classPathEnv); + env.put("CLASSPATH", classPathEnv.toString()); amContainer.setEnvironment(env); From 91528ce9eddbfc4f3201584a3dadd6d9c6b722e2 Mon Sep 17 00:00:00 2001 From: Todd Lipcon Date: Tue, 10 Apr 2012 00:59:38 +0000 Subject: [PATCH 08/29] HDFS-3234. Accidentally left log message in GetConf after HDFS-3226. Contributed by Todd Lipcon. git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/trunk@1311541 13f79535-47bb-0310-9956-ffa450edef68 --- hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt | 2 ++ .../src/main/java/org/apache/hadoop/hdfs/tools/GetConf.java | 1 - 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt index 0a89cd40f36..02396863aba 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt +++ b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt @@ -462,6 +462,8 @@ Release 2.0.0 - UNRELEASED HDFS-3214. InterDatanodeProtocolServerSideTranslatorPB doesn't handle null response from initReplicaRecovery (todd) + HDFS-3234. Accidentally left log message in GetConf after HDFS-3226 (todd) + BREAKDOWN OF HDFS-1623 SUBTASKS HDFS-2179. Add fencing framework and mechanisms for NameNode HA. (todd) diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/tools/GetConf.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/tools/GetConf.java index 2546873e1e3..adf3293edf5 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/tools/GetConf.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/tools/GetConf.java @@ -248,7 +248,6 @@ public class GetConf extends Configured implements Tool { @Override int doWorkInternal(GetConf tool, String[] args) throws Exception { this.key = args[0]; - System.err.println("key: " + key); return super.doWorkInternal(tool, args); } } From 4b0d4d54d9a5719a9bd151cec622b2b6ab67d24f Mon Sep 17 00:00:00 2001 From: Todd Lipcon Date: Tue, 10 Apr 2012 01:02:05 +0000 Subject: [PATCH 09/29] HADOOP-8263. Stringification of IPC calls not useful. Contributed by Todd Lipcon. git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/trunk@1311543 13f79535-47bb-0310-9956-ffa450edef68 --- hadoop-common-project/hadoop-common/CHANGES.txt | 2 ++ .../main/java/org/apache/hadoop/ipc/ProtobufRpcEngine.java | 6 ++++++ 2 files changed, 8 insertions(+) diff --git a/hadoop-common-project/hadoop-common/CHANGES.txt b/hadoop-common-project/hadoop-common/CHANGES.txt index 44cb78d8e0d..7f5f1bc0897 100644 --- a/hadoop-common-project/hadoop-common/CHANGES.txt +++ b/hadoop-common-project/hadoop-common/CHANGES.txt @@ -338,6 +338,8 @@ Release 2.0.0 - UNRELEASED HADOOP-8261. Har file system doesn't deal with FS URIs with a host but no port. (atm) + HADOOP-8263. Stringification of IPC calls not useful (todd) + BREAKDOWN OF HADOOP-7454 SUBTASKS HADOOP-7455. HA: Introduce HA Service Protocol Interface. (suresh) diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ipc/ProtobufRpcEngine.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ipc/ProtobufRpcEngine.java index ffeda175d75..556f7101a4e 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ipc/ProtobufRpcEngine.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ipc/ProtobufRpcEngine.java @@ -268,6 +268,12 @@ public class ProtobufRpcEngine implements RpcEngine { in.readFully(bytes); message = HadoopRpcRequestProto.parseFrom(bytes); } + + @Override + public String toString() { + return message.getDeclaringClassProtocolName() + "." + + message.getMethodName(); + } } /** From 9597c81f35c17f6ee8bd2cef85f76af306478e6b Mon Sep 17 00:00:00 2001 From: Aaron Myers Date: Tue, 10 Apr 2012 02:17:17 +0000 Subject: [PATCH 10/29] HDFS-3236. NameNode does not initialize generic conf keys when started with -initializeSharedEditsDir. Contributed by Aaron T. Myers. git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/trunk@1311554 13f79535-47bb-0310-9956-ffa450edef68 --- hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt | 3 +++ .../hadoop/hdfs/server/namenode/NameNode.java | 3 +++ .../namenode/ha/TestInitializeSharedEdits.java | 15 +++++++++++++++ 3 files changed, 21 insertions(+) diff --git a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt index 02396863aba..63c77a7a5cb 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt +++ b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt @@ -464,6 +464,9 @@ Release 2.0.0 - UNRELEASED HDFS-3234. Accidentally left log message in GetConf after HDFS-3226 (todd) + HDFS-3236. NameNode does not initialize generic conf keys when started + with -initializeSharedEditsDir (atm) + BREAKDOWN OF HDFS-1623 SUBTASKS HDFS-2179. Add fencing framework and mechanisms for NameNode HA. (todd) diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/NameNode.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/NameNode.java index 66558606289..0c7c4cd863c 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/NameNode.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/NameNode.java @@ -760,6 +760,9 @@ public class NameNode { */ private static boolean initializeSharedEdits(Configuration conf, boolean force, boolean interactive) { + String nsId = DFSUtil.getNamenodeNameServiceId(conf); + String namenodeId = HAUtil.getNameNodeId(conf, nsId); + initializeGenericKeys(conf, nsId, namenodeId); NNStorage existingStorage = null; try { FSNamesystem fsns = FSNamesystem.loadFromDisk(conf, diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestInitializeSharedEdits.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestInitializeSharedEdits.java index 74aeaab4f28..3415b5eff9b 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestInitializeSharedEdits.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestInitializeSharedEdits.java @@ -25,6 +25,8 @@ import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.FileUtil; import org.apache.hadoop.fs.Path; +import org.apache.hadoop.hdfs.DFSConfigKeys; +import org.apache.hadoop.hdfs.DFSUtil; import org.apache.hadoop.hdfs.MiniDFSCluster; import org.apache.hadoop.hdfs.MiniDFSNNTopology; import org.apache.hadoop.hdfs.server.namenode.NameNode; @@ -117,4 +119,17 @@ public class TestInitializeSharedEdits { assertFalse(NameNode.initializeSharedEdits(conf, false)); assertTrue(NameNode.initializeSharedEdits(conf, false)); } + + @Test + public void testInitializeSharedEditsConfiguresGenericConfKeys() { + Configuration conf = new Configuration(); + conf.set(DFSConfigKeys.DFS_FEDERATION_NAMESERVICES, "ns1"); + conf.set(DFSUtil.addKeySuffixes(DFSConfigKeys.DFS_HA_NAMENODES_KEY_PREFIX, + "ns1"), "nn1,nn2"); + conf.set(DFSUtil.addKeySuffixes(DFSConfigKeys.DFS_NAMENODE_RPC_ADDRESS_KEY, + "ns1", "nn1"), "localhost:1234"); + assertNull(conf.get(DFSConfigKeys.DFS_NAMENODE_RPC_ADDRESS_KEY)); + NameNode.initializeSharedEdits(conf); + assertNotNull(conf.get(DFSConfigKeys.DFS_NAMENODE_RPC_ADDRESS_KEY)); + } } From 53dc8546d4b5c6ffe715c61c5d058c533aca4b0a Mon Sep 17 00:00:00 2001 From: Aaron Myers Date: Tue, 10 Apr 2012 02:25:40 +0000 Subject: [PATCH 11/29] HDFS-3235. MiniDFSClusterManager doesn't correctly support -format option. Contributed by Henry Robinson. git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/trunk@1311556 13f79535-47bb-0310-9956-ffa450edef68 --- hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt | 3 +++ .../org/apache/hadoop/test/MiniDFSClusterManager.java | 11 +++++++++-- 2 files changed, 12 insertions(+), 2 deletions(-) diff --git a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt index 63c77a7a5cb..12e4bc5784f 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt +++ b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt @@ -121,6 +121,9 @@ Trunk (unreleased changes) HDFS-3119. Overreplicated block is not deleted even after the replication factor is reduced after sync follwed by closing that file. (Ashish Singhi via umamahesh) + + HDFS-3235. MiniDFSClusterManager doesn't correctly support -format option. + (Henry Robinson via atm) Release 2.0.0 - UNRELEASED diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/test/MiniDFSClusterManager.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/test/MiniDFSClusterManager.java index 4622b4cd5c5..bd6e524f7c5 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/test/MiniDFSClusterManager.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/test/MiniDFSClusterManager.java @@ -68,6 +68,7 @@ public class MiniDFSClusterManager { private StartupOption dfsOpts; private String writeConfig; private Configuration conf; + private boolean format; private static final long SLEEP_INTERVAL_MS = 1000 * 60; @@ -138,6 +139,7 @@ public class MiniDFSClusterManager { dfs = new MiniDFSCluster.Builder(conf).nameNodePort(nameNodePort) .numDataNodes(numDataNodes) .startupOption(dfsOpts) + .format(format) .build(); dfs.waitActive(); @@ -196,8 +198,13 @@ public class MiniDFSClusterManager { // HDFS numDataNodes = intArgument(cli, "datanodes", 1); nameNodePort = intArgument(cli, "nnport", 0); - dfsOpts = cli.hasOption("format") ? - StartupOption.FORMAT : StartupOption.REGULAR; + if (cli.hasOption("format")) { + dfsOpts = StartupOption.FORMAT; + format = true; + } else { + dfsOpts = StartupOption.REGULAR; + format = false; + } // Runner writeDetails = cli.getOptionValue("writeDetails"); From df3e1a31582653f1c4e187ae7466aee6cb27e4d4 Mon Sep 17 00:00:00 2001 From: Todd Lipcon Date: Tue, 10 Apr 2012 05:12:00 +0000 Subject: [PATCH 12/29] HDFS-3240. Drop log level of "heartbeat: ..." in BPServiceActor to DEBUG. Contributed by Todd Lipcon. git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/trunk@1311577 13f79535-47bb-0310-9956-ffa450edef68 --- hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt | 3 +++ .../apache/hadoop/hdfs/server/datanode/BPServiceActor.java | 4 +++- 2 files changed, 6 insertions(+), 1 deletion(-) diff --git a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt index 12e4bc5784f..78ccde43a83 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt +++ b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt @@ -350,6 +350,9 @@ Release 2.0.0 - UNRELEASED HDFS-3211. Add fence(..) and replace NamenodeRegistration with JournalInfo and epoch in JournalProtocol. (suresh via szetszwo) + HDFS-3240. Drop log level of "heartbeat: ..." in BPServiceActor to DEBUG + (todd) + OPTIMIZATIONS HDFS-3024. Improve performance of stringification in addStoredBlock (todd) diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/BPServiceActor.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/BPServiceActor.java index a0850f8b4ee..25e3a6781ef 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/BPServiceActor.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/BPServiceActor.java @@ -417,7 +417,9 @@ class BPServiceActor implements Runnable { HeartbeatResponse sendHeartBeat() throws IOException { - LOG.info("heartbeat: " + this); + if (LOG.isDebugEnabled()) { + LOG.debug("Sending heartbeat from service actor: " + this); + } // reports number of failed volumes StorageReport[] report = { new StorageReport(bpRegistration.getStorageID(), false, From 482e0638e85be223fa6b8a6e229a9e9ab59f8904 Mon Sep 17 00:00:00 2001 From: Robert Joseph Evans Date: Tue, 10 Apr 2012 14:12:02 +0000 Subject: [PATCH 13/29] HADOOP-8264. Remove irritating double double quotes in front of hostname (Bernd Fondermann via bobby) git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/trunk@1311738 13f79535-47bb-0310-9956-ffa450edef68 --- hadoop-common-project/hadoop-common/CHANGES.txt | 3 +++ .../src/main/java/org/apache/hadoop/net/NetUtils.java | 2 +- 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/hadoop-common-project/hadoop-common/CHANGES.txt b/hadoop-common-project/hadoop-common/CHANGES.txt index 7f5f1bc0897..a4c4993dd66 100644 --- a/hadoop-common-project/hadoop-common/CHANGES.txt +++ b/hadoop-common-project/hadoop-common/CHANGES.txt @@ -340,6 +340,9 @@ Release 2.0.0 - UNRELEASED HADOOP-8263. Stringification of IPC calls not useful (todd) + HADOOP-8264. Remove irritating double double quotes in front of hostname + (Bernd Fondermann via bobby) + BREAKDOWN OF HADOOP-7454 SUBTASKS HADOOP-7455. HA: Introduce HA Service Protocol Interface. (suresh) diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/net/NetUtils.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/net/NetUtils.java index b84bcb6a2a4..b6a9eac1084 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/net/NetUtils.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/net/NetUtils.java @@ -782,7 +782,7 @@ public class NetUtils { hostDetails.append("local host is: ") .append(quoteHost(localHost)) .append("; "); - hostDetails.append("destination host is: \"").append(quoteHost(destHost)) + hostDetails.append("destination host is: ").append(quoteHost(destHost)) .append(":") .append(destPort).append("; "); return hostDetails.toString(); From 926f0a5ae0777bd95dac966eac1c13ffe000245c Mon Sep 17 00:00:00 2001 From: Eli Collins Date: Tue, 10 Apr 2012 15:03:17 +0000 Subject: [PATCH 14/29] HDFS-3238. ServerCommand and friends don't need to be writables. Contributed by Eli Collins git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/trunk@1311774 13f79535-47bb-0310-9956-ffa450edef68 --- hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt | 2 + .../protocol/BalancerBandwidthCommand.java | 39 ------------- .../hdfs/server/protocol/BlockCommand.java | 56 ------------------- .../server/protocol/BlockRecoveryCommand.java | 54 ------------------ .../server/protocol/CheckpointCommand.java | 30 ---------- .../hdfs/server/protocol/DatanodeCommand.java | 5 +- .../hdfs/server/protocol/FinalizeCommand.java | 26 --------- .../server/protocol/KeyUpdateCommand.java | 32 ----------- .../hdfs/server/protocol/NamenodeCommand.java | 14 ----- .../hdfs/server/protocol/RegisterCommand.java | 22 -------- .../hdfs/server/protocol/ServerCommand.java | 27 +-------- .../hdfs/server/protocol/UpgradeCommand.java | 34 ----------- 12 files changed, 4 insertions(+), 337 deletions(-) diff --git a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt index 78ccde43a83..5f9cb1ae49e 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt +++ b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt @@ -353,6 +353,8 @@ Release 2.0.0 - UNRELEASED HDFS-3240. Drop log level of "heartbeat: ..." in BPServiceActor to DEBUG (todd) + HDFS-3238. ServerCommand and friends don't need to be writables. (eli) + OPTIMIZATIONS HDFS-3024. Improve performance of stringification in addStoredBlock (todd) diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/protocol/BalancerBandwidthCommand.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/protocol/BalancerBandwidthCommand.java index ca8a5fb84f7..1a6682ad4a2 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/protocol/BalancerBandwidthCommand.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/protocol/BalancerBandwidthCommand.java @@ -25,14 +25,6 @@ package org.apache.hadoop.hdfs.server.protocol; * each datanode. */ -import java.io.DataInput; -import java.io.DataOutput; -import java.io.IOException; - -import org.apache.hadoop.io.Writable; -import org.apache.hadoop.io.WritableFactories; -import org.apache.hadoop.io.WritableFactory; - /** * Balancer bandwidth command instructs each datanode to change its value for * the max amount of network bandwidth it may use during the block balancing @@ -71,35 +63,4 @@ public class BalancerBandwidthCommand extends DatanodeCommand { public long getBalancerBandwidthValue() { return this.bandwidth; } - - // /////////////////////////////////////////////// - // Writable - // /////////////////////////////////////////////// - static { // register a ctor - WritableFactories.setFactory(BalancerBandwidthCommand.class, new WritableFactory() { - public Writable newInstance() { - return new BalancerBandwidthCommand(); - } - }); - } - - /** - * Writes the bandwidth payload to the Balancer Bandwidth Command packet. - * @param out DataOutput stream used for writing commands to the datanode. - * @throws IOException - */ - public void write(DataOutput out) throws IOException { - super.write(out); - out.writeLong(this.bandwidth); - } - - /** - * Reads the bandwidth payload from the Balancer Bandwidth Command packet. - * @param in DataInput stream used for reading commands to the datanode. - * @throws IOException - */ - public void readFields(DataInput in) throws IOException { - super.readFields(in); - this.bandwidth = in.readLong(); - } } diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/protocol/BlockCommand.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/protocol/BlockCommand.java index cfbfb0a5360..7a58c6162fe 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/protocol/BlockCommand.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/protocol/BlockCommand.java @@ -17,9 +17,6 @@ */ package org.apache.hadoop.hdfs.server.protocol; -import java.io.DataInput; -import java.io.DataOutput; -import java.io.IOException; import java.util.List; import org.apache.hadoop.classification.InterfaceAudience; @@ -27,11 +24,6 @@ import org.apache.hadoop.classification.InterfaceStability; import org.apache.hadoop.hdfs.protocol.Block; import org.apache.hadoop.hdfs.protocol.DatanodeInfo; import org.apache.hadoop.hdfs.server.blockmanagement.DatanodeDescriptor.BlockTargetPair; -import org.apache.hadoop.io.Text; -import org.apache.hadoop.io.Writable; -import org.apache.hadoop.io.WritableFactories; -import org.apache.hadoop.io.WritableFactory; - /**************************************************** * A BlockCommand is an instruction to a datanode @@ -58,8 +50,6 @@ public class BlockCommand extends DatanodeCommand { Block blocks[]; DatanodeInfo targets[][]; - public BlockCommand() {} - /** * Create BlockCommand for transferring blocks to another datanode * @param blocktargetlist blocks to be transferred @@ -110,50 +100,4 @@ public class BlockCommand extends DatanodeCommand { public DatanodeInfo[][] getTargets() { return targets; } - - /////////////////////////////////////////// - // Writable - /////////////////////////////////////////// - static { // register a ctor - WritableFactories.setFactory - (BlockCommand.class, - new WritableFactory() { - public Writable newInstance() { return new BlockCommand(); } - }); - } - - public void write(DataOutput out) throws IOException { - super.write(out); - Text.writeString(out, poolId); - out.writeInt(blocks.length); - for (int i = 0; i < blocks.length; i++) { - blocks[i].write(out); - } - out.writeInt(targets.length); - for (int i = 0; i < targets.length; i++) { - out.writeInt(targets[i].length); - for (int j = 0; j < targets[i].length; j++) { - targets[i][j].write(out); - } - } - } - - public void readFields(DataInput in) throws IOException { - super.readFields(in); - this.poolId = Text.readString(in); - this.blocks = new Block[in.readInt()]; - for (int i = 0; i < blocks.length; i++) { - blocks[i] = new Block(); - blocks[i].readFields(in); - } - - this.targets = new DatanodeInfo[in.readInt()][]; - for (int i = 0; i < targets.length; i++) { - this.targets[i] = new DatanodeInfo[in.readInt()]; - for (int j = 0; j < targets[i].length; j++) { - targets[i][j] = new DatanodeInfo(); - targets[i][j].readFields(in); - } - } - } } diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/protocol/BlockRecoveryCommand.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/protocol/BlockRecoveryCommand.java index 5f2ae8eb8d8..1d3f7b41389 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/protocol/BlockRecoveryCommand.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/protocol/BlockRecoveryCommand.java @@ -28,9 +28,6 @@ import org.apache.hadoop.classification.InterfaceStability; import org.apache.hadoop.hdfs.protocol.DatanodeInfo; import org.apache.hadoop.hdfs.protocol.ExtendedBlock; import org.apache.hadoop.hdfs.protocol.LocatedBlock; -import org.apache.hadoop.io.Writable; -import org.apache.hadoop.io.WritableFactories; -import org.apache.hadoop.io.WritableFactory; import com.google.common.base.Joiner; @@ -84,27 +81,6 @@ public class BlockRecoveryCommand extends DatanodeCommand { public long getNewGenerationStamp() { return newGenerationStamp; } - - /////////////////////////////////////////// - // Writable - /////////////////////////////////////////// - static { // register a ctor - WritableFactories.setFactory - (RecoveringBlock.class, - new WritableFactory() { - public Writable newInstance() { return new RecoveringBlock(); } - }); - } - - public void write(DataOutput out) throws IOException { - super.write(out); - out.writeLong(newGenerationStamp); - } - - public void readFields(DataInput in) throws IOException { - super.readFields(in); - newGenerationStamp = in.readLong(); - } } /** @@ -149,34 +125,4 @@ public class BlockRecoveryCommand extends DatanodeCommand { sb.append("\n)"); return sb.toString(); } - - /////////////////////////////////////////// - // Writable - /////////////////////////////////////////// - static { // register a ctor - WritableFactories.setFactory - (BlockRecoveryCommand.class, - new WritableFactory() { - public Writable newInstance() { return new BlockRecoveryCommand(); } - }); - } - - public void write(DataOutput out) throws IOException { - super.write(out); - out.writeInt(recoveringBlocks.size()); - for(RecoveringBlock block : recoveringBlocks) { - block.write(out); - } - } - - public void readFields(DataInput in) throws IOException { - super.readFields(in); - int numBlocks = in.readInt(); - recoveringBlocks = new ArrayList(numBlocks); - for(int i = 0; i < numBlocks; i++) { - RecoveringBlock b = new RecoveringBlock(); - b.readFields(in); - add(b); - } - } } diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/protocol/CheckpointCommand.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/protocol/CheckpointCommand.java index 889d635043f..36947b84bf6 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/protocol/CheckpointCommand.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/protocol/CheckpointCommand.java @@ -17,13 +17,6 @@ */ package org.apache.hadoop.hdfs.server.protocol; -import java.io.DataInput; -import java.io.DataOutput; -import java.io.IOException; - -import org.apache.hadoop.io.Writable; -import org.apache.hadoop.io.WritableFactories; -import org.apache.hadoop.io.WritableFactory; import org.apache.hadoop.classification.InterfaceAudience; import org.apache.hadoop.classification.InterfaceStability; import org.apache.hadoop.hdfs.server.namenode.CheckpointSignature; @@ -77,27 +70,4 @@ public class CheckpointCommand extends NamenodeCommand { public boolean needToReturnImage() { return needToReturnImage; } - - /////////////////////////////////////////// - // Writable - /////////////////////////////////////////// - static { - WritableFactories.setFactory(CheckpointCommand.class, - new WritableFactory() { - public Writable newInstance() {return new CheckpointCommand();} - }); - } - - public void write(DataOutput out) throws IOException { - super.write(out); - cSig.write(out); - out.writeBoolean(needToReturnImage); - } - - public void readFields(DataInput in) throws IOException { - super.readFields(in); - cSig = new CheckpointSignature(); - cSig.readFields(in); - needToReturnImage = in.readBoolean(); - } } diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/protocol/DatanodeCommand.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/protocol/DatanodeCommand.java index 6d59274efd2..b231f75809d 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/protocol/DatanodeCommand.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/protocol/DatanodeCommand.java @@ -27,10 +27,7 @@ import org.apache.hadoop.classification.InterfaceStability; @InterfaceAudience.Private @InterfaceStability.Evolving public abstract class DatanodeCommand extends ServerCommand { - public DatanodeCommand() { - super(); - } - + DatanodeCommand(int action) { super(action); } diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/protocol/FinalizeCommand.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/protocol/FinalizeCommand.java index 3bc8b117c2c..86fb6a5dadd 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/protocol/FinalizeCommand.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/protocol/FinalizeCommand.java @@ -17,16 +17,8 @@ */ package org.apache.hadoop.hdfs.server.protocol; -import java.io.DataInput; -import java.io.DataOutput; -import java.io.IOException; - import org.apache.hadoop.classification.InterfaceAudience; import org.apache.hadoop.classification.InterfaceStability; -import org.apache.hadoop.io.Writable; -import org.apache.hadoop.io.WritableFactories; -import org.apache.hadoop.io.WritableFactory; -import org.apache.hadoop.io.WritableUtils; /** * A BlockCommand is an instruction to a datanode to register with the namenode. @@ -34,17 +26,6 @@ import org.apache.hadoop.io.WritableUtils; @InterfaceAudience.Private @InterfaceStability.Evolving public class FinalizeCommand extends DatanodeCommand { - // ///////////////////////////////////////// - // Writable - // ///////////////////////////////////////// - static { // register a ctor - WritableFactories.setFactory(FinalizeCommand.class, new WritableFactory() { - public Writable newInstance() { - return new FinalizeCommand(); - } - }); - } - String blockPoolId; private FinalizeCommand() { super(DatanodeProtocol.DNA_FINALIZE); @@ -58,11 +39,4 @@ public class FinalizeCommand extends DatanodeCommand { public String getBlockPoolId() { return blockPoolId; } - - public void readFields(DataInput in) throws IOException { - blockPoolId = WritableUtils.readString(in); - } - public void write(DataOutput out) throws IOException { - WritableUtils.writeString(out, blockPoolId); - } } \ No newline at end of file diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/protocol/KeyUpdateCommand.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/protocol/KeyUpdateCommand.java index 9598736a3bc..fb1d2189da2 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/protocol/KeyUpdateCommand.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/protocol/KeyUpdateCommand.java @@ -17,16 +17,9 @@ */ package org.apache.hadoop.hdfs.server.protocol; -import java.io.DataInput; -import java.io.DataOutput; -import java.io.IOException; - import org.apache.hadoop.classification.InterfaceAudience; import org.apache.hadoop.classification.InterfaceStability; import org.apache.hadoop.hdfs.security.token.block.ExportedBlockKeys; -import org.apache.hadoop.io.Writable; -import org.apache.hadoop.io.WritableFactories; -import org.apache.hadoop.io.WritableFactory; @InterfaceAudience.Private @InterfaceStability.Evolving @@ -45,29 +38,4 @@ public class KeyUpdateCommand extends DatanodeCommand { public ExportedBlockKeys getExportedKeys() { return this.keys; } - - // /////////////////////////////////////////////// - // Writable - // /////////////////////////////////////////////// - static { // register a ctor - WritableFactories.setFactory(KeyUpdateCommand.class, new WritableFactory() { - public Writable newInstance() { - return new KeyUpdateCommand(); - } - }); - } - - /** - */ - public void write(DataOutput out) throws IOException { - super.write(out); - keys.write(out); - } - - /** - */ - public void readFields(DataInput in) throws IOException { - super.readFields(in); - keys.readFields(in); - } } diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/protocol/NamenodeCommand.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/protocol/NamenodeCommand.java index c5ed290d951..7cd592c8730 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/protocol/NamenodeCommand.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/protocol/NamenodeCommand.java @@ -19,9 +19,6 @@ package org.apache.hadoop.hdfs.server.protocol; import org.apache.hadoop.classification.InterfaceAudience; import org.apache.hadoop.classification.InterfaceStability; -import org.apache.hadoop.io.Writable; -import org.apache.hadoop.io.WritableFactories; -import org.apache.hadoop.io.WritableFactory; /** * Base class for name-node command. @@ -30,17 +27,6 @@ import org.apache.hadoop.io.WritableFactory; @InterfaceAudience.Private @InterfaceStability.Evolving public class NamenodeCommand extends ServerCommand { - static { - WritableFactories.setFactory(NamenodeCommand.class, - new WritableFactory() { - public Writable newInstance() {return new NamenodeCommand();} - }); - } - - public NamenodeCommand() { - super(); - } - public NamenodeCommand(int action) { super(action); } diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/protocol/RegisterCommand.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/protocol/RegisterCommand.java index 05843475f65..a6cd4498fc9 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/protocol/RegisterCommand.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/protocol/RegisterCommand.java @@ -17,14 +17,8 @@ */ package org.apache.hadoop.hdfs.server.protocol; -import java.io.DataInput; -import java.io.DataOutput; - import org.apache.hadoop.classification.InterfaceAudience; import org.apache.hadoop.classification.InterfaceStability; -import org.apache.hadoop.io.Writable; -import org.apache.hadoop.io.WritableFactories; -import org.apache.hadoop.io.WritableFactory; /** * A BlockCommand is an instruction to a datanode to register with the namenode. @@ -32,26 +26,10 @@ import org.apache.hadoop.io.WritableFactory; @InterfaceAudience.Private @InterfaceStability.Evolving public class RegisterCommand extends DatanodeCommand { - // ///////////////////////////////////////// - // Writable - // ///////////////////////////////////////// - static { // register a ctor - WritableFactories.setFactory(RegisterCommand.class, new WritableFactory() { - public Writable newInstance() { - return new RegisterCommand(); - } - }); - } public static final DatanodeCommand REGISTER = new RegisterCommand(); public RegisterCommand() { super(DatanodeProtocol.DNA_REGISTER); } - - @Override - public void readFields(DataInput in) { } - - @Override - public void write(DataOutput out) { } } \ No newline at end of file diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/protocol/ServerCommand.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/protocol/ServerCommand.java index 7880683eac5..193839cd37f 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/protocol/ServerCommand.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/protocol/ServerCommand.java @@ -17,11 +17,8 @@ */ package org.apache.hadoop.hdfs.server.protocol; -import java.io.*; - import org.apache.hadoop.classification.InterfaceAudience; import org.apache.hadoop.classification.InterfaceStability; -import org.apache.hadoop.io.Writable; /** * Base class for a server command. @@ -33,20 +30,9 @@ import org.apache.hadoop.io.Writable; */ @InterfaceAudience.Private @InterfaceStability.Evolving -public abstract class ServerCommand implements Writable { +public abstract class ServerCommand { private int action; - /** - * Unknown server command constructor. - * Creates a command with action 0. - * - * @see NamenodeProtocol#ACT_UNKNOWN - * @see DatanodeProtocol#DNA_UNKNOWN - */ - public ServerCommand() { - this(0); - } - /** * Create a command for the specified action. * Actions are protocol specific. @@ -66,15 +52,4 @@ public abstract class ServerCommand implements Writable { public int getAction() { return this.action; } - - /////////////////////////////////////////// - // Writable - /////////////////////////////////////////// - public void write(DataOutput out) throws IOException { - out.writeInt(this.action); - } - - public void readFields(DataInput in) throws IOException { - this.action = in.readInt(); - } } diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/protocol/UpgradeCommand.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/protocol/UpgradeCommand.java index a6de55d843c..fc9656a8f39 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/protocol/UpgradeCommand.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/protocol/UpgradeCommand.java @@ -17,15 +17,8 @@ */ package org.apache.hadoop.hdfs.server.protocol; -import java.io.DataInput; -import java.io.DataOutput; -import java.io.IOException; - import org.apache.hadoop.classification.InterfaceAudience; import org.apache.hadoop.classification.InterfaceStability; -import org.apache.hadoop.io.Writable; -import org.apache.hadoop.io.WritableFactories; -import org.apache.hadoop.io.WritableFactory; /** * This as a generic distributed upgrade command. @@ -68,31 +61,4 @@ public class UpgradeCommand extends DatanodeCommand { public short getCurrentStatus() { return this.upgradeStatus; } - - ///////////////////////////////////////////////// - // Writable - ///////////////////////////////////////////////// - static { // register a ctor - WritableFactories.setFactory - (UpgradeCommand.class, - new WritableFactory() { - public Writable newInstance() { return new UpgradeCommand(); } - }); - } - - /** - */ - public void write(DataOutput out) throws IOException { - super.write(out); - out.writeInt(this.version); - out.writeShort(this.upgradeStatus); - } - - /** - */ - public void readFields(DataInput in) throws IOException { - super.readFields(in); - this.version = in.readInt(); - this.upgradeStatus = in.readShort(); - } } From 000859a534f4cc6a57524a676805d8af6ad199de Mon Sep 17 00:00:00 2001 From: Robert Joseph Evans Date: Tue, 10 Apr 2012 16:13:07 +0000 Subject: [PATCH 15/29] MAPREDUCE-3983. TestTTResourceReporting can fail, and should just be deleted git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/trunk@1311818 13f79535-47bb-0310-9956-ffa450edef68 --- .../mapred/org/apache/hadoop/mapred/TestTTResourceReporting.java | 0 1 file changed, 0 insertions(+), 0 deletions(-) delete mode 100644 hadoop-mapreduce-project/src/test/mapred/org/apache/hadoop/mapred/TestTTResourceReporting.java diff --git a/hadoop-mapreduce-project/src/test/mapred/org/apache/hadoop/mapred/TestTTResourceReporting.java b/hadoop-mapreduce-project/src/test/mapred/org/apache/hadoop/mapred/TestTTResourceReporting.java deleted file mode 100644 index e69de29bb2d..00000000000 From cbb5f6109097a77f18f5fb0ba62ac132b8fa980f Mon Sep 17 00:00:00 2001 From: Thomas Graves Date: Tue, 10 Apr 2012 18:11:26 +0000 Subject: [PATCH 16/29] MAPREDUCE-4059. The history server should have a separate pluggable storage/query interface. (Robert Evans via tgraves). git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/trunk@1311896 13f79535-47bb-0310-9956-ffa450edef68 --- hadoop-mapreduce-project/CHANGES.txt | 3 + .../v2/jobhistory/JHAdminConfig.java | 18 + .../v2/jobhistory/JobHistoryUtils.java | 4 + .../mapreduce/v2/hs/CachedHistoryStorage.java | 217 ++++ .../mapreduce/v2/hs/HistoryContext.java | 5 + .../mapreduce/v2/hs/HistoryFileManager.java | 763 ++++++++++++ .../mapreduce/v2/hs/HistoryStorage.java | 80 ++ .../hadoop/mapreduce/v2/hs/JobHistory.java | 1031 +++-------------- .../hadoop/mapreduce/v2/hs/PartialJob.java | 1 + .../mapreduce/v2/hs/webapp/HsWebApp.java | 1 + .../mapreduce/v2/hs/webapp/HsWebServices.java | 112 +- .../v2/hs/TestJobHistoryParsing.java | 10 +- .../v2/hs/webapp/TestHsWebServices.java | 19 +- .../hs/webapp/TestHsWebServicesAttempts.java | 20 +- .../hs/webapp/TestHsWebServicesJobConf.java | 20 +- .../v2/hs/webapp/TestHsWebServicesJobs.java | 21 +- .../hs/webapp/TestHsWebServicesJobsQuery.java | 20 +- .../v2/hs/webapp/TestHsWebServicesTasks.java | 20 +- 18 files changed, 1399 insertions(+), 966 deletions(-) create mode 100644 hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-hs/src/main/java/org/apache/hadoop/mapreduce/v2/hs/CachedHistoryStorage.java create mode 100644 hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-hs/src/main/java/org/apache/hadoop/mapreduce/v2/hs/HistoryFileManager.java create mode 100644 hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-hs/src/main/java/org/apache/hadoop/mapreduce/v2/hs/HistoryStorage.java diff --git a/hadoop-mapreduce-project/CHANGES.txt b/hadoop-mapreduce-project/CHANGES.txt index 7c4c352c58a..44dd8abda13 100644 --- a/hadoop-mapreduce-project/CHANGES.txt +++ b/hadoop-mapreduce-project/CHANGES.txt @@ -235,6 +235,9 @@ Release 0.23.3 - UNRELEASED IMPROVEMENTS + MAPREDUCE-4059. The history server should have a separate pluggable + storage/query interface. (Robert Evans via tgraves) + OPTIMIZATIONS BUG FIXES diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-common/src/main/java/org/apache/hadoop/mapreduce/v2/jobhistory/JHAdminConfig.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-common/src/main/java/org/apache/hadoop/mapreduce/v2/jobhistory/JHAdminConfig.java index a89f70c901d..22cc7fcc64e 100644 --- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-common/src/main/java/org/apache/hadoop/mapreduce/v2/jobhistory/JHAdminConfig.java +++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-common/src/main/java/org/apache/hadoop/mapreduce/v2/jobhistory/JHAdminConfig.java @@ -44,6 +44,9 @@ public class JHAdminConfig { /** Run the History Cleaner every X ms.*/ public static final String MR_HISTORY_CLEANER_INTERVAL_MS = MR_HISTORY_PREFIX + "cleaner.interval-ms"; + public static final long DEFAULT_MR_HISTORY_CLEANER_INTERVAL_MS = + 1 * 24 * 60 * 60 * 1000l; //1 day + /** The number of threads to handle client API requests.*/ public static final String MR_HISTORY_CLIENT_THREAD_COUNT = @@ -56,7 +59,9 @@ public class JHAdminConfig { */ public static final String MR_HISTORY_DATESTRING_CACHE_SIZE = MR_HISTORY_PREFIX + "datestring.cache.size"; + public static final int DEFAULT_MR_HISTORY_DATESTRING_CACHE_SIZE = 200000; + //TODO REMOVE debug-mode /** Equivalent to 0.20 mapreduce.jobhistory.debug.mode */ public static final String MR_HISTORY_DEBUG_MODE = MR_HISTORY_PREFIX + "debug-mode"; @@ -75,6 +80,7 @@ public class JHAdminConfig { /** Size of the job list cache.*/ public static final String MR_HISTORY_JOBLIST_CACHE_SIZE = MR_HISTORY_PREFIX + "joblist.cache.size"; + public static final int DEFAULT_MR_HISTORY_JOBLIST_CACHE_SIZE = 20000; /** The location of the Kerberos keytab file.*/ public static final String MR_HISTORY_KEYTAB = MR_HISTORY_PREFIX + "keytab"; @@ -82,6 +88,7 @@ public class JHAdminConfig { /** Size of the loaded job cache.*/ public static final String MR_HISTORY_LOADED_JOB_CACHE_SIZE = MR_HISTORY_PREFIX + "loadedjobs.cache.size"; + public static final int DEFAULT_MR_HISTORY_LOADED_JOB_CACHE_SIZE = 5; /** * The maximum age of a job history file before it is deleted from the history @@ -89,6 +96,8 @@ public class JHAdminConfig { */ public static final String MR_HISTORY_MAX_AGE_MS = MR_HISTORY_PREFIX + "max-age-ms"; + public static final long DEFAULT_MR_HISTORY_MAX_AGE = + 7 * 24 * 60 * 60 * 1000L; //1 week /** * Scan for history files to more from intermediate done dir to done dir @@ -96,10 +105,13 @@ public class JHAdminConfig { */ public static final String MR_HISTORY_MOVE_INTERVAL_MS = MR_HISTORY_PREFIX + "move.interval-ms"; + public static final long DEFAULT_MR_HISTORY_MOVE_INTERVAL_MS = + 3 * 60 * 1000l; //3 minutes /** The number of threads used to move files.*/ public static final String MR_HISTORY_MOVE_THREAD_COUNT = MR_HISTORY_PREFIX + "move.thread-count"; + public static final int DEFAULT_MR_HISTORY_MOVE_THREAD_COUNT = 3; /** The Kerberos principal for the history server.*/ public static final String MR_HISTORY_PRINCIPAL = @@ -116,4 +128,10 @@ public class JHAdminConfig { */ public static final String MR_HS_SECURITY_SERVICE_AUTHORIZATION = "security.mrhs.client.protocol.acl"; + + /** + * The HistoryStorage class to use to cache history data. + */ + public static final String MR_HISTORY_STORAGE = + MR_HISTORY_PREFIX + ".store.class"; } diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-common/src/main/java/org/apache/hadoop/mapreduce/v2/jobhistory/JobHistoryUtils.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-common/src/main/java/org/apache/hadoop/mapreduce/v2/jobhistory/JobHistoryUtils.java index d7e191b0eaf..494431614d7 100644 --- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-common/src/main/java/org/apache/hadoop/mapreduce/v2/jobhistory/JobHistoryUtils.java +++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-common/src/main/java/org/apache/hadoop/mapreduce/v2/jobhistory/JobHistoryUtils.java @@ -31,6 +31,8 @@ import java.util.concurrent.atomic.AtomicBoolean; import java.util.regex.Matcher; import java.util.regex.Pattern; +import org.apache.hadoop.classification.InterfaceAudience; +import org.apache.hadoop.classification.InterfaceStability; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FileContext; import org.apache.hadoop.fs.FileStatus; @@ -50,6 +52,8 @@ import org.apache.hadoop.yarn.api.records.ApplicationId; import com.google.common.base.Joiner; import com.google.common.base.Splitter; +@InterfaceAudience.Private +@InterfaceStability.Unstable public class JobHistoryUtils { /** diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-hs/src/main/java/org/apache/hadoop/mapreduce/v2/hs/CachedHistoryStorage.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-hs/src/main/java/org/apache/hadoop/mapreduce/v2/hs/CachedHistoryStorage.java new file mode 100644 index 00000000000..5a4da68e6fd --- /dev/null +++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-hs/src/main/java/org/apache/hadoop/mapreduce/v2/hs/CachedHistoryStorage.java @@ -0,0 +1,217 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.mapreduce.v2.hs; + +import java.io.IOException; +import java.util.Collection; +import java.util.Collections; +import java.util.LinkedHashMap; +import java.util.Map; +import java.util.SortedMap; +import java.util.TreeMap; + +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.mapreduce.v2.api.records.JobId; +import org.apache.hadoop.mapreduce.v2.api.records.JobReport; +import org.apache.hadoop.mapreduce.v2.api.records.JobState; +import org.apache.hadoop.mapreduce.v2.app.job.Job; +import org.apache.hadoop.mapreduce.v2.hs.webapp.dao.JobsInfo; +import org.apache.hadoop.mapreduce.v2.hs.HistoryFileManager.MetaInfo; +import org.apache.hadoop.mapreduce.v2.hs.webapp.dao.JobInfo; +import org.apache.hadoop.mapreduce.v2.jobhistory.JHAdminConfig; +import org.apache.hadoop.yarn.YarnException; +import org.apache.hadoop.yarn.service.AbstractService; + +/** + * Manages an in memory cache of parsed Job History files. + */ +public class CachedHistoryStorage extends AbstractService implements + HistoryStorage { + private static final Log LOG = LogFactory.getLog(CachedHistoryStorage.class); + + private Map loadedJobCache = null; + // The number of loaded jobs. + private int loadedJobCacheSize; + + private HistoryFileManager hsManager; + + @Override + public void setHistoryFileManager(HistoryFileManager hsManager) { + this.hsManager = hsManager; + } + + @SuppressWarnings("serial") + @Override + public void init(Configuration conf) throws YarnException { + LOG.info("CachedHistoryStorage Init"); + + loadedJobCacheSize = conf.getInt( + JHAdminConfig.MR_HISTORY_LOADED_JOB_CACHE_SIZE, + JHAdminConfig.DEFAULT_MR_HISTORY_LOADED_JOB_CACHE_SIZE); + + loadedJobCache = Collections.synchronizedMap(new LinkedHashMap( + loadedJobCacheSize + 1, 0.75f, true) { + @Override + public boolean removeEldestEntry(final Map.Entry eldest) { + return super.size() > loadedJobCacheSize; + } + }); + + super.init(conf); + } + + public CachedHistoryStorage() { + super(CachedHistoryStorage.class.getName()); + } + + private Job loadJob(MetaInfo metaInfo) { + try { + Job job = hsManager.loadJob(metaInfo); + if (LOG.isDebugEnabled()) { + LOG.debug("Adding " + job.getID() + " to loaded job cache"); + } + loadedJobCache.put(job.getID(), job); + return job; + } catch (IOException e) { + throw new YarnException( + "Could not find/load job: " + metaInfo.getJobId(), e); + } + } + + @Override + public synchronized Job getFullJob(JobId jobId) { + if (LOG.isDebugEnabled()) { + LOG.debug("Looking for Job " + jobId); + } + try { + Job result = loadedJobCache.get(jobId); + if (result == null) { + MetaInfo metaInfo = hsManager.getMetaInfo(jobId); + if (metaInfo != null) { + result = loadJob(metaInfo); + } + } + return result; + } catch (IOException e) { + throw new YarnException(e); + } + } + + @Override + public Map getAllPartialJobs() { + LOG.debug("Called getAllPartialJobs()"); + SortedMap result = new TreeMap(); + try { + for (MetaInfo mi : hsManager.getAllMetaInfo()) { + if (mi != null) { + JobId id = mi.getJobId(); + result.put(id, new PartialJob(mi.getJobIndexInfo(), id)); + } + } + } catch (IOException e) { + LOG.warn("Error trying to scan for all MetaInfos", e); + throw new YarnException(e); + } + return result; + } + + @Override + public void jobRemovedFromHDFS(JobId jobId) { + loadedJobCache.remove(jobId); + } + + @Override + public JobsInfo getPartialJobs(Long offset, Long count, String user, + String queue, Long sBegin, Long sEnd, Long fBegin, Long fEnd, + JobState jobState) { + return getPartialJobs(getAllPartialJobs().values(), offset, count, user, + queue, sBegin, sEnd, fBegin, fEnd, jobState); + } + + public static JobsInfo getPartialJobs(Collection jobs, Long offset, + Long count, String user, String queue, Long sBegin, Long sEnd, + Long fBegin, Long fEnd, JobState jobState) { + JobsInfo allJobs = new JobsInfo(); + + if (sBegin == null || sBegin < 0) + sBegin = 0l; + if (sEnd == null) + sEnd = Long.MAX_VALUE; + if (fBegin == null || fBegin < 0) + fBegin = 0l; + if (fEnd == null) + fEnd = Long.MAX_VALUE; + if (offset == null || offset < 0) + offset = 0l; + if (count == null) + count = Long.MAX_VALUE; + + if (offset > jobs.size()) { + return allJobs; + } + + long at = 0; + long end = offset + count - 1; + if (end < 0) { // due to overflow + end = Long.MAX_VALUE; + } + for (Job job : jobs) { + if (at > end) { + break; + } + + // can't really validate queue is a valid one since queues could change + if (queue != null && !queue.isEmpty()) { + if (!job.getQueueName().equals(queue)) { + continue; + } + } + + if (user != null && !user.isEmpty()) { + if (!job.getUserName().equals(user)) { + continue; + } + } + + JobReport report = job.getReport(); + + if (report.getStartTime() < sBegin || report.getStartTime() > sEnd) { + continue; + } + if (report.getFinishTime() < fBegin || report.getFinishTime() > fEnd) { + continue; + } + if (jobState != null && jobState != report.getJobState()) { + continue; + } + + at++; + if ((at - 1) < offset) { + continue; + } + + JobInfo jobInfo = new JobInfo(job); + + allJobs.add(jobInfo); + } + return allJobs; + } +} diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-hs/src/main/java/org/apache/hadoop/mapreduce/v2/hs/HistoryContext.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-hs/src/main/java/org/apache/hadoop/mapreduce/v2/hs/HistoryContext.java index 0dfebf85143..881c6c2dbf7 100644 --- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-hs/src/main/java/org/apache/hadoop/mapreduce/v2/hs/HistoryContext.java +++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-hs/src/main/java/org/apache/hadoop/mapreduce/v2/hs/HistoryContext.java @@ -24,8 +24,13 @@ import org.apache.hadoop.mapreduce.v2.app.AppContext; import org.apache.hadoop.mapreduce.v2.app.job.Job; import org.apache.hadoop.yarn.api.records.ApplicationId; import org.apache.hadoop.mapreduce.v2.api.records.JobId; +import org.apache.hadoop.mapreduce.v2.api.records.JobState; +import org.apache.hadoop.mapreduce.v2.hs.webapp.dao.JobsInfo; public interface HistoryContext extends AppContext { Map getAllJobs(ApplicationId appID); + + JobsInfo getPartialJobs(Long offset, Long count, String user, + String queue, Long sBegin, Long sEnd, Long fBegin, Long fEnd, JobState jobState); } diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-hs/src/main/java/org/apache/hadoop/mapreduce/v2/hs/HistoryFileManager.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-hs/src/main/java/org/apache/hadoop/mapreduce/v2/hs/HistoryFileManager.java new file mode 100644 index 00000000000..07b078f50aa --- /dev/null +++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-hs/src/main/java/org/apache/hadoop/mapreduce/v2/hs/HistoryFileManager.java @@ -0,0 +1,763 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.mapreduce.v2.hs; + +import java.io.FileNotFoundException; +import java.io.IOException; +import java.util.ArrayList; +import java.util.Collection; +import java.util.Collections; +import java.util.HashMap; +import java.util.HashSet; +import java.util.List; +import java.util.Map; +import java.util.Set; +import java.util.SortedMap; +import java.util.TreeMap; +import java.util.concurrent.ConcurrentSkipListMap; + +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; +import org.apache.hadoop.classification.InterfaceAudience; +import org.apache.hadoop.classification.InterfaceStability; +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.FSDataInputStream; +import org.apache.hadoop.fs.FileAlreadyExistsException; +import org.apache.hadoop.fs.FileContext; +import org.apache.hadoop.fs.FileStatus; +import org.apache.hadoop.fs.Options; +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.fs.PathFilter; +import org.apache.hadoop.fs.RemoteIterator; +import org.apache.hadoop.fs.permission.FsPermission; +import org.apache.hadoop.mapred.JobACLsManager; +import org.apache.hadoop.mapreduce.jobhistory.JobSummary; +import org.apache.hadoop.mapreduce.v2.api.records.JobId; +import org.apache.hadoop.mapreduce.v2.app.job.Job; +import org.apache.hadoop.mapreduce.v2.jobhistory.FileNameIndexUtils; +import org.apache.hadoop.mapreduce.v2.jobhistory.JHAdminConfig; +import org.apache.hadoop.mapreduce.v2.jobhistory.JobHistoryUtils; +import org.apache.hadoop.mapreduce.v2.jobhistory.JobIndexInfo; +import org.apache.hadoop.yarn.YarnException; +import org.apache.hadoop.yarn.service.AbstractService; + +/** + * This class provides a way to interact with history files in a thread safe + * manor. + */ +@InterfaceAudience.Public +@InterfaceStability.Unstable +public class HistoryFileManager extends AbstractService { + private static final Log LOG = LogFactory.getLog(HistoryFileManager.class); + private static final Log SUMMARY_LOG = LogFactory.getLog(JobSummary.class); + + private static String DONE_BEFORE_SERIAL_TAIL = JobHistoryUtils + .doneSubdirsBeforeSerialTail(); + + public static class MetaInfo { + private Path historyFile; + private Path confFile; + private Path summaryFile; + private JobIndexInfo jobIndexInfo; + + public MetaInfo(Path historyFile, Path confFile, Path summaryFile, + JobIndexInfo jobIndexInfo) { + this.historyFile = historyFile; + this.confFile = confFile; + this.summaryFile = summaryFile; + this.jobIndexInfo = jobIndexInfo; + } + + private Path getHistoryFile() { + return historyFile; + } + + private Path getConfFile() { + return confFile; + } + + private Path getSummaryFile() { + return summaryFile; + } + + public JobIndexInfo getJobIndexInfo() { + return jobIndexInfo; + } + + public JobId getJobId() { + return jobIndexInfo.getJobId(); + } + + private void setHistoryFile(Path historyFile) { + this.historyFile = historyFile; + } + + private void setConfFile(Path confFile) { + this.confFile = confFile; + } + + private void setSummaryFile(Path summaryFile) { + this.summaryFile = summaryFile; + } + } + + /** + * Maps between a serial number (generated based on jobId) and the timestamp + * component(s) to which it belongs. Facilitates jobId based searches. If a + * jobId is not found in this list - it will not be found. + */ + private final SortedMap> idToDateString = + new TreeMap>(); + // The number of entries in idToDateString + private int dateStringCacheSize; + + // Maintains minimal details for recent jobs (parsed from history file name). + // Sorted on Job Completion Time. + private final SortedMap jobListCache = + new ConcurrentSkipListMap(); + // The number of jobs to maintain in the job list cache. + private int jobListCacheSize; + + // Re-use existing MetaInfo objects if they exist for the specific JobId. + // (synchronization on MetaInfo) + // Check for existence of the object when using iterators. + private final SortedMap intermediateListCache = + new ConcurrentSkipListMap(); + + // Maintains a list of known done subdirectories. + private final Set existingDoneSubdirs = new HashSet(); + + /** + * Maintains a mapping between intermediate user directories and the last + * known modification time. + */ + private Map userDirModificationTimeMap = + new HashMap(); + + private JobACLsManager aclsMgr; + + private Configuration conf; + + // TODO Remove me!!!! + private boolean debugMode; + private String serialNumberFormat; + + private Path doneDirPrefixPath = null; // folder for completed jobs + private FileContext doneDirFc; // done Dir FileContext + + private Path intermediateDoneDirPath = null; // Intermediate Done Dir Path + private FileContext intermediateDoneDirFc; // Intermediate Done Dir + // FileContext + + public HistoryFileManager() { + super(HistoryFileManager.class.getName()); + } + + @Override + public void init(Configuration conf) { + this.conf = conf; + + debugMode = conf.getBoolean(JHAdminConfig.MR_HISTORY_DEBUG_MODE, false); + int serialNumberLowDigits = debugMode ? 1 : 3; + serialNumberFormat = ("%0" + + (JobHistoryUtils.SERIAL_NUMBER_DIRECTORY_DIGITS + serialNumberLowDigits) + + "d"); + + String doneDirPrefix = null; + doneDirPrefix = JobHistoryUtils + .getConfiguredHistoryServerDoneDirPrefix(conf); + try { + doneDirPrefixPath = FileContext.getFileContext(conf).makeQualified( + new Path(doneDirPrefix)); + doneDirFc = FileContext.getFileContext(doneDirPrefixPath.toUri(), conf); + mkdir(doneDirFc, doneDirPrefixPath, new FsPermission( + JobHistoryUtils.HISTORY_DONE_DIR_PERMISSION)); + } catch (IOException e) { + throw new YarnException("Error creating done directory: [" + + doneDirPrefixPath + "]", e); + } + + String intermediateDoneDirPrefix = null; + intermediateDoneDirPrefix = JobHistoryUtils + .getConfiguredHistoryIntermediateDoneDirPrefix(conf); + try { + intermediateDoneDirPath = FileContext.getFileContext(conf).makeQualified( + new Path(intermediateDoneDirPrefix)); + intermediateDoneDirFc = FileContext.getFileContext( + intermediateDoneDirPath.toUri(), conf); + mkdir(intermediateDoneDirFc, intermediateDoneDirPath, new FsPermission( + JobHistoryUtils.HISTORY_INTERMEDIATE_DONE_DIR_PERMISSIONS.toShort())); + } catch (IOException e) { + LOG.info("error creating done directory on dfs " + e); + throw new YarnException("Error creating intermediate done directory: [" + + intermediateDoneDirPath + "]", e); + } + + this.aclsMgr = new JobACLsManager(conf); + + jobListCacheSize = conf.getInt(JHAdminConfig.MR_HISTORY_JOBLIST_CACHE_SIZE, + JHAdminConfig.DEFAULT_MR_HISTORY_JOBLIST_CACHE_SIZE); + + dateStringCacheSize = conf.getInt( + JHAdminConfig.MR_HISTORY_DATESTRING_CACHE_SIZE, + JHAdminConfig.DEFAULT_MR_HISTORY_DATESTRING_CACHE_SIZE); + + super.init(conf); + } + + private void mkdir(FileContext fc, Path path, FsPermission fsp) + throws IOException { + if (!fc.util().exists(path)) { + try { + fc.mkdir(path, fsp, true); + + FileStatus fsStatus = fc.getFileStatus(path); + LOG.info("Perms after creating " + fsStatus.getPermission().toShort() + + ", Expected: " + fsp.toShort()); + if (fsStatus.getPermission().toShort() != fsp.toShort()) { + LOG.info("Explicitly setting permissions to : " + fsp.toShort() + + ", " + fsp); + fc.setPermission(path, fsp); + } + } catch (FileAlreadyExistsException e) { + LOG.info("Directory: [" + path + "] already exists."); + } + } + } + + /** + * Populates index data structures. Should only be called at initialization + * times. + */ + @SuppressWarnings("unchecked") + void initExisting() throws IOException { + LOG.info("Initializing Existing Jobs..."); + List timestampedDirList = findTimestampedDirectories(); + Collections.sort(timestampedDirList); + for (FileStatus fs : timestampedDirList) { + // TODO Could verify the correct format for these directories. + addDirectoryToSerialNumberIndex(fs.getPath()); + addDirectoryToJobListCache(fs.getPath()); + } + } + + private void removeDirectoryFromSerialNumberIndex(Path serialDirPath) { + String serialPart = serialDirPath.getName(); + String timeStampPart = JobHistoryUtils + .getTimestampPartFromPath(serialDirPath.toString()); + if (timeStampPart == null) { + LOG.warn("Could not find timestamp portion from path: " + + serialDirPath.toString() + ". Continuing with next"); + return; + } + if (serialPart == null) { + LOG.warn("Could not find serial portion from path: " + + serialDirPath.toString() + ". Continuing with next"); + return; + } + synchronized (idToDateString) { + // TODO make this thread safe without the synchronize + if (idToDateString.containsKey(serialPart)) { + Set set = idToDateString.get(serialPart); + set.remove(timeStampPart); + if (set.isEmpty()) { + idToDateString.remove(serialPart); + } + } + } + } + + private void addDirectoryToSerialNumberIndex(Path serialDirPath) { + if (LOG.isDebugEnabled()) { + LOG.debug("Adding " + serialDirPath + " to serial index"); + } + String serialPart = serialDirPath.getName(); + String timestampPart = JobHistoryUtils + .getTimestampPartFromPath(serialDirPath.toString()); + if (timestampPart == null) { + LOG.warn("Could not find timestamp portion from path: " + serialDirPath + + ". Continuing with next"); + return; + } + if (serialPart == null) { + LOG.warn("Could not find serial portion from path: " + + serialDirPath.toString() + ". Continuing with next"); + } + addToSerialNumberIndex(serialPart, timestampPart); + } + + private void addToSerialNumberIndex(String serialPart, String timestampPart) { + synchronized (idToDateString) { + // TODO make this thread safe without the synchronize + if (!idToDateString.containsKey(serialPart)) { + idToDateString.put(serialPart, new HashSet()); + if (idToDateString.size() > dateStringCacheSize) { + idToDateString.remove(idToDateString.firstKey()); + } + Set datePartSet = idToDateString.get(serialPart); + datePartSet.add(timestampPart); + } + } + } + + private void addDirectoryToJobListCache(Path path) throws IOException { + if (LOG.isDebugEnabled()) { + LOG.debug("Adding " + path + " to job list cache."); + } + List historyFileList = scanDirectoryForHistoryFiles(path, + doneDirFc); + for (FileStatus fs : historyFileList) { + if (LOG.isDebugEnabled()) { + LOG.debug("Adding in history for " + fs.getPath()); + } + JobIndexInfo jobIndexInfo = FileNameIndexUtils.getIndexInfo(fs.getPath() + .getName()); + String confFileName = JobHistoryUtils + .getIntermediateConfFileName(jobIndexInfo.getJobId()); + String summaryFileName = JobHistoryUtils + .getIntermediateSummaryFileName(jobIndexInfo.getJobId()); + MetaInfo metaInfo = new MetaInfo(fs.getPath(), new Path(fs.getPath() + .getParent(), confFileName), new Path(fs.getPath().getParent(), + summaryFileName), jobIndexInfo); + addToJobListCache(metaInfo); + } + } + + private static List scanDirectory(Path path, FileContext fc, + PathFilter pathFilter) throws IOException { + path = fc.makeQualified(path); + List jhStatusList = new ArrayList(); + RemoteIterator fileStatusIter = fc.listStatus(path); + while (fileStatusIter.hasNext()) { + FileStatus fileStatus = fileStatusIter.next(); + Path filePath = fileStatus.getPath(); + if (fileStatus.isFile() && pathFilter.accept(filePath)) { + jhStatusList.add(fileStatus); + } + } + return jhStatusList; + } + + private static List scanDirectoryForHistoryFiles(Path path, + FileContext fc) throws IOException { + return scanDirectory(path, fc, JobHistoryUtils.getHistoryFileFilter()); + } + + /** + * Finds all history directories with a timestamp component by scanning the + * filesystem. Used when the JobHistory server is started. + * + * @return + */ + private List findTimestampedDirectories() throws IOException { + List fsList = JobHistoryUtils.localGlobber(doneDirFc, + doneDirPrefixPath, DONE_BEFORE_SERIAL_TAIL); + return fsList; + } + + private void addToJobListCache(MetaInfo metaInfo) { + JobId jobId = metaInfo.getJobIndexInfo().getJobId(); + if (LOG.isDebugEnabled()) { + LOG.debug("Adding " + jobId + " to job list cache with " + + metaInfo.getJobIndexInfo()); + } + jobListCache.put(jobId, metaInfo); + if (jobListCache.size() > jobListCacheSize) { + jobListCache.remove(jobListCache.firstKey()); + } + } + + /** + * Scans the intermediate directory to find user directories. Scans these for + * history files if the modification time for the directory has changed. + * + * @throws IOException + */ + private void scanIntermediateDirectory() throws IOException { + List userDirList = JobHistoryUtils.localGlobber( + intermediateDoneDirFc, intermediateDoneDirPath, ""); + + for (FileStatus userDir : userDirList) { + String name = userDir.getPath().getName(); + long newModificationTime = userDir.getModificationTime(); + boolean shouldScan = false; + synchronized (userDirModificationTimeMap) { + if (!userDirModificationTimeMap.containsKey(name) + || newModificationTime > userDirModificationTimeMap.get(name)) { + shouldScan = true; + userDirModificationTimeMap.put(name, newModificationTime); + } + } + if (shouldScan) { + scanIntermediateDirectory(userDir.getPath()); + } + } + } + + /** + * Scans the specified path and populates the intermediate cache. + * + * @param absPath + * @throws IOException + */ + private void scanIntermediateDirectory(final Path absPath) throws IOException { + List fileStatusList = scanDirectoryForHistoryFiles(absPath, + intermediateDoneDirFc); + for (FileStatus fs : fileStatusList) { + JobIndexInfo jobIndexInfo = FileNameIndexUtils.getIndexInfo(fs.getPath() + .getName()); + String confFileName = JobHistoryUtils + .getIntermediateConfFileName(jobIndexInfo.getJobId()); + String summaryFileName = JobHistoryUtils + .getIntermediateSummaryFileName(jobIndexInfo.getJobId()); + MetaInfo metaInfo = new MetaInfo(fs.getPath(), new Path(fs.getPath() + .getParent(), confFileName), new Path(fs.getPath().getParent(), + summaryFileName), jobIndexInfo); + if (!intermediateListCache.containsKey(jobIndexInfo.getJobId())) { + intermediateListCache.put(jobIndexInfo.getJobId(), metaInfo); + } + } + } + + /** + * Searches the job history file FileStatus list for the specified JobId. + * + * @param fileStatusList + * fileStatus list of Job History Files. + * @param jobId + * The JobId to find. + * @return A MetaInfo object for the jobId, null if not found. + * @throws IOException + */ + private MetaInfo getJobMetaInfo(List fileStatusList, JobId jobId) + throws IOException { + for (FileStatus fs : fileStatusList) { + JobIndexInfo jobIndexInfo = FileNameIndexUtils.getIndexInfo(fs.getPath() + .getName()); + if (jobIndexInfo.getJobId().equals(jobId)) { + String confFileName = JobHistoryUtils + .getIntermediateConfFileName(jobIndexInfo.getJobId()); + String summaryFileName = JobHistoryUtils + .getIntermediateSummaryFileName(jobIndexInfo.getJobId()); + MetaInfo metaInfo = new MetaInfo(fs.getPath(), new Path(fs.getPath() + .getParent(), confFileName), new Path(fs.getPath().getParent(), + summaryFileName), jobIndexInfo); + return metaInfo; + } + } + return null; + } + + /** + * Scans old directories known by the idToDateString map for the specified + * jobId. If the number of directories is higher than the supported size of + * the idToDateString cache, the jobId will not be found. + * + * @param jobId + * the jobId. + * @return + * @throws IOException + */ + private MetaInfo scanOldDirsForJob(JobId jobId) throws IOException { + int jobSerialNumber = JobHistoryUtils.jobSerialNumber(jobId); + String boxedSerialNumber = String.valueOf(jobSerialNumber); + Set dateStringSet; + synchronized (idToDateString) { + Set found = idToDateString.get(boxedSerialNumber); + if (found == null) { + return null; + } else { + dateStringSet = new HashSet(found); + } + } + for (String timestampPart : dateStringSet) { + Path logDir = canonicalHistoryLogPath(jobId, timestampPart); + List fileStatusList = scanDirectoryForHistoryFiles(logDir, + doneDirFc); + MetaInfo metaInfo = getJobMetaInfo(fileStatusList, jobId); + if (metaInfo != null) { + return metaInfo; + } + } + return null; + } + + /** + * Checks for the existence of the job history file in the intermediate + * directory. + * + * @param jobId + * @return + * @throws IOException + */ + private MetaInfo scanIntermediateForJob(JobId jobId) throws IOException { + scanIntermediateDirectory(); + return intermediateListCache.get(jobId); + } + + /** + * Parse a job from the JobHistoryFile, if the underlying file is not going to + * be deleted. + * + * @param metaInfo + * the where the JobHistory is stored. + * @return the Job or null if the underlying file was deleted. + * @throws IOException + * if there is an error trying to read the file. + */ + public Job loadJob(MetaInfo metaInfo) throws IOException { + return new CompletedJob(conf, metaInfo.getJobIndexInfo().getJobId(), + metaInfo.getHistoryFile(), false, metaInfo.getJobIndexInfo().getUser(), + metaInfo.getConfFile(), aclsMgr); + } + + public Collection getAllMetaInfo() throws IOException { + scanIntermediateDirectory(); + ArrayList result = new ArrayList(); + result.addAll(intermediateListCache.values()); + result.addAll(jobListCache.values()); + return result; + } + + Collection getIntermediateMetaInfos() throws IOException { + scanIntermediateDirectory(); + return intermediateListCache.values(); + } + + public MetaInfo getMetaInfo(JobId jobId) throws IOException { + // MetaInfo available in cache. + MetaInfo metaInfo = null; + if (jobListCache.containsKey(jobId)) { + metaInfo = jobListCache.get(jobId); + } + + if (metaInfo != null) { + return metaInfo; + } + + // MetaInfo not available. Check intermediate directory for meta info. + metaInfo = scanIntermediateForJob(jobId); + if (metaInfo != null) { + return metaInfo; + } + + // Intermediate directory does not contain job. Search through older ones. + metaInfo = scanOldDirsForJob(jobId); + if (metaInfo != null) { + return metaInfo; + } + return null; + } + + void moveToDone(MetaInfo metaInfo) throws IOException { + long completeTime = metaInfo.getJobIndexInfo().getFinishTime(); + if (completeTime == 0) + completeTime = System.currentTimeMillis(); + JobId jobId = metaInfo.getJobIndexInfo().getJobId(); + + List paths = new ArrayList(); + Path historyFile = metaInfo.getHistoryFile(); + if (historyFile == null) { + LOG.info("No file for job-history with " + jobId + " found in cache!"); + } else { + paths.add(historyFile); + } + + Path confFile = metaInfo.getConfFile(); + if (confFile == null) { + LOG.info("No file for jobConf with " + jobId + " found in cache!"); + } else { + paths.add(confFile); + } + + // TODO Check all mi getters and setters for the conf path + Path summaryFile = metaInfo.getSummaryFile(); + if (summaryFile == null) { + LOG.info("No summary file for job: " + jobId); + } else { + try { + String jobSummaryString = getJobSummary(intermediateDoneDirFc, + summaryFile); + SUMMARY_LOG.info(jobSummaryString); + LOG.info("Deleting JobSummary file: [" + summaryFile + "]"); + intermediateDoneDirFc.delete(summaryFile, false); + metaInfo.setSummaryFile(null); + } catch (IOException e) { + LOG.warn("Failed to process summary file: [" + summaryFile + "]"); + throw e; + } + } + + Path targetDir = canonicalHistoryLogPath(jobId, completeTime); + addDirectoryToSerialNumberIndex(targetDir); + try { + makeDoneSubdir(targetDir); + } catch (IOException e) { + LOG.warn("Failed creating subdirectory: " + targetDir + + " while attempting to move files for jobId: " + jobId); + throw e; + } + synchronized (metaInfo) { + if (historyFile != null) { + Path toPath = doneDirFc.makeQualified(new Path(targetDir, historyFile + .getName())); + try { + moveToDoneNow(historyFile, toPath); + } catch (IOException e) { + LOG.warn("Failed to move file: " + historyFile + " for jobId: " + + jobId); + throw e; + } + metaInfo.setHistoryFile(toPath); + } + if (confFile != null) { + Path toPath = doneDirFc.makeQualified(new Path(targetDir, confFile + .getName())); + try { + moveToDoneNow(confFile, toPath); + } catch (IOException e) { + LOG.warn("Failed to move file: " + historyFile + " for jobId: " + + jobId); + throw e; + } + metaInfo.setConfFile(toPath); + } + } + addToJobListCache(metaInfo); + intermediateListCache.remove(jobId); + } + + private void moveToDoneNow(final Path src, final Path target) + throws IOException { + LOG.info("Moving " + src.toString() + " to " + target.toString()); + intermediateDoneDirFc.rename(src, target, Options.Rename.NONE); + } + + private String getJobSummary(FileContext fc, Path path) throws IOException { + Path qPath = fc.makeQualified(path); + FSDataInputStream in = fc.open(qPath); + String jobSummaryString = in.readUTF(); + in.close(); + return jobSummaryString; + } + + private void makeDoneSubdir(Path path) throws IOException { + boolean existsInExistingCache = false; + synchronized (existingDoneSubdirs) { + if (existingDoneSubdirs.contains(path)) + existsInExistingCache = true; + } + try { + doneDirFc.getFileStatus(path); + if (!existsInExistingCache) { + existingDoneSubdirs.add(path); + if (LOG.isDebugEnabled()) { + LOG.debug("JobHistory.maybeMakeSubdirectory -- We believed " + path + + " already existed, but it didn't."); + } + } + } catch (FileNotFoundException fnfE) { + try { + FsPermission fsp = new FsPermission( + JobHistoryUtils.HISTORY_DONE_DIR_PERMISSION); + doneDirFc.mkdir(path, fsp, true); + FileStatus fsStatus = doneDirFc.getFileStatus(path); + LOG.info("Perms after creating " + fsStatus.getPermission().toShort() + + ", Expected: " + fsp.toShort()); + if (fsStatus.getPermission().toShort() != fsp.toShort()) { + LOG.info("Explicitly setting permissions to : " + fsp.toShort() + + ", " + fsp); + doneDirFc.setPermission(path, fsp); + } + synchronized (existingDoneSubdirs) { + existingDoneSubdirs.add(path); + } + } catch (FileAlreadyExistsException faeE) { + // Nothing to do. + } + } + } + + private Path canonicalHistoryLogPath(JobId id, String timestampComponent) { + return new Path(doneDirPrefixPath, JobHistoryUtils.historyLogSubdirectory( + id, timestampComponent, serialNumberFormat)); + } + + private Path canonicalHistoryLogPath(JobId id, long millisecondTime) { + String timestampComponent = JobHistoryUtils.timestampDirectoryComponent( + millisecondTime, debugMode); + return new Path(doneDirPrefixPath, JobHistoryUtils.historyLogSubdirectory( + id, timestampComponent, serialNumberFormat)); + } + + private long getEffectiveTimestamp(long finishTime, FileStatus fileStatus) { + if (finishTime == 0) { + return fileStatus.getModificationTime(); + } + return finishTime; + } + + private void deleteJobFromDone(MetaInfo metaInfo) throws IOException { + jobListCache.remove(metaInfo.getJobId()); + doneDirFc.delete(doneDirFc.makeQualified(metaInfo.getHistoryFile()), false); + doneDirFc.delete(doneDirFc.makeQualified(metaInfo.getConfFile()), false); + } + + @SuppressWarnings("unchecked") + void clean(long cutoff, HistoryStorage storage) throws IOException { + // TODO this should be replaced by something that knows about the directory + // structure and will put less of a load on HDFS. + boolean halted = false; + // TODO Delete YYYY/MM/DD directories. + List serialDirList = findTimestampedDirectories(); + // Sort in ascending order. Relies on YYYY/MM/DD/Serial + Collections.sort(serialDirList); + for (FileStatus serialDir : serialDirList) { + List historyFileList = scanDirectoryForHistoryFiles( + serialDir.getPath(), doneDirFc); + for (FileStatus historyFile : historyFileList) { + JobIndexInfo jobIndexInfo = FileNameIndexUtils.getIndexInfo(historyFile + .getPath().getName()); + long effectiveTimestamp = getEffectiveTimestamp( + jobIndexInfo.getFinishTime(), historyFile); + if (effectiveTimestamp <= cutoff) { + String confFileName = JobHistoryUtils + .getIntermediateConfFileName(jobIndexInfo.getJobId()); + MetaInfo metaInfo = new MetaInfo(historyFile.getPath(), new Path( + historyFile.getPath().getParent(), confFileName), null, + jobIndexInfo); + storage.jobRemovedFromHDFS(metaInfo.getJobId()); + deleteJobFromDone(metaInfo); + } else { + halted = true; + break; + } + } + if (!halted) { + doneDirFc.delete(doneDirFc.makeQualified(serialDir.getPath()), true); + removeDirectoryFromSerialNumberIndex(serialDir.getPath()); + synchronized (existingDoneSubdirs) { + existingDoneSubdirs.remove(serialDir.getPath()); + } + } else { + break; // Don't scan any more directories. + } + } + } +} diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-hs/src/main/java/org/apache/hadoop/mapreduce/v2/hs/HistoryStorage.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-hs/src/main/java/org/apache/hadoop/mapreduce/v2/hs/HistoryStorage.java new file mode 100644 index 00000000000..bbdf9feabc6 --- /dev/null +++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-hs/src/main/java/org/apache/hadoop/mapreduce/v2/hs/HistoryStorage.java @@ -0,0 +1,80 @@ +/** +* Licensed to the Apache Software Foundation (ASF) under one +* or more contributor license agreements. See the NOTICE file +* distributed with this work for additional information +* regarding copyright ownership. The ASF licenses this file +* to you under the Apache License, Version 2.0 (the +* "License"); you may not use this file except in compliance +* with the License. You may obtain a copy of the License at +* +* http://www.apache.org/licenses/LICENSE-2.0 +* +* Unless required by applicable law or agreed to in writing, software +* distributed under the License is distributed on an "AS IS" BASIS, +* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +* See the License for the specific language governing permissions and +* limitations under the License. +*/ + +package org.apache.hadoop.mapreduce.v2.hs; + +import java.util.Map; + +import org.apache.hadoop.mapreduce.v2.api.records.JobId; +import org.apache.hadoop.mapreduce.v2.api.records.JobState; +import org.apache.hadoop.mapreduce.v2.app.job.Job; +import org.apache.hadoop.mapreduce.v2.hs.webapp.dao.JobsInfo; +import org.apache.hadoop.classification.InterfaceAudience; +import org.apache.hadoop.classification.InterfaceStability; + +/** + * Provides an API to query jobs that have finished. + */ +@InterfaceAudience.Public +@InterfaceStability.Unstable +public interface HistoryStorage { + + /** + * Give the Storage a reference to a class that can be used to interact with + * history files. + * @param hsManager the class that is used to interact with history files. + */ + void setHistoryFileManager(HistoryFileManager hsManager); + + /** + * Look for a set of partial jobs. + * @param offset the offset into the list of jobs. + * @param count the maximum number of jobs to return. + * @param user only return jobs for the given user. + * @param queue only return jobs for in the given queue. + * @param sBegin only return Jobs that started on or after the given time. + * @param sEnd only return Jobs that started on or before the given time. + * @param fBegin only return Jobs that ended on or after the given time. + * @param fEnd only return Jobs that ended on or before the given time. + * @param jobState only return Jobs that are in the given job state. + * @return The list of filtered jobs. + */ + JobsInfo getPartialJobs(Long offset, Long count, String user, + String queue, Long sBegin, Long sEnd, Long fBegin, Long fEnd, + JobState jobState); + + /** + * Get all of the cached jobs. This only returns partial jobs and is here for + * legacy reasons. + * @return all of the cached jobs + */ + Map getAllPartialJobs(); + + /** + * Get a fully parsed job. + * @param jobId the id of the job + * @return the job, or null if it is not found. + */ + Job getFullJob(JobId jobId); + + /** + * Informs the Storage that a job has been removed from HDFS + * @param jobId the ID of the job that was removed. + */ + void jobRemovedFromHDFS(JobId jobId); +} diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-hs/src/main/java/org/apache/hadoop/mapreduce/v2/hs/JobHistory.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-hs/src/main/java/org/apache/hadoop/mapreduce/v2/hs/JobHistory.java index c0581655597..54ffec6924d 100644 --- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-hs/src/main/java/org/apache/hadoop/mapreduce/v2/hs/JobHistory.java +++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-hs/src/main/java/org/apache/hadoop/mapreduce/v2/hs/JobHistory.java @@ -1,36 +1,26 @@ /** -* Licensed to the Apache Software Foundation (ASF) under one -* or more contributor license agreements. See the NOTICE file -* distributed with this work for additional information -* regarding copyright ownership. The ASF licenses this file -* to you under the Apache License, Version 2.0 (the -* "License"); you may not use this file except in compliance -* with the License. You may obtain a copy of the License at -* -* http://www.apache.org/licenses/LICENSE-2.0 -* -* Unless required by applicable law or agreed to in writing, software -* distributed under the License is distributed on an "AS IS" BASIS, -* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -* See the License for the specific language governing permissions and -* limitations under the License. -*/ + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ package org.apache.hadoop.mapreduce.v2.hs; -import java.io.FileNotFoundException; import java.io.IOException; -import java.util.ArrayList; -import java.util.Collections; import java.util.HashMap; -import java.util.HashSet; -import java.util.LinkedHashMap; -import java.util.List; import java.util.Map; -import java.util.Set; -import java.util.SortedMap; -import java.util.TreeMap; -import java.util.concurrent.ConcurrentSkipListMap; import java.util.concurrent.LinkedBlockingQueue; import java.util.concurrent.ScheduledThreadPoolExecutor; import java.util.concurrent.ThreadFactory; @@ -41,26 +31,16 @@ import java.util.regex.Pattern; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; import org.apache.hadoop.conf.Configuration; -import org.apache.hadoop.fs.FSDataInputStream; -import org.apache.hadoop.fs.FileAlreadyExistsException; -import org.apache.hadoop.fs.FileContext; -import org.apache.hadoop.fs.FileStatus; -import org.apache.hadoop.fs.Options; -import org.apache.hadoop.fs.Path; -import org.apache.hadoop.fs.PathFilter; -import org.apache.hadoop.fs.RemoteIterator; -import org.apache.hadoop.fs.permission.FsPermission; -import org.apache.hadoop.mapred.JobACLsManager; import org.apache.hadoop.mapreduce.JobID; import org.apache.hadoop.mapreduce.MRJobConfig; import org.apache.hadoop.mapreduce.TypeConverter; -import org.apache.hadoop.mapreduce.jobhistory.JobSummary; import org.apache.hadoop.mapreduce.v2.api.records.JobId; +import org.apache.hadoop.mapreduce.v2.api.records.JobState; import org.apache.hadoop.mapreduce.v2.app.job.Job; -import org.apache.hadoop.mapreduce.v2.jobhistory.FileNameIndexUtils; +import org.apache.hadoop.mapreduce.v2.hs.HistoryFileManager.MetaInfo; +import org.apache.hadoop.mapreduce.v2.hs.webapp.dao.JobsInfo; import org.apache.hadoop.mapreduce.v2.jobhistory.JHAdminConfig; -import org.apache.hadoop.mapreduce.v2.jobhistory.JobHistoryUtils; -import org.apache.hadoop.mapreduce.v2.jobhistory.JobIndexInfo; +import org.apache.hadoop.util.ReflectionUtils; import org.apache.hadoop.yarn.Clock; import org.apache.hadoop.yarn.ClusterInfo; import org.apache.hadoop.yarn.YarnException; @@ -69,106 +49,36 @@ import org.apache.hadoop.yarn.api.records.ApplicationId; import org.apache.hadoop.yarn.event.EventHandler; import org.apache.hadoop.yarn.factory.providers.RecordFactoryProvider; import org.apache.hadoop.yarn.service.AbstractService; +import org.apache.hadoop.yarn.service.Service; import com.google.common.util.concurrent.ThreadFactoryBuilder; - -/* +/** * Loads and manages the Job history cache. */ -public class JobHistory extends AbstractService implements HistoryContext { - - private static final int DEFAULT_JOBLIST_CACHE_SIZE = 20000; - private static final int DEFAULT_LOADEDJOB_CACHE_SIZE = 5; - private static final int DEFAULT_DATESTRING_CACHE_SIZE = 200000; - private static final long DEFAULT_MOVE_THREAD_INTERVAL = 3 * 60 * 1000l; //3 minutes - private static final int DEFAULT_MOVE_THREAD_COUNT = 3; - - static final long DEFAULT_HISTORY_MAX_AGE = 7 * 24 * 60 * 60 * 1000L; //1 week - static final long DEFAULT_RUN_INTERVAL = 1 * 24 * 60 * 60 * 1000l; //1 day - +public class JobHistory extends AbstractService implements HistoryContext { private static final Log LOG = LogFactory.getLog(JobHistory.class); - private static final Log SUMMARY_LOG = LogFactory.getLog(JobSummary.class); - public static final Pattern CONF_FILENAME_REGEX = - Pattern.compile("(" + JobID.JOBID_REGEX + ")_conf.xml(?:\\.[0-9]+\\.old)?"); + public static final Pattern CONF_FILENAME_REGEX = Pattern.compile("(" + + JobID.JOBID_REGEX + ")_conf.xml(?:\\.[0-9]+\\.old)?"); public static final String OLD_SUFFIX = ".old"; - private static String DONE_BEFORE_SERIAL_TAIL = - JobHistoryUtils.doneSubdirsBeforeSerialTail(); - - /** - * Maps between a serial number (generated based on jobId) and the timestamp - * component(s) to which it belongs. - * Facilitates jobId based searches. - * If a jobId is not found in this list - it will not be found. - */ - private final SortedMap> idToDateString = - new ConcurrentSkipListMap>(); - - //Maintains minimal details for recent jobs (parsed from history file name). - //Sorted on Job Completion Time. - private final SortedMap jobListCache = - new ConcurrentSkipListMap(); - - - // Re-use exisiting MetaInfo objects if they exist for the specific JobId. (synchronization on MetaInfo) - // Check for existance of the object when using iterators. - private final SortedMap intermediateListCache = - new ConcurrentSkipListMap(); - - //Maintains a list of known done subdirectories. Not currently used. - private final Set existingDoneSubdirs = new HashSet(); - - private Map loadedJobCache = null; - - /** - * Maintains a mapping between intermediate user directories and the last - * known modification time. - */ - private Map userDirModificationTimeMap = - new HashMap(); - - //The number of jobs to maintain in the job list cache. - private int jobListCacheSize; - - private JobACLsManager aclsMgr; - - //The number of loaded jobs. - private int loadedJobCacheSize; - - //The number of entries in idToDateString - private int dateStringCacheSize; - - //Time interval for the move thread. + // Time interval for the move thread. private long moveThreadInterval; - - //Number of move threads. + + // Number of move threads. private int numMoveThreads; - + private Configuration conf; - private boolean debugMode; - private int serialNumberLowDigits; - private String serialNumberFormat; - - - private Path doneDirPrefixPath = null; // folder for completed jobs - private FileContext doneDirFc; // done Dir FileContext - - private Path intermediateDoneDirPath = null; //Intermediate Done Dir Path - private FileContext intermediateDoneDirFc; //Intermediate Done Dir FileContext - private Thread moveIntermediateToDoneThread = null; private MoveIntermediateToDoneRunnable moveIntermediateToDoneRunnable = null; - private ScheduledThreadPoolExecutor cleanerScheduledExecutor = null; - - /** - * Writes out files to the path - * .....${DONE_DIR}/VERSION_STRING/YYYY/MM/DD/HH/SERIAL_NUM/jh{index_entries}.jhist - */ - @SuppressWarnings("serial") + private ScheduledThreadPoolExecutor cleanerScheduledExecutor = null; + + private HistoryStorage storage = null; + private HistoryFileManager hsManager = null; + @Override public void init(Configuration conf) throws YarnException { LOG.info("JobHistory Init"); @@ -176,121 +86,66 @@ public class JobHistory extends AbstractService implements HistoryContext { this.appID = RecordFactoryProvider.getRecordFactory(conf) .newRecordInstance(ApplicationId.class); this.appAttemptID = RecordFactoryProvider.getRecordFactory(conf) - .newRecordInstance(ApplicationAttemptId.class); + .newRecordInstance(ApplicationAttemptId.class); - debugMode = conf.getBoolean(JHAdminConfig.MR_HISTORY_DEBUG_MODE, false); - serialNumberLowDigits = debugMode ? 1 : 3; - serialNumberFormat = ("%0" - + (JobHistoryUtils.SERIAL_NUMBER_DIRECTORY_DIGITS - + serialNumberLowDigits) + "d"); - - String doneDirPrefix = null; - doneDirPrefix = JobHistoryUtils.getConfiguredHistoryServerDoneDirPrefix(conf); - try { - doneDirPrefixPath = FileContext.getFileContext(conf).makeQualified( - new Path(doneDirPrefix)); - doneDirFc = FileContext.getFileContext(doneDirPrefixPath.toUri(), conf); - mkdir(doneDirFc, doneDirPrefixPath, new FsPermission( - JobHistoryUtils.HISTORY_DONE_DIR_PERMISSION)); - } catch (IOException e) { - throw new YarnException("Error creating done directory: [" + - doneDirPrefixPath + "]", e); - } - - String intermediateDoneDirPrefix = null; - intermediateDoneDirPrefix = JobHistoryUtils - .getConfiguredHistoryIntermediateDoneDirPrefix(conf); - try { - intermediateDoneDirPath = FileContext.getFileContext(conf) - .makeQualified(new Path(intermediateDoneDirPrefix)); - intermediateDoneDirFc = FileContext.getFileContext( - intermediateDoneDirPath.toUri(), conf); - mkdir(intermediateDoneDirFc, intermediateDoneDirPath, new FsPermission( - JobHistoryUtils.HISTORY_INTERMEDIATE_DONE_DIR_PERMISSIONS.toShort())); - } catch (IOException e) { - LOG.info("error creating done directory on dfs " + e); - throw new YarnException("Error creating intermediate done directory: [" - + intermediateDoneDirPath + "]", e); - } - - this.aclsMgr = new JobACLsManager(conf); - - jobListCacheSize = conf.getInt(JHAdminConfig.MR_HISTORY_JOBLIST_CACHE_SIZE, - DEFAULT_JOBLIST_CACHE_SIZE); - loadedJobCacheSize = conf.getInt(JHAdminConfig.MR_HISTORY_LOADED_JOB_CACHE_SIZE, - DEFAULT_LOADEDJOB_CACHE_SIZE); - dateStringCacheSize = conf.getInt(JHAdminConfig.MR_HISTORY_DATESTRING_CACHE_SIZE, - DEFAULT_DATESTRING_CACHE_SIZE); - moveThreadInterval = - conf.getLong(JHAdminConfig.MR_HISTORY_MOVE_INTERVAL_MS, - DEFAULT_MOVE_THREAD_INTERVAL); + moveThreadInterval = conf.getLong( + JHAdminConfig.MR_HISTORY_MOVE_INTERVAL_MS, + JHAdminConfig.DEFAULT_MR_HISTORY_MOVE_INTERVAL_MS); numMoveThreads = conf.getInt(JHAdminConfig.MR_HISTORY_MOVE_THREAD_COUNT, - DEFAULT_MOVE_THREAD_COUNT); - - loadedJobCache = - Collections.synchronizedMap(new LinkedHashMap( - loadedJobCacheSize + 1, 0.75f, true) { - @Override - public boolean removeEldestEntry(final Map.Entry eldest) { - return super.size() > loadedJobCacheSize; - } - }); - + JHAdminConfig.DEFAULT_MR_HISTORY_MOVE_THREAD_COUNT); + + hsManager = new HistoryFileManager(); + hsManager.init(conf); try { - initExisting(); + hsManager.initExisting(); } catch (IOException e) { throw new YarnException("Failed to intialize existing directories", e); } - super.init(conf); - } - - private void mkdir(FileContext fc, Path path, FsPermission fsp) - throws IOException { - if (!fc.util().exists(path)) { - try { - fc.mkdir(path, fsp, true); - FileStatus fsStatus = fc.getFileStatus(path); - LOG.info("Perms after creating " + fsStatus.getPermission().toShort() - + ", Expected: " + fsp.toShort()); - if (fsStatus.getPermission().toShort() != fsp.toShort()) { - LOG.info("Explicitly setting permissions to : " + fsp.toShort() - + ", " + fsp); - fc.setPermission(path, fsp); - } - } catch (FileAlreadyExistsException e) { - LOG.info("Directory: [" + path + "] already exists."); - } + storage = ReflectionUtils.newInstance(conf.getClass( + JHAdminConfig.MR_HISTORY_STORAGE, CachedHistoryStorage.class, + HistoryStorage.class), conf); + if (storage instanceof Service) { + ((Service) storage).init(conf); } + storage.setHistoryFileManager(hsManager); + + super.init(conf); } @Override public void start() { - //Start moveIntermediatToDoneThread - moveIntermediateToDoneRunnable = - new MoveIntermediateToDoneRunnable(moveThreadInterval, numMoveThreads); + hsManager.start(); + if (storage instanceof Service) { + ((Service) storage).start(); + } + + // Start moveIntermediatToDoneThread + moveIntermediateToDoneRunnable = new MoveIntermediateToDoneRunnable( + moveThreadInterval, numMoveThreads); moveIntermediateToDoneThread = new Thread(moveIntermediateToDoneRunnable); moveIntermediateToDoneThread.setName("MoveIntermediateToDoneScanner"); moveIntermediateToDoneThread.start(); - - //Start historyCleaner + + // Start historyCleaner boolean startCleanerService = conf.getBoolean( JHAdminConfig.MR_HISTORY_CLEANER_ENABLE, true); if (startCleanerService) { long maxAgeOfHistoryFiles = conf.getLong( - JHAdminConfig.MR_HISTORY_MAX_AGE_MS, DEFAULT_HISTORY_MAX_AGE); + JHAdminConfig.MR_HISTORY_MAX_AGE_MS, + JHAdminConfig.DEFAULT_MR_HISTORY_MAX_AGE); cleanerScheduledExecutor = new ScheduledThreadPoolExecutor(1, - new ThreadFactoryBuilder().setNameFormat("LogCleaner").build() - ); + new ThreadFactoryBuilder().setNameFormat("LogCleaner").build()); long runInterval = conf.getLong( - JHAdminConfig.MR_HISTORY_CLEANER_INTERVAL_MS, DEFAULT_RUN_INTERVAL); + JHAdminConfig.MR_HISTORY_CLEANER_INTERVAL_MS, + JHAdminConfig.DEFAULT_MR_HISTORY_CLEANER_INTERVAL_MS); cleanerScheduledExecutor .scheduleAtFixedRate(new HistoryCleaner(maxAgeOfHistoryFiles), 30 * 1000l, runInterval, TimeUnit.MILLISECONDS); } super.start(); } - + @Override public void stop() { LOG.info("Stopping JobHistory"); @@ -323,281 +178,16 @@ public class JobHistory extends AbstractService implements HistoryContext { LOG.warn("HistoryCleanerService shutdown may not have succeeded"); } } + if (storage instanceof Service) { + ((Service) storage).stop(); + } + hsManager.stop(); super.stop(); } - + public JobHistory() { super(JobHistory.class.getName()); } - - /** - * Populates index data structures. - * Should only be called at initialization times. - */ - @SuppressWarnings("unchecked") - private void initExisting() throws IOException { - LOG.info("Initializing Existing Jobs..."); - List timestampedDirList = findTimestampedDirectories(); - Collections.sort(timestampedDirList); - for (FileStatus fs : timestampedDirList) { - //TODO Could verify the correct format for these directories. - addDirectoryToSerialNumberIndex(fs.getPath()); - addDirectoryToJobListCache(fs.getPath()); - } - } - - private void removeDirectoryFromSerialNumberIndex(Path serialDirPath) { - String serialPart = serialDirPath.getName(); - String timeStampPart = - JobHistoryUtils.getTimestampPartFromPath(serialDirPath.toString()); - if (timeStampPart == null) { - LOG.warn("Could not find timestamp portion from path: " + - serialDirPath.toString() +". Continuing with next"); - return; - } - if (serialPart == null) { - LOG.warn("Could not find serial portion from path: " + - serialDirPath.toString() + ". Continuing with next"); - return; - } - if (idToDateString.containsKey(serialPart)) { - Set set = idToDateString.get(serialPart); - set.remove(timeStampPart); - if (set.isEmpty()) { - idToDateString.remove(serialPart); - } - } - - } - - private void addDirectoryToSerialNumberIndex(Path serialDirPath) { - if(LOG.isDebugEnabled()) { - LOG.debug("Adding "+serialDirPath+" to serial index"); - } - String serialPart = serialDirPath.getName(); - String timestampPart = - JobHistoryUtils.getTimestampPartFromPath(serialDirPath.toString()); - if (timestampPart == null) { - LOG.warn("Could not find timestamp portion from path: " + - serialDirPath.toString() +". Continuing with next"); - return; - } - if (serialPart == null) { - LOG.warn("Could not find serial portion from path: " + - serialDirPath.toString() + ". Continuing with next"); - } - addToSerialNumberIndex(serialPart, timestampPart); - } - - private void addToSerialNumberIndex(String serialPart, String timestampPart) { - if (!idToDateString.containsKey(serialPart)) { - idToDateString.put(serialPart, new HashSet()); - if (idToDateString.size() > dateStringCacheSize) { - idToDateString.remove(idToDateString.firstKey()); - } - Set datePartSet = idToDateString.get(serialPart); - datePartSet.add(timestampPart); - } - } - - private void addDirectoryToJobListCache(Path path) throws IOException { - if(LOG.isDebugEnabled()) { - LOG.debug("Adding "+path+" to job list cache."); - } - List historyFileList = scanDirectoryForHistoryFiles(path, - doneDirFc); - for (FileStatus fs : historyFileList) { - if(LOG.isDebugEnabled()) { - LOG.debug("Adding in history for "+fs.getPath()); - } - JobIndexInfo jobIndexInfo = FileNameIndexUtils.getIndexInfo(fs.getPath() - .getName()); - String confFileName = JobHistoryUtils - .getIntermediateConfFileName(jobIndexInfo.getJobId()); - String summaryFileName = JobHistoryUtils - .getIntermediateSummaryFileName(jobIndexInfo.getJobId()); - MetaInfo metaInfo = new MetaInfo(fs.getPath(), new Path(fs.getPath() - .getParent(), confFileName), new Path(fs.getPath().getParent(), - summaryFileName), jobIndexInfo); - addToJobListCache(jobIndexInfo.getJobId(), metaInfo); - } - } - - private static List scanDirectory(Path path, FileContext fc, - PathFilter pathFilter) throws IOException { - path = fc.makeQualified(path); - List jhStatusList = new ArrayList(); - RemoteIterator fileStatusIter = fc.listStatus(path); - while (fileStatusIter.hasNext()) { - FileStatus fileStatus = fileStatusIter.next(); - Path filePath = fileStatus.getPath(); - if (fileStatus.isFile() && pathFilter.accept(filePath)) { - jhStatusList.add(fileStatus); - } - } - return jhStatusList; - } - - private static List scanDirectoryForHistoryFiles(Path path, - FileContext fc) throws IOException { - return scanDirectory(path, fc, JobHistoryUtils.getHistoryFileFilter()); - } - - /** - * Finds all history directories with a timestamp component by scanning - * the filesystem. - * Used when the JobHistory server is started. - * @return - */ - private List findTimestampedDirectories() throws IOException { - List fsList = JobHistoryUtils.localGlobber(doneDirFc, - doneDirPrefixPath, DONE_BEFORE_SERIAL_TAIL); - return fsList; - } - - /** - * Adds an entry to the job list cache. Maintains the size. - */ - private void addToJobListCache(JobId jobId, MetaInfo metaInfo) { - if(LOG.isDebugEnabled()) { - LOG.debug("Adding "+jobId+" to job list cache with " - +metaInfo.getJobIndexInfo()); - } - jobListCache.put(jobId, metaInfo); - if (jobListCache.size() > jobListCacheSize) { - jobListCache.remove(jobListCache.firstKey()); - } - } - - /** - * Adds an entry to the loaded job cache. Maintains the size. - */ - private void addToLoadedJobCache(Job job) { - if(LOG.isDebugEnabled()) { - LOG.debug("Adding "+job.getID()+" to loaded job cache"); - } - loadedJobCache.put(job.getID(), job); - } - - - /** - * Scans the intermediate directory to find user directories. Scans these - * for history files if the modification time for the directory has changed. - * @throws IOException - */ - private void scanIntermediateDirectory() throws IOException { - List userDirList = - JobHistoryUtils.localGlobber(intermediateDoneDirFc, intermediateDoneDirPath, ""); - - for (FileStatus userDir : userDirList) { - String name = userDir.getPath().getName(); - long newModificationTime = userDir.getModificationTime(); - boolean shouldScan = false; - synchronized (userDirModificationTimeMap) { - if (!userDirModificationTimeMap.containsKey(name) || newModificationTime - > userDirModificationTimeMap.get(name)) { - shouldScan = true; - userDirModificationTimeMap.put(name, newModificationTime); - } - } - if (shouldScan) { - scanIntermediateDirectory(userDir.getPath()); - } - } - } - - /** - * Scans the specified path and populates the intermediate cache. - * @param absPath - * @throws IOException - */ - private void scanIntermediateDirectory(final Path absPath) - throws IOException { - List fileStatusList = scanDirectoryForHistoryFiles(absPath, - intermediateDoneDirFc); - for (FileStatus fs : fileStatusList) { - JobIndexInfo jobIndexInfo = FileNameIndexUtils.getIndexInfo(fs.getPath() - .getName()); - String confFileName = JobHistoryUtils - .getIntermediateConfFileName(jobIndexInfo.getJobId()); - String summaryFileName = JobHistoryUtils - .getIntermediateSummaryFileName(jobIndexInfo.getJobId()); - MetaInfo metaInfo = new MetaInfo(fs.getPath(), new Path(fs.getPath() - .getParent(), confFileName), new Path(fs.getPath().getParent(), - summaryFileName), jobIndexInfo); - if (!intermediateListCache.containsKey(jobIndexInfo.getJobId())) { - intermediateListCache.put(jobIndexInfo.getJobId(), metaInfo); - } - } - } - - /** - * Searches the job history file FileStatus list for the specified JobId. - * - * @param fileStatusList fileStatus list of Job History Files. - * @param jobId The JobId to find. - * @param checkForDoneFile whether to check for the existance of a done file. - * @return A MetaInfo object for the jobId, null if not found. - * @throws IOException - */ - private MetaInfo getJobMetaInfo(List fileStatusList, JobId jobId) - throws IOException { - for (FileStatus fs : fileStatusList) { - JobIndexInfo jobIndexInfo = - FileNameIndexUtils.getIndexInfo(fs.getPath().getName()); - if (jobIndexInfo.getJobId().equals(jobId)) { - String confFileName = JobHistoryUtils - .getIntermediateConfFileName(jobIndexInfo.getJobId()); - String summaryFileName = JobHistoryUtils - .getIntermediateSummaryFileName(jobIndexInfo.getJobId()); - MetaInfo metaInfo = new MetaInfo(fs.getPath(), new Path(fs.getPath() - .getParent(), confFileName), new Path(fs.getPath().getParent(), - summaryFileName), jobIndexInfo); - return metaInfo; - } - } - return null; - } - - /** - * Scans old directories known by the idToDateString map for the specified - * jobId. - * If the number of directories is higher than the supported size of the - * idToDateString cache, the jobId will not be found. - * @param jobId the jobId. - * @return - * @throws IOException - */ - private MetaInfo scanOldDirsForJob(JobId jobId) throws IOException { - int jobSerialNumber = JobHistoryUtils.jobSerialNumber(jobId); - String boxedSerialNumber = String.valueOf(jobSerialNumber); - Set dateStringSet = idToDateString.get(boxedSerialNumber); - if (dateStringSet == null) { - return null; - } - for (String timestampPart : dateStringSet) { - Path logDir = canonicalHistoryLogPath(jobId, timestampPart); - List fileStatusList = scanDirectoryForHistoryFiles(logDir, - doneDirFc); - MetaInfo metaInfo = getJobMetaInfo(fileStatusList, jobId); - if (metaInfo != null) { - return metaInfo; - } - } - return null; - } - - /** - * Checks for the existence of the job history file in the intermediate - * directory. - * @param jobId - * @return - * @throws IOException - */ - private MetaInfo scanIntermediateForJob(JobId jobId) throws IOException { - scanIntermediateDirectory(); - return intermediateListCache.get(jobId); - } @Override public String getApplicationName() { @@ -609,486 +199,167 @@ public class JobHistory extends AbstractService implements HistoryContext { private long sleepTime; private ThreadPoolExecutor moveToDoneExecutor = null; private boolean running = false; - - public void stop() { + + public synchronized void stop() { running = false; + notify(); } - + MoveIntermediateToDoneRunnable(long sleepTime, int numMoveThreads) { this.sleepTime = sleepTime; - ThreadFactory tf = new ThreadFactoryBuilder() - .setNameFormat("MoveIntermediateToDone Thread #%d") - .build(); - moveToDoneExecutor = new ThreadPoolExecutor(1, numMoveThreads, 1, + ThreadFactory tf = new ThreadFactoryBuilder().setNameFormat( + "MoveIntermediateToDone Thread #%d").build(); + moveToDoneExecutor = new ThreadPoolExecutor(1, numMoveThreads, 1, TimeUnit.HOURS, new LinkedBlockingQueue(), tf); running = true; } - - @Override + + @Override public void run() { Thread.currentThread().setName("IntermediateHistoryScanner"); try { - while (running) { + while (true) { LOG.info("Starting scan to move intermediate done files"); - scanIntermediateDirectory(); - for (final MetaInfo metaInfo : intermediateListCache.values()) { + for (final MetaInfo metaInfo : hsManager.getIntermediateMetaInfos()) { moveToDoneExecutor.execute(new Runnable() { @Override public void run() { try { - moveToDone(metaInfo); + hsManager.moveToDone(metaInfo); } catch (IOException e) { - LOG.info("Failed to process metaInfo for job: " + - metaInfo.jobIndexInfo.getJobId(), e); + LOG.info( + "Failed to process metaInfo for job: " + + metaInfo.getJobId(), e); } } }); - } - synchronized (this) { // TODO Is this really required. + synchronized (this) { try { this.wait(sleepTime); } catch (InterruptedException e) { LOG.info("IntermediateHistoryScannerThread interrupted"); } + if (!running) { + break; + } } } } catch (IOException e) { - LOG.warn("Unable to get a list of intermediate files to be moved from: " - + intermediateDoneDirPath); + LOG.warn("Unable to get a list of intermediate files to be moved"); + // TODO Shut down the entire process!!!! } } } - - private Job loadJob(MetaInfo metaInfo) { - synchronized(metaInfo) { - try { - Job job = new CompletedJob(conf, metaInfo.getJobIndexInfo().getJobId(), - metaInfo.getHistoryFile(), false, metaInfo.getJobIndexInfo().getUser(), - metaInfo.getConfFile(), this.aclsMgr); - addToLoadedJobCache(job); - return job; - } catch (IOException e) { - throw new YarnException("Could not find/load job: " + - metaInfo.getJobIndexInfo().getJobId(), e); - } - } - } - - private Map getAllJobsInternal() { - //TODO This should ideally be using getAllJobsMetaInfo - // or get rid of that method once Job has APIs for user, finishTime etc. - SortedMap result = new TreeMap(); - try { - scanIntermediateDirectory(); - } catch (IOException e) { - LOG.warn("Failed to scan intermediate directory", e); - throw new YarnException(e); - } - for (JobId jobId : intermediateListCache.keySet()) { - MetaInfo mi = intermediateListCache.get(jobId); - if (mi != null) { - result.put(jobId, new PartialJob(mi.getJobIndexInfo(), mi - .getJobIndexInfo().getJobId())); - } - } - for (JobId jobId : jobListCache.keySet()) { - MetaInfo mi = jobListCache.get(jobId); - if (mi != null) { - result.put(jobId, new PartialJob(mi.getJobIndexInfo(), mi - .getJobIndexInfo().getJobId())); - } - } - return result; - } /** * Helper method for test cases. */ MetaInfo getJobMetaInfo(JobId jobId) throws IOException { - //MetaInfo available in cache. - MetaInfo metaInfo = null; - if (jobListCache.containsKey(jobId)) { - metaInfo = jobListCache.get(jobId); - } - - if (metaInfo != null) { - return metaInfo; - } - - //MetaInfo not available. Check intermediate directory for meta info. - metaInfo = scanIntermediateForJob(jobId); - if (metaInfo != null) { - return metaInfo; - } - - //Intermediate directory does not contain job. Search through older ones. - metaInfo = scanOldDirsForJob(jobId); - if (metaInfo != null) { - return metaInfo; - } - return null; + return hsManager.getMetaInfo(jobId); } - - private Job findJob(JobId jobId) throws IOException { - //Job already loaded. - if (loadedJobCache.containsKey(jobId)) { - return loadedJobCache.get(jobId); - } - - //MetaInfo available in cache. - MetaInfo metaInfo = null; - if (jobListCache.containsKey(jobId)) { - metaInfo = jobListCache.get(jobId); - } - - if (metaInfo != null) { - return loadJob(metaInfo); - } - - //MetaInfo not available. Check intermediate directory for meta info. - metaInfo = scanIntermediateForJob(jobId); - if (metaInfo != null) { - return loadJob(metaInfo); - } - - //Intermediate directory does not contain job. Search through older ones. - metaInfo = scanOldDirsForJob(jobId); - if (metaInfo != null) { - return loadJob(metaInfo); - } - return null; - } - - private void moveToDone(MetaInfo metaInfo) throws IOException { - long completeTime = metaInfo.getJobIndexInfo().getFinishTime(); - if (completeTime == 0) completeTime = System.currentTimeMillis(); - JobId jobId = metaInfo.getJobIndexInfo().getJobId(); - - List paths = new ArrayList(); - Path historyFile = metaInfo.getHistoryFile(); - if (historyFile == null) { - LOG.info("No file for job-history with " + jobId + " found in cache!"); - } else { - paths.add(historyFile); - } - - Path confFile = metaInfo.getConfFile(); - if (confFile == null) { - LOG.info("No file for jobConf with " + jobId + " found in cache!"); - } else { - paths.add(confFile); - } - - //TODO Check all mi getters and setters for the conf path - Path summaryFile = metaInfo.getSummaryFile(); - if (summaryFile == null) { - LOG.info("No summary file for job: " + jobId); - } else { - try { - String jobSummaryString = getJobSummary(intermediateDoneDirFc, summaryFile); - SUMMARY_LOG.info(jobSummaryString); - LOG.info("Deleting JobSummary file: [" + summaryFile + "]"); - intermediateDoneDirFc.delete(summaryFile, false); - metaInfo.setSummaryFile(null); - } catch (IOException e) { - LOG.warn("Failed to process summary file: [" + summaryFile + "]"); - throw e; - } - } - - Path targetDir = canonicalHistoryLogPath(jobId, completeTime); - addDirectoryToSerialNumberIndex(targetDir); - try { - maybeMakeSubdirectory(targetDir); - } catch (IOException e) { - LOG.warn("Failed creating subdirectory: " + targetDir + - " while attempting to move files for jobId: " + jobId); - throw e; - } - synchronized (metaInfo) { - if (historyFile != null) { - Path toPath = doneDirFc.makeQualified(new Path(targetDir, - historyFile.getName())); - try { - moveToDoneNow(historyFile, toPath); - } catch (IOException e) { - LOG.warn("Failed to move file: " + historyFile + " for jobId: " - + jobId); - throw e; - } - metaInfo.setHistoryFile(toPath); - } - if (confFile != null) { - Path toPath = doneDirFc.makeQualified(new Path(targetDir, - confFile.getName())); - try { - moveToDoneNow(confFile, toPath); - } catch (IOException e) { - LOG.warn("Failed to move file: " + historyFile + " for jobId: " - + jobId); - throw e; - } - metaInfo.setConfFile(toPath); - } - } - addToJobListCache(jobId, metaInfo); - intermediateListCache.remove(jobId); - } - - private void moveToDoneNow(final Path src, final Path target) - throws IOException { - LOG.info("Moving " + src.toString() + " to " + target.toString()); - intermediateDoneDirFc.rename(src, target, Options.Rename.NONE); - // fc.util().copy(src, target); - //fc.delete(src, false); - //intermediateDoneDirFc.setPermission(target, new FsPermission( - //JobHistoryUtils.HISTORY_DONE_FILE_PERMISSION)); - } - - String getJobSummary(FileContext fc, Path path) throws IOException { - Path qPath = fc.makeQualified(path); - FSDataInputStream in = fc.open(qPath); - String jobSummaryString = in.readUTF(); - in.close(); - return jobSummaryString; - } - - private void maybeMakeSubdirectory(Path path) throws IOException { - boolean existsInExistingCache = false; - synchronized(existingDoneSubdirs) { - if (existingDoneSubdirs.contains(path)) existsInExistingCache = true; - } - try { - doneDirFc.getFileStatus(path); - if (!existsInExistingCache) { - existingDoneSubdirs.add(path); - if (debugMode) { - LOG.info("JobHistory.maybeMakeSubdirectory -- We believed " - + path + " already existed, but it didn't."); - } - } - } catch (FileNotFoundException fnfE) { - try { - FsPermission fsp = - new FsPermission(JobHistoryUtils.HISTORY_DONE_DIR_PERMISSION); - doneDirFc.mkdir(path, fsp, true); - FileStatus fsStatus = doneDirFc.getFileStatus(path); - LOG.info("Perms after creating " + fsStatus.getPermission().toShort() - + ", Expected: " + fsp.toShort()); - if (fsStatus.getPermission().toShort() != fsp.toShort()) { - LOG.info("Explicitly setting permissions to : " + fsp.toShort() - + ", " + fsp); - doneDirFc.setPermission(path, fsp); - } - synchronized(existingDoneSubdirs) { - existingDoneSubdirs.add(path); - } - } catch (FileAlreadyExistsException faeE) { //Nothing to do. - } - } - } - - private Path canonicalHistoryLogPath(JobId id, String timestampComponent) { - return new Path(doneDirPrefixPath, - JobHistoryUtils.historyLogSubdirectory(id, timestampComponent, serialNumberFormat)); - } - - private Path canonicalHistoryLogPath(JobId id, long millisecondTime) { - String timestampComponent = - JobHistoryUtils.timestampDirectoryComponent(millisecondTime, debugMode); - return new Path(doneDirPrefixPath, - JobHistoryUtils.historyLogSubdirectory(id, timestampComponent, serialNumberFormat)); - } - @Override - public synchronized Job getJob(JobId jobId) { - if(LOG.isDebugEnabled()) { - LOG.debug("Looking for Job "+jobId); - } - Job job = null; - try { - job = findJob(jobId); - //This could return a null job. - } catch (IOException e) { - throw new YarnException(e); - } - return job; + public Job getJob(JobId jobId) { + return storage.getFullJob(jobId); } @Override public Map getAllJobs(ApplicationId appID) { - if(LOG.isDebugEnabled()) { + if (LOG.isDebugEnabled()) { LOG.debug("Called getAllJobs(AppId): " + appID); } -// currently there is 1 to 1 mapping between app and job id + // currently there is 1 to 1 mapping between app and job id org.apache.hadoop.mapreduce.JobID oldJobID = TypeConverter.fromYarn(appID); Map jobs = new HashMap(); JobId jobID = TypeConverter.toYarn(oldJobID); jobs.put(jobID, getJob(jobID)); return jobs; -// return getAllJobs(); } - - /* (non-Javadoc) - * @see org.apache.hadoop.mapreduce.v2.hs.HistoryContext#getAllJobs() - * - * Returns a recent list of jobs. This may not be the complete set. - * If a previous jobId is known - it can be queries via the getJob(JobId) - * method. - * Size of this list is determined by the size of the job list cache. - * This can be fixed when pagination is implemented - return the first set of - * jobs via the cache, go to DFS only when an attempt is made to navigate - * past the cached list. - * This does involve a DFS oepration of scanning the intermediate directory. - */ + + @Override public Map getAllJobs() { - LOG.debug("Called getAllJobs()"); - return getAllJobsInternal(); + return storage.getAllPartialJobs(); } - static class MetaInfo { - private Path historyFile; - private Path confFile; - private Path summaryFile; - JobIndexInfo jobIndexInfo; - - MetaInfo(Path historyFile, Path confFile, Path summaryFile, - JobIndexInfo jobIndexInfo) { - this.historyFile = historyFile; - this.confFile = confFile; - this.summaryFile = summaryFile; - this.jobIndexInfo = jobIndexInfo; - } - - Path getHistoryFile() { return historyFile; } - Path getConfFile() { return confFile; } - Path getSummaryFile() { return summaryFile; } - JobIndexInfo getJobIndexInfo() { return jobIndexInfo; } - - void setHistoryFile(Path historyFile) { this.historyFile = historyFile; } - void setConfFile(Path confFile) {this.confFile = confFile; } - void setSummaryFile(Path summaryFile) { this.summaryFile = summaryFile; } + /** + * Look for a set of partial jobs. + * + * @param offset + * the offset into the list of jobs. + * @param count + * the maximum number of jobs to return. + * @param user + * only return jobs for the given user. + * @param queue + * only return jobs for in the given queue. + * @param sBegin + * only return Jobs that started on or after the given time. + * @param sEnd + * only return Jobs that started on or before the given time. + * @param fBegin + * only return Jobs that ended on or after the given time. + * @param fEnd + * only return Jobs that ended on or before the given time. + * @param jobState + * only return jobs that are in the give job state. + * @return The list of filtered jobs. + */ + @Override + public JobsInfo getPartialJobs(Long offset, Long count, String user, + String queue, Long sBegin, Long sEnd, Long fBegin, Long fEnd, + JobState jobState) { + return storage.getPartialJobs(offset, count, user, queue, sBegin, sEnd, + fBegin, fEnd, jobState); } - public class HistoryCleaner implements Runnable { - private long currentTime; - long maxAgeMillis; - long filesDeleted = 0; - long dirsDeleted = 0; - + public HistoryCleaner(long maxAge) { this.maxAgeMillis = maxAge; } - - @SuppressWarnings("unchecked") + public void run() { LOG.info("History Cleaner started"); - currentTime = System.currentTimeMillis(); - boolean halted = false; - //TODO Delete YYYY/MM/DD directories. + long cutoff = System.currentTimeMillis() - maxAgeMillis; try { - List serialDirList = findTimestampedDirectories(); - //Sort in ascending order. Relies on YYYY/MM/DD/Serial - Collections.sort(serialDirList); - for (FileStatus serialDir : serialDirList) { - List historyFileList = - scanDirectoryForHistoryFiles(serialDir.getPath(), doneDirFc); - for (FileStatus historyFile : historyFileList) { - JobIndexInfo jobIndexInfo = - FileNameIndexUtils.getIndexInfo(historyFile.getPath().getName()); - long effectiveTimestamp = - getEffectiveTimestamp(jobIndexInfo.getFinishTime(), historyFile); - if (shouldDelete(effectiveTimestamp)) { - String confFileName = - JobHistoryUtils.getIntermediateConfFileName(jobIndexInfo.getJobId()); - MetaInfo metaInfo = new MetaInfo(historyFile.getPath(), - new Path(historyFile.getPath().getParent(), confFileName), - null, jobIndexInfo); - delete(metaInfo); - } else { - halted = true; - break; - } - } - if (!halted) { - deleteDir(serialDir.getPath()); - removeDirectoryFromSerialNumberIndex(serialDir.getPath()); - synchronized (existingDoneSubdirs) { - existingDoneSubdirs.remove(serialDir.getPath()); - } - - } else { - break; //Don't scan any more directories. - } - } + hsManager.clean(cutoff, storage); } catch (IOException e) { - LOG.warn("Error in History cleaner run", e); + LOG.warn("Error trying to clean up ", e); } LOG.info("History Cleaner complete"); - LOG.info("FilesDeleted: " + filesDeleted); - LOG.info("Directories Deleted: " + dirsDeleted); } - - private boolean shouldDelete(long ts) { - return ((ts + maxAgeMillis) <= currentTime); - } - - private long getEffectiveTimestamp(long finishTime, FileStatus fileStatus) { - if (finishTime == 0) { - return fileStatus.getModificationTime(); - } - return finishTime; - } - - private void delete(MetaInfo metaInfo) throws IOException { - deleteFile(metaInfo.getHistoryFile()); - deleteFile(metaInfo.getConfFile()); - jobListCache.remove(metaInfo.getJobIndexInfo().getJobId()); - loadedJobCache.remove(metaInfo.getJobIndexInfo().getJobId()); - } - - private void deleteFile(final Path path) throws IOException { - doneDirFc.delete(doneDirFc.makeQualified(path), false); - filesDeleted++; - } - - private void deleteDir(Path path) throws IOException { - doneDirFc.delete(doneDirFc.makeQualified(path), true); - dirsDeleted++; - } - } - - - - //TODO AppContext - Not Required - private ApplicationAttemptId appAttemptID; + } + + // TODO AppContext - Not Required + private ApplicationAttemptId appAttemptID; + @Override public ApplicationAttemptId getApplicationAttemptId() { - //TODO fixme - bogus appAttemptID for now + // TODO fixme - bogus appAttemptID for now return appAttemptID; - } - - //TODO AppContext - Not Required + } + + // TODO AppContext - Not Required private ApplicationId appID; + @Override public ApplicationId getApplicationID() { - //TODO fixme - bogus appID for now + // TODO fixme - bogus appID for now return appID; } - - //TODO AppContext - Not Required + + // TODO AppContext - Not Required @Override public EventHandler getEventHandler() { // TODO Auto-generated method stub return null; } - - //TODO AppContext - Not Required + + // TODO AppContext - Not Required private String userName; + @Override public CharSequence getUser() { if (userName != null) { diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-hs/src/main/java/org/apache/hadoop/mapreduce/v2/hs/PartialJob.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-hs/src/main/java/org/apache/hadoop/mapreduce/v2/hs/PartialJob.java index 83380ea5f87..f2acbe48a9c 100644 --- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-hs/src/main/java/org/apache/hadoop/mapreduce/v2/hs/PartialJob.java +++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-hs/src/main/java/org/apache/hadoop/mapreduce/v2/hs/PartialJob.java @@ -51,6 +51,7 @@ public class PartialJob implements org.apache.hadoop.mapreduce.v2.app.job.Job { jobReport = RecordFactoryProvider.getRecordFactory(null).newRecordInstance(JobReport.class); jobReport.setStartTime(jobIndexInfo.getSubmitTime()); jobReport.setFinishTime(jobIndexInfo.getFinishTime()); + jobReport.setJobState(getState()); } @Override diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-hs/src/main/java/org/apache/hadoop/mapreduce/v2/hs/webapp/HsWebApp.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-hs/src/main/java/org/apache/hadoop/mapreduce/v2/hs/webapp/HsWebApp.java index 71f1e30a280..76991a27cb9 100644 --- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-hs/src/main/java/org/apache/hadoop/mapreduce/v2/hs/webapp/HsWebApp.java +++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-hs/src/main/java/org/apache/hadoop/mapreduce/v2/hs/webapp/HsWebApp.java @@ -44,6 +44,7 @@ public class HsWebApp extends WebApp implements AMParams { bind(JAXBContextResolver.class); bind(GenericExceptionHandler.class); bind(AppContext.class).toInstance(history); + bind(HistoryContext.class).toInstance(history); route("/", HsController.class); route("/app", HsController.class); route(pajoin("/job", JOB_ID), HsController.class, "job"); diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-hs/src/main/java/org/apache/hadoop/mapreduce/v2/hs/webapp/HsWebServices.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-hs/src/main/java/org/apache/hadoop/mapreduce/v2/hs/webapp/HsWebServices.java index 404cfbb22cb..71ad89f028e 100644 --- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-hs/src/main/java/org/apache/hadoop/mapreduce/v2/hs/webapp/HsWebServices.java +++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-hs/src/main/java/org/apache/hadoop/mapreduce/v2/hs/webapp/HsWebServices.java @@ -32,10 +32,8 @@ import javax.ws.rs.core.UriInfo; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.mapreduce.v2.api.records.AMInfo; import org.apache.hadoop.mapreduce.v2.api.records.JobState; -import org.apache.hadoop.mapreduce.v2.api.records.JobReport; import org.apache.hadoop.mapreduce.v2.api.records.TaskId; import org.apache.hadoop.mapreduce.v2.api.records.TaskType; -import org.apache.hadoop.mapreduce.v2.app.AppContext; import org.apache.hadoop.mapreduce.v2.app.job.Job; import org.apache.hadoop.mapreduce.v2.app.job.Task; import org.apache.hadoop.mapreduce.v2.app.job.TaskAttempt; @@ -49,6 +47,7 @@ import org.apache.hadoop.mapreduce.v2.app.webapp.dao.TaskAttemptInfo; import org.apache.hadoop.mapreduce.v2.app.webapp.dao.TaskAttemptsInfo; import org.apache.hadoop.mapreduce.v2.app.webapp.dao.TaskInfo; import org.apache.hadoop.mapreduce.v2.app.webapp.dao.TasksInfo; +import org.apache.hadoop.mapreduce.v2.hs.HistoryContext; import org.apache.hadoop.mapreduce.v2.hs.webapp.dao.AMAttemptInfo; import org.apache.hadoop.mapreduce.v2.hs.webapp.dao.AMAttemptsInfo; import org.apache.hadoop.mapreduce.v2.hs.webapp.dao.HistoryInfo; @@ -64,7 +63,7 @@ import com.google.inject.Inject; @Path("/ws/v1/history") public class HsWebServices { - private final AppContext appCtx; + private final HistoryContext ctx; private WebApp webapp; private final Configuration conf; @@ -72,9 +71,9 @@ public class HsWebServices { UriInfo uriInfo; @Inject - public HsWebServices(final AppContext appCtx, final Configuration conf, + public HsWebServices(final HistoryContext ctx, final Configuration conf, final WebApp webapp) { - this.appCtx = appCtx; + this.ctx = ctx; this.conf = conf; this.webapp = webapp; } @@ -103,33 +102,22 @@ public class HsWebServices { @QueryParam("startedTimeEnd") String startedEnd, @QueryParam("finishedTimeBegin") String finishBegin, @QueryParam("finishedTimeEnd") String finishEnd) { - JobsInfo allJobs = new JobsInfo(); - long num = 0; - boolean checkCount = false; - boolean checkStart = false; - boolean checkEnd = false; - long countNum = 0; - - // set values suitable in case both of begin/end not specified - long sBegin = 0; - long sEnd = Long.MAX_VALUE; - long fBegin = 0; - long fEnd = Long.MAX_VALUE; + Long countParam = null; + if (count != null && !count.isEmpty()) { - checkCount = true; try { - countNum = Long.parseLong(count); + countParam = Long.parseLong(count); } catch (NumberFormatException e) { throw new BadRequestException(e.getMessage()); } - if (countNum <= 0) { + if (countParam <= 0) { throw new BadRequestException("limit value must be greater then 0"); } } + Long sBegin = null; if (startedBegin != null && !startedBegin.isEmpty()) { - checkStart = true; try { sBegin = Long.parseLong(startedBegin); } catch (NumberFormatException e) { @@ -139,8 +127,9 @@ public class HsWebServices { throw new BadRequestException("startedTimeBegin must be greater than 0"); } } + + Long sEnd = null; if (startedEnd != null && !startedEnd.isEmpty()) { - checkStart = true; try { sEnd = Long.parseLong(startedEnd); } catch (NumberFormatException e) { @@ -150,13 +139,13 @@ public class HsWebServices { throw new BadRequestException("startedTimeEnd must be greater than 0"); } } - if (sBegin > sEnd) { + if (sBegin != null && sEnd != null && sBegin > sEnd) { throw new BadRequestException( "startedTimeEnd must be greater than startTimeBegin"); } + Long fBegin = null; if (finishBegin != null && !finishBegin.isEmpty()) { - checkEnd = true; try { fBegin = Long.parseLong(finishBegin); } catch (NumberFormatException e) { @@ -166,8 +155,8 @@ public class HsWebServices { throw new BadRequestException("finishedTimeBegin must be greater than 0"); } } + Long fEnd = null; if (finishEnd != null && !finishEnd.isEmpty()) { - checkEnd = true; try { fEnd = Long.parseLong(finishEnd); } catch (NumberFormatException e) { @@ -177,53 +166,18 @@ public class HsWebServices { throw new BadRequestException("finishedTimeEnd must be greater than 0"); } } - if (fBegin > fEnd) { + if (fBegin != null && fEnd != null && fBegin > fEnd) { throw new BadRequestException( "finishedTimeEnd must be greater than finishedTimeBegin"); } - - for (Job job : appCtx.getAllJobs().values()) { - if (checkCount && num == countNum) { - break; - } - - if (stateQuery != null && !stateQuery.isEmpty()) { - JobState.valueOf(stateQuery); - if (!job.getState().toString().equalsIgnoreCase(stateQuery)) { - continue; - } - } - - // can't really validate queue is a valid one since queues could change - if (queueQuery != null && !queueQuery.isEmpty()) { - if (!job.getQueueName().equals(queueQuery)) { - continue; - } - } - - if (userQuery != null && !userQuery.isEmpty()) { - if (!job.getUserName().equals(userQuery)) { - continue; - } - } - - JobReport report = job.getReport(); - - if (checkStart - && (report.getStartTime() < sBegin || report.getStartTime() > sEnd)) { - continue; - } - if (checkEnd - && (report.getFinishTime() < fBegin || report.getFinishTime() > fEnd)) { - continue; - } - - JobInfo jobInfo = new JobInfo(job); - - allJobs.add(jobInfo); - num++; + + JobState jobState = null; + if (stateQuery != null) { + jobState = JobState.valueOf(stateQuery); } - return allJobs; + + return ctx.getPartialJobs(0l, countParam, userQuery, queueQuery, + sBegin, sEnd, fBegin, fEnd, jobState); } @GET @@ -231,7 +185,7 @@ public class HsWebServices { @Produces({ MediaType.APPLICATION_JSON, MediaType.APPLICATION_XML }) public JobInfo getJob(@PathParam("jobid") String jid) { - Job job = AMWebServices.getJobFromJobIdString(jid, appCtx); + Job job = AMWebServices.getJobFromJobIdString(jid, ctx); return new JobInfo(job); } @@ -240,7 +194,7 @@ public class HsWebServices { @Produces({ MediaType.APPLICATION_JSON, MediaType.APPLICATION_XML }) public AMAttemptsInfo getJobAttempts(@PathParam("jobid") String jid) { - Job job = AMWebServices.getJobFromJobIdString(jid, appCtx); + Job job = AMWebServices.getJobFromJobIdString(jid, ctx); AMAttemptsInfo amAttempts = new AMAttemptsInfo(); for (AMInfo amInfo : job.getAMInfos()) { AMAttemptInfo attempt = new AMAttemptInfo(amInfo, MRApps.toString(job @@ -256,8 +210,8 @@ public class HsWebServices { @Produces({ MediaType.APPLICATION_JSON, MediaType.APPLICATION_XML }) public JobCounterInfo getJobCounters(@PathParam("jobid") String jid) { - Job job = AMWebServices.getJobFromJobIdString(jid, appCtx); - return new JobCounterInfo(this.appCtx, job); + Job job = AMWebServices.getJobFromJobIdString(jid, ctx); + return new JobCounterInfo(this.ctx, job); } @GET @@ -265,7 +219,7 @@ public class HsWebServices { @Produces({ MediaType.APPLICATION_JSON, MediaType.APPLICATION_XML }) public ConfInfo getJobConf(@PathParam("jobid") String jid) { - Job job = AMWebServices.getJobFromJobIdString(jid, appCtx); + Job job = AMWebServices.getJobFromJobIdString(jid, ctx); ConfInfo info; try { info = new ConfInfo(job, this.conf); @@ -282,7 +236,7 @@ public class HsWebServices { public TasksInfo getJobTasks(@PathParam("jobid") String jid, @QueryParam("type") String type) { - Job job = AMWebServices.getJobFromJobIdString(jid, appCtx); + Job job = AMWebServices.getJobFromJobIdString(jid, ctx); TasksInfo allTasks = new TasksInfo(); for (Task task : job.getTasks().values()) { TaskType ttype = null; @@ -307,7 +261,7 @@ public class HsWebServices { public TaskInfo getJobTask(@PathParam("jobid") String jid, @PathParam("taskid") String tid) { - Job job = AMWebServices.getJobFromJobIdString(jid, appCtx); + Job job = AMWebServices.getJobFromJobIdString(jid, ctx); Task task = AMWebServices.getTaskFromTaskIdString(tid, job); return new TaskInfo(task); @@ -319,7 +273,7 @@ public class HsWebServices { public JobTaskCounterInfo getSingleTaskCounters( @PathParam("jobid") String jid, @PathParam("taskid") String tid) { - Job job = AMWebServices.getJobFromJobIdString(jid, appCtx); + Job job = AMWebServices.getJobFromJobIdString(jid, ctx); TaskId taskID = MRApps.toTaskID(tid); if (taskID == null) { throw new NotFoundException("taskid " + tid + " not found or invalid"); @@ -338,7 +292,7 @@ public class HsWebServices { @PathParam("taskid") String tid) { TaskAttemptsInfo attempts = new TaskAttemptsInfo(); - Job job = AMWebServices.getJobFromJobIdString(jid, appCtx); + Job job = AMWebServices.getJobFromJobIdString(jid, ctx); Task task = AMWebServices.getTaskFromTaskIdString(tid, job); for (TaskAttempt ta : task.getAttempts().values()) { if (ta != null) { @@ -358,7 +312,7 @@ public class HsWebServices { public TaskAttemptInfo getJobTaskAttemptId(@PathParam("jobid") String jid, @PathParam("taskid") String tid, @PathParam("attemptid") String attId) { - Job job = AMWebServices.getJobFromJobIdString(jid, appCtx); + Job job = AMWebServices.getJobFromJobIdString(jid, ctx); Task task = AMWebServices.getTaskFromTaskIdString(tid, job); TaskAttempt ta = AMWebServices.getTaskAttemptFromTaskAttemptString(attId, task); @@ -376,7 +330,7 @@ public class HsWebServices { @PathParam("jobid") String jid, @PathParam("taskid") String tid, @PathParam("attemptid") String attId) { - Job job = AMWebServices.getJobFromJobIdString(jid, appCtx); + Job job = AMWebServices.getJobFromJobIdString(jid, ctx); Task task = AMWebServices.getTaskFromTaskIdString(tid, job); TaskAttempt ta = AMWebServices.getTaskAttemptFromTaskAttemptString(attId, task); diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-hs/src/test/java/org/apache/hadoop/mapreduce/v2/hs/TestJobHistoryParsing.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-hs/src/test/java/org/apache/hadoop/mapreduce/v2/hs/TestJobHistoryParsing.java index d737cd23766..2b0d8965ec8 100644 --- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-hs/src/test/java/org/apache/hadoop/mapreduce/v2/hs/TestJobHistoryParsing.java +++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-hs/src/test/java/org/apache/hadoop/mapreduce/v2/hs/TestJobHistoryParsing.java @@ -92,6 +92,14 @@ public class TestJobHistoryParsing { checkHistoryParsing(3, 0, 2); } + private static String getJobSummary(FileContext fc, Path path) throws IOException { + Path qPath = fc.makeQualified(path); + FSDataInputStream in = fc.open(qPath); + String jobSummaryString = in.readUTF(); + in.close(); + return jobSummaryString; + } + private void checkHistoryParsing(final int numMaps, final int numReduces, final int numSuccessfulMaps) throws Exception { @@ -244,7 +252,7 @@ public class TestJobHistoryParsing { String summaryFileName = JobHistoryUtils .getIntermediateSummaryFileName(jobId); Path summaryFile = new Path(jobhistoryDir, summaryFileName); - String jobSummaryString = jobHistory.getJobSummary(fc, summaryFile); + String jobSummaryString = getJobSummary(fc, summaryFile); Assert.assertTrue(jobSummaryString.contains("resourcesPerMap=100")); Assert.assertTrue(jobSummaryString.contains("resourcesPerReduce=100")); Assert.assertNotNull(jobSummaryString); diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-hs/src/test/java/org/apache/hadoop/mapreduce/v2/hs/webapp/TestHsWebServices.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-hs/src/test/java/org/apache/hadoop/mapreduce/v2/hs/webapp/TestHsWebServices.java index e14b28c993b..ce6a31bc727 100644 --- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-hs/src/test/java/org/apache/hadoop/mapreduce/v2/hs/webapp/TestHsWebServices.java +++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-hs/src/test/java/org/apache/hadoop/mapreduce/v2/hs/webapp/TestHsWebServices.java @@ -30,11 +30,13 @@ import javax.xml.parsers.DocumentBuilderFactory; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.mapreduce.v2.api.records.JobId; +import org.apache.hadoop.mapreduce.v2.api.records.JobState; import org.apache.hadoop.mapreduce.v2.app.AppContext; import org.apache.hadoop.mapreduce.v2.app.MockJobs; import org.apache.hadoop.mapreduce.v2.app.job.Job; import org.apache.hadoop.mapreduce.v2.hs.HistoryContext; import org.apache.hadoop.mapreduce.v2.hs.JobHistory; +import org.apache.hadoop.mapreduce.v2.hs.webapp.dao.JobsInfo; import org.apache.hadoop.util.VersionInfo; import org.apache.hadoop.yarn.Clock; import org.apache.hadoop.yarn.ClusterInfo; @@ -77,7 +79,7 @@ public class TestHsWebServices extends JerseyTest { private static TestAppContext appContext; private static HsWebApp webApp; - static class TestAppContext implements AppContext { + static class TestAppContext implements HistoryContext { final ApplicationAttemptId appAttemptID; final ApplicationId appID; final String user = MockJobs.newUserName(); @@ -144,6 +146,20 @@ public class TestHsWebServices extends JerseyTest { public ClusterInfo getClusterInfo() { return null; } + + @Override + public Map getAllJobs(ApplicationId appID) { + // TODO Auto-generated method stub + return null; + } + + @Override + public JobsInfo getPartialJobs(Long offset, Long count, String user, + String queue, Long sBegin, Long sEnd, Long fBegin, Long fEnd, + JobState jobState) { + // TODO Auto-generated method stub + return null; + } } private Injector injector = Guice.createInjector(new ServletModule() { @@ -160,6 +176,7 @@ public class TestHsWebServices extends JerseyTest { bind(GenericExceptionHandler.class); bind(WebApp.class).toInstance(webApp); bind(AppContext.class).toInstance(appContext); + bind(HistoryContext.class).toInstance(appContext); bind(Configuration.class).toInstance(conf); serve("/*").with(GuiceContainer.class); diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-hs/src/test/java/org/apache/hadoop/mapreduce/v2/hs/webapp/TestHsWebServicesAttempts.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-hs/src/test/java/org/apache/hadoop/mapreduce/v2/hs/webapp/TestHsWebServicesAttempts.java index 79e66af7245..a584987d427 100644 --- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-hs/src/test/java/org/apache/hadoop/mapreduce/v2/hs/webapp/TestHsWebServicesAttempts.java +++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-hs/src/test/java/org/apache/hadoop/mapreduce/v2/hs/webapp/TestHsWebServicesAttempts.java @@ -35,6 +35,7 @@ import javax.xml.parsers.DocumentBuilderFactory; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.mapreduce.v2.api.records.JobId; +import org.apache.hadoop.mapreduce.v2.api.records.JobState; import org.apache.hadoop.mapreduce.v2.api.records.TaskAttemptId; import org.apache.hadoop.mapreduce.v2.api.records.TaskType; import org.apache.hadoop.mapreduce.v2.app.AppContext; @@ -42,6 +43,8 @@ import org.apache.hadoop.mapreduce.v2.app.MockJobs; import org.apache.hadoop.mapreduce.v2.app.job.Job; import org.apache.hadoop.mapreduce.v2.app.job.Task; import org.apache.hadoop.mapreduce.v2.app.job.TaskAttempt; +import org.apache.hadoop.mapreduce.v2.hs.HistoryContext; +import org.apache.hadoop.mapreduce.v2.hs.webapp.dao.JobsInfo; import org.apache.hadoop.mapreduce.v2.util.MRApps; import org.apache.hadoop.yarn.Clock; import org.apache.hadoop.yarn.ClusterInfo; @@ -89,7 +92,7 @@ public class TestHsWebServicesAttempts extends JerseyTest { private static TestAppContext appContext; private static HsWebApp webApp; - static class TestAppContext implements AppContext { + static class TestAppContext implements HistoryContext { final ApplicationAttemptId appAttemptID; final ApplicationId appID; final String user = MockJobs.newUserName(); @@ -156,6 +159,20 @@ public class TestHsWebServicesAttempts extends JerseyTest { public ClusterInfo getClusterInfo() { return null; } + + @Override + public Map getAllJobs(ApplicationId appID) { + // TODO Auto-generated method stub + return null; + } + + @Override + public JobsInfo getPartialJobs(Long offset, Long count, String user, + String queue, Long sBegin, Long sEnd, Long fBegin, Long fEnd, + JobState jobState) { + // TODO Auto-generated method stub + return null; + } } private Injector injector = Guice.createInjector(new ServletModule() { @@ -171,6 +188,7 @@ public class TestHsWebServicesAttempts extends JerseyTest { bind(GenericExceptionHandler.class); bind(WebApp.class).toInstance(webApp); bind(AppContext.class).toInstance(appContext); + bind(HistoryContext.class).toInstance(appContext); bind(Configuration.class).toInstance(conf); serve("/*").with(GuiceContainer.class); diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-hs/src/test/java/org/apache/hadoop/mapreduce/v2/hs/webapp/TestHsWebServicesJobConf.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-hs/src/test/java/org/apache/hadoop/mapreduce/v2/hs/webapp/TestHsWebServicesJobConf.java index 57999658edc..d19a6468a0b 100644 --- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-hs/src/test/java/org/apache/hadoop/mapreduce/v2/hs/webapp/TestHsWebServicesJobConf.java +++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-hs/src/test/java/org/apache/hadoop/mapreduce/v2/hs/webapp/TestHsWebServicesJobConf.java @@ -41,9 +41,12 @@ import org.apache.hadoop.fs.FileUtil; import org.apache.hadoop.fs.Path; import org.apache.hadoop.mapreduce.MRJobConfig; import org.apache.hadoop.mapreduce.v2.api.records.JobId; +import org.apache.hadoop.mapreduce.v2.api.records.JobState; import org.apache.hadoop.mapreduce.v2.app.AppContext; import org.apache.hadoop.mapreduce.v2.app.MockJobs; import org.apache.hadoop.mapreduce.v2.app.job.Job; +import org.apache.hadoop.mapreduce.v2.hs.HistoryContext; +import org.apache.hadoop.mapreduce.v2.hs.webapp.dao.JobsInfo; import org.apache.hadoop.mapreduce.v2.util.MRApps; import org.apache.hadoop.yarn.Clock; import org.apache.hadoop.yarn.ClusterInfo; @@ -90,7 +93,7 @@ public class TestHsWebServicesJobConf extends JerseyTest { private static File testConfDir = new File("target", TestHsWebServicesJobConf.class.getSimpleName() + "confDir"); - static class TestAppContext implements AppContext { + static class TestAppContext implements HistoryContext { final ApplicationAttemptId appAttemptID; final ApplicationId appID; final String user = MockJobs.newUserName(); @@ -156,6 +159,20 @@ public class TestHsWebServicesJobConf extends JerseyTest { public ClusterInfo getClusterInfo() { return null; } + + @Override + public Map getAllJobs(ApplicationId appID) { + // TODO Auto-generated method stub + return null; + } + + @Override + public JobsInfo getPartialJobs(Long offset, Long count, String user, + String queue, Long sBegin, Long sEnd, Long fBegin, Long fEnd, + JobState jobState) { + // TODO Auto-generated method stub + return null; + } } private Injector injector = Guice.createInjector(new ServletModule() { @@ -195,6 +212,7 @@ public class TestHsWebServicesJobConf extends JerseyTest { bind(GenericExceptionHandler.class); bind(WebApp.class).toInstance(webApp); bind(AppContext.class).toInstance(appContext); + bind(HistoryContext.class).toInstance(appContext); bind(Configuration.class).toInstance(conf); serve("/*").with(GuiceContainer.class); diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-hs/src/test/java/org/apache/hadoop/mapreduce/v2/hs/webapp/TestHsWebServicesJobs.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-hs/src/test/java/org/apache/hadoop/mapreduce/v2/hs/webapp/TestHsWebServicesJobs.java index 3404e71e539..04524062317 100644 --- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-hs/src/test/java/org/apache/hadoop/mapreduce/v2/hs/webapp/TestHsWebServicesJobs.java +++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-hs/src/test/java/org/apache/hadoop/mapreduce/v2/hs/webapp/TestHsWebServicesJobs.java @@ -38,11 +38,15 @@ import javax.xml.parsers.DocumentBuilderFactory; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.mapreduce.v2.api.records.AMInfo; import org.apache.hadoop.mapreduce.v2.api.records.JobId; +import org.apache.hadoop.mapreduce.v2.api.records.JobState; import org.apache.hadoop.mapreduce.v2.app.AppContext; import org.apache.hadoop.mapreduce.v2.app.MockJobs; import org.apache.hadoop.mapreduce.v2.app.job.Job; +import org.apache.hadoop.mapreduce.v2.hs.CachedHistoryStorage; +import org.apache.hadoop.mapreduce.v2.hs.HistoryContext; import org.apache.hadoop.mapreduce.v2.hs.MockHistoryJobs; import org.apache.hadoop.mapreduce.v2.hs.MockHistoryJobs.JobsPair; +import org.apache.hadoop.mapreduce.v2.hs.webapp.dao.JobsInfo; import org.apache.hadoop.mapreduce.v2.util.MRApps; import org.apache.hadoop.yarn.Clock; import org.apache.hadoop.yarn.ClusterInfo; @@ -90,7 +94,7 @@ public class TestHsWebServicesJobs extends JerseyTest { private static TestAppContext appContext; private static HsWebApp webApp; - static class TestAppContext implements AppContext { + static class TestAppContext implements HistoryContext { final ApplicationAttemptId appAttemptID; final ApplicationId appID; final String user = MockJobs.newUserName(); @@ -169,6 +173,20 @@ public class TestHsWebServicesJobs extends JerseyTest { public ClusterInfo getClusterInfo() { return null; } + + @Override + public Map getAllJobs(ApplicationId appID) { + // TODO Auto-generated method stub + return null; + } + + @Override + public JobsInfo getPartialJobs(Long offset, Long count, String user, + String queue, Long sBegin, Long sEnd, Long fBegin, Long fEnd, + JobState jobState) { + return CachedHistoryStorage.getPartialJobs(this.partialJobs.values(), + offset, count, user, queue, sBegin, sEnd, fBegin, fEnd, jobState); + } } private Injector injector = Guice.createInjector(new ServletModule() { @@ -184,6 +202,7 @@ public class TestHsWebServicesJobs extends JerseyTest { bind(GenericExceptionHandler.class); bind(WebApp.class).toInstance(webApp); bind(AppContext.class).toInstance(appContext); + bind(HistoryContext.class).toInstance(appContext); bind(Configuration.class).toInstance(conf); serve("/*").with(GuiceContainer.class); diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-hs/src/test/java/org/apache/hadoop/mapreduce/v2/hs/webapp/TestHsWebServicesJobsQuery.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-hs/src/test/java/org/apache/hadoop/mapreduce/v2/hs/webapp/TestHsWebServicesJobsQuery.java index c0110dcd087..5d5da9d551d 100644 --- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-hs/src/test/java/org/apache/hadoop/mapreduce/v2/hs/webapp/TestHsWebServicesJobsQuery.java +++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-hs/src/test/java/org/apache/hadoop/mapreduce/v2/hs/webapp/TestHsWebServicesJobsQuery.java @@ -36,8 +36,11 @@ import org.apache.hadoop.mapreduce.v2.api.records.JobState; import org.apache.hadoop.mapreduce.v2.app.AppContext; import org.apache.hadoop.mapreduce.v2.app.MockJobs; import org.apache.hadoop.mapreduce.v2.app.job.Job; +import org.apache.hadoop.mapreduce.v2.hs.CachedHistoryStorage; +import org.apache.hadoop.mapreduce.v2.hs.HistoryContext; import org.apache.hadoop.mapreduce.v2.hs.MockHistoryJobs; import org.apache.hadoop.mapreduce.v2.hs.MockHistoryJobs.JobsPair; +import org.apache.hadoop.mapreduce.v2.hs.webapp.dao.JobsInfo; import org.apache.hadoop.mapreduce.v2.util.MRApps; import org.apache.hadoop.yarn.Clock; import org.apache.hadoop.yarn.ClusterInfo; @@ -77,7 +80,7 @@ public class TestHsWebServicesJobsQuery extends JerseyTest { private static TestAppContext appContext; private static HsWebApp webApp; - static class TestAppContext implements AppContext { + static class TestAppContext implements HistoryContext { final String user = MockJobs.newUserName(); final Map fullJobs; final Map partialJobs; @@ -152,6 +155,20 @@ public class TestHsWebServicesJobsQuery extends JerseyTest { public ClusterInfo getClusterInfo() { return null; } + + @Override + public Map getAllJobs(ApplicationId appID) { + // TODO Auto-generated method stub + return null; + } + + @Override + public JobsInfo getPartialJobs(Long offset, Long count, String user, + String queue, Long sBegin, Long sEnd, Long fBegin, Long fEnd, + JobState jobState) { + return CachedHistoryStorage.getPartialJobs(this.partialJobs.values(), + offset, count, user, queue, sBegin, sEnd, fBegin, fEnd, jobState); + } } private Injector injector = Guice.createInjector(new ServletModule() { @@ -167,6 +184,7 @@ public class TestHsWebServicesJobsQuery extends JerseyTest { bind(GenericExceptionHandler.class); bind(WebApp.class).toInstance(webApp); bind(AppContext.class).toInstance(appContext); + bind(HistoryContext.class).toInstance(appContext); bind(Configuration.class).toInstance(conf); serve("/*").with(GuiceContainer.class); diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-hs/src/test/java/org/apache/hadoop/mapreduce/v2/hs/webapp/TestHsWebServicesTasks.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-hs/src/test/java/org/apache/hadoop/mapreduce/v2/hs/webapp/TestHsWebServicesTasks.java index 471acb5b289..c8cdb7c22e3 100644 --- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-hs/src/test/java/org/apache/hadoop/mapreduce/v2/hs/webapp/TestHsWebServicesTasks.java +++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-hs/src/test/java/org/apache/hadoop/mapreduce/v2/hs/webapp/TestHsWebServicesTasks.java @@ -34,12 +34,15 @@ import javax.xml.parsers.DocumentBuilderFactory; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.mapreduce.v2.api.records.JobId; +import org.apache.hadoop.mapreduce.v2.api.records.JobState; import org.apache.hadoop.mapreduce.v2.api.records.TaskId; import org.apache.hadoop.mapreduce.v2.api.records.TaskReport; import org.apache.hadoop.mapreduce.v2.app.AppContext; import org.apache.hadoop.mapreduce.v2.app.MockJobs; import org.apache.hadoop.mapreduce.v2.app.job.Job; import org.apache.hadoop.mapreduce.v2.app.job.Task; +import org.apache.hadoop.mapreduce.v2.hs.HistoryContext; +import org.apache.hadoop.mapreduce.v2.hs.webapp.dao.JobsInfo; import org.apache.hadoop.mapreduce.v2.util.MRApps; import org.apache.hadoop.yarn.Clock; import org.apache.hadoop.yarn.ClusterInfo; @@ -85,7 +88,7 @@ public class TestHsWebServicesTasks extends JerseyTest { private static TestAppContext appContext; private static HsWebApp webApp; - static class TestAppContext implements AppContext { + static class TestAppContext implements HistoryContext { final ApplicationAttemptId appAttemptID; final ApplicationId appID; final String user = MockJobs.newUserName(); @@ -152,6 +155,20 @@ public class TestHsWebServicesTasks extends JerseyTest { public ClusterInfo getClusterInfo() { return null; } + + @Override + public Map getAllJobs(ApplicationId appID) { + // TODO Auto-generated method stub + return null; + } + + @Override + public JobsInfo getPartialJobs(Long offset, Long count, String user, + String queue, Long sBegin, Long sEnd, Long fBegin, Long fEnd, + JobState jobState) { + // TODO Auto-generated method stub + return null; + } } private Injector injector = Guice.createInjector(new ServletModule() { @@ -167,6 +184,7 @@ public class TestHsWebServicesTasks extends JerseyTest { bind(GenericExceptionHandler.class); bind(WebApp.class).toInstance(webApp); bind(AppContext.class).toInstance(appContext); + bind(HistoryContext.class).toInstance(appContext); bind(Configuration.class).toInstance(conf); serve("/*").with(GuiceContainer.class); From 793746870b704a30fa0595d09da3d176ada75c35 Mon Sep 17 00:00:00 2001 From: Robert Joseph Evans Date: Tue, 10 Apr 2012 18:57:34 +0000 Subject: [PATCH 17/29] MAPREDUCE-4099. ApplicationMaster may fail to remove staging directory (Jason Lowe via bobby) git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/trunk@1311926 13f79535-47bb-0310-9956-ffa450edef68 --- hadoop-mapreduce-project/CHANGES.txt | 3 + .../hadoop/mapreduce/v2/app/MRAppMaster.java | 15 ++-- .../hadoop/mapreduce/v2/app/TestMRApp.java | 70 +++++++++++++++++++ 3 files changed, 81 insertions(+), 7 deletions(-) diff --git a/hadoop-mapreduce-project/CHANGES.txt b/hadoop-mapreduce-project/CHANGES.txt index 44dd8abda13..4b539aaf1c4 100644 --- a/hadoop-mapreduce-project/CHANGES.txt +++ b/hadoop-mapreduce-project/CHANGES.txt @@ -292,6 +292,9 @@ Release 0.23.3 - UNRELEASED MAPREDUCE-4117. mapred job -status throws NullPointerException (Devaraj K via bobby) + MAPREDUCE-4099. ApplicationMaster may fail to remove staging directory + (Jason Lowe via bobby) + Release 0.23.2 - UNRELEASED INCOMPATIBLE CHANGES diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/v2/app/MRAppMaster.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/v2/app/MRAppMaster.java index 6fc8fab10fc..680872c9866 100644 --- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/v2/app/MRAppMaster.java +++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/v2/app/MRAppMaster.java @@ -405,6 +405,14 @@ public class MRAppMaster extends CompositeService { } catch (InterruptedException e) { e.printStackTrace(); } + + // Cleanup staging directory + try { + cleanupStagingDir(); + } catch(IOException io) { + LOG.warn("Failed to delete staging dir", io); + } + try { // Stop all services // This will also send the final report to the ResourceManager @@ -415,13 +423,6 @@ public class MRAppMaster extends CompositeService { LOG.warn("Graceful stop failed ", t); } - // Cleanup staging directory - try { - cleanupStagingDir(); - } catch(IOException io) { - LOG.warn("Failed to delete staging dir"); - } - //Bring the process down by force. //Not needed after HADOOP-7140 LOG.info("Exiting MR AppMaster..GoodBye!"); diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/test/java/org/apache/hadoop/mapreduce/v2/app/TestMRApp.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/test/java/org/apache/hadoop/mapreduce/v2/app/TestMRApp.java index 9d1b8cc30bc..68d07a7ef2e 100644 --- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/test/java/org/apache/hadoop/mapreduce/v2/app/TestMRApp.java +++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/test/java/org/apache/hadoop/mapreduce/v2/app/TestMRApp.java @@ -22,6 +22,7 @@ import static org.mockito.Mockito.times; import static org.mockito.Mockito.verify; import static org.mockito.Mockito.spy; +import java.io.IOException; import java.util.Iterator; import junit.framework.Assert; @@ -35,11 +36,14 @@ import org.apache.hadoop.mapreduce.v2.api.records.TaskState; import org.apache.hadoop.mapreduce.v2.app.job.Job; import org.apache.hadoop.mapreduce.v2.app.job.Task; import org.apache.hadoop.mapreduce.v2.app.job.TaskAttempt; +import org.apache.hadoop.mapreduce.v2.app.job.event.JobFinishEvent; import org.apache.hadoop.mapreduce.v2.app.job.event.TaskAttemptEvent; import org.apache.hadoop.mapreduce.v2.app.job.event.TaskAttemptEventType; import org.apache.hadoop.mapreduce.v2.app.job.event.TaskEvent; import org.apache.hadoop.mapreduce.v2.app.job.event.TaskEventType; import org.apache.hadoop.mapreduce.v2.app.job.impl.JobImpl; +import org.apache.hadoop.security.UserGroupInformation; +import org.apache.hadoop.yarn.YarnException; import org.junit.Test; /** @@ -233,6 +237,71 @@ public class TestMRApp { } } + private final class MRAppTestCleanup extends MRApp { + boolean hasStopped; + boolean cleanedBeforeStopped; + + public MRAppTestCleanup(int maps, int reduces, boolean autoComplete, + String testName, boolean cleanOnStart) { + super(maps, reduces, autoComplete, testName, cleanOnStart); + hasStopped = false; + cleanedBeforeStopped = false; + } + + @Override + protected Job createJob(Configuration conf) { + UserGroupInformation currentUser = null; + try { + currentUser = UserGroupInformation.getCurrentUser(); + } catch (IOException e) { + throw new YarnException(e); + } + Job newJob = new TestJob(getJobId(), getAttemptID(), conf, + getDispatcher().getEventHandler(), + getTaskAttemptListener(), getContext().getClock(), + getCommitter(), isNewApiCommitter(), + currentUser.getUserName(), getContext()); + ((AppContext) getContext()).getAllJobs().put(newJob.getID(), newJob); + + getDispatcher().register(JobFinishEvent.Type.class, + createJobFinishEventHandler()); + + return newJob; + } + + @Override + public void cleanupStagingDir() throws IOException { + cleanedBeforeStopped = !hasStopped; + } + + @Override + public synchronized void stop() { + hasStopped = true; + super.stop(); + } + + @Override + protected void sysexit() { + } + } + + @Test + public void testStagingCleanupOrder() throws Exception { + MRAppTestCleanup app = new MRAppTestCleanup(1, 1, true, + this.getClass().getName(), true); + JobImpl job = (JobImpl)app.submit(new Configuration()); + app.waitForState(job, JobState.SUCCEEDED); + app.verifyCompleted(); + + int waitTime = 20 * 1000; + while (waitTime > 0 && !app.cleanedBeforeStopped) { + Thread.sleep(100); + waitTime -= 100; + } + Assert.assertTrue("Staging directory not cleaned before notifying RM", + app.cleanedBeforeStopped); + } + public static void main(String[] args) throws Exception { TestMRApp t = new TestMRApp(); t.testMapReduce(); @@ -241,5 +310,6 @@ public class TestMRApp { t.testCompletedMapsForReduceSlowstart(); t.testJobError(); t.testCountersOnJobFinish(); + t.testStagingCleanupOrder(); } } From d9b740cb1413de7a6631275c2ecb19d11ef1f0c5 Mon Sep 17 00:00:00 2001 From: Tsz-wo Sze Date: Tue, 10 Apr 2012 19:46:44 +0000 Subject: [PATCH 18/29] MAPREDUCE-4057. Update RAID for the HA and fsdataset changes. Contributed by Devaraj K git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/trunk@1311959 13f79535-47bb-0310-9956-ffa450edef68 --- hadoop-mapreduce-project/CHANGES.txt | 3 +++ .../hadoop/hdfs/server/datanode/RaidBlockSender.java | 5 +++-- .../hadoop/hdfs/server/namenode/NameNodeRaidUtil.java | 6 ++++-- .../src/java/org/apache/hadoop/raid/BlockFixer.java | 10 +++++----- 4 files changed, 15 insertions(+), 9 deletions(-) diff --git a/hadoop-mapreduce-project/CHANGES.txt b/hadoop-mapreduce-project/CHANGES.txt index 4b539aaf1c4..55778b5cf51 100644 --- a/hadoop-mapreduce-project/CHANGES.txt +++ b/hadoop-mapreduce-project/CHANGES.txt @@ -224,6 +224,9 @@ Release 2.0.0 - UNRELEASED MAPREDUCE-3869. Fix classpath for DistributedShell application. (Devaraj K via sseth) + MAPREDUCE-4057. Update RAID for the HA and fsdataset changes. (Devaraj K + via szetszwo) + Release 0.23.3 - UNRELEASED INCOMPATIBLE CHANGES diff --git a/hadoop-mapreduce-project/src/contrib/raid/src/java/org/apache/hadoop/hdfs/server/datanode/RaidBlockSender.java b/hadoop-mapreduce-project/src/contrib/raid/src/java/org/apache/hadoop/hdfs/server/datanode/RaidBlockSender.java index c1fc998471d..a29a3ca1b12 100644 --- a/hadoop-mapreduce-project/src/contrib/raid/src/java/org/apache/hadoop/hdfs/server/datanode/RaidBlockSender.java +++ b/hadoop-mapreduce-project/src/contrib/raid/src/java/org/apache/hadoop/hdfs/server/datanode/RaidBlockSender.java @@ -33,6 +33,7 @@ import org.apache.hadoop.fs.ChecksumException; import org.apache.hadoop.hdfs.protocol.ExtendedBlock; import org.apache.hadoop.hdfs.protocol.HdfsConstants; import org.apache.hadoop.hdfs.protocol.datatransfer.PacketHeader; +import org.apache.hadoop.hdfs.server.datanode.fsdataset.FsDatasetSpi; import org.apache.hadoop.io.IOUtils; import org.apache.hadoop.net.SocketOutputStream; import org.apache.hadoop.util.DataChecksum; @@ -441,9 +442,9 @@ public class RaidBlockSender implements java.io.Closeable { private static class BlockInputStreamFactory implements InputStreamFactory { private final ExtendedBlock block; - private final FSDatasetInterface data; + private final FsDatasetSpi data; - private BlockInputStreamFactory(ExtendedBlock block, FSDatasetInterface data) { + private BlockInputStreamFactory(ExtendedBlock block, FsDatasetSpi data) { this.block = block; this.data = data; } diff --git a/hadoop-mapreduce-project/src/contrib/raid/src/java/org/apache/hadoop/hdfs/server/namenode/NameNodeRaidUtil.java b/hadoop-mapreduce-project/src/contrib/raid/src/java/org/apache/hadoop/hdfs/server/namenode/NameNodeRaidUtil.java index 6b2c32da9df..531a0f238e9 100644 --- a/hadoop-mapreduce-project/src/contrib/raid/src/java/org/apache/hadoop/hdfs/server/namenode/NameNodeRaidUtil.java +++ b/hadoop-mapreduce-project/src/contrib/raid/src/java/org/apache/hadoop/hdfs/server/namenode/NameNodeRaidUtil.java @@ -22,6 +22,7 @@ import java.io.*; import org.apache.hadoop.classification.*; import org.apache.hadoop.fs.*; import org.apache.hadoop.hdfs.protocol.*; +import org.apache.hadoop.ipc.StandbyException; import org.apache.hadoop.security.AccessControlException; /** Utilities used by RAID for accessing NameNode. */ @@ -35,10 +36,11 @@ public class NameNodeRaidUtil { return dir.getFileInfo(src, resolveLink); } - /** Accessing FSNamesystem.getFileInfo(..) */ + /** Accessing FSNamesystem.getFileInfo(..) + * @throws StandbyException */ public static HdfsFileStatus getFileInfo(final FSNamesystem namesystem, final String src, final boolean resolveLink - ) throws AccessControlException, UnresolvedLinkException { + ) throws AccessControlException, UnresolvedLinkException, StandbyException { return namesystem.getFileInfo(src, resolveLink); } diff --git a/hadoop-mapreduce-project/src/contrib/raid/src/java/org/apache/hadoop/raid/BlockFixer.java b/hadoop-mapreduce-project/src/contrib/raid/src/java/org/apache/hadoop/raid/BlockFixer.java index dabb73564a3..6e1d7f79175 100644 --- a/hadoop-mapreduce-project/src/contrib/raid/src/java/org/apache/hadoop/raid/BlockFixer.java +++ b/hadoop-mapreduce-project/src/contrib/raid/src/java/org/apache/hadoop/raid/BlockFixer.java @@ -622,8 +622,8 @@ public abstract class BlockFixer extends Configured implements Runnable { int idx = rand.nextInt(live.length); chosen = live[idx]; for (DatanodeInfo avoid: locationsToAvoid) { - if (chosen.name.equals(avoid.name)) { - LOG.info("Avoiding " + avoid.name); + if (chosen.getName().equals(avoid.getName())) { + LOG.info("Avoiding " + avoid.getName()); chosen = null; break; } @@ -632,7 +632,7 @@ public abstract class BlockFixer extends Configured implements Runnable { if (chosen == null) { throw new IOException("Could not choose datanode"); } - LOG.info("Choosing datanode " + chosen.name); + LOG.info("Choosing datanode " + chosen.getName()); return chosen; } @@ -736,7 +736,7 @@ public abstract class BlockFixer extends Configured implements Runnable { DataInputStream metadataIn, LocatedBlock block, long blockSize) throws IOException { - InetSocketAddress target = NetUtils.createSocketAddr(datanode.name); + InetSocketAddress target = NetUtils.createSocketAddr(datanode.getName()); Socket sock = SocketChannel.open().socket(); int readTimeout = @@ -785,7 +785,7 @@ public abstract class BlockFixer extends Configured implements Runnable { 1, 0L, blockSize, 0L, DataChecksum.newDataChecksum(metadataIn)); blockSender.sendBlock(out, baseStream); - LOG.info("Sent block " + block.getBlock() + " to " + datanode.name); + LOG.info("Sent block " + block.getBlock() + " to " + datanode.getName()); } finally { out.close(); } From 2accda38a1e8d658ed1f6da4a583a81a151e17b4 Mon Sep 17 00:00:00 2001 From: Robert Joseph Evans Date: Tue, 10 Apr 2012 20:36:09 +0000 Subject: [PATCH 19/29] MAPREDUCE-4017. Add jobname to jobsummary log (tgraves and Koji Noguchi via bobby) git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/trunk@1311972 13f79535-47bb-0310-9956-ffa450edef68 --- hadoop-mapreduce-project/CHANGES.txt | 3 +++ .../mapreduce/jobhistory/JobHistoryEventHandler.java | 1 + .../hadoop/mapreduce/jobhistory/JobSummary.java | 12 +++++++++++- .../mapreduce/v2/hs/TestJobHistoryParsing.java | 2 ++ 4 files changed, 17 insertions(+), 1 deletion(-) diff --git a/hadoop-mapreduce-project/CHANGES.txt b/hadoop-mapreduce-project/CHANGES.txt index 55778b5cf51..b73e16431b5 100644 --- a/hadoop-mapreduce-project/CHANGES.txt +++ b/hadoop-mapreduce-project/CHANGES.txt @@ -298,6 +298,9 @@ Release 0.23.3 - UNRELEASED MAPREDUCE-4099. ApplicationMaster may fail to remove staging directory (Jason Lowe via bobby) + MAPREDUCE-4017. Add jobname to jobsummary log (tgraves and Koji Noguchi + via bobby) + Release 0.23.2 - UNRELEASED INCOMPATIBLE CHANGES diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/jobhistory/JobHistoryEventHandler.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/jobhistory/JobHistoryEventHandler.java index 953ef447b66..01073cb34fb 100644 --- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/jobhistory/JobHistoryEventHandler.java +++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/jobhistory/JobHistoryEventHandler.java @@ -551,6 +551,7 @@ public class JobHistoryEventHandler extends AbstractService summary.setUser(jse.getUserName()); summary.setQueue(jse.getJobQueueName()); summary.setJobSubmitTime(jse.getSubmitTime()); + summary.setJobName(jse.getJobName()); break; case NORMALIZED_RESOURCE: NormalizedResourceEvent normalizedResourceEvent = diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/jobhistory/JobSummary.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/jobhistory/JobSummary.java index 691c7ee4e13..5adfed15a9c 100644 --- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/jobhistory/JobSummary.java +++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/jobhistory/JobSummary.java @@ -45,6 +45,7 @@ public class JobSummary { private long mapSlotSeconds; // TODO Not generated yet in MRV2 private long reduceSlotSeconds; // TODO Not generated yet MRV2 // private int clusterSlotCapacity; + private String jobName; JobSummary() { } @@ -185,6 +186,14 @@ public class JobSummary { this.reduceSlotSeconds = reduceSlotSeconds; } + public String getJobName() { + return jobName; + } + + public void setJobName(String jobName) { + this.jobName = jobName; + } + public String getJobSummaryString() { SummaryBuilder summary = new SummaryBuilder() .add("jobId", jobId) @@ -201,7 +210,8 @@ public class JobSummary { .add("queue", queue) .add("status", jobStatus) .add("mapSlotSeconds", mapSlotSeconds) - .add("reduceSlotSeconds", reduceSlotSeconds); + .add("reduceSlotSeconds", reduceSlotSeconds) + .add("jobName", jobName); return summary.toString(); } diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-hs/src/test/java/org/apache/hadoop/mapreduce/v2/hs/TestJobHistoryParsing.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-hs/src/test/java/org/apache/hadoop/mapreduce/v2/hs/TestJobHistoryParsing.java index 2b0d8965ec8..7aab4de0dc2 100644 --- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-hs/src/test/java/org/apache/hadoop/mapreduce/v2/hs/TestJobHistoryParsing.java +++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-hs/src/test/java/org/apache/hadoop/mapreduce/v2/hs/TestJobHistoryParsing.java @@ -267,6 +267,8 @@ public class TestJobHistoryParsing { Assert.assertEquals("JobId does not match", jobId.toString(), jobSummaryElements.get("jobId")); + Assert.assertEquals("JobName does not match", "test", + jobSummaryElements.get("jobName")); Assert.assertTrue("submitTime should not be 0", Long.parseLong(jobSummaryElements.get("submitTime")) != 0); Assert.assertTrue("launchTime should not be 0", From eb74ff0c3b6de7561fe2c1cee66fda0ca4544808 Mon Sep 17 00:00:00 2001 From: Robert Joseph Evans Date: Tue, 10 Apr 2012 21:30:05 +0000 Subject: [PATCH 20/29] MAPREDUCE-4076. Stream job fails with ZipException when use yarn jar command (Devaraj K via bobby) git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/trunk@1312003 13f79535-47bb-0310-9956-ffa450edef68 --- hadoop-mapreduce-project/CHANGES.txt | 3 +++ hadoop-mapreduce-project/hadoop-yarn/bin/yarn | 1 + 2 files changed, 4 insertions(+) diff --git a/hadoop-mapreduce-project/CHANGES.txt b/hadoop-mapreduce-project/CHANGES.txt index b73e16431b5..bb2aa74b5a8 100644 --- a/hadoop-mapreduce-project/CHANGES.txt +++ b/hadoop-mapreduce-project/CHANGES.txt @@ -227,6 +227,9 @@ Release 2.0.0 - UNRELEASED MAPREDUCE-4057. Update RAID for the HA and fsdataset changes. (Devaraj K via szetszwo) + MAPREDUCE-4076. Stream job fails with ZipException when use yarn jar + command (Devaraj K via bobby) + Release 0.23.3 - UNRELEASED INCOMPATIBLE CHANGES diff --git a/hadoop-mapreduce-project/hadoop-yarn/bin/yarn b/hadoop-mapreduce-project/hadoop-yarn/bin/yarn index f3b0eeac370..01687b0f710 100644 --- a/hadoop-mapreduce-project/hadoop-yarn/bin/yarn +++ b/hadoop-mapreduce-project/hadoop-yarn/bin/yarn @@ -225,6 +225,7 @@ YARN_OPTS="$YARN_OPTS -Dyarn.log.dir=$YARN_LOG_DIR" YARN_OPTS="$YARN_OPTS -Dhadoop.log.file=$YARN_LOGFILE" YARN_OPTS="$YARN_OPTS -Dyarn.log.file=$YARN_LOGFILE" YARN_OPTS="$YARN_OPTS -Dyarn.home.dir=$YARN_HOME" +YARN_OPTS="$YARN_OPTS -Dhadoop.home.dir=$YARN_HOME" YARN_OPTS="$YARN_OPTS -Dhadoop.root.logger=${YARN_ROOT_LOGGER:-INFO,console}" YARN_OPTS="$YARN_OPTS -Dyarn.root.logger=${YARN_ROOT_LOGGER:-INFO,console}" if [ "x$JAVA_LIBRARY_PATH" != "x" ]; then From 94843b848ac468ee4758124088467d43ddc73c89 Mon Sep 17 00:00:00 2001 From: Thomas Graves Date: Tue, 10 Apr 2012 22:09:00 +0000 Subject: [PATCH 21/29] MAPREDUCE-4108. Fix tests in org.apache.hadoop.util.TestRunJar (Devaraj K via tgraves) git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/trunk@1312018 13f79535-47bb-0310-9956-ffa450edef68 --- hadoop-mapreduce-project/CHANGES.txt | 3 + .../apache/hadoop/util}/Hello.java | 13 ++-- .../org/apache/hadoop/util/TestRunJar.java | 59 ++++++++++++++----- 3 files changed, 53 insertions(+), 22 deletions(-) rename hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-jobclient/src/test/java/{testjar => org/apache/hadoop/util}/Hello.java (85%) diff --git a/hadoop-mapreduce-project/CHANGES.txt b/hadoop-mapreduce-project/CHANGES.txt index bb2aa74b5a8..63b397a305f 100644 --- a/hadoop-mapreduce-project/CHANGES.txt +++ b/hadoop-mapreduce-project/CHANGES.txt @@ -229,6 +229,9 @@ Release 2.0.0 - UNRELEASED MAPREDUCE-4076. Stream job fails with ZipException when use yarn jar command (Devaraj K via bobby) + + MAPREDUCE-4108. Fix tests in org.apache.hadoop.util.TestRunJar + (Devaraj K via tgraves) Release 0.23.3 - UNRELEASED diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-jobclient/src/test/java/testjar/Hello.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-jobclient/src/test/java/org/apache/hadoop/util/Hello.java similarity index 85% rename from hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-jobclient/src/test/java/testjar/Hello.java rename to hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-jobclient/src/test/java/org/apache/hadoop/util/Hello.java index dc9b98d2d06..47e9efaf509 100644 --- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-jobclient/src/test/java/testjar/Hello.java +++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-jobclient/src/test/java/org/apache/hadoop/util/Hello.java @@ -16,25 +16,24 @@ * limitations under the License. */ -package testjar; +package org.apache.hadoop.util; import java.io.FileOutputStream; import java.io.IOException; /** - * A simple Hello class that is called from TestRunJar - * + * A simple Hello class that is called from TestRunJar + * */ public class Hello { - public static void main(String[] args){ + public static void main(String[] args) { try { System.out.println("Creating file" + args[0]); FileOutputStream fstream = new FileOutputStream(args[0]); fstream.write("Hello Hadoopers".getBytes()); fstream.close(); - } - catch (IOException e) { - //do nothing + } catch (IOException e) { + // do nothing } } } diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-jobclient/src/test/java/org/apache/hadoop/util/TestRunJar.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-jobclient/src/test/java/org/apache/hadoop/util/TestRunJar.java index 472f82bfddc..6d19b214f56 100644 --- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-jobclient/src/test/java/org/apache/hadoop/util/TestRunJar.java +++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-jobclient/src/test/java/org/apache/hadoop/util/TestRunJar.java @@ -18,34 +18,63 @@ package org.apache.hadoop.util; +import java.io.BufferedInputStream; import java.io.File; -import org.apache.hadoop.fs.Path; -import org.junit.Ignore; +import java.io.FileOutputStream; +import java.io.IOException; +import java.io.InputStream; +import java.util.jar.JarOutputStream; +import java.util.zip.ZipEntry; -import junit.framework.TestCase; +import org.apache.hadoop.fs.Path; +import org.junit.Assert; +import org.junit.Test; /** * A test to rest the RunJar class. */ -@Ignore -public class TestRunJar extends TestCase { - +public class TestRunJar { + private static String TEST_ROOT_DIR = new Path(System.getProperty( "test.build.data", "/tmp")).toString(); - + + private static final String TEST_JAR_NAME = "testjar.jar"; + private static final String CLASS_NAME = "Hello.class"; + + @Test public void testRunjar() throws Throwable { - - File outFile = new File(TEST_ROOT_DIR, "out"); - // delete if output file already exists. + File outFile = new File(TEST_ROOT_DIR, "out"); + // delete if output file already exists. if (outFile.exists()) { outFile.delete(); } - + File makeTestJar = makeTestJar(); + String[] args = new String[3]; - args[0] = "build/test/mapred/testjar/testjob.jar"; - args[1] = "testjar.Hello"; + args[0] = makeTestJar.getAbsolutePath(); + args[1] = "org.apache.hadoop.util.Hello"; args[2] = outFile.toString(); RunJar.main(args); - assertTrue("RunJar failed", outFile.exists()); + Assert.assertTrue("RunJar failed", outFile.exists()); } -} + + private File makeTestJar() throws IOException { + File jarFile = new File(TEST_ROOT_DIR, TEST_JAR_NAME); + JarOutputStream jstream = new JarOutputStream(new FileOutputStream(jarFile)); + InputStream entryInputStream = this.getClass().getResourceAsStream( + CLASS_NAME); + ZipEntry entry = new ZipEntry("org/apache/hadoop/util/" + CLASS_NAME); + jstream.putNextEntry(entry); + BufferedInputStream bufInputStream = new BufferedInputStream( + entryInputStream, 2048); + int count; + byte[] data = new byte[2048]; + while ((count = bufInputStream.read(data, 0, 2048)) != -1) { + jstream.write(data, 0, count); + } + jstream.closeEntry(); + jstream.close(); + + return jarFile; + } +} \ No newline at end of file From 3ffdb9152e780f4fd80ac7ea7b1a45ec583edc36 Mon Sep 17 00:00:00 2001 From: Todd Lipcon Date: Tue, 10 Apr 2012 22:22:38 +0000 Subject: [PATCH 22/29] HDFS-3094. add -nonInteractive and -force option to namenode -format command. Contributed by Arpit Gupta. git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/trunk@1312025 13f79535-47bb-0310-9956-ffa450edef68 --- hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt | 3 + .../server/common/HdfsServerConstants.java | 23 +- .../hadoop/hdfs/server/namenode/NameNode.java | 51 ++- .../hdfs/server/namenode/TestClusterId.java | 401 +++++++++++++++++- 4 files changed, 447 insertions(+), 31 deletions(-) diff --git a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt index 5f9cb1ae49e..41735667696 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt +++ b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt @@ -355,6 +355,9 @@ Release 2.0.0 - UNRELEASED HDFS-3238. ServerCommand and friends don't need to be writables. (eli) + HDFS-3094. add -nonInteractive and -force option to namenode -format + command (Arpit Gupta via todd) + OPTIMIZATIONS HDFS-3024. Improve performance of stringification in addStoredBlock (todd) diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/common/HdfsServerConstants.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/common/HdfsServerConstants.java index 00275c5917c..8e0b9927373 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/common/HdfsServerConstants.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/common/HdfsServerConstants.java @@ -57,13 +57,18 @@ public final class HdfsServerConstants { BOOTSTRAPSTANDBY("-bootstrapStandby"), INITIALIZESHAREDEDITS("-initializeSharedEdits"), RECOVER ("-recover"), - FORCE("-force"); + FORCE("-force"), + NONINTERACTIVE("-nonInteractive"); private String name = null; // Used only with format and upgrade options private String clusterId = null; + // Used only with format option + private boolean isForceFormat = false; + private boolean isInteractiveFormat = true; + // Used only with recovery option private int force = 0; @@ -101,6 +106,22 @@ public final class HdfsServerConstants { public int getForce() { return this.force; } + + public boolean getForceFormat() { + return isForceFormat; + } + + public void setForceFormat(boolean force) { + isForceFormat = force; + } + + public boolean getInteractiveFormat() { + return isInteractiveFormat; + } + + public void setInteractiveFormat(boolean interactive) { + isInteractiveFormat = interactive; + } } // Timeouts for communicating with DataNode for streaming writes/reads diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/NameNode.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/NameNode.java index 0c7c4cd863c..9bd18338924 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/NameNode.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/NameNode.java @@ -215,7 +215,7 @@ public class NameNode { /** Format a new filesystem. Destroys any filesystem that may already * exist at this location. **/ public static void format(Configuration conf) throws IOException { - format(conf, true); + format(conf, true, true); } static NameNodeMetrics metrics; @@ -658,9 +658,8 @@ public class NameNode { * @return true if formatting was aborted, false otherwise * @throws IOException */ - private static boolean format(Configuration conf, - boolean force) - throws IOException { + private static boolean format(Configuration conf, boolean force, + boolean isInteractive) throws IOException { String nsId = DFSUtil.getNamenodeNameServiceId(conf); String namenodeId = HAUtil.getNameNodeId(conf, nsId); initializeGenericKeys(conf, nsId, namenodeId); @@ -669,7 +668,7 @@ public class NameNode { Collection dirsToFormat = FSNamesystem.getNamespaceDirs(conf); List editDirsToFormat = FSNamesystem.getNamespaceEditsDirs(conf); - if (!confirmFormat(dirsToFormat, force, true)) { + if (!confirmFormat(dirsToFormat, force, isInteractive)) { return true; // aborted } @@ -830,8 +829,9 @@ public class NameNode { "Usage: java NameNode [" + StartupOption.BACKUP.getName() + "] | [" + StartupOption.CHECKPOINT.getName() + "] | [" + - StartupOption.FORMAT.getName() + "[" + StartupOption.CLUSTERID.getName() + - " cid ]] | [" + + StartupOption.FORMAT.getName() + " [" + StartupOption.CLUSTERID.getName() + + " cid ] [" + StartupOption.FORCE.getName() + "] [" + + StartupOption.NONINTERACTIVE.getName() + "] ] | [" + StartupOption.UPGRADE.getName() + "] | [" + StartupOption.ROLLBACK.getName() + "] | [" + StartupOption.FINALIZE.getName() + "] | [" + @@ -850,11 +850,35 @@ public class NameNode { String cmd = args[i]; if (StartupOption.FORMAT.getName().equalsIgnoreCase(cmd)) { startOpt = StartupOption.FORMAT; - // might be followed by two args - if (i + 2 < argsLen - && args[i + 1].equalsIgnoreCase(StartupOption.CLUSTERID.getName())) { - i += 2; - startOpt.setClusterId(args[i]); + for (i = i + 1; i < argsLen; i++) { + if (args[i].equalsIgnoreCase(StartupOption.CLUSTERID.getName())) { + i++; + if (i >= argsLen) { + // if no cluster id specified, return null + LOG.fatal("Must specify a valid cluster ID after the " + + StartupOption.CLUSTERID.getName() + " flag"); + return null; + } + String clusterId = args[i]; + // Make sure an id is specified and not another flag + if (clusterId.isEmpty() || + clusterId.equalsIgnoreCase(StartupOption.FORCE.getName()) || + clusterId.equalsIgnoreCase( + StartupOption.NONINTERACTIVE.getName())) { + LOG.fatal("Must specify a valid cluster ID after the " + + StartupOption.CLUSTERID.getName() + " flag"); + return null; + } + startOpt.setClusterId(clusterId); + } + + if (args[i].equalsIgnoreCase(StartupOption.FORCE.getName())) { + startOpt.setForceFormat(true); + } + + if (args[i].equalsIgnoreCase(StartupOption.NONINTERACTIVE.getName())) { + startOpt.setInteractiveFormat(false); + } } } else if (StartupOption.GENCLUSTERID.getName().equalsIgnoreCase(cmd)) { startOpt = StartupOption.GENCLUSTERID; @@ -997,7 +1021,8 @@ public class NameNode { switch (startOpt) { case FORMAT: { - boolean aborted = format(conf, false); + boolean aborted = format(conf, startOpt.getForceFormat(), + startOpt.getInteractiveFormat()); System.exit(aborted ? 1 : 0); return null; // avoid javac warning } diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestClusterId.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestClusterId.java index 98c17a7b4d1..9b5a5bd2944 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestClusterId.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestClusterId.java @@ -18,12 +18,19 @@ package org.apache.hadoop.hdfs.server.namenode; import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_NAME_DIR_KEY; +import static org.junit.Assert.assertEquals; import static org.junit.Assert.assertTrue; import static org.junit.Assert.assertFalse; +import static org.junit.Assert.fail; + +import java.io.ByteArrayInputStream; +import java.io.ByteArrayOutputStream; import java.io.File; import java.io.IOException; +import java.io.InputStream; +import java.io.PrintStream; import java.net.URI; -import java.util.ArrayList; +import java.security.Permission; import java.util.Collection; import java.util.Iterator; import java.util.List; @@ -40,11 +47,11 @@ import org.junit.After; import org.junit.Before; import org.junit.Test; - public class TestClusterId { private static final Log LOG = LogFactory.getLog(TestClusterId.class); File hdfsDir; - + Configuration config; + private String getClusterId(Configuration config) throws IOException { // see if cluster id not empty. Collection dirsToFormat = FSNamesystem.getNamespaceDirs(config); @@ -59,33 +66,41 @@ public class TestClusterId { LOG.info("successfully formated : sd="+sd.getCurrentDir() + ";cid="+cid); return cid; } - + @Before public void setUp() throws IOException { + System.setSecurityManager(new NoExitSecurityManager()); + String baseDir = System.getProperty("test.build.data", "build/test/data"); - hdfsDir = new File(baseDir, "dfs"); - if ( hdfsDir.exists() && !FileUtil.fullyDelete(hdfsDir) ) { - throw new IOException("Could not delete test directory '" + - hdfsDir + "'"); + hdfsDir = new File(baseDir, "dfs/name"); + if (hdfsDir.exists() && !FileUtil.fullyDelete(hdfsDir)) { + throw new IOException("Could not delete test directory '" + hdfsDir + "'"); } LOG.info("hdfsdir is " + hdfsDir.getAbsolutePath()); + + // as some tests might change these values we reset them to defaults before + // every test + StartupOption.FORMAT.setForceFormat(false); + StartupOption.FORMAT.setInteractiveFormat(true); + + config = new Configuration(); + config.set(DFS_NAMENODE_NAME_DIR_KEY, hdfsDir.getPath()); } - + @After public void tearDown() throws IOException { - if ( hdfsDir.exists() && !FileUtil.fullyDelete(hdfsDir) ) { - throw new IOException("Could not tearDown test directory '" + - hdfsDir + "'"); + System.setSecurityManager(null); + + if (hdfsDir.exists() && !FileUtil.fullyDelete(hdfsDir)) { + throw new IOException("Could not tearDown test directory '" + hdfsDir + + "'"); } } - + @Test public void testFormatClusterIdOption() throws IOException { - Configuration config = new Configuration(); - config.set(DFS_NAMENODE_NAME_DIR_KEY, new File(hdfsDir, "name").getPath()); - // 1. should format without cluster id //StartupOption.FORMAT.setClusterId(""); NameNode.format(config); @@ -107,4 +122,356 @@ public class TestClusterId { String newCid = getClusterId(config); assertFalse("ClusterId should not be the same", newCid.equals(cid)); } -} + + /** + * Test namenode format with -format option. Format should succeed. + * + * @throws IOException + */ + @Test + public void testFormat() throws IOException { + String[] argv = { "-format" }; + try { + NameNode.createNameNode(argv, config); + fail("createNameNode() did not call System.exit()"); + } catch (ExitException e) { + assertEquals("Format should have succeeded", 0, e.status); + } + + String cid = getClusterId(config); + assertTrue("Didn't get new ClusterId", (cid != null && !cid.equals(""))); + } + + /** + * Test namenode format with -format option when an empty name directory + * exists. Format should succeed. + * + * @throws IOException + */ + @Test + public void testFormatWithEmptyDir() throws IOException { + + if (!hdfsDir.mkdirs()) { + fail("Failed to create dir " + hdfsDir.getPath()); + } + + String[] argv = { "-format" }; + try { + NameNode.createNameNode(argv, config); + fail("createNameNode() did not call System.exit()"); + } catch (ExitException e) { + assertEquals("Format should have succeeded", 0, e.status); + } + + String cid = getClusterId(config); + assertTrue("Didn't get new ClusterId", (cid != null && !cid.equals(""))); + } + + /** + * Test namenode format with -format -force options when name directory + * exists. Format should succeed. + * + * @throws IOException + */ + @Test + public void testFormatWithForce() throws IOException { + + if (!hdfsDir.mkdirs()) { + fail("Failed to create dir " + hdfsDir.getPath()); + } + + String[] argv = { "-format", "-force" }; + try { + NameNode.createNameNode(argv, config); + fail("createNameNode() did not call System.exit()"); + } catch (ExitException e) { + assertEquals("Format should have succeeded", 0, e.status); + } + + String cid = getClusterId(config); + assertTrue("Didn't get new ClusterId", (cid != null && !cid.equals(""))); + } + + /** + * Test namenode format with -format -force -clusterid option when name + * directory exists. Format should succeed. + * + * @throws IOException + */ + @Test + public void testFormatWithForceAndClusterId() throws IOException { + + if (!hdfsDir.mkdirs()) { + fail("Failed to create dir " + hdfsDir.getPath()); + } + + String myId = "testFormatWithForceAndClusterId"; + String[] argv = { "-format", "-force", "-clusterid", myId }; + try { + NameNode.createNameNode(argv, config); + fail("createNameNode() did not call System.exit()"); + } catch (ExitException e) { + assertEquals("Format should have succeeded", 0, e.status); + } + + String cId = getClusterId(config); + assertEquals("ClusterIds do not match", myId, cId); + } + + /** + * Test namenode format with -clusterid -force option. Format command should + * fail as no cluster id was provided. + * + * @throws IOException + */ + @Test + public void testFormatWithInvalidClusterIdOption() throws IOException { + + String[] argv = { "-format", "-clusterid", "-force" }; + PrintStream origErr = System.err; + ByteArrayOutputStream baos = new ByteArrayOutputStream(); + PrintStream stdErr = new PrintStream(baos); + System.setErr(stdErr); + + NameNode.createNameNode(argv, config); + + // Check if usage is printed + assertTrue(baos.toString("UTF-8").contains("Usage: java NameNode")); + System.setErr(origErr); + + // check if the version file does not exists. + File version = new File(hdfsDir, "current/VERSION"); + assertFalse("Check version should not exist", version.exists()); + } + + /** + * Test namenode format with -format -clusterid options. Format should fail + * was no clusterid was sent. + * + * @throws IOException + */ + @Test + public void testFormatWithNoClusterIdOption() throws IOException { + + String[] argv = { "-format", "-clusterid" }; + PrintStream origErr = System.err; + ByteArrayOutputStream baos = new ByteArrayOutputStream(); + PrintStream stdErr = new PrintStream(baos); + System.setErr(stdErr); + + NameNode.createNameNode(argv, config); + + // Check if usage is printed + assertTrue(baos.toString("UTF-8").contains("Usage: java NameNode")); + System.setErr(origErr); + + // check if the version file does not exists. + File version = new File(hdfsDir, "current/VERSION"); + assertFalse("Check version should not exist", version.exists()); + } + + /** + * Test namenode format with -format -clusterid and empty clusterid. Format + * should fail as no valid if was provided. + * + * @throws IOException + */ + @Test + public void testFormatWithEmptyClusterIdOption() throws IOException { + + String[] argv = { "-format", "-clusterid", "" }; + + PrintStream origErr = System.err; + ByteArrayOutputStream baos = new ByteArrayOutputStream(); + PrintStream stdErr = new PrintStream(baos); + System.setErr(stdErr); + + NameNode.createNameNode(argv, config); + + // Check if usage is printed + assertTrue(baos.toString("UTF-8").contains("Usage: java NameNode")); + System.setErr(origErr); + + // check if the version file does not exists. + File version = new File(hdfsDir, "current/VERSION"); + assertFalse("Check version should not exist", version.exists()); + } + + /** + * Test namenode format with -format -nonInteractive options when a non empty + * name directory exists. Format should not succeed. + * + * @throws IOException + */ + @Test + public void testFormatWithNonInteractive() throws IOException { + + // we check for a non empty dir, so create a child path + File data = new File(hdfsDir, "file"); + if (!data.mkdirs()) { + fail("Failed to create dir " + data.getPath()); + } + + String[] argv = { "-format", "-nonInteractive" }; + try { + NameNode.createNameNode(argv, config); + fail("createNameNode() did not call System.exit()"); + } catch (ExitException e) { + assertEquals("Format should have been aborted with exit code 1", 1, + e.status); + } + + // check if the version file does not exists. + File version = new File(hdfsDir, "current/VERSION"); + assertFalse("Check version should not exist", version.exists()); + } + + /** + * Test namenode format with -format -nonInteractive options when name + * directory does not exist. Format should succeed. + * + * @throws IOException + */ + @Test + public void testFormatWithNonInteractiveNameDirDoesNotExit() + throws IOException { + + String[] argv = { "-format", "-nonInteractive" }; + try { + NameNode.createNameNode(argv, config); + fail("createNameNode() did not call System.exit()"); + } catch (ExitException e) { + assertEquals("Format should have succeeded", 0, e.status); + } + + String cid = getClusterId(config); + assertTrue("Didn't get new ClusterId", (cid != null && !cid.equals(""))); + } + + /** + * Test namenode format with -force -nonInteractive -force option. Format + * should succeed. + * + * @throws IOException + */ + @Test + public void testFormatWithNonInteractiveAndForce() throws IOException { + + if (!hdfsDir.mkdirs()) { + fail("Failed to create dir " + hdfsDir.getPath()); + } + + String[] argv = { "-format", "-nonInteractive", "-force" }; + try { + NameNode.createNameNode(argv, config); + fail("createNameNode() did not call System.exit()"); + } catch (ExitException e) { + assertEquals("Format should have succeeded", 0, e.status); + } + + String cid = getClusterId(config); + assertTrue("Didn't get new ClusterId", (cid != null && !cid.equals(""))); + } + + /** + * Test namenode format with -format option when a non empty name directory + * exists. Enter Y when prompted and the format should succeed. + * + * @throws IOException + * @throws InterruptedException + */ + @Test + public void testFormatWithoutForceEnterYes() throws IOException, + InterruptedException { + + // we check for a non empty dir, so create a child path + File data = new File(hdfsDir, "file"); + if (!data.mkdirs()) { + fail("Failed to create dir " + data.getPath()); + } + + // capture the input stream + InputStream origIn = System.in; + ByteArrayInputStream bins = new ByteArrayInputStream("Y\n".getBytes()); + System.setIn(bins); + + String[] argv = { "-format" }; + + try { + NameNode.createNameNode(argv, config); + fail("createNameNode() did not call System.exit()"); + } catch (ExitException e) { + assertEquals("Format should have succeeded", 0, e.status); + } + + System.setIn(origIn); + + String cid = getClusterId(config); + assertTrue("Didn't get new ClusterId", (cid != null && !cid.equals(""))); + } + + /** + * Test namenode format with -format option when a non empty name directory + * exists. Enter N when prompted and format should be aborted. + * + * @throws IOException + * @throws InterruptedException + */ + @Test + public void testFormatWithoutForceEnterNo() throws IOException, + InterruptedException { + + // we check for a non empty dir, so create a child path + File data = new File(hdfsDir, "file"); + if (!data.mkdirs()) { + fail("Failed to create dir " + data.getPath()); + } + + // capture the input stream + InputStream origIn = System.in; + ByteArrayInputStream bins = new ByteArrayInputStream("N\n".getBytes()); + System.setIn(bins); + + String[] argv = { "-format" }; + try { + NameNode.createNameNode(argv, config); + fail("createNameNode() did not call System.exit()"); + } catch (ExitException e) { + assertEquals("Format should not have succeeded", 1, e.status); + } + + System.setIn(origIn); + + // check if the version file does not exists. + File version = new File(hdfsDir, "current/VERSION"); + assertFalse("Check version should not exist", version.exists()); + } + + private static class ExitException extends SecurityException { + private static final long serialVersionUID = 1L; + public final int status; + + public ExitException(int status) { + super("There is no escape!"); + this.status = status; + } + } + + private static class NoExitSecurityManager extends SecurityManager { + @Override + public void checkPermission(Permission perm) { + // allow anything. + } + + @Override + public void checkPermission(Permission perm, Object context) { + // allow anything. + } + + @Override + public void checkExit(int status) { + super.checkExit(status); + throw new ExitException(status); + } + } +} \ No newline at end of file From 7c2e10e0cd9f14ecbe25e1073b09db9403e42558 Mon Sep 17 00:00:00 2001 From: Todd Lipcon Date: Tue, 10 Apr 2012 22:28:25 +0000 Subject: [PATCH 23/29] HDFS-3248. bootstrapStandby repeated twice in hdfs namenode usage message. Contributed by Colin Patrick McCabe. git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/trunk@1312028 13f79535-47bb-0310-9956-ffa450edef68 --- hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt | 3 +++ .../java/org/apache/hadoop/hdfs/server/namenode/NameNode.java | 1 - 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt index 41735667696..56e1502a855 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt +++ b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt @@ -478,6 +478,9 @@ Release 2.0.0 - UNRELEASED HDFS-3236. NameNode does not initialize generic conf keys when started with -initializeSharedEditsDir (atm) + HDFS-3248. bootstrapStandby repeated twice in hdfs namenode usage message + (Colin Patrick McCabe via todd) + BREAKDOWN OF HDFS-1623 SUBTASKS HDFS-2179. Add fencing framework and mechanisms for NameNode HA. (todd) diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/NameNode.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/NameNode.java index 9bd18338924..1549874f031 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/NameNode.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/NameNode.java @@ -836,7 +836,6 @@ public class NameNode { StartupOption.ROLLBACK.getName() + "] | [" + StartupOption.FINALIZE.getName() + "] | [" + StartupOption.IMPORT.getName() + "] | [" + - StartupOption.BOOTSTRAPSTANDBY.getName() + "] | [" + StartupOption.INITIALIZESHAREDEDITS.getName() + "] | [" + StartupOption.BOOTSTRAPSTANDBY.getName() + "] | [" + StartupOption.RECOVER.getName() + " [ " + From 58228e4e7bd20777976ca395bf99a2a55bd33676 Mon Sep 17 00:00:00 2001 From: Todd Lipcon Date: Tue, 10 Apr 2012 22:31:18 +0000 Subject: [PATCH 24/29] HDFS-3243. TestParallelRead timing out on jenkins. Contributed by Henry Robinson. git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/trunk@1312029 13f79535-47bb-0310-9956-ffa450edef68 --- hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt | 2 ++ .../test/java/org/apache/hadoop/hdfs/TestParallelReadUtil.java | 2 +- 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt index 56e1502a855..6817533e26e 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt +++ b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt @@ -124,6 +124,8 @@ Trunk (unreleased changes) HDFS-3235. MiniDFSClusterManager doesn't correctly support -format option. (Henry Robinson via atm) + + HDFS-3243. TestParallelRead timing out on jenkins. (Henry Robinson via todd) Release 2.0.0 - UNRELEASED diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestParallelReadUtil.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestParallelReadUtil.java index 0a0fc67343a..8e218d637ab 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestParallelReadUtil.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestParallelReadUtil.java @@ -179,7 +179,7 @@ public class TestParallelReadUtil { */ static class ReadWorker extends Thread { - static public final int N_ITERATIONS = 1024 * 4; + static public final int N_ITERATIONS = 1024; private static final double PROPORTION_NON_POSITIONAL_READ = 0.10; From 80447bd35a79ca58a03354d0552fbefd4edd7565 Mon Sep 17 00:00:00 2001 From: Eli Collins Date: Wed, 11 Apr 2012 01:10:38 +0000 Subject: [PATCH 25/29] HDFS-3244. Remove dead writable code from hdfs/protocol. Contributed by Eli Collins git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/trunk@1312061 13f79535-47bb-0310-9956-ffa450edef68 --- hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt | 2 + .../hdfs/protocol/BlockLocalPathInfo.java | 34 +----- .../hdfs/protocol/ClientDatanodeProtocol.java | 5 - .../hadoop/hdfs/protocol/ClientProtocol.java | 4 - .../hdfs/protocol/CorruptFileBlocks.java | 29 +---- .../hadoop/hdfs/protocol/DatanodeID.java | 38 +----- .../hadoop/hdfs/protocol/DatanodeInfo.java | 57 --------- .../hdfs/protocol/DirectoryListing.java | 58 +-------- .../hadoop/hdfs/protocol/ExtendedBlock.java | 40 +------ .../hadoop/hdfs/protocol/HdfsFileStatus.java | 70 +---------- .../hdfs/protocol/HdfsLocatedFileStatus.java | 28 ----- .../hadoop/hdfs/protocol/LocatedBlock.java | 48 +------- .../hadoop/hdfs/protocol/LocatedBlocks.java | 64 +--------- .../ClientDatanodeProtocolTranslatorPB.java | 2 - .../ClientNamenodeProtocolTranslatorPB.java | 13 +-- ...appingsProtocolClientSideTranslatorPB.java | 2 - .../NamenodeProtocolTranslatorPB.java | 2 - ...nPolicyProtocolClientSideTranslatorPB.java | 2 - ...appingsProtocolClientSideTranslatorPB.java | 2 - .../ProtocolSignatureWritable.java | 110 ------------------ .../hdfs/protocolR23Compatible/overview.html | 44 ------- .../blockmanagement/DatanodeDescriptor.java | 2 - .../hdfs/server/common/StorageInfo.java | 29 +---- .../server/common/UpgradeStatusReport.java | 40 +------ .../server/namenode/CheckpointSignature.java | 28 +---- .../server/protocol/BlockRecoveryCommand.java | 11 -- .../server/protocol/BlocksWithLocations.java | 58 +-------- .../server/protocol/DatanodeRegistration.java | 46 +------- .../server/protocol/HeartbeatResponse.java | 39 +------ .../protocol/InterDatanodeProtocol.java | 3 - .../server/protocol/NNHAStatusHeartbeat.java | 26 +---- .../server/protocol/NamenodeProtocol.java | 4 - .../server/protocol/NamenodeRegistration.java | 39 ------- .../hdfs/server/protocol/NamespaceInfo.java | 32 ----- .../protocol/ReceivedDeletedBlockInfo.java | 28 +---- .../server/protocol/ReplicaRecoveryInfo.java | 33 ------ .../org/apache/hadoop/hdfs/DFSTestUtil.java | 11 ++ .../org/apache/hadoop/hdfs/TestDFSUtil.java | 2 +- .../org/apache/hadoop/hdfs/TestGetBlocks.java | 6 +- .../hdfs/protocol/TestCorruptFileBlocks.java | 79 ------------- .../hadoop/hdfs/protocolPB/TestPBHelper.java | 7 +- .../server/blockmanagement/TestBlockInfo.java | 3 +- .../TestCorruptReplicaInfo.java | 5 +- .../TestDatanodeDescriptor.java | 5 +- .../hdfs/server/common/TestStorageInfo.java | 78 ------------- 45 files changed, 54 insertions(+), 1214 deletions(-) delete mode 100644 hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/protocolR23Compatible/ProtocolSignatureWritable.java delete mode 100644 hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/protocolR23Compatible/overview.html delete mode 100644 hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/protocol/TestCorruptFileBlocks.java delete mode 100644 hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/common/TestStorageInfo.java diff --git a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt index 6817533e26e..1710957395f 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt +++ b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt @@ -360,6 +360,8 @@ Release 2.0.0 - UNRELEASED HDFS-3094. add -nonInteractive and -force option to namenode -format command (Arpit Gupta via todd) + HDFS-3244. Remove dead writable code from hdfs/protocol. (eli) + OPTIMIZATIONS HDFS-3024. Improve performance of stringification in addStoredBlock (todd) diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/protocol/BlockLocalPathInfo.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/protocol/BlockLocalPathInfo.java index 6017c37148d..6bb850b952a 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/protocol/BlockLocalPathInfo.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/protocol/BlockLocalPathInfo.java @@ -17,16 +17,8 @@ */ package org.apache.hadoop.hdfs.protocol; -import java.io.DataInput; -import java.io.DataOutput; -import java.io.IOException; - import org.apache.hadoop.classification.InterfaceAudience; import org.apache.hadoop.classification.InterfaceStability; -import org.apache.hadoop.io.Text; -import org.apache.hadoop.io.Writable; -import org.apache.hadoop.io.WritableFactories; -import org.apache.hadoop.io.WritableFactory; /** * A block and the full path information to the block data file and @@ -34,20 +26,11 @@ import org.apache.hadoop.io.WritableFactory; */ @InterfaceAudience.Private @InterfaceStability.Evolving -public class BlockLocalPathInfo implements Writable { - static final WritableFactory FACTORY = new WritableFactory() { - public Writable newInstance() { return new BlockLocalPathInfo(); } - }; - static { // register a ctor - WritableFactories.setFactory(BlockLocalPathInfo.class, FACTORY); - } - +public class BlockLocalPathInfo { private ExtendedBlock block; private String localBlockPath = ""; // local file storing the data private String localMetaPath = ""; // local file storing the checksum - public BlockLocalPathInfo() {} - /** * Constructs BlockLocalPathInfo. * @param b The block corresponding to this lock path info. @@ -77,21 +60,6 @@ public class BlockLocalPathInfo implements Writable { */ public String getMetaPath() {return localMetaPath;} - @Override - public void write(DataOutput out) throws IOException { - block.write(out); - Text.writeString(out, localBlockPath); - Text.writeString(out, localMetaPath); - } - - @Override - public void readFields(DataInput in) throws IOException { - block = new ExtendedBlock(); - block.readFields(in); - localBlockPath = Text.readString(in); - localMetaPath = Text.readString(in); - } - /** * Get number of bytes in the block. * @return Number of bytes in the block. diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/protocol/ClientDatanodeProtocol.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/protocol/ClientDatanodeProtocol.java index 363e8dba3a1..38b13db3f5a 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/protocol/ClientDatanodeProtocol.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/protocol/ClientDatanodeProtocol.java @@ -24,7 +24,6 @@ import org.apache.hadoop.classification.InterfaceStability; import org.apache.hadoop.hdfs.DFSConfigKeys; import org.apache.hadoop.hdfs.security.token.block.BlockTokenIdentifier; import org.apache.hadoop.hdfs.security.token.block.BlockTokenSelector; -import org.apache.hadoop.ipc.VersionedProtocol; import org.apache.hadoop.security.KerberosInfo; import org.apache.hadoop.security.token.Token; import org.apache.hadoop.security.token.TokenInfo; @@ -42,9 +41,6 @@ public interface ClientDatanodeProtocol { * the client interface to the DN AND the RPC protocol used to * communicate with the NN. * - * Post version 10 (release 23 of Hadoop), the protocol is implemented in - * {@literal ../protocolR23Compatible/ClientDatanodeWireProtocol} - * * This class is used by both the DFSClient and the * DN server side to insulate from the protocol serialization. * @@ -60,7 +56,6 @@ public interface ClientDatanodeProtocol { * * 9 is the last version id when this class was used for protocols * serialization. DO not update this version any further. - * Changes are recorded in R23 classes. */ public static final long versionID = 9L; diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/protocol/ClientProtocol.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/protocol/ClientProtocol.java index 099fd284fff..26f309a28fe 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/protocol/ClientProtocol.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/protocol/ClientProtocol.java @@ -66,9 +66,6 @@ public interface ClientProtocol { * the client interface to the NN AND the RPC protocol used to * communicate with the NN. * - * Post version 70 (release 23 of Hadoop), the protocol is implemented in - * {@literal ../protocolR23Compatible/ClientNamenodeWireProtocol} - * * This class is used by both the DFSClient and the * NN server side to insulate from the protocol serialization. * @@ -84,7 +81,6 @@ public interface ClientProtocol { * * 69L is the last version id when this class was used for protocols * serialization. DO not update this version any further. - * Changes are recorded in R23 classes. */ public static final long versionID = 69L; diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/protocol/CorruptFileBlocks.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/protocol/CorruptFileBlocks.java index 44fd387741a..16c7656aaf6 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/protocol/CorruptFileBlocks.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/protocol/CorruptFileBlocks.java @@ -17,11 +17,6 @@ */ package org.apache.hadoop.hdfs.protocol; -import org.apache.hadoop.io.Writable; -import org.apache.hadoop.io.Text; -import java.io.DataInput; -import java.io.DataOutput; -import java.io.IOException; import java.util.Arrays; /** @@ -29,7 +24,7 @@ import java.util.Arrays; * used for iterative calls to NameNode.listCorruptFileBlocks. * */ -public class CorruptFileBlocks implements Writable { +public class CorruptFileBlocks { // used for hashCode private static final int PRIME = 16777619; @@ -53,28 +48,6 @@ public class CorruptFileBlocks implements Writable { return cookie; } - - @Override - public void readFields(DataInput in) throws IOException { - int fileCount = in.readInt(); - files = new String[fileCount]; - for (int i = 0; i < fileCount; i++) { - files[i] = Text.readString(in); - } - cookie = Text.readString(in); - } - - - @Override - public void write(DataOutput out) throws IOException { - out.writeInt(files.length); - for (int i = 0; i < files.length; i++) { - Text.writeString(out, files[i]); - } - Text.writeString(out, cookie); - } - - @Override public boolean equals(Object obj) { if (this == obj) { diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/protocol/DatanodeID.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/protocol/DatanodeID.java index e36818859e5..464fd614d23 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/protocol/DatanodeID.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/protocol/DatanodeID.java @@ -18,15 +18,9 @@ package org.apache.hadoop.hdfs.protocol; -import java.io.DataInput; -import java.io.DataOutput; -import java.io.IOException; - import org.apache.hadoop.classification.InterfaceAudience; import org.apache.hadoop.classification.InterfaceStability; import org.apache.hadoop.hdfs.DFSConfigKeys; -import org.apache.hadoop.io.Text; -import org.apache.hadoop.io.WritableComparable; /** * This class represents the primary identifier for a Datanode. @@ -41,8 +35,8 @@ import org.apache.hadoop.io.WritableComparable; */ @InterfaceAudience.Private @InterfaceStability.Evolving -public class DatanodeID implements WritableComparable { - public static final DatanodeID[] EMPTY_ARRAY = {}; +public class DatanodeID implements Comparable { + public static final DatanodeID[] EMPTY_ARRAY = {}; protected String ipAddr; // IP address protected String hostName; // hostname @@ -51,10 +45,6 @@ public class DatanodeID implements WritableComparable { protected int infoPort; // info server port protected int ipcPort; // IPC server port - public DatanodeID() { - this("", DFSConfigKeys.DFS_DATANODE_DEFAULT_PORT); - } - public DatanodeID(String ipAddr, int xferPort) { this(ipAddr, "", "", xferPort, DFSConfigKeys.DFS_DATANODE_HTTP_DEFAULT_PORT, @@ -234,28 +224,4 @@ public class DatanodeID implements WritableComparable { public int compareTo(DatanodeID that) { return getXferAddr().compareTo(that.getXferAddr()); } - - @Override - public void write(DataOutput out) throws IOException { - Text.writeString(out, ipAddr); - Text.writeString(out, hostName); - Text.writeString(out, storageID); - out.writeShort(xferPort); - out.writeShort(infoPort); - out.writeShort(ipcPort); - } - - @Override - public void readFields(DataInput in) throws IOException { - ipAddr = Text.readString(in); - hostName = Text.readString(in); - storageID = Text.readString(in); - // The port read could be negative, if the port is a large number (more - // than 15 bits in storage size (but less than 16 bits). - // So chop off the first two bytes (and hence the signed bits) before - // setting the field. - xferPort = in.readShort() & 0x0000ffff; - infoPort = in.readShort() & 0x0000ffff; - ipcPort = in.readShort() & 0x0000ffff; - } } diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/protocol/DatanodeInfo.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/protocol/DatanodeInfo.java index 16ebc9fcc86..db7eaed6435 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/protocol/DatanodeInfo.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/protocol/DatanodeInfo.java @@ -17,19 +17,11 @@ */ package org.apache.hadoop.hdfs.protocol; -import java.io.DataInput; -import java.io.DataOutput; -import java.io.IOException; import java.util.Date; import org.apache.hadoop.classification.InterfaceAudience; import org.apache.hadoop.classification.InterfaceStability; import org.apache.hadoop.hdfs.DFSUtil; -import org.apache.hadoop.io.Text; -import org.apache.hadoop.io.Writable; -import org.apache.hadoop.io.WritableFactories; -import org.apache.hadoop.io.WritableFactory; -import org.apache.hadoop.io.WritableUtils; import org.apache.hadoop.net.NetUtils; import org.apache.hadoop.net.NetworkTopology; import org.apache.hadoop.net.Node; @@ -78,11 +70,6 @@ public class DatanodeInfo extends DatanodeID implements Node { protected AdminStates adminState; - public DatanodeInfo() { - super(); - adminState = null; - } - public DatanodeInfo(DatanodeInfo from) { super(from); this.capacity = from.getCapacity(); @@ -356,50 +343,6 @@ public class DatanodeInfo extends DatanodeID implements Node { public int getLevel() { return level; } public void setLevel(int level) {this.level = level;} - ///////////////////////////////////////////////// - // Writable - ///////////////////////////////////////////////// - static { // register a ctor - WritableFactories.setFactory - (DatanodeInfo.class, - new WritableFactory() { - public Writable newInstance() { return new DatanodeInfo(); } - }); - } - - @Override - public void write(DataOutput out) throws IOException { - super.write(out); - out.writeLong(capacity); - out.writeLong(dfsUsed); - out.writeLong(remaining); - out.writeLong(blockPoolUsed); - out.writeLong(lastUpdate); - out.writeInt(xceiverCount); - Text.writeString(out, location); - WritableUtils.writeEnum(out, getAdminState()); - } - - @Override - public void readFields(DataInput in) throws IOException { - super.readFields(in); - this.capacity = in.readLong(); - this.dfsUsed = in.readLong(); - this.remaining = in.readLong(); - this.blockPoolUsed = in.readLong(); - this.lastUpdate = in.readLong(); - this.xceiverCount = in.readInt(); - this.location = Text.readString(in); - setAdminState(WritableUtils.readEnum(in, AdminStates.class)); - } - - /** Read a DatanodeInfo */ - public static DatanodeInfo read(DataInput in) throws IOException { - final DatanodeInfo d = new DatanodeInfo(); - d.readFields(in); - return d; - } - @Override public int hashCode() { // Super implementation is sufficient diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/protocol/DirectoryListing.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/protocol/DirectoryListing.java index f129b70b236..169287b8560 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/protocol/DirectoryListing.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/protocol/DirectoryListing.java @@ -16,15 +16,8 @@ */ package org.apache.hadoop.hdfs.protocol; -import java.io.DataInput; -import java.io.DataOutput; -import java.io.IOException; - import org.apache.hadoop.classification.InterfaceAudience; import org.apache.hadoop.classification.InterfaceStability; -import org.apache.hadoop.io.Writable; -import org.apache.hadoop.io.WritableFactories; -import org.apache.hadoop.io.WritableFactory; /** * This class defines a partial listing of a directory to support @@ -32,24 +25,10 @@ import org.apache.hadoop.io.WritableFactory; */ @InterfaceAudience.Private @InterfaceStability.Evolving -public class DirectoryListing implements Writable { - static { // register a ctor - WritableFactories.setFactory - (DirectoryListing.class, - new WritableFactory() { - public Writable newInstance() { return new DirectoryListing(); } - }); - } - +public class DirectoryListing { private HdfsFileStatus[] partialListing; private int remainingEntries; - /** - * default constructor - */ - public DirectoryListing() { - } - /** * constructor * @param partialListing a partial listing of a directory @@ -103,39 +82,4 @@ public class DirectoryListing implements Writable { } return partialListing[partialListing.length-1].getLocalNameInBytes(); } - - // Writable interface - @Override - public void readFields(DataInput in) throws IOException { - int numEntries = in.readInt(); - partialListing = new HdfsFileStatus[numEntries]; - if (numEntries !=0 ) { - boolean hasLocation = in.readBoolean(); - for (int i=0; i blocks; // array of blocks with prioritized locations private boolean underConstruction; @@ -167,61 +160,6 @@ public class LocatedBlocks implements Writable { return binSearchResult >= 0 ? binSearchResult : -(binSearchResult+1); } - ////////////////////////////////////////////////// - // Writable - ////////////////////////////////////////////////// - static { // register a ctor - WritableFactories.setFactory - (LocatedBlocks.class, - new WritableFactory() { - public Writable newInstance() { return new LocatedBlocks(); } - }); - } - - public void write(DataOutput out) throws IOException { - out.writeLong(this.fileLength); - out.writeBoolean(underConstruction); - - //write the last located block - final boolean isNull = lastLocatedBlock == null; - out.writeBoolean(isNull); - if (!isNull) { - lastLocatedBlock.write(out); - } - out.writeBoolean(isLastBlockComplete); - - // write located blocks - int nrBlocks = locatedBlockCount(); - out.writeInt(nrBlocks); - if (nrBlocks == 0) { - return; - } - for (LocatedBlock blk : this.blocks) { - blk.write(out); - } - } - - public void readFields(DataInput in) throws IOException { - this.fileLength = in.readLong(); - underConstruction = in.readBoolean(); - - //read the last located block - final boolean isNull = in.readBoolean(); - if (!isNull) { - lastLocatedBlock = LocatedBlock.read(in); - } - isLastBlockComplete = in.readBoolean(); - - // read located blocks - int nrBlocks = in.readInt(); - this.blocks = new ArrayList(nrBlocks); - for (int idx = 0; idx < nrBlocks; idx++) { - LocatedBlock blk = new LocatedBlock(); - blk.readFields(in); - this.blocks.add(blk); - } - } - @Override public String toString() { final StringBuilder b = new StringBuilder(getClass().getSimpleName()); diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/protocolPB/ClientDatanodeProtocolTranslatorPB.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/protocolPB/ClientDatanodeProtocolTranslatorPB.java index 3bcc76474ed..80dbeb79cdd 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/protocolPB/ClientDatanodeProtocolTranslatorPB.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/protocolPB/ClientDatanodeProtocolTranslatorPB.java @@ -39,12 +39,10 @@ import org.apache.hadoop.hdfs.protocol.proto.ClientDatanodeProtocolProtos.GetBlo import org.apache.hadoop.hdfs.protocol.proto.ClientDatanodeProtocolProtos.GetBlockLocalPathInfoResponseProto; import org.apache.hadoop.hdfs.protocol.proto.ClientDatanodeProtocolProtos.GetReplicaVisibleLengthRequestProto; import org.apache.hadoop.hdfs.protocol.proto.ClientDatanodeProtocolProtos.RefreshNamenodesRequestProto; -import org.apache.hadoop.hdfs.protocolR23Compatible.ProtocolSignatureWritable; import org.apache.hadoop.hdfs.security.token.block.BlockTokenIdentifier; import org.apache.hadoop.ipc.ProtobufHelper; import org.apache.hadoop.ipc.ProtobufRpcEngine; import org.apache.hadoop.ipc.ProtocolMetaInterface; -import org.apache.hadoop.ipc.ProtocolSignature; import org.apache.hadoop.ipc.ProtocolTranslator; import org.apache.hadoop.ipc.RPC; import org.apache.hadoop.ipc.RpcClientUtil; diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/protocolPB/ClientNamenodeProtocolTranslatorPB.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/protocolPB/ClientNamenodeProtocolTranslatorPB.java index 46a3c825cd6..66b7c2f8687 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/protocolPB/ClientNamenodeProtocolTranslatorPB.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/protocolPB/ClientNamenodeProtocolTranslatorPB.java @@ -40,7 +40,6 @@ import org.apache.hadoop.hdfs.protocol.DatanodeID; import org.apache.hadoop.hdfs.protocol.DatanodeInfo; import org.apache.hadoop.hdfs.protocol.DirectoryListing; import org.apache.hadoop.hdfs.protocol.ExtendedBlock; -import org.apache.hadoop.hdfs.protocol.HdfsConstants; import org.apache.hadoop.hdfs.protocol.HdfsConstants.DatanodeReportType; import org.apache.hadoop.hdfs.protocol.HdfsConstants.SafeModeAction; import org.apache.hadoop.hdfs.protocol.HdfsConstants.UpgradeAction; @@ -102,24 +101,16 @@ import org.apache.hadoop.hdfs.protocol.proto.ClientNamenodeProtocolProtos.Update import org.apache.hadoop.hdfs.protocol.proto.ClientNamenodeProtocolProtos.UpdatePipelineRequestProto; import org.apache.hadoop.hdfs.security.token.delegation.DelegationTokenIdentifier; import org.apache.hadoop.hdfs.server.common.UpgradeStatusReport; -import org.apache.hadoop.hdfs.server.namenode.NameNode; import org.apache.hadoop.hdfs.server.namenode.NotReplicatedYetException; import org.apache.hadoop.hdfs.server.namenode.SafeModeException; import org.apache.hadoop.io.EnumSetWritable; import org.apache.hadoop.io.Text; -import org.apache.hadoop.io.retry.RetryPolicies; -import org.apache.hadoop.io.retry.RetryPolicy; -import org.apache.hadoop.io.retry.RetryProxy; import org.apache.hadoop.ipc.ProtobufHelper; -import org.apache.hadoop.ipc.ProtobufRpcEngine; import org.apache.hadoop.ipc.ProtocolMetaInterface; import org.apache.hadoop.ipc.RPC; -import org.apache.hadoop.ipc.RemoteException; import org.apache.hadoop.ipc.RpcClientUtil; import org.apache.hadoop.ipc.RpcPayloadHeader.RpcKind; -import org.apache.hadoop.net.NetUtils; import org.apache.hadoop.security.AccessControlException; -import org.apache.hadoop.security.UserGroupInformation; import org.apache.hadoop.security.token.Token; import com.google.protobuf.ByteString; @@ -127,8 +118,8 @@ import com.google.protobuf.ServiceException; /** * This class forwards NN's ClientProtocol calls as RPC calls to the NN server - * while translating from the parameter types used in ClientProtocol to those - * used in protocolR23Compatile.*. + * while translating from the parameter types used in ClientProtocol to the + * new PB types. */ @InterfaceAudience.Private @InterfaceStability.Stable diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/protocolPB/GetUserMappingsProtocolClientSideTranslatorPB.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/protocolPB/GetUserMappingsProtocolClientSideTranslatorPB.java index 01bd88ebbff..b2c6dd2cb26 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/protocolPB/GetUserMappingsProtocolClientSideTranslatorPB.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/protocolPB/GetUserMappingsProtocolClientSideTranslatorPB.java @@ -22,10 +22,8 @@ import java.io.Closeable; import java.io.IOException; import org.apache.hadoop.hdfs.protocol.proto.GetUserMappingsProtocolProtos.GetGroupsForUserRequestProto; import org.apache.hadoop.hdfs.protocol.proto.GetUserMappingsProtocolProtos.GetGroupsForUserResponseProto; -import org.apache.hadoop.hdfs.protocolR23Compatible.ProtocolSignatureWritable; import org.apache.hadoop.ipc.ProtobufHelper; import org.apache.hadoop.ipc.ProtocolMetaInterface; -import org.apache.hadoop.ipc.ProtocolSignature; import org.apache.hadoop.ipc.RPC; import org.apache.hadoop.ipc.RpcClientUtil; import org.apache.hadoop.ipc.RpcPayloadHeader.RpcKind; diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/protocolPB/NamenodeProtocolTranslatorPB.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/protocolPB/NamenodeProtocolTranslatorPB.java index 7de2c0e4614..a8aba30c846 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/protocolPB/NamenodeProtocolTranslatorPB.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/protocolPB/NamenodeProtocolTranslatorPB.java @@ -35,7 +35,6 @@ import org.apache.hadoop.hdfs.protocol.proto.NamenodeProtocolProtos.GetTransacti import org.apache.hadoop.hdfs.protocol.proto.NamenodeProtocolProtos.RegisterRequestProto; import org.apache.hadoop.hdfs.protocol.proto.NamenodeProtocolProtos.RollEditLogRequestProto; import org.apache.hadoop.hdfs.protocol.proto.NamenodeProtocolProtos.StartCheckpointRequestProto; -import org.apache.hadoop.hdfs.protocolR23Compatible.ProtocolSignatureWritable; import org.apache.hadoop.hdfs.security.token.block.ExportedBlockKeys; import org.apache.hadoop.hdfs.server.namenode.CheckpointSignature; import org.apache.hadoop.hdfs.server.protocol.BlocksWithLocations; @@ -46,7 +45,6 @@ import org.apache.hadoop.hdfs.server.protocol.NamespaceInfo; import org.apache.hadoop.hdfs.server.protocol.RemoteEditLogManifest; import org.apache.hadoop.ipc.ProtobufHelper; import org.apache.hadoop.ipc.ProtocolMetaInterface; -import org.apache.hadoop.ipc.ProtocolSignature; import org.apache.hadoop.ipc.RPC; import org.apache.hadoop.ipc.RpcClientUtil; import org.apache.hadoop.ipc.RpcPayloadHeader.RpcKind; diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/protocolPB/RefreshAuthorizationPolicyProtocolClientSideTranslatorPB.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/protocolPB/RefreshAuthorizationPolicyProtocolClientSideTranslatorPB.java index 96ba2cf7a06..ab06a887851 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/protocolPB/RefreshAuthorizationPolicyProtocolClientSideTranslatorPB.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/protocolPB/RefreshAuthorizationPolicyProtocolClientSideTranslatorPB.java @@ -22,10 +22,8 @@ import java.io.Closeable; import java.io.IOException; import org.apache.hadoop.hdfs.protocol.proto.RefreshAuthorizationPolicyProtocolProtos.RefreshServiceAclRequestProto; -import org.apache.hadoop.hdfs.protocolR23Compatible.ProtocolSignatureWritable; import org.apache.hadoop.ipc.ProtobufHelper; import org.apache.hadoop.ipc.ProtocolMetaInterface; -import org.apache.hadoop.ipc.ProtocolSignature; import org.apache.hadoop.ipc.RPC; import org.apache.hadoop.ipc.RpcClientUtil; import org.apache.hadoop.ipc.RpcPayloadHeader.RpcKind; diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/protocolPB/RefreshUserMappingsProtocolClientSideTranslatorPB.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/protocolPB/RefreshUserMappingsProtocolClientSideTranslatorPB.java index 6f07617baba..1cb780e3d44 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/protocolPB/RefreshUserMappingsProtocolClientSideTranslatorPB.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/protocolPB/RefreshUserMappingsProtocolClientSideTranslatorPB.java @@ -23,10 +23,8 @@ import java.io.IOException; import org.apache.hadoop.hdfs.protocol.proto.RefreshUserMappingsProtocolProtos.RefreshSuperUserGroupsConfigurationRequestProto; import org.apache.hadoop.hdfs.protocol.proto.RefreshUserMappingsProtocolProtos.RefreshUserToGroupsMappingsRequestProto; -import org.apache.hadoop.hdfs.protocolR23Compatible.ProtocolSignatureWritable; import org.apache.hadoop.ipc.ProtobufHelper; import org.apache.hadoop.ipc.ProtocolMetaInterface; -import org.apache.hadoop.ipc.ProtocolSignature; import org.apache.hadoop.ipc.RPC; import org.apache.hadoop.ipc.RpcClientUtil; import org.apache.hadoop.ipc.RpcPayloadHeader.RpcKind; diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/protocolR23Compatible/ProtocolSignatureWritable.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/protocolR23Compatible/ProtocolSignatureWritable.java deleted file mode 100644 index 9dc929bf53e..00000000000 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/protocolR23Compatible/ProtocolSignatureWritable.java +++ /dev/null @@ -1,110 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.hadoop.hdfs.protocolR23Compatible; - -import java.io.DataInput; -import java.io.DataOutput; -import java.io.IOException; - -import org.apache.hadoop.classification.InterfaceAudience; -import org.apache.hadoop.classification.InterfaceStability; -import org.apache.hadoop.io.Writable; -import org.apache.hadoop.io.WritableFactories; -import org.apache.hadoop.io.WritableFactory; - -@InterfaceAudience.Private -@InterfaceStability.Evolving -public class ProtocolSignatureWritable implements Writable { - static { // register a ctor - WritableFactories.setFactory - (ProtocolSignatureWritable.class, - new WritableFactory() { - public Writable newInstance() { return new ProtocolSignatureWritable(); } - }); - } - - private long version; - private int[] methods = null; // an array of method hash codes - - public static org.apache.hadoop.ipc.ProtocolSignature convert( - final ProtocolSignatureWritable ps) { - if (ps == null) return null; - return new org.apache.hadoop.ipc.ProtocolSignature( - ps.getVersion(), ps.getMethods()); - } - - public static ProtocolSignatureWritable convert( - final org.apache.hadoop.ipc.ProtocolSignature ps) { - if (ps == null) return null; - return new ProtocolSignatureWritable(ps.getVersion(), ps.getMethods()); - } - - /** - * default constructor - */ - public ProtocolSignatureWritable() { - } - - /** - * Constructor - * - * @param version server version - * @param methodHashcodes hash codes of the methods supported by server - */ - public ProtocolSignatureWritable(long version, int[] methodHashcodes) { - this.version = version; - this.methods = methodHashcodes; - } - - public long getVersion() { - return version; - } - - public int[] getMethods() { - return methods; - } - - @Override - public void readFields(DataInput in) throws IOException { - version = in.readLong(); - boolean hasMethods = in.readBoolean(); - if (hasMethods) { - int numMethods = in.readInt(); - methods = new int[numMethods]; - for (int i=0; i - - - - Namenode Client Protocols Compatible with the version - of Hadoop Release 23 - - -

    -This package is for ALL versions of HDFS protocols that use writable data types -and are compatible with the version of the protocol that was - shipped with Release 23 of Hadoop. -

    - -Compatibility should be maintained: -
      -
    • Do NOT delete any methods
    • -
    • Do NOT change the signatures of any method: - do not change parameters, parameter types -or exceptions thrown by the method.
    • -
    -

    -You can add new methods and new types. If you need to change a method's -signature, please add a new method instead. -When you add new methods and new types do NOT change the version number. -

    -Version number is changed ONLY when compatibility is broken (which -should be very rare and a big deal). -

    \ No newline at end of file diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/DatanodeDescriptor.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/DatanodeDescriptor.java index 2372d3c3da1..bf35af063f9 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/DatanodeDescriptor.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/DatanodeDescriptor.java @@ -153,8 +153,6 @@ public class DatanodeDescriptor extends DatanodeInfo { */ private boolean disallowed = false; - public DatanodeDescriptor() {} - /** * DatanodeDescriptor constructor * @param nodeID id of the data node diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/common/StorageInfo.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/common/StorageInfo.java index 95573f38f09..978f356abf1 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/common/StorageInfo.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/common/StorageInfo.java @@ -17,13 +17,7 @@ */ package org.apache.hadoop.hdfs.server.common; -import java.io.DataInput; -import java.io.DataOutput; -import java.io.IOException; - import org.apache.hadoop.classification.InterfaceAudience; -import org.apache.hadoop.io.Writable; -import org.apache.hadoop.io.WritableUtils; import com.google.common.base.Joiner; @@ -33,16 +27,16 @@ import com.google.common.base.Joiner; * TODO namespaceID should be long and computed as hash(address + port) */ @InterfaceAudience.Private -public class StorageInfo implements Writable { +public class StorageInfo { public int layoutVersion; // layout version of the storage data public int namespaceID; // id of the file system public String clusterID; // id of the cluster public long cTime; // creation time of the file system state - + public StorageInfo () { this(0, 0, "", 0L); } - + public StorageInfo(int layoutV, int nsID, String cid, long cT) { layoutVersion = layoutV; clusterID = cid; @@ -83,23 +77,6 @@ public class StorageInfo implements Writable { namespaceID = from.namespaceID; cTime = from.cTime; } - - ///////////////////////////////////////////////// - // Writable - ///////////////////////////////////////////////// - public void write(DataOutput out) throws IOException { - out.writeInt(getLayoutVersion()); - out.writeInt(getNamespaceID()); - WritableUtils.writeString(out, clusterID); - out.writeLong(getCTime()); - } - - public void readFields(DataInput in) throws IOException { - layoutVersion = in.readInt(); - namespaceID = in.readInt(); - clusterID = WritableUtils.readString(in); - cTime = in.readLong(); - } public String toString() { StringBuilder sb = new StringBuilder(); diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/common/UpgradeStatusReport.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/common/UpgradeStatusReport.java index 15d3b2178e7..3903cdb32d8 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/common/UpgradeStatusReport.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/common/UpgradeStatusReport.java @@ -17,14 +17,7 @@ */ package org.apache.hadoop.hdfs.server.common; -import java.io.DataInput; -import java.io.DataOutput; -import java.io.IOException; - import org.apache.hadoop.classification.InterfaceAudience; -import org.apache.hadoop.io.Writable; -import org.apache.hadoop.io.WritableFactories; -import org.apache.hadoop.io.WritableFactory; /** * Base upgrade upgradeStatus class. @@ -33,17 +26,11 @@ import org.apache.hadoop.io.WritableFactory; * Describes status of current upgrade. */ @InterfaceAudience.Private -public class UpgradeStatusReport implements Writable { +public class UpgradeStatusReport { protected int version; protected short upgradeStatus; protected boolean finalized; - public UpgradeStatusReport() { - this.version = 0; - this.upgradeStatus = 0; - this.finalized = false; - } - public UpgradeStatusReport(int version, short status, boolean isFinalized) { this.version = version; this.upgradeStatus = status; @@ -98,29 +85,4 @@ public class UpgradeStatusReport implements Writable { public String toString() { return getStatusText(false); } - - ///////////////////////////////////////////////// - // Writable - ///////////////////////////////////////////////// - static { // register a ctor - WritableFactories.setFactory - (UpgradeStatusReport.class, - new WritableFactory() { - public Writable newInstance() { return new UpgradeStatusReport(); } - }); - } - - /** - */ - public void write(DataOutput out) throws IOException { - out.writeInt(this.version); - out.writeShort(this.upgradeStatus); - } - - /** - */ - public void readFields(DataInput in) throws IOException { - this.version = in.readInt(); - this.upgradeStatus = in.readShort(); - } } diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/CheckpointSignature.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/CheckpointSignature.java index 5f5ebaf7481..f88e15f43b4 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/CheckpointSignature.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/CheckpointSignature.java @@ -17,15 +17,11 @@ */ package org.apache.hadoop.hdfs.server.namenode; -import java.io.DataInput; -import java.io.DataOutput; import java.io.IOException; import org.apache.hadoop.classification.InterfaceAudience; import org.apache.hadoop.hdfs.server.common.StorageInfo; import org.apache.hadoop.hdfs.server.namenode.FSImage; -import org.apache.hadoop.io.WritableComparable; -import org.apache.hadoop.io.WritableUtils; import com.google.common.collect.ComparisonChain; @@ -33,16 +29,15 @@ import com.google.common.collect.ComparisonChain; * A unique signature intended to identify checkpoint transactions. */ @InterfaceAudience.Private -public class CheckpointSignature extends StorageInfo - implements WritableComparable { +public class CheckpointSignature extends StorageInfo + implements Comparable { + private static final String FIELD_SEPARATOR = ":"; private static final int NUM_FIELDS = 7; String blockpoolID = ""; long mostRecentCheckpointTxId; long curSegmentTxId; - public CheckpointSignature() {} - CheckpointSignature(FSImage fsImage) { super(fsImage.getStorage()); blockpoolID = fsImage.getBlockPoolID(); @@ -162,21 +157,4 @@ public class CheckpointSignature extends StorageInfo (int)(cTime ^ mostRecentCheckpointTxId ^ curSegmentTxId) ^ clusterID.hashCode() ^ blockpoolID.hashCode(); } - - ///////////////////////////////////////////////// - // Writable - ///////////////////////////////////////////////// - public void write(DataOutput out) throws IOException { - super.write(out); - WritableUtils.writeString(out, blockpoolID); - out.writeLong(mostRecentCheckpointTxId); - out.writeLong(curSegmentTxId); - } - - public void readFields(DataInput in) throws IOException { - super.readFields(in); - blockpoolID = WritableUtils.readString(in); - mostRecentCheckpointTxId = in.readLong(); - curSegmentTxId = in.readLong(); - } } diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/protocol/BlockRecoveryCommand.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/protocol/BlockRecoveryCommand.java index 1d3f7b41389..43d1e3188be 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/protocol/BlockRecoveryCommand.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/protocol/BlockRecoveryCommand.java @@ -17,9 +17,6 @@ */ package org.apache.hadoop.hdfs.server.protocol; -import java.io.DataInput; -import java.io.DataOutput; -import java.io.IOException; import java.util.Collection; import java.util.ArrayList; @@ -58,14 +55,6 @@ public class BlockRecoveryCommand extends DatanodeCommand { public static class RecoveringBlock extends LocatedBlock { private long newGenerationStamp; - /** - * Create empty RecoveringBlock. - */ - public RecoveringBlock() { - super(); - newGenerationStamp = -1L; - } - /** * Create RecoveringBlock. */ diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/protocol/BlocksWithLocations.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/protocol/BlocksWithLocations.java index 6f2bfbd2c41..da1c9bdd829 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/protocol/BlocksWithLocations.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/protocol/BlocksWithLocations.java @@ -17,16 +17,9 @@ */ package org.apache.hadoop.hdfs.server.protocol; -import java.io.DataInput; -import java.io.DataOutput; -import java.io.IOException; - import org.apache.hadoop.classification.InterfaceAudience; import org.apache.hadoop.classification.InterfaceStability; import org.apache.hadoop.hdfs.protocol.Block; -import org.apache.hadoop.io.Text; -import org.apache.hadoop.io.Writable; -import org.apache.hadoop.io.WritableUtils; /** A class to implement an array of BlockLocations * It provide efficient customized serialization/deserialization methods @@ -34,23 +27,17 @@ import org.apache.hadoop.io.WritableUtils; */ @InterfaceAudience.Private @InterfaceStability.Evolving -public class BlocksWithLocations implements Writable { +public class BlocksWithLocations { /** * A class to keep track of a block and its locations */ @InterfaceAudience.Private @InterfaceStability.Evolving - public static class BlockWithLocations implements Writable { + public static class BlockWithLocations { Block block; String datanodeIDs[]; - /** default constructor */ - public BlockWithLocations() { - block = new Block(); - datanodeIDs = null; - } - /** constructor */ public BlockWithLocations(Block b, String[] datanodes) { block = b; @@ -66,33 +53,10 @@ public class BlocksWithLocations implements Writable { public String[] getDatanodes() { return datanodeIDs; } - - /** deserialization method */ - public void readFields(DataInput in) throws IOException { - block.readFields(in); - int len = WritableUtils.readVInt(in); // variable length integer - datanodeIDs = new String[len]; - for(int i=0; i blockList = new ArrayList(MAX_BLOCKS); ArrayList blockInfoList = new ArrayList(); int headIndex; diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/blockmanagement/TestCorruptReplicaInfo.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/blockmanagement/TestCorruptReplicaInfo.java index 3b7eccdca6a..ee6a26026c6 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/blockmanagement/TestCorruptReplicaInfo.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/blockmanagement/TestCorruptReplicaInfo.java @@ -28,6 +28,7 @@ import junit.framework.TestCase; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; +import org.apache.hadoop.hdfs.DFSTestUtil; import org.apache.hadoop.hdfs.protocol.Block; @@ -80,8 +81,8 @@ public class TestCorruptReplicaInfo extends TestCase { block_ids.add((long)i); } - DatanodeDescriptor dn1 = new DatanodeDescriptor(); - DatanodeDescriptor dn2 = new DatanodeDescriptor(); + DatanodeDescriptor dn1 = DFSTestUtil.getLocalDatanodeDescriptor(); + DatanodeDescriptor dn2 = DFSTestUtil.getLocalDatanodeDescriptor(); crm.addToCorruptReplicasMap(getBlock(0), dn1, "TEST"); assertEquals("Number of corrupt blocks not returning correctly", diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/blockmanagement/TestDatanodeDescriptor.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/blockmanagement/TestDatanodeDescriptor.java index ac3885738eb..e43310cb437 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/blockmanagement/TestDatanodeDescriptor.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/blockmanagement/TestDatanodeDescriptor.java @@ -19,6 +19,7 @@ package org.apache.hadoop.hdfs.server.blockmanagement; import java.util.ArrayList; +import org.apache.hadoop.hdfs.DFSTestUtil; import org.apache.hadoop.hdfs.protocol.Block; import org.apache.hadoop.hdfs.server.common.GenerationStamp; @@ -36,7 +37,7 @@ public class TestDatanodeDescriptor extends TestCase { final int REMAINING_BLOCKS = 2; final int MAX_LIMIT = MAX_BLOCKS - REMAINING_BLOCKS; - DatanodeDescriptor dd = new DatanodeDescriptor(); + DatanodeDescriptor dd = DFSTestUtil.getLocalDatanodeDescriptor(); ArrayList blockList = new ArrayList(MAX_BLOCKS); for (int i=0; i Date: Wed, 11 Apr 2012 01:55:05 +0000 Subject: [PATCH 26/29] =?UTF-8?q?HDFS-2696.=20Fix=20the=20fuse-fds=20build?= =?UTF-8?q?.=20Contributed=20by=20Bruno=20Mah=C3=A9?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/trunk@1312068 13f79535-47bb-0310-9956-ffa450edef68 --- hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt | 2 + .../hadoop-hdfs/src/contrib/build.xml | 63 ------- .../contrib/{ => fuse-dfs}/build-contrib.xml | 2 +- .../src/contrib/fuse-dfs/build.xml | 26 +-- .../hadoop-hdfs/src/contrib/fuse-dfs/pom.xml | 161 ++++++++++++++++++ .../src/contrib/fuse-dfs/src/Makefile.am | 4 +- hadoop-hdfs-project/pom.xml | 1 + 7 files changed, 174 insertions(+), 85 deletions(-) delete mode 100644 hadoop-hdfs-project/hadoop-hdfs/src/contrib/build.xml rename hadoop-hdfs-project/hadoop-hdfs/src/contrib/{ => fuse-dfs}/build-contrib.xml (99%) create mode 100644 hadoop-hdfs-project/hadoop-hdfs/src/contrib/fuse-dfs/pom.xml diff --git a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt index 1710957395f..24a9ddc4f0c 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt +++ b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt @@ -485,6 +485,8 @@ Release 2.0.0 - UNRELEASED HDFS-3248. bootstrapStandby repeated twice in hdfs namenode usage message (Colin Patrick McCabe via todd) + HDFS-2696. Fix the fuse-fds build. (Bruno Mahé via eli) + BREAKDOWN OF HDFS-1623 SUBTASKS HDFS-2179. Add fencing framework and mechanisms for NameNode HA. (todd) diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/contrib/build.xml b/hadoop-hdfs-project/hadoop-hdfs/src/contrib/build.xml deleted file mode 100644 index e69640ace78..00000000000 --- a/hadoop-hdfs-project/hadoop-hdfs/src/contrib/build.xml +++ /dev/null @@ -1,63 +0,0 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/contrib/build-contrib.xml b/hadoop-hdfs-project/hadoop-hdfs/src/contrib/fuse-dfs/build-contrib.xml similarity index 99% rename from hadoop-hdfs-project/hadoop-hdfs/src/contrib/build-contrib.xml rename to hadoop-hdfs-project/hadoop-hdfs/src/contrib/fuse-dfs/build-contrib.xml index 0c57fb90a21..bde5e4e7b23 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/contrib/build-contrib.xml +++ b/hadoop-hdfs-project/hadoop-hdfs/src/contrib/fuse-dfs/build-contrib.xml @@ -70,7 +70,7 @@ - + diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/contrib/fuse-dfs/build.xml b/hadoop-hdfs-project/hadoop-hdfs/src/contrib/fuse-dfs/build.xml index c8795c50b0f..ab3c92b18c2 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/contrib/fuse-dfs/build.xml +++ b/hadoop-hdfs-project/hadoop-hdfs/src/contrib/fuse-dfs/build.xml @@ -17,19 +17,19 @@ limitations under the License. --> - + - + - + - + - + @@ -46,24 +46,12 @@ - - - - - - - - - - - - - + - + diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/contrib/fuse-dfs/pom.xml b/hadoop-hdfs-project/hadoop-hdfs/src/contrib/fuse-dfs/pom.xml new file mode 100644 index 00000000000..0e61d8dae66 --- /dev/null +++ b/hadoop-hdfs-project/hadoop-hdfs/src/contrib/fuse-dfs/pom.xml @@ -0,0 +1,161 @@ + + + + 4.0.0 + + org.apache.hadoop + hadoop-project + 3.0.0-SNAPSHOT + ../../../../../hadoop-project + + org.apache.hadoop.contrib + hadoop-hdfs-fuse + 3.0.0-SNAPSHOT + pom + + Apache Hadoop HDFS Fuse + Apache Hadoop HDFS Fuse + + + + org.apache.hadoop + hadoop-hdfs + compile + + + org.apache.hadoop + hadoop-hdfs + test + test-jar + + + + + + + + + org.apache.maven.plugins + maven-eclipse-plugin + 2.6 + + + org.apache.maven.plugins + maven-surefire-plugin + + 1 + + + + org.apache.maven.plugins + maven-javadoc-plugin + + + + javadoc + + site + + true + true + false + ${maven.compile.source} + ${maven.compile.encoding} + + + HttpFs API + * + + + + + + + + org.apache.maven.plugins + maven-project-info-reports-plugin + + + + false + + + dependencies + + site + + + + + org.apache.rat + apache-rat-plugin + + + + + + + + + + + fuse + + false + + + + + org.apache.maven.plugins + maven-antrun-plugin + + + prepare-compile-native + generate-sources + + run + + + + + + + + + + + compile-fuse + compile + + run + + + + + + + + + + + + + + + + diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/contrib/fuse-dfs/src/Makefile.am b/hadoop-hdfs-project/hadoop-hdfs/src/contrib/fuse-dfs/src/Makefile.am index d62384d596a..85c81c226aa 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/contrib/fuse-dfs/src/Makefile.am +++ b/hadoop-hdfs-project/hadoop-hdfs/src/contrib/fuse-dfs/src/Makefile.am @@ -17,5 +17,5 @@ bin_PROGRAMS = fuse_dfs fuse_dfs_SOURCES = fuse_dfs.c fuse_options.c fuse_trash.c fuse_stat_struct.c fuse_users.c fuse_init.c fuse_connect.c fuse_impls_access.c fuse_impls_chmod.c fuse_impls_chown.c fuse_impls_create.c fuse_impls_flush.c fuse_impls_getattr.c fuse_impls_mkdir.c fuse_impls_mknod.c fuse_impls_open.c fuse_impls_read.c fuse_impls_release.c fuse_impls_readdir.c fuse_impls_rename.c fuse_impls_rmdir.c fuse_impls_statfs.c fuse_impls_symlink.c fuse_impls_truncate.c fuse_impls_utimens.c fuse_impls_unlink.c fuse_impls_write.c AM_CFLAGS= -Wall -g -AM_CPPFLAGS= -DPERMS=$(PERMS) -D_FILE_OFFSET_BITS=64 -I$(JAVA_HOME)/include -I$(HADOOP_PREFIX)/src/c++/libhdfs -I$(JAVA_HOME)/include/linux -D_FUSE_DFS_VERSION=\"$(PACKAGE_VERSION)\" -DPROTECTED_PATHS=\"$(PROTECTED_PATHS)\" -I$(FUSE_HOME)/include -AM_LDFLAGS= -L$(HADOOP_PREFIX)/build/c++/$(BUILD_PLATFORM)/lib -lhdfs -L$(FUSE_HOME)/lib -lfuse -L$(JAVA_HOME)/jre/lib/$(OS_ARCH)/server -ljvm +AM_CPPFLAGS= -DPERMS=$(PERMS) -D_FILE_OFFSET_BITS=64 -I$(JAVA_HOME)/include -I$(HADOOP_PREFIX)/../../src/main/native -I$(JAVA_HOME)/include/linux -D_FUSE_DFS_VERSION=\"$(PACKAGE_VERSION)\" -DPROTECTED_PATHS=\"$(PROTECTED_PATHS)\" -I$(FUSE_HOME)/include +AM_LDFLAGS= -L$(HADOOP_PREFIX)/../../target/native/target/usr/local/lib -lhdfs -L$(FUSE_HOME)/lib -lfuse -L$(JAVA_HOME)/jre/lib/$(OS_ARCH)/server -ljvm -lm diff --git a/hadoop-hdfs-project/pom.xml b/hadoop-hdfs-project/pom.xml index 1d39a2c4a91..ea775912ff9 100644 --- a/hadoop-hdfs-project/pom.xml +++ b/hadoop-hdfs-project/pom.xml @@ -31,6 +31,7 @@ hadoop-hdfs hadoop-hdfs-httpfs hadoop-hdfs/src/contrib/bkjournal + hadoop-hdfs/src/contrib/fuse-dfs From 13bfe4794bc01bad54d4ced8deff9be01269f0dc Mon Sep 17 00:00:00 2001 From: Siddharth Seth Date: Wed, 11 Apr 2012 02:50:02 +0000 Subject: [PATCH 27/29] MAPREDUCE-4040. History links should use hostname rather than IP address. (Contributed by Bhallamudi Venkata Siva Kamesh) git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/trunk@1312076 13f79535-47bb-0310-9956-ffa450edef68 --- hadoop-mapreduce-project/CHANGES.txt | 3 +++ .../apache/hadoop/mapreduce/v2/jobhistory/JobHistoryUtils.java | 2 +- 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/hadoop-mapreduce-project/CHANGES.txt b/hadoop-mapreduce-project/CHANGES.txt index 63b397a305f..03ebbdb0bbe 100644 --- a/hadoop-mapreduce-project/CHANGES.txt +++ b/hadoop-mapreduce-project/CHANGES.txt @@ -307,6 +307,9 @@ Release 0.23.3 - UNRELEASED MAPREDUCE-4017. Add jobname to jobsummary log (tgraves and Koji Noguchi via bobby) + MAPREDUCE-4040. History links should use hostname rather than IP address. + (Bhallamudi Venkata Siva Kamesh via sseth) + Release 0.23.2 - UNRELEASED INCOMPATIBLE CHANGES diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-common/src/main/java/org/apache/hadoop/mapreduce/v2/jobhistory/JobHistoryUtils.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-common/src/main/java/org/apache/hadoop/mapreduce/v2/jobhistory/JobHistoryUtils.java index 494431614d7..5be28fd92e9 100644 --- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-common/src/main/java/org/apache/hadoop/mapreduce/v2/jobhistory/JobHistoryUtils.java +++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-common/src/main/java/org/apache/hadoop/mapreduce/v2/jobhistory/JobHistoryUtils.java @@ -507,7 +507,7 @@ public class JobHistoryUtils { StringBuffer sb = new StringBuffer(); if (address.getAddress().isAnyLocalAddress() || address.getAddress().isLoopbackAddress()) { - sb.append(InetAddress.getLocalHost().getHostAddress()); + sb.append(InetAddress.getLocalHost().getCanonicalHostName()); } else { sb.append(address.getHostName()); } From 21824d8232875a6aba9c9c1669507ea9d09586df Mon Sep 17 00:00:00 2001 From: Todd Lipcon Date: Wed, 11 Apr 2012 05:16:06 +0000 Subject: [PATCH 28/29] HDFS-3247. Improve bootstrapStandby behavior when original NN is not active. Contributed by Todd Lipcon. git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/trunk@1324558 13f79535-47bb-0310-9956-ffa450edef68 --- hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt | 3 + .../server/namenode/ha/BootstrapStandby.java | 98 +++++++++++++++---- .../namenode/ha/TestBootstrapStandby.java | 24 ++++- 3 files changed, 106 insertions(+), 19 deletions(-) diff --git a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt index 24a9ddc4f0c..c8fa0cff51d 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt +++ b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt @@ -362,6 +362,9 @@ Release 2.0.0 - UNRELEASED HDFS-3244. Remove dead writable code from hdfs/protocol. (eli) + HDFS-3247. Improve bootstrapStandby behavior when original NN is not active + (todd) + OPTIMIZATIONS HDFS-3024. Improve performance of stringification in addStoredBlock (todd) diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/ha/BootstrapStandby.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/ha/BootstrapStandby.java index 2c4a04bf61a..481dde3cd2d 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/ha/BootstrapStandby.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/ha/BootstrapStandby.java @@ -33,10 +33,14 @@ import org.apache.hadoop.HadoopIllegalArgumentException; import org.apache.hadoop.classification.InterfaceAudience; import org.apache.hadoop.conf.Configurable; import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.ha.HAServiceProtocol; +import org.apache.hadoop.ha.HAServiceStatus; +import org.apache.hadoop.ha.HAServiceProtocol.HAServiceState; +import org.apache.hadoop.ha.ServiceFailedException; import org.apache.hadoop.hdfs.DFSUtil; import org.apache.hadoop.hdfs.HAUtil; +import org.apache.hadoop.hdfs.HdfsConfiguration; import org.apache.hadoop.hdfs.NameNodeProxies; -import org.apache.hadoop.hdfs.NameNodeProxies.ProxyAndInfo; import org.apache.hadoop.hdfs.protocol.HdfsConstants; import org.apache.hadoop.hdfs.server.namenode.CheckpointSignature; import org.apache.hadoop.hdfs.server.namenode.EditLogInputStream; @@ -47,8 +51,10 @@ import org.apache.hadoop.hdfs.server.namenode.NameNode; import org.apache.hadoop.hdfs.server.namenode.TransferFsImage; import org.apache.hadoop.hdfs.server.protocol.NamenodeProtocol; import org.apache.hadoop.hdfs.server.protocol.NamespaceInfo; +import org.apache.hadoop.hdfs.tools.NNHAServiceTarget; import org.apache.hadoop.io.IOUtils; import org.apache.hadoop.io.MD5Hash; +import org.apache.hadoop.security.AccessControlException; import org.apache.hadoop.security.SecurityUtil; import org.apache.hadoop.security.UserGroupInformation; import org.apache.hadoop.util.Tool; @@ -65,7 +71,7 @@ import com.google.common.collect.Sets; */ @InterfaceAudience.Private public class BootstrapStandby implements Tool, Configurable { - private static final Log LOG = LogFactory.getLog(BootstrapStandby.class); + private static final Log LOG = LogFactory.getLog(BootstrapStandby.class); private String nsId; private String nnId; private String otherNNId; @@ -79,7 +85,13 @@ public class BootstrapStandby implements Tool, Configurable { private boolean force = false; private boolean interactive = true; - + + // Exit/return codes. + static final int ERR_CODE_FAILED_CONNECT = 2; + static final int ERR_CODE_INVALID_VERSION = 3; + static final int ERR_CODE_OTHER_NN_NOT_ACTIVE = 4; + static final int ERR_CODE_ALREADY_FORMATTED = 5; + static final int ERR_CODE_LOGS_UNAVAILABLE = 6; public int run(String[] args) throws Exception { SecurityUtil.initKrb5CipherSuites(); @@ -121,24 +133,43 @@ public class BootstrapStandby implements Tool, Configurable { System.err.println("Usage: " + this.getClass().getSimpleName() + "[-force] [-nonInteractive]"); } + + private NamenodeProtocol createNNProtocolProxy() + throws IOException { + return NameNodeProxies.createNonHAProxy(getConf(), + otherIpcAddr, NamenodeProtocol.class, + UserGroupInformation.getLoginUser(), true) + .getProxy(); + } + + private HAServiceProtocol createHAProtocolProxy() + throws IOException { + return new NNHAServiceTarget(new HdfsConfiguration(conf), + nsId, otherNNId).getProxy(conf, 15000); + } private int doRun() throws IOException { - ProxyAndInfo proxyAndInfo = NameNodeProxies.createNonHAProxy(getConf(), - otherIpcAddr, NamenodeProtocol.class, - UserGroupInformation.getLoginUser(), true); - NamenodeProtocol proxy = proxyAndInfo.getProxy(); + + NamenodeProtocol proxy = createNNProtocolProxy(); NamespaceInfo nsInfo; try { nsInfo = proxy.versionRequest(); - checkLayoutVersion(nsInfo); } catch (IOException ioe) { LOG.fatal("Unable to fetch namespace information from active NN at " + otherIpcAddr + ": " + ioe.getMessage()); if (LOG.isDebugEnabled()) { LOG.debug("Full exception trace", ioe); } - return 1; + return ERR_CODE_FAILED_CONNECT; } + + if (!checkLayoutVersion(nsInfo)) { + LOG.fatal("Layout version on remote node (" + + nsInfo.getLayoutVersion() + ") does not match " + + "this node's layout version (" + HdfsConstants.LAYOUT_VERSION + ")"); + return ERR_CODE_INVALID_VERSION; + } + System.out.println( "=====================================================\n" + @@ -153,12 +184,35 @@ public class BootstrapStandby implements Tool, Configurable { " Layout version: " + nsInfo.getLayoutVersion() + "\n" + "====================================================="); + // Ensure the other NN is active - we can't force it to roll edit logs + // below if it's not active. + if (!isOtherNNActive()) { + String err = "NameNode " + nsId + "." + nnId + " at " + otherIpcAddr + + " is not currently in ACTIVE state."; + if (!interactive) { + LOG.fatal(err + " Please transition it to " + + "active before attempting to bootstrap a standby node."); + return ERR_CODE_OTHER_NN_NOT_ACTIVE; + } + + System.err.println(err); + if (ToolRunner.confirmPrompt( + "Do you want to automatically transition it to active now?")) { + transitionOtherNNActive(); + } else { + LOG.fatal("User aborted. Exiting without bootstrapping standby."); + return ERR_CODE_OTHER_NN_NOT_ACTIVE; + } + } + + + // Check with the user before blowing away data. if (!NameNode.confirmFormat( Sets.union(Sets.newHashSet(dirsToFormat), Sets.newHashSet(editUrisToFormat)), force, interactive)) { - return 1; + return ERR_CODE_ALREADY_FORMATTED; } // Force the active to roll its log @@ -180,7 +234,7 @@ public class BootstrapStandby implements Tool, Configurable { // Ensure that we have enough edits already in the shared directory to // start up from the last checkpoint on the active. if (!checkLogsAvailableForRead(image, imageTxId, rollTxId)) { - return 1; + return ERR_CODE_LOGS_UNAVAILABLE; } image.getStorage().writeTransactionIdFileToStorage(rollTxId); @@ -193,6 +247,14 @@ public class BootstrapStandby implements Tool, Configurable { return 0; } + + private void transitionOtherNNActive() + throws AccessControlException, ServiceFailedException, IOException { + LOG.info("Transitioning the running namenode to active..."); + createHAProtocolProxy().transitionToActive(); + LOG.info("Successful"); + } + private boolean checkLogsAvailableForRead(FSImage image, long imageTxId, long rollTxId) { @@ -225,12 +287,14 @@ public class BootstrapStandby implements Tool, Configurable { } } - private void checkLayoutVersion(NamespaceInfo nsInfo) throws IOException { - if (nsInfo.getLayoutVersion() != HdfsConstants.LAYOUT_VERSION) { - throw new IOException("Layout version on remote node (" + - nsInfo.getLayoutVersion() + ") does not match " + - "this node's layout version (" + HdfsConstants.LAYOUT_VERSION + ")"); - } + private boolean checkLayoutVersion(NamespaceInfo nsInfo) throws IOException { + return (nsInfo.getLayoutVersion() == HdfsConstants.LAYOUT_VERSION); + } + + private boolean isOtherNNActive() + throws AccessControlException, IOException { + HAServiceStatus status = createHAProtocolProxy().getServiceStatus(); + return status.getState() == HAServiceState.ACTIVE; } private void parseConfAndFindOtherNN() throws IOException { diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestBootstrapStandby.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestBootstrapStandby.java index fe3fe749ce9..b26e85ae295 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestBootstrapStandby.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestBootstrapStandby.java @@ -17,6 +17,7 @@ */ package org.apache.hadoop.hdfs.server.namenode.ha; +import java.io.ByteArrayInputStream; import java.io.File; import java.io.IOException; import java.net.URI; @@ -40,6 +41,7 @@ import org.junit.After; import org.junit.Before; import org.junit.Test; +import com.google.common.base.Suppliers; import com.google.common.collect.ImmutableList; import com.google.common.collect.Lists; @@ -170,7 +172,7 @@ public class TestBootstrapStandby { int rc = BootstrapStandby.run( new String[]{"-force"}, cluster.getConfiguration(1)); - assertEquals(1, rc); + assertEquals(BootstrapStandby.ERR_CODE_LOGS_UNAVAILABLE, rc); } finally { logs.stopCapturing(); } @@ -184,7 +186,7 @@ public class TestBootstrapStandby { int rc = BootstrapStandby.run( new String[]{"-nonInteractive"}, cluster.getConfiguration(1)); - assertEquals(1, rc); + assertEquals(BootstrapStandby.ERR_CODE_ALREADY_FORMATTED, rc); // Should pass with -force rc = BootstrapStandby.run( @@ -192,6 +194,24 @@ public class TestBootstrapStandby { cluster.getConfiguration(1)); assertEquals(0, rc); } + + @Test(timeout=30000) + public void testOtherNodeNotActive() throws Exception { + cluster.transitionToStandby(0); + int rc = BootstrapStandby.run( + new String[]{"-nonInteractive"}, + cluster.getConfiguration(1)); + assertEquals(BootstrapStandby.ERR_CODE_OTHER_NN_NOT_ACTIVE, rc); + + // Answer "yes" to the prompt about transition to active + System.setIn(new ByteArrayInputStream("yes\n".getBytes())); + rc = BootstrapStandby.run( + new String[]{"-force"}, + cluster.getConfiguration(1)); + assertEquals(0, rc); + + assertFalse(nn0.getNamesystem().isInStandbyState()); + } private void assertNNFilesMatch() throws Exception { List curDirs = Lists.newArrayList(); From dab66bee5559a75f6d3756fc224fcda9cd4d598d Mon Sep 17 00:00:00 2001 From: Todd Lipcon Date: Wed, 11 Apr 2012 05:28:58 +0000 Subject: [PATCH 29/29] HDFS-3249. Use ToolRunner.confirmPrompt in NameNode. Contributed by Todd Lipcon. git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/trunk@1324564 13f79535-47bb-0310-9956-ffa450edef68 --- hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt | 2 ++ .../hadoop/hdfs/server/namenode/NameNode.java | 29 +------------------ 2 files changed, 3 insertions(+), 28 deletions(-) diff --git a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt index c8fa0cff51d..b82704cfe51 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt +++ b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt @@ -365,6 +365,8 @@ Release 2.0.0 - UNRELEASED HDFS-3247. Improve bootstrapStandby behavior when original NN is not active (todd) + HDFS-3249. Use ToolRunner.confirmPrompt in NameNode (todd) + OPTIMIZATIONS HDFS-3024. Improve performance of stringification in addStoredBlock (todd) diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/NameNode.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/NameNode.java index 1549874f031..dec7452e27f 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/NameNode.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/NameNode.java @@ -72,6 +72,7 @@ import org.apache.hadoop.security.authorize.RefreshAuthorizationPolicyProtocol; import org.apache.hadoop.tools.GetUserMappingsProtocol; import org.apache.hadoop.util.ServicePlugin; import org.apache.hadoop.util.StringUtils; +import static org.apache.hadoop.util.ToolRunner.confirmPrompt; import com.google.common.annotations.VisibleForTesting; import com.google.common.collect.Lists; @@ -971,34 +972,6 @@ public class NameNode { } } - /** - * Print out a prompt to the user, and return true if the user - * responds with "Y" or "yes". - */ - static boolean confirmPrompt(String prompt) throws IOException { - while (true) { - System.err.print(prompt + " (Y or N) "); - StringBuilder responseBuilder = new StringBuilder(); - while (true) { - int c = System.in.read(); - if (c == -1 || c == '\r' || c == '\n') { - break; - } - responseBuilder.append((char)c); - } - - String response = responseBuilder.toString(); - if (response.equalsIgnoreCase("y") || - response.equalsIgnoreCase("yes")) { - return true; - } else if (response.equalsIgnoreCase("n") || - response.equalsIgnoreCase("no")) { - return false; - } - // else ask them again - } - } - public static NameNode createNameNode(String argv[], Configuration conf) throws IOException { if (conf == null)