diff --git a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt index 095e9598d13..56a728c9e5c 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt +++ b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt @@ -314,6 +314,8 @@ Release 0.23.2 - UNRELEASED HDFS-3006. In WebHDFS, when the return body is empty, set the Content-Type to application/octet-stream instead of application/json. (szetszwo) + HDFS-2991. Fix case where OP_ADD would not be logged in append(). (todd) + Release 0.23.1 - 2012-02-17 INCOMPATIBLE CHANGES diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/protocol/LayoutVersion.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/protocol/LayoutVersion.java index 12a11df92c0..729748f3026 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/protocol/LayoutVersion.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/protocol/LayoutVersion.java @@ -44,6 +44,15 @@ @InterfaceAudience.Private public class LayoutVersion { + /** + * Version in which HDFS-2991 was fixed. This bug caused OP_ADD to + * sometimes be skipped for append() calls. If we see such a case when + * loading the edits, but the version is known to have that bug, we + * workaround the issue. Otherwise we should consider it a corruption + * and bail. + */ + public static final int BUGFIX_HDFS_2991_VERSION = -40; + /** * Enums for features that change the layout version. *

diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSDirectory.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSDirectory.java index ba045d106da..45ce9df154b 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSDirectory.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSDirectory.java @@ -249,8 +249,6 @@ INodeFileUnderConstruction addFile(String path, +" to the file system"); return null; } - // add create file record to log, record new generation stamp - fsImage.getEditLog().logOpenFile(path, newNode); if(NameNode.stateChangeLog.isDebugEnabled()) { NameNode.stateChangeLog.debug("DIR* FSDirectory.addFile: " diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSEditLogLoader.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSEditLogLoader.java index 4aa79dfa916..b890d16e652 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSEditLogLoader.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSEditLogLoader.java @@ -206,13 +206,22 @@ int loadEditRecords(int logVersion, EditLogInputStream in, boolean closeOnExit, fsDir.updateFile(oldFile, addCloseOp.path, blocks, addCloseOp.mtime, addCloseOp.atime); if(addCloseOp.opCode == FSEditLogOpCodes.OP_CLOSE) { // OP_CLOSE - assert oldFile.isUnderConstruction() : - "File is not under construction: " + addCloseOp.path; + if (!oldFile.isUnderConstruction() && + logVersion <= LayoutVersion.BUGFIX_HDFS_2991_VERSION) { + // There was a bug (HDFS-2991) in hadoop < 0.23.1 where OP_CLOSE + // could show up twice in a row. But after that version, this + // should be fixed, so we should treat it as an error. + throw new IOException( + "File is not under construction: " + addCloseOp.path); + } fsNamesys.getBlockManager().completeBlock( oldFile, blocks.length-1, true); - INodeFile newFile = - ((INodeFileUnderConstruction)oldFile).convertToInodeFile(); - fsDir.replaceNode(addCloseOp.path, oldFile, newFile); + + if (oldFile.isUnderConstruction()) { + INodeFile newFile = + ((INodeFileUnderConstruction)oldFile).convertToInodeFile(); + fsDir.replaceNode(addCloseOp.path, oldFile, newFile); + } } else if(! oldFile.isUnderConstruction()) { // OP_ADD for append INodeFileUnderConstruction cons = new INodeFileUnderConstruction( oldFile.getLocalNameBytes(), @@ -231,8 +240,10 @@ int loadEditRecords(int logVersion, EditLogInputStream in, boolean closeOnExit, if(addCloseOp.opCode == FSEditLogOpCodes.OP_ADD) { fsNamesys.leaseManager.addLease(addCloseOp.clientName, addCloseOp.path); } else { // Ops.OP_CLOSE - fsNamesys.leaseManager.removeLease( - ((INodeFileUnderConstruction)oldFile).getClientName(), addCloseOp.path); + if (oldFile.isUnderConstruction()) { + fsNamesys.leaseManager.removeLease( + ((INodeFileUnderConstruction)oldFile).getClientName(), addCloseOp.path); + } } break; } diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java index 40f173405a4..604fbb00bdb 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java @@ -1283,9 +1283,13 @@ private LocatedBlock startFileInternal(String src, clientNode); dir.replaceNode(src, node, cons); leaseManager.addLease(cons.getClientName(), src); - + // convert last block to under-construction - return blockManager.convertLastBlockToUnderConstruction(cons); + LocatedBlock ret = blockManager.convertLastBlockToUnderConstruction(cons); + + // add append file record to log, record lease, etc. + getEditLog().logOpenFile(src, cons); + return ret; } else { // Now we can add the name to the filesystem. This file has no // blocks associated with it. @@ -1301,6 +1305,9 @@ private LocatedBlock startFileInternal(String src, "Unable to add file to namespace."); } leaseManager.addLease(newNode.getClientName(), src); + + // record file record in log, record new generation stamp + getEditLog().logOpenFile(src, newNode); if (NameNode.stateChangeLog.isDebugEnabled()) { NameNode.stateChangeLog.debug("DIR* NameSystem.startFile: " +"add "+src+" to namespace for "+holder); diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestFileAppendRestart.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestFileAppendRestart.java new file mode 100644 index 00000000000..033478f3a81 --- /dev/null +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestFileAppendRestart.java @@ -0,0 +1,166 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hdfs; + +import static org.junit.Assert.assertEquals; + +import java.io.File; +import java.io.IOException; +import java.util.EnumMap; +import java.util.Random; + +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.CommonConfigurationKeysPublic; +import org.apache.hadoop.fs.FSDataOutputStream; +import org.apache.hadoop.fs.FileSystem; +import org.apache.hadoop.fs.FileUtil; +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.hdfs.server.common.HdfsServerConstants.StartupOption; +import org.apache.hadoop.hdfs.server.namenode.FSEditLog; +import org.apache.hadoop.hdfs.server.namenode.FSEditLogOp; +import org.apache.hadoop.hdfs.server.namenode.FSEditLogOpCodes; +import org.apache.hadoop.hdfs.server.namenode.FSImageTestUtil; +import org.apache.hadoop.hdfs.server.namenode.NNStorage; +import org.apache.hadoop.hdfs.util.Holder; +import org.apache.hadoop.io.IOUtils; +import org.apache.hadoop.test.GenericTestUtils; +import org.junit.Test; + +/** + * Unit test to make sure that Append properly logs the right + * things to the edit log, such that files aren't lost or truncated + * on restart. + */ +public class TestFileAppendRestart { + private static final int BLOCK_SIZE = 4096; + private static final String HADOOP_23_BROKEN_APPEND_TGZ = + "image-with-buggy-append.tgz"; + + private void writeAndAppend(FileSystem fs, Path p, + int lengthForCreate, int lengthForAppend) throws IOException { + // Creating a file with 4096 blockSize to write multiple blocks + FSDataOutputStream stream = fs.create( + p, true, BLOCK_SIZE, (short) 1, BLOCK_SIZE); + try { + AppendTestUtil.write(stream, 0, lengthForCreate); + stream.close(); + + stream = fs.append(p); + AppendTestUtil.write(stream, lengthForCreate, lengthForAppend); + stream.close(); + } finally { + IOUtils.closeStream(stream); + } + + int totalLength = lengthForCreate + lengthForAppend; + assertEquals(totalLength, fs.getFileStatus(p).getLen()); + } + + /** + * Regression test for HDFS-2991. Creates and appends to files + * where blocks start/end on block boundaries. + */ + @Test + public void testAppendRestart() throws Exception { + final Configuration conf = new HdfsConfiguration(); + // Turn off persistent IPC, so that the DFSClient can survive NN restart + conf.setInt( + CommonConfigurationKeysPublic.IPC_CLIENT_CONNECTION_MAXIDLETIME_KEY, + 0); + MiniDFSCluster cluster = null; + + FSDataOutputStream stream = null; + try { + cluster = new MiniDFSCluster.Builder(conf).numDataNodes(1).build(); + FileSystem fs = cluster.getFileSystem(); + File editLog = + new File(FSImageTestUtil.getNameNodeCurrentDirs(cluster).get(0), + NNStorage.getInProgressEditsFileName(1)); + EnumMap> counts; + + Path p1 = new Path("/block-boundaries"); + writeAndAppend(fs, p1, BLOCK_SIZE, BLOCK_SIZE); + + counts = FSImageTestUtil.countEditLogOpTypes(editLog); + assertEquals(2, (int)counts.get(FSEditLogOpCodes.OP_ADD).held); + assertEquals(2, (int)counts.get(FSEditLogOpCodes.OP_CLOSE).held); + + Path p2 = new Path("/not-block-boundaries"); + writeAndAppend(fs, p2, BLOCK_SIZE/2, BLOCK_SIZE); + counts = FSImageTestUtil.countEditLogOpTypes(editLog); + // We get *3* OP_ADDS from this test rather than two. The first + // OP_ADD comes from re-opening the file to establish the lease, + // the second comes from the updatePipeline call when the block + // itself has its generation stamp incremented + assertEquals(5, (int)counts.get(FSEditLogOpCodes.OP_ADD).held); + assertEquals(4, (int)counts.get(FSEditLogOpCodes.OP_CLOSE).held); + + cluster.restartNameNode(); + + AppendTestUtil.check(fs, p1, 2*BLOCK_SIZE); + AppendTestUtil.check(fs, p2, 3*BLOCK_SIZE/2); + } finally { + IOUtils.closeStream(stream); + if (cluster != null) { cluster.shutdown(); } + } + } + + /** + * Earlier versions of HDFS had a bug (HDFS-2991) which caused + * append(), when called exactly at a block boundary, + * to not log an OP_ADD. This ensures that we can read from + * such buggy versions correctly, by loading an image created + * using a namesystem image created with 0.23.1-rc2 exhibiting + * the issue. + */ + @Test + public void testLoadLogsFromBuggyEarlierVersions() throws IOException { + final Configuration conf = new HdfsConfiguration(); + + String tarFile = System.getProperty("test.cache.data", "build/test/cache") + + "/" + HADOOP_23_BROKEN_APPEND_TGZ; + String testDir = System.getProperty("test.build.data", "build/test/data"); + File dfsDir = new File(testDir, "image-with-buggy-append"); + if (dfsDir.exists() && !FileUtil.fullyDelete(dfsDir)) { + throw new IOException("Could not delete dfs directory '" + dfsDir + "'"); + } + FileUtil.unTar(new File(tarFile), new File(testDir)); + + File nameDir = new File(dfsDir, "name"); + GenericTestUtils.assertExists(nameDir); + + conf.set(DFSConfigKeys.DFS_NAMENODE_NAME_DIR_KEY, nameDir.getAbsolutePath()); + + MiniDFSCluster cluster = new MiniDFSCluster.Builder(conf).numDataNodes(0) + .format(false) + .manageDataDfsDirs(false) + .manageNameDfsDirs(false) + .numDataNodes(0) + .waitSafeMode(false) + .startupOption(StartupOption.UPGRADE) + .build(); + try { + FileSystem fs = cluster.getFileSystem(); + Path testPath = new Path("/tmp/io_data/test_io_0"); + assertEquals(2*1024*1024, fs.getFileStatus(testPath).getLen()); + } finally { + cluster.shutdown(); + } + } +} diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/FSImageTestUtil.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/FSImageTestUtil.java index 29bbdb7eb25..b6c69c31932 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/FSImageTestUtil.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/FSImageTestUtil.java @@ -26,6 +26,7 @@ import java.util.ArrayList; import java.util.Arrays; import java.util.Collections; +import java.util.EnumMap; import java.util.HashMap; import java.util.List; import java.util.Map; @@ -41,6 +42,7 @@ import org.apache.hadoop.hdfs.server.namenode.FileJournalManager.EditLogFile; import org.apache.hadoop.hdfs.server.namenode.FSImageStorageInspector.FSImageFile; import org.apache.hadoop.hdfs.server.namenode.NNStorage.NameNodeDirType; +import org.apache.hadoop.hdfs.util.Holder; import org.apache.hadoop.hdfs.util.MD5FileUtils; import org.apache.hadoop.io.IOUtils; import org.apache.hadoop.conf.Configuration; @@ -190,6 +192,36 @@ public static FSEditLog createStandaloneEditLog(File logDir) ImmutableList.of(logDir.toURI())); } + + /** + * @param editLog a path of an edit log file + * @return the count of each type of operation in the log file + * @throws Exception if there is an error reading it + */ + public static EnumMap> countEditLogOpTypes( + File editLog) throws Exception { + EnumMap> opCounts = + new EnumMap>(FSEditLogOpCodes.class); + + EditLogInputStream elis = new EditLogFileInputStream(editLog); + try { + FSEditLogOp op; + while ((op = elis.readOp()) != null) { + Holder i = opCounts.get(op.opCode); + if (i == null) { + i = new Holder(0); + opCounts.put(op.opCode, i); + } + i.held++; + } + } finally { + IOUtils.closeStream(elis); + } + + return opCounts; + } + + /** * Assert that all of the given directories have the same newest filename * for fsimage that they hold the same data. @@ -393,7 +425,7 @@ static void assertNNHasCheckpoints(MiniDFSCluster cluster, } } - static List getNameNodeCurrentDirs(MiniDFSCluster cluster) { + public static List getNameNodeCurrentDirs(MiniDFSCluster cluster) { List nameDirs = Lists.newArrayList(); for (URI u : cluster.getNameDirs(0)) { nameDirs.add(new File(u.getPath(), "current")); diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/resources/image-with-buggy-append.tgz b/hadoop-hdfs-project/hadoop-hdfs/src/test/resources/image-with-buggy-append.tgz new file mode 100644 index 00000000000..38422c29a47 Binary files /dev/null and b/hadoop-hdfs-project/hadoop-hdfs/src/test/resources/image-with-buggy-append.tgz differ