diff --git a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt
index 095e9598d13..56a728c9e5c 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt
+++ b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt
@@ -314,6 +314,8 @@ Release 0.23.2 - UNRELEASED
HDFS-3006. In WebHDFS, when the return body is empty, set the Content-Type
to application/octet-stream instead of application/json. (szetszwo)
+ HDFS-2991. Fix case where OP_ADD would not be logged in append(). (todd)
+
Release 0.23.1 - 2012-02-17
INCOMPATIBLE CHANGES
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/protocol/LayoutVersion.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/protocol/LayoutVersion.java
index 12a11df92c0..729748f3026 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/protocol/LayoutVersion.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/protocol/LayoutVersion.java
@@ -44,6 +44,15 @@
@InterfaceAudience.Private
public class LayoutVersion {
+ /**
+ * Version in which HDFS-2991 was fixed. This bug caused OP_ADD to
+ * sometimes be skipped for append() calls. If we see such a case when
+ * loading the edits, but the version is known to have that bug, we
+ * workaround the issue. Otherwise we should consider it a corruption
+ * and bail.
+ */
+ public static final int BUGFIX_HDFS_2991_VERSION = -40;
+
/**
* Enums for features that change the layout version.
*
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSDirectory.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSDirectory.java
index ba045d106da..45ce9df154b 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSDirectory.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSDirectory.java
@@ -249,8 +249,6 @@ INodeFileUnderConstruction addFile(String path,
+" to the file system");
return null;
}
- // add create file record to log, record new generation stamp
- fsImage.getEditLog().logOpenFile(path, newNode);
if(NameNode.stateChangeLog.isDebugEnabled()) {
NameNode.stateChangeLog.debug("DIR* FSDirectory.addFile: "
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSEditLogLoader.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSEditLogLoader.java
index 4aa79dfa916..b890d16e652 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSEditLogLoader.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSEditLogLoader.java
@@ -206,13 +206,22 @@ int loadEditRecords(int logVersion, EditLogInputStream in, boolean closeOnExit,
fsDir.updateFile(oldFile, addCloseOp.path, blocks,
addCloseOp.mtime, addCloseOp.atime);
if(addCloseOp.opCode == FSEditLogOpCodes.OP_CLOSE) { // OP_CLOSE
- assert oldFile.isUnderConstruction() :
- "File is not under construction: " + addCloseOp.path;
+ if (!oldFile.isUnderConstruction() &&
+ logVersion <= LayoutVersion.BUGFIX_HDFS_2991_VERSION) {
+ // There was a bug (HDFS-2991) in hadoop < 0.23.1 where OP_CLOSE
+ // could show up twice in a row. But after that version, this
+ // should be fixed, so we should treat it as an error.
+ throw new IOException(
+ "File is not under construction: " + addCloseOp.path);
+ }
fsNamesys.getBlockManager().completeBlock(
oldFile, blocks.length-1, true);
- INodeFile newFile =
- ((INodeFileUnderConstruction)oldFile).convertToInodeFile();
- fsDir.replaceNode(addCloseOp.path, oldFile, newFile);
+
+ if (oldFile.isUnderConstruction()) {
+ INodeFile newFile =
+ ((INodeFileUnderConstruction)oldFile).convertToInodeFile();
+ fsDir.replaceNode(addCloseOp.path, oldFile, newFile);
+ }
} else if(! oldFile.isUnderConstruction()) { // OP_ADD for append
INodeFileUnderConstruction cons = new INodeFileUnderConstruction(
oldFile.getLocalNameBytes(),
@@ -231,8 +240,10 @@ int loadEditRecords(int logVersion, EditLogInputStream in, boolean closeOnExit,
if(addCloseOp.opCode == FSEditLogOpCodes.OP_ADD) {
fsNamesys.leaseManager.addLease(addCloseOp.clientName, addCloseOp.path);
} else { // Ops.OP_CLOSE
- fsNamesys.leaseManager.removeLease(
- ((INodeFileUnderConstruction)oldFile).getClientName(), addCloseOp.path);
+ if (oldFile.isUnderConstruction()) {
+ fsNamesys.leaseManager.removeLease(
+ ((INodeFileUnderConstruction)oldFile).getClientName(), addCloseOp.path);
+ }
}
break;
}
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java
index 40f173405a4..604fbb00bdb 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java
@@ -1283,9 +1283,13 @@ private LocatedBlock startFileInternal(String src,
clientNode);
dir.replaceNode(src, node, cons);
leaseManager.addLease(cons.getClientName(), src);
-
+
// convert last block to under-construction
- return blockManager.convertLastBlockToUnderConstruction(cons);
+ LocatedBlock ret = blockManager.convertLastBlockToUnderConstruction(cons);
+
+ // add append file record to log, record lease, etc.
+ getEditLog().logOpenFile(src, cons);
+ return ret;
} else {
// Now we can add the name to the filesystem. This file has no
// blocks associated with it.
@@ -1301,6 +1305,9 @@ private LocatedBlock startFileInternal(String src,
"Unable to add file to namespace.");
}
leaseManager.addLease(newNode.getClientName(), src);
+
+ // record file record in log, record new generation stamp
+ getEditLog().logOpenFile(src, newNode);
if (NameNode.stateChangeLog.isDebugEnabled()) {
NameNode.stateChangeLog.debug("DIR* NameSystem.startFile: "
+"add "+src+" to namespace for "+holder);
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestFileAppendRestart.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestFileAppendRestart.java
new file mode 100644
index 00000000000..033478f3a81
--- /dev/null
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestFileAppendRestart.java
@@ -0,0 +1,166 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hdfs;
+
+import static org.junit.Assert.assertEquals;
+
+import java.io.File;
+import java.io.IOException;
+import java.util.EnumMap;
+import java.util.Random;
+
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.CommonConfigurationKeysPublic;
+import org.apache.hadoop.fs.FSDataOutputStream;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.FileUtil;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.hdfs.server.common.HdfsServerConstants.StartupOption;
+import org.apache.hadoop.hdfs.server.namenode.FSEditLog;
+import org.apache.hadoop.hdfs.server.namenode.FSEditLogOp;
+import org.apache.hadoop.hdfs.server.namenode.FSEditLogOpCodes;
+import org.apache.hadoop.hdfs.server.namenode.FSImageTestUtil;
+import org.apache.hadoop.hdfs.server.namenode.NNStorage;
+import org.apache.hadoop.hdfs.util.Holder;
+import org.apache.hadoop.io.IOUtils;
+import org.apache.hadoop.test.GenericTestUtils;
+import org.junit.Test;
+
+/**
+ * Unit test to make sure that Append properly logs the right
+ * things to the edit log, such that files aren't lost or truncated
+ * on restart.
+ */
+public class TestFileAppendRestart {
+ private static final int BLOCK_SIZE = 4096;
+ private static final String HADOOP_23_BROKEN_APPEND_TGZ =
+ "image-with-buggy-append.tgz";
+
+ private void writeAndAppend(FileSystem fs, Path p,
+ int lengthForCreate, int lengthForAppend) throws IOException {
+ // Creating a file with 4096 blockSize to write multiple blocks
+ FSDataOutputStream stream = fs.create(
+ p, true, BLOCK_SIZE, (short) 1, BLOCK_SIZE);
+ try {
+ AppendTestUtil.write(stream, 0, lengthForCreate);
+ stream.close();
+
+ stream = fs.append(p);
+ AppendTestUtil.write(stream, lengthForCreate, lengthForAppend);
+ stream.close();
+ } finally {
+ IOUtils.closeStream(stream);
+ }
+
+ int totalLength = lengthForCreate + lengthForAppend;
+ assertEquals(totalLength, fs.getFileStatus(p).getLen());
+ }
+
+ /**
+ * Regression test for HDFS-2991. Creates and appends to files
+ * where blocks start/end on block boundaries.
+ */
+ @Test
+ public void testAppendRestart() throws Exception {
+ final Configuration conf = new HdfsConfiguration();
+ // Turn off persistent IPC, so that the DFSClient can survive NN restart
+ conf.setInt(
+ CommonConfigurationKeysPublic.IPC_CLIENT_CONNECTION_MAXIDLETIME_KEY,
+ 0);
+ MiniDFSCluster cluster = null;
+
+ FSDataOutputStream stream = null;
+ try {
+ cluster = new MiniDFSCluster.Builder(conf).numDataNodes(1).build();
+ FileSystem fs = cluster.getFileSystem();
+ File editLog =
+ new File(FSImageTestUtil.getNameNodeCurrentDirs(cluster).get(0),
+ NNStorage.getInProgressEditsFileName(1));
+ EnumMap> counts;
+
+ Path p1 = new Path("/block-boundaries");
+ writeAndAppend(fs, p1, BLOCK_SIZE, BLOCK_SIZE);
+
+ counts = FSImageTestUtil.countEditLogOpTypes(editLog);
+ assertEquals(2, (int)counts.get(FSEditLogOpCodes.OP_ADD).held);
+ assertEquals(2, (int)counts.get(FSEditLogOpCodes.OP_CLOSE).held);
+
+ Path p2 = new Path("/not-block-boundaries");
+ writeAndAppend(fs, p2, BLOCK_SIZE/2, BLOCK_SIZE);
+ counts = FSImageTestUtil.countEditLogOpTypes(editLog);
+ // We get *3* OP_ADDS from this test rather than two. The first
+ // OP_ADD comes from re-opening the file to establish the lease,
+ // the second comes from the updatePipeline call when the block
+ // itself has its generation stamp incremented
+ assertEquals(5, (int)counts.get(FSEditLogOpCodes.OP_ADD).held);
+ assertEquals(4, (int)counts.get(FSEditLogOpCodes.OP_CLOSE).held);
+
+ cluster.restartNameNode();
+
+ AppendTestUtil.check(fs, p1, 2*BLOCK_SIZE);
+ AppendTestUtil.check(fs, p2, 3*BLOCK_SIZE/2);
+ } finally {
+ IOUtils.closeStream(stream);
+ if (cluster != null) { cluster.shutdown(); }
+ }
+ }
+
+ /**
+ * Earlier versions of HDFS had a bug (HDFS-2991) which caused
+ * append(), when called exactly at a block boundary,
+ * to not log an OP_ADD. This ensures that we can read from
+ * such buggy versions correctly, by loading an image created
+ * using a namesystem image created with 0.23.1-rc2 exhibiting
+ * the issue.
+ */
+ @Test
+ public void testLoadLogsFromBuggyEarlierVersions() throws IOException {
+ final Configuration conf = new HdfsConfiguration();
+
+ String tarFile = System.getProperty("test.cache.data", "build/test/cache")
+ + "/" + HADOOP_23_BROKEN_APPEND_TGZ;
+ String testDir = System.getProperty("test.build.data", "build/test/data");
+ File dfsDir = new File(testDir, "image-with-buggy-append");
+ if (dfsDir.exists() && !FileUtil.fullyDelete(dfsDir)) {
+ throw new IOException("Could not delete dfs directory '" + dfsDir + "'");
+ }
+ FileUtil.unTar(new File(tarFile), new File(testDir));
+
+ File nameDir = new File(dfsDir, "name");
+ GenericTestUtils.assertExists(nameDir);
+
+ conf.set(DFSConfigKeys.DFS_NAMENODE_NAME_DIR_KEY, nameDir.getAbsolutePath());
+
+ MiniDFSCluster cluster = new MiniDFSCluster.Builder(conf).numDataNodes(0)
+ .format(false)
+ .manageDataDfsDirs(false)
+ .manageNameDfsDirs(false)
+ .numDataNodes(0)
+ .waitSafeMode(false)
+ .startupOption(StartupOption.UPGRADE)
+ .build();
+ try {
+ FileSystem fs = cluster.getFileSystem();
+ Path testPath = new Path("/tmp/io_data/test_io_0");
+ assertEquals(2*1024*1024, fs.getFileStatus(testPath).getLen());
+ } finally {
+ cluster.shutdown();
+ }
+ }
+}
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/FSImageTestUtil.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/FSImageTestUtil.java
index 29bbdb7eb25..b6c69c31932 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/FSImageTestUtil.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/FSImageTestUtil.java
@@ -26,6 +26,7 @@
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collections;
+import java.util.EnumMap;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
@@ -41,6 +42,7 @@
import org.apache.hadoop.hdfs.server.namenode.FileJournalManager.EditLogFile;
import org.apache.hadoop.hdfs.server.namenode.FSImageStorageInspector.FSImageFile;
import org.apache.hadoop.hdfs.server.namenode.NNStorage.NameNodeDirType;
+import org.apache.hadoop.hdfs.util.Holder;
import org.apache.hadoop.hdfs.util.MD5FileUtils;
import org.apache.hadoop.io.IOUtils;
import org.apache.hadoop.conf.Configuration;
@@ -190,6 +192,36 @@ public static FSEditLog createStandaloneEditLog(File logDir)
ImmutableList.of(logDir.toURI()));
}
+
+ /**
+ * @param editLog a path of an edit log file
+ * @return the count of each type of operation in the log file
+ * @throws Exception if there is an error reading it
+ */
+ public static EnumMap> countEditLogOpTypes(
+ File editLog) throws Exception {
+ EnumMap> opCounts =
+ new EnumMap>(FSEditLogOpCodes.class);
+
+ EditLogInputStream elis = new EditLogFileInputStream(editLog);
+ try {
+ FSEditLogOp op;
+ while ((op = elis.readOp()) != null) {
+ Holder i = opCounts.get(op.opCode);
+ if (i == null) {
+ i = new Holder(0);
+ opCounts.put(op.opCode, i);
+ }
+ i.held++;
+ }
+ } finally {
+ IOUtils.closeStream(elis);
+ }
+
+ return opCounts;
+ }
+
+
/**
* Assert that all of the given directories have the same newest filename
* for fsimage that they hold the same data.
@@ -393,7 +425,7 @@ static void assertNNHasCheckpoints(MiniDFSCluster cluster,
}
}
- static List getNameNodeCurrentDirs(MiniDFSCluster cluster) {
+ public static List getNameNodeCurrentDirs(MiniDFSCluster cluster) {
List nameDirs = Lists.newArrayList();
for (URI u : cluster.getNameDirs(0)) {
nameDirs.add(new File(u.getPath(), "current"));
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/resources/image-with-buggy-append.tgz b/hadoop-hdfs-project/hadoop-hdfs/src/test/resources/image-with-buggy-append.tgz
new file mode 100644
index 00000000000..38422c29a47
Binary files /dev/null and b/hadoop-hdfs-project/hadoop-hdfs/src/test/resources/image-with-buggy-append.tgz differ