From 34c73db5e1bb94aae51b1270f3f86d75c0fd1481 Mon Sep 17 00:00:00 2001 From: Todd Lipcon Date: Wed, 29 Feb 2012 18:52:00 +0000 Subject: [PATCH] HDFS-2991. Fix case where OP_ADD would not be logged in append(). Contributed by Todd Lipcon. git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/trunk@1295214 13f79535-47bb-0310-9956-ffa450edef68 --- hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt | 2 + .../hadoop/hdfs/protocol/LayoutVersion.java | 9 + .../hdfs/server/namenode/FSDirectory.java | 2 - .../hdfs/server/namenode/FSEditLogLoader.java | 25 ++- .../hdfs/server/namenode/FSNamesystem.java | 11 +- .../hadoop/hdfs/TestFileAppendRestart.java | 166 ++++++++++++++++++ .../hdfs/server/namenode/FSImageTestUtil.java | 34 +++- .../resources/image-with-buggy-append.tgz | Bin 0 -> 1998 bytes 8 files changed, 237 insertions(+), 12 deletions(-) create mode 100644 hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestFileAppendRestart.java create mode 100644 hadoop-hdfs-project/hadoop-hdfs/src/test/resources/image-with-buggy-append.tgz diff --git a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt index 095e9598d13..56a728c9e5c 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt +++ b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt @@ -314,6 +314,8 @@ Release 0.23.2 - UNRELEASED HDFS-3006. In WebHDFS, when the return body is empty, set the Content-Type to application/octet-stream instead of application/json. (szetszwo) + HDFS-2991. Fix case where OP_ADD would not be logged in append(). (todd) + Release 0.23.1 - 2012-02-17 INCOMPATIBLE CHANGES diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/protocol/LayoutVersion.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/protocol/LayoutVersion.java index 12a11df92c0..729748f3026 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/protocol/LayoutVersion.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/protocol/LayoutVersion.java @@ -44,6 +44,15 @@ import org.apache.hadoop.classification.InterfaceAudience; @InterfaceAudience.Private public class LayoutVersion { + /** + * Version in which HDFS-2991 was fixed. This bug caused OP_ADD to + * sometimes be skipped for append() calls. If we see such a case when + * loading the edits, but the version is known to have that bug, we + * workaround the issue. Otherwise we should consider it a corruption + * and bail. + */ + public static final int BUGFIX_HDFS_2991_VERSION = -40; + /** * Enums for features that change the layout version. *

diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSDirectory.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSDirectory.java index ba045d106da..45ce9df154b 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSDirectory.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSDirectory.java @@ -249,8 +249,6 @@ public class FSDirectory implements Closeable { +" to the file system"); return null; } - // add create file record to log, record new generation stamp - fsImage.getEditLog().logOpenFile(path, newNode); if(NameNode.stateChangeLog.isDebugEnabled()) { NameNode.stateChangeLog.debug("DIR* FSDirectory.addFile: " diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSEditLogLoader.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSEditLogLoader.java index 4aa79dfa916..b890d16e652 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSEditLogLoader.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSEditLogLoader.java @@ -206,13 +206,22 @@ public class FSEditLogLoader { fsDir.updateFile(oldFile, addCloseOp.path, blocks, addCloseOp.mtime, addCloseOp.atime); if(addCloseOp.opCode == FSEditLogOpCodes.OP_CLOSE) { // OP_CLOSE - assert oldFile.isUnderConstruction() : - "File is not under construction: " + addCloseOp.path; + if (!oldFile.isUnderConstruction() && + logVersion <= LayoutVersion.BUGFIX_HDFS_2991_VERSION) { + // There was a bug (HDFS-2991) in hadoop < 0.23.1 where OP_CLOSE + // could show up twice in a row. But after that version, this + // should be fixed, so we should treat it as an error. + throw new IOException( + "File is not under construction: " + addCloseOp.path); + } fsNamesys.getBlockManager().completeBlock( oldFile, blocks.length-1, true); - INodeFile newFile = - ((INodeFileUnderConstruction)oldFile).convertToInodeFile(); - fsDir.replaceNode(addCloseOp.path, oldFile, newFile); + + if (oldFile.isUnderConstruction()) { + INodeFile newFile = + ((INodeFileUnderConstruction)oldFile).convertToInodeFile(); + fsDir.replaceNode(addCloseOp.path, oldFile, newFile); + } } else if(! oldFile.isUnderConstruction()) { // OP_ADD for append INodeFileUnderConstruction cons = new INodeFileUnderConstruction( oldFile.getLocalNameBytes(), @@ -231,8 +240,10 @@ public class FSEditLogLoader { if(addCloseOp.opCode == FSEditLogOpCodes.OP_ADD) { fsNamesys.leaseManager.addLease(addCloseOp.clientName, addCloseOp.path); } else { // Ops.OP_CLOSE - fsNamesys.leaseManager.removeLease( - ((INodeFileUnderConstruction)oldFile).getClientName(), addCloseOp.path); + if (oldFile.isUnderConstruction()) { + fsNamesys.leaseManager.removeLease( + ((INodeFileUnderConstruction)oldFile).getClientName(), addCloseOp.path); + } } break; } diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java index 40f173405a4..604fbb00bdb 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java @@ -1283,9 +1283,13 @@ public class FSNamesystem implements Namesystem, FSClusterStats, clientNode); dir.replaceNode(src, node, cons); leaseManager.addLease(cons.getClientName(), src); - + // convert last block to under-construction - return blockManager.convertLastBlockToUnderConstruction(cons); + LocatedBlock ret = blockManager.convertLastBlockToUnderConstruction(cons); + + // add append file record to log, record lease, etc. + getEditLog().logOpenFile(src, cons); + return ret; } else { // Now we can add the name to the filesystem. This file has no // blocks associated with it. @@ -1301,6 +1305,9 @@ public class FSNamesystem implements Namesystem, FSClusterStats, "Unable to add file to namespace."); } leaseManager.addLease(newNode.getClientName(), src); + + // record file record in log, record new generation stamp + getEditLog().logOpenFile(src, newNode); if (NameNode.stateChangeLog.isDebugEnabled()) { NameNode.stateChangeLog.debug("DIR* NameSystem.startFile: " +"add "+src+" to namespace for "+holder); diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestFileAppendRestart.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestFileAppendRestart.java new file mode 100644 index 00000000000..033478f3a81 --- /dev/null +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestFileAppendRestart.java @@ -0,0 +1,166 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hdfs; + +import static org.junit.Assert.assertEquals; + +import java.io.File; +import java.io.IOException; +import java.util.EnumMap; +import java.util.Random; + +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.CommonConfigurationKeysPublic; +import org.apache.hadoop.fs.FSDataOutputStream; +import org.apache.hadoop.fs.FileSystem; +import org.apache.hadoop.fs.FileUtil; +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.hdfs.server.common.HdfsServerConstants.StartupOption; +import org.apache.hadoop.hdfs.server.namenode.FSEditLog; +import org.apache.hadoop.hdfs.server.namenode.FSEditLogOp; +import org.apache.hadoop.hdfs.server.namenode.FSEditLogOpCodes; +import org.apache.hadoop.hdfs.server.namenode.FSImageTestUtil; +import org.apache.hadoop.hdfs.server.namenode.NNStorage; +import org.apache.hadoop.hdfs.util.Holder; +import org.apache.hadoop.io.IOUtils; +import org.apache.hadoop.test.GenericTestUtils; +import org.junit.Test; + +/** + * Unit test to make sure that Append properly logs the right + * things to the edit log, such that files aren't lost or truncated + * on restart. + */ +public class TestFileAppendRestart { + private static final int BLOCK_SIZE = 4096; + private static final String HADOOP_23_BROKEN_APPEND_TGZ = + "image-with-buggy-append.tgz"; + + private void writeAndAppend(FileSystem fs, Path p, + int lengthForCreate, int lengthForAppend) throws IOException { + // Creating a file with 4096 blockSize to write multiple blocks + FSDataOutputStream stream = fs.create( + p, true, BLOCK_SIZE, (short) 1, BLOCK_SIZE); + try { + AppendTestUtil.write(stream, 0, lengthForCreate); + stream.close(); + + stream = fs.append(p); + AppendTestUtil.write(stream, lengthForCreate, lengthForAppend); + stream.close(); + } finally { + IOUtils.closeStream(stream); + } + + int totalLength = lengthForCreate + lengthForAppend; + assertEquals(totalLength, fs.getFileStatus(p).getLen()); + } + + /** + * Regression test for HDFS-2991. Creates and appends to files + * where blocks start/end on block boundaries. + */ + @Test + public void testAppendRestart() throws Exception { + final Configuration conf = new HdfsConfiguration(); + // Turn off persistent IPC, so that the DFSClient can survive NN restart + conf.setInt( + CommonConfigurationKeysPublic.IPC_CLIENT_CONNECTION_MAXIDLETIME_KEY, + 0); + MiniDFSCluster cluster = null; + + FSDataOutputStream stream = null; + try { + cluster = new MiniDFSCluster.Builder(conf).numDataNodes(1).build(); + FileSystem fs = cluster.getFileSystem(); + File editLog = + new File(FSImageTestUtil.getNameNodeCurrentDirs(cluster).get(0), + NNStorage.getInProgressEditsFileName(1)); + EnumMap> counts; + + Path p1 = new Path("/block-boundaries"); + writeAndAppend(fs, p1, BLOCK_SIZE, BLOCK_SIZE); + + counts = FSImageTestUtil.countEditLogOpTypes(editLog); + assertEquals(2, (int)counts.get(FSEditLogOpCodes.OP_ADD).held); + assertEquals(2, (int)counts.get(FSEditLogOpCodes.OP_CLOSE).held); + + Path p2 = new Path("/not-block-boundaries"); + writeAndAppend(fs, p2, BLOCK_SIZE/2, BLOCK_SIZE); + counts = FSImageTestUtil.countEditLogOpTypes(editLog); + // We get *3* OP_ADDS from this test rather than two. The first + // OP_ADD comes from re-opening the file to establish the lease, + // the second comes from the updatePipeline call when the block + // itself has its generation stamp incremented + assertEquals(5, (int)counts.get(FSEditLogOpCodes.OP_ADD).held); + assertEquals(4, (int)counts.get(FSEditLogOpCodes.OP_CLOSE).held); + + cluster.restartNameNode(); + + AppendTestUtil.check(fs, p1, 2*BLOCK_SIZE); + AppendTestUtil.check(fs, p2, 3*BLOCK_SIZE/2); + } finally { + IOUtils.closeStream(stream); + if (cluster != null) { cluster.shutdown(); } + } + } + + /** + * Earlier versions of HDFS had a bug (HDFS-2991) which caused + * append(), when called exactly at a block boundary, + * to not log an OP_ADD. This ensures that we can read from + * such buggy versions correctly, by loading an image created + * using a namesystem image created with 0.23.1-rc2 exhibiting + * the issue. + */ + @Test + public void testLoadLogsFromBuggyEarlierVersions() throws IOException { + final Configuration conf = new HdfsConfiguration(); + + String tarFile = System.getProperty("test.cache.data", "build/test/cache") + + "/" + HADOOP_23_BROKEN_APPEND_TGZ; + String testDir = System.getProperty("test.build.data", "build/test/data"); + File dfsDir = new File(testDir, "image-with-buggy-append"); + if (dfsDir.exists() && !FileUtil.fullyDelete(dfsDir)) { + throw new IOException("Could not delete dfs directory '" + dfsDir + "'"); + } + FileUtil.unTar(new File(tarFile), new File(testDir)); + + File nameDir = new File(dfsDir, "name"); + GenericTestUtils.assertExists(nameDir); + + conf.set(DFSConfigKeys.DFS_NAMENODE_NAME_DIR_KEY, nameDir.getAbsolutePath()); + + MiniDFSCluster cluster = new MiniDFSCluster.Builder(conf).numDataNodes(0) + .format(false) + .manageDataDfsDirs(false) + .manageNameDfsDirs(false) + .numDataNodes(0) + .waitSafeMode(false) + .startupOption(StartupOption.UPGRADE) + .build(); + try { + FileSystem fs = cluster.getFileSystem(); + Path testPath = new Path("/tmp/io_data/test_io_0"); + assertEquals(2*1024*1024, fs.getFileStatus(testPath).getLen()); + } finally { + cluster.shutdown(); + } + } +} diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/FSImageTestUtil.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/FSImageTestUtil.java index 29bbdb7eb25..b6c69c31932 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/FSImageTestUtil.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/FSImageTestUtil.java @@ -26,6 +26,7 @@ import java.net.URI; import java.util.ArrayList; import java.util.Arrays; import java.util.Collections; +import java.util.EnumMap; import java.util.HashMap; import java.util.List; import java.util.Map; @@ -41,6 +42,7 @@ import org.apache.hadoop.hdfs.server.common.Storage.StorageDirectory; import org.apache.hadoop.hdfs.server.namenode.FileJournalManager.EditLogFile; import org.apache.hadoop.hdfs.server.namenode.FSImageStorageInspector.FSImageFile; import org.apache.hadoop.hdfs.server.namenode.NNStorage.NameNodeDirType; +import org.apache.hadoop.hdfs.util.Holder; import org.apache.hadoop.hdfs.util.MD5FileUtils; import org.apache.hadoop.io.IOUtils; import org.apache.hadoop.conf.Configuration; @@ -190,6 +192,36 @@ public abstract class FSImageTestUtil { ImmutableList.of(logDir.toURI())); } + + /** + * @param editLog a path of an edit log file + * @return the count of each type of operation in the log file + * @throws Exception if there is an error reading it + */ + public static EnumMap> countEditLogOpTypes( + File editLog) throws Exception { + EnumMap> opCounts = + new EnumMap>(FSEditLogOpCodes.class); + + EditLogInputStream elis = new EditLogFileInputStream(editLog); + try { + FSEditLogOp op; + while ((op = elis.readOp()) != null) { + Holder i = opCounts.get(op.opCode); + if (i == null) { + i = new Holder(0); + opCounts.put(op.opCode, i); + } + i.held++; + } + } finally { + IOUtils.closeStream(elis); + } + + return opCounts; + } + + /** * Assert that all of the given directories have the same newest filename * for fsimage that they hold the same data. @@ -393,7 +425,7 @@ public abstract class FSImageTestUtil { } } - static List getNameNodeCurrentDirs(MiniDFSCluster cluster) { + public static List getNameNodeCurrentDirs(MiniDFSCluster cluster) { List nameDirs = Lists.newArrayList(); for (URI u : cluster.getNameDirs(0)) { nameDirs.add(new File(u.getPath(), "current")); diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/resources/image-with-buggy-append.tgz b/hadoop-hdfs-project/hadoop-hdfs/src/test/resources/image-with-buggy-append.tgz new file mode 100644 index 0000000000000000000000000000000000000000..38422c29a47573a455c0f585c78edbb22787cafd GIT binary patch literal 1998 zcmV;<2Ql~`iwFR$zC}*}1MOOQY!p=(f72F`6%|lWti}~V#JKF7JG(UnYFkK6EQJ;- z9xSuFvt{jG%+407pm=~9qX|J2JR;(S*MAa-!3&QdkpN-N)fqSMKJLE z3$%;me^8J&|Nf++$Wis**1w0$zj0A$ zSeu+s8>Y85&4ghlt4rh_oyG0UrmaB3LNRY zW`#yQvaxImjaogUM6RJ4S3pvwzM$9=iB8vu7AqlD2BteIN@O}KrI|BZt_pOzw{=c^ zePe4Y&P-Z7T|}6;78$U`B}H{)j`h%Ym2UN{?pQZ!@uD|95~gaxS^d?5^K6K)s47Y) zM^6&?d#u8_W?K7};f5;u*}~s&@P{*{&1#dARO0jmf|I?k9$M);Yxel`FVi3qmA_ax zITM^f7A;gyKKM=fA2dH#O9Hc!s5E zo?~tPuqK+UXwT}K8XPR=_sD!ea|bTw8Zsu5GTnw$a->v!T)E3&;GO@mMUZQC{8RaI;!P@FLGR7*?E2Yd_j2j`eu7 zBJyx9fwP6B?pRWtD=UdmEL!W}JmeBvB8r;-av-E8EN*|S!}y3{-DV&8=Rec${U5`n z?tdKb63P4@3~2lG{`2wY*{b^P#c+mfwK#KWoAc;#V*J~x`(eJdWlZJoFFjbJR^o?q za&_$jYd#t*d;d4+j^?6E4E>$CPI_6K>8>L(3>IgD@!CbCJ{;1@SzB|OudQpD-Pkav zzR|0#F}&cWU98LFWzev`6=%co;X{VufZ7KT(525>1VsH@w{GoGX1(zO^Se3FxAyEh z06DnMcn(cFcz)I&6C`Ld&vc4Rw|QaJ`uPVy(CaPRT=pPXb2dI2xAJZ1eFHkbUvH;2 z=15#y#+B~EM0=O6S_`#uBOdf09kVw61?)Fj4RPW%V9ksV%z6GjD3tkVOm=6dDH%FZ znX98|^`vo79o~Xb++h&~wU0eEnwl_WX?A2Obn0v~qb!R4LJE)eR5g6RO&?9m45AQ%YdU*2u8ZWqp%zcWCR_;V;3` zyRgv0IPsgWrd;?M%-j&x^v|zn!YoGUW_|6z9R2VAqbGi++p-qhMMqi!P;8wAqc~h# z6ib#huK;<%IZgo!rs;{vz53%SAL-*XO{Rr+ydJbR1NT4Pd|4ISU9p zE{DL{WyK|OE|Kpi?=2sWSpo9K2M~_bZV?W(X#b(!TJffH`UtUT<}knLd~OgwwN6@pVPUo_xkd0)`;1L*Oc9Ul#O`5-Jz*Zy_?Ac3!IpMwnexi#W68hA2AK^#`>uk3mZGVp zP-V+xTriBgY_N87kFX3Dv1J7pnBdgipZz%!X7FuT*uQVQ`_p!))Z5W{MiRpg7p&85 z87pp6Z|4T6xaEa|s4p&vXP=yWpVb@p67X;T{y#YqDEMz$`QQI&5M9Q<{|l-A|0C~z zPCkM`EfYbJW5u8-i#|EP@GKYfxkVR43v|F^zamfa$y-+f0s;a80s;a80s;a80s;a8 g0s;a80s;a80s;a80s;a8f>Vlr03F=0Y5-6G0NXR>0ssI2 literal 0 HcmV?d00001