HDFS-2991. Fix case where OP_ADD would not be logged in append(). Contributed by Todd Lipcon.

git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/trunk@1295214 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Todd Lipcon 2012-02-29 18:52:00 +00:00
parent a4ad12dd2d
commit 34c73db5e1
8 changed files with 237 additions and 12 deletions

View File

@ -314,6 +314,8 @@ Release 0.23.2 - UNRELEASED
HDFS-3006. In WebHDFS, when the return body is empty, set the Content-Type
to application/octet-stream instead of application/json. (szetszwo)
HDFS-2991. Fix case where OP_ADD would not be logged in append(). (todd)
Release 0.23.1 - 2012-02-17
INCOMPATIBLE CHANGES

View File

@ -44,6 +44,15 @@
@InterfaceAudience.Private
public class LayoutVersion {
/**
* Version in which HDFS-2991 was fixed. This bug caused OP_ADD to
* sometimes be skipped for append() calls. If we see such a case when
* loading the edits, but the version is known to have that bug, we
* workaround the issue. Otherwise we should consider it a corruption
* and bail.
*/
public static final int BUGFIX_HDFS_2991_VERSION = -40;
/**
* Enums for features that change the layout version.
* <br><br>

View File

@ -249,8 +249,6 @@ INodeFileUnderConstruction addFile(String path,
+" to the file system");
return null;
}
// add create file record to log, record new generation stamp
fsImage.getEditLog().logOpenFile(path, newNode);
if(NameNode.stateChangeLog.isDebugEnabled()) {
NameNode.stateChangeLog.debug("DIR* FSDirectory.addFile: "

View File

@ -206,13 +206,22 @@ int loadEditRecords(int logVersion, EditLogInputStream in, boolean closeOnExit,
fsDir.updateFile(oldFile, addCloseOp.path, blocks,
addCloseOp.mtime, addCloseOp.atime);
if(addCloseOp.opCode == FSEditLogOpCodes.OP_CLOSE) { // OP_CLOSE
assert oldFile.isUnderConstruction() :
"File is not under construction: " + addCloseOp.path;
if (!oldFile.isUnderConstruction() &&
logVersion <= LayoutVersion.BUGFIX_HDFS_2991_VERSION) {
// There was a bug (HDFS-2991) in hadoop < 0.23.1 where OP_CLOSE
// could show up twice in a row. But after that version, this
// should be fixed, so we should treat it as an error.
throw new IOException(
"File is not under construction: " + addCloseOp.path);
}
fsNamesys.getBlockManager().completeBlock(
oldFile, blocks.length-1, true);
if (oldFile.isUnderConstruction()) {
INodeFile newFile =
((INodeFileUnderConstruction)oldFile).convertToInodeFile();
fsDir.replaceNode(addCloseOp.path, oldFile, newFile);
}
} else if(! oldFile.isUnderConstruction()) { // OP_ADD for append
INodeFileUnderConstruction cons = new INodeFileUnderConstruction(
oldFile.getLocalNameBytes(),
@ -231,9 +240,11 @@ int loadEditRecords(int logVersion, EditLogInputStream in, boolean closeOnExit,
if(addCloseOp.opCode == FSEditLogOpCodes.OP_ADD) {
fsNamesys.leaseManager.addLease(addCloseOp.clientName, addCloseOp.path);
} else { // Ops.OP_CLOSE
if (oldFile.isUnderConstruction()) {
fsNamesys.leaseManager.removeLease(
((INodeFileUnderConstruction)oldFile).getClientName(), addCloseOp.path);
}
}
break;
}
case OP_SET_REPLICATION: {

View File

@ -1285,7 +1285,11 @@ private LocatedBlock startFileInternal(String src,
leaseManager.addLease(cons.getClientName(), src);
// convert last block to under-construction
return blockManager.convertLastBlockToUnderConstruction(cons);
LocatedBlock ret = blockManager.convertLastBlockToUnderConstruction(cons);
// add append file record to log, record lease, etc.
getEditLog().logOpenFile(src, cons);
return ret;
} else {
// Now we can add the name to the filesystem. This file has no
// blocks associated with it.
@ -1301,6 +1305,9 @@ private LocatedBlock startFileInternal(String src,
"Unable to add file to namespace.");
}
leaseManager.addLease(newNode.getClientName(), src);
// record file record in log, record new generation stamp
getEditLog().logOpenFile(src, newNode);
if (NameNode.stateChangeLog.isDebugEnabled()) {
NameNode.stateChangeLog.debug("DIR* NameSystem.startFile: "
+"add "+src+" to namespace for "+holder);

View File

@ -0,0 +1,166 @@
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.hdfs;
import static org.junit.Assert.assertEquals;
import java.io.File;
import java.io.IOException;
import java.util.EnumMap;
import java.util.Random;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.CommonConfigurationKeysPublic;
import org.apache.hadoop.fs.FSDataOutputStream;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.FileUtil;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hdfs.server.common.HdfsServerConstants.StartupOption;
import org.apache.hadoop.hdfs.server.namenode.FSEditLog;
import org.apache.hadoop.hdfs.server.namenode.FSEditLogOp;
import org.apache.hadoop.hdfs.server.namenode.FSEditLogOpCodes;
import org.apache.hadoop.hdfs.server.namenode.FSImageTestUtil;
import org.apache.hadoop.hdfs.server.namenode.NNStorage;
import org.apache.hadoop.hdfs.util.Holder;
import org.apache.hadoop.io.IOUtils;
import org.apache.hadoop.test.GenericTestUtils;
import org.junit.Test;
/**
* Unit test to make sure that Append properly logs the right
* things to the edit log, such that files aren't lost or truncated
* on restart.
*/
public class TestFileAppendRestart {
private static final int BLOCK_SIZE = 4096;
private static final String HADOOP_23_BROKEN_APPEND_TGZ =
"image-with-buggy-append.tgz";
private void writeAndAppend(FileSystem fs, Path p,
int lengthForCreate, int lengthForAppend) throws IOException {
// Creating a file with 4096 blockSize to write multiple blocks
FSDataOutputStream stream = fs.create(
p, true, BLOCK_SIZE, (short) 1, BLOCK_SIZE);
try {
AppendTestUtil.write(stream, 0, lengthForCreate);
stream.close();
stream = fs.append(p);
AppendTestUtil.write(stream, lengthForCreate, lengthForAppend);
stream.close();
} finally {
IOUtils.closeStream(stream);
}
int totalLength = lengthForCreate + lengthForAppend;
assertEquals(totalLength, fs.getFileStatus(p).getLen());
}
/**
* Regression test for HDFS-2991. Creates and appends to files
* where blocks start/end on block boundaries.
*/
@Test
public void testAppendRestart() throws Exception {
final Configuration conf = new HdfsConfiguration();
// Turn off persistent IPC, so that the DFSClient can survive NN restart
conf.setInt(
CommonConfigurationKeysPublic.IPC_CLIENT_CONNECTION_MAXIDLETIME_KEY,
0);
MiniDFSCluster cluster = null;
FSDataOutputStream stream = null;
try {
cluster = new MiniDFSCluster.Builder(conf).numDataNodes(1).build();
FileSystem fs = cluster.getFileSystem();
File editLog =
new File(FSImageTestUtil.getNameNodeCurrentDirs(cluster).get(0),
NNStorage.getInProgressEditsFileName(1));
EnumMap<FSEditLogOpCodes, Holder<Integer>> counts;
Path p1 = new Path("/block-boundaries");
writeAndAppend(fs, p1, BLOCK_SIZE, BLOCK_SIZE);
counts = FSImageTestUtil.countEditLogOpTypes(editLog);
assertEquals(2, (int)counts.get(FSEditLogOpCodes.OP_ADD).held);
assertEquals(2, (int)counts.get(FSEditLogOpCodes.OP_CLOSE).held);
Path p2 = new Path("/not-block-boundaries");
writeAndAppend(fs, p2, BLOCK_SIZE/2, BLOCK_SIZE);
counts = FSImageTestUtil.countEditLogOpTypes(editLog);
// We get *3* OP_ADDS from this test rather than two. The first
// OP_ADD comes from re-opening the file to establish the lease,
// the second comes from the updatePipeline call when the block
// itself has its generation stamp incremented
assertEquals(5, (int)counts.get(FSEditLogOpCodes.OP_ADD).held);
assertEquals(4, (int)counts.get(FSEditLogOpCodes.OP_CLOSE).held);
cluster.restartNameNode();
AppendTestUtil.check(fs, p1, 2*BLOCK_SIZE);
AppendTestUtil.check(fs, p2, 3*BLOCK_SIZE/2);
} finally {
IOUtils.closeStream(stream);
if (cluster != null) { cluster.shutdown(); }
}
}
/**
* Earlier versions of HDFS had a bug (HDFS-2991) which caused
* append(), when called exactly at a block boundary,
* to not log an OP_ADD. This ensures that we can read from
* such buggy versions correctly, by loading an image created
* using a namesystem image created with 0.23.1-rc2 exhibiting
* the issue.
*/
@Test
public void testLoadLogsFromBuggyEarlierVersions() throws IOException {
final Configuration conf = new HdfsConfiguration();
String tarFile = System.getProperty("test.cache.data", "build/test/cache")
+ "/" + HADOOP_23_BROKEN_APPEND_TGZ;
String testDir = System.getProperty("test.build.data", "build/test/data");
File dfsDir = new File(testDir, "image-with-buggy-append");
if (dfsDir.exists() && !FileUtil.fullyDelete(dfsDir)) {
throw new IOException("Could not delete dfs directory '" + dfsDir + "'");
}
FileUtil.unTar(new File(tarFile), new File(testDir));
File nameDir = new File(dfsDir, "name");
GenericTestUtils.assertExists(nameDir);
conf.set(DFSConfigKeys.DFS_NAMENODE_NAME_DIR_KEY, nameDir.getAbsolutePath());
MiniDFSCluster cluster = new MiniDFSCluster.Builder(conf).numDataNodes(0)
.format(false)
.manageDataDfsDirs(false)
.manageNameDfsDirs(false)
.numDataNodes(0)
.waitSafeMode(false)
.startupOption(StartupOption.UPGRADE)
.build();
try {
FileSystem fs = cluster.getFileSystem();
Path testPath = new Path("/tmp/io_data/test_io_0");
assertEquals(2*1024*1024, fs.getFileStatus(testPath).getLen());
} finally {
cluster.shutdown();
}
}
}

View File

@ -26,6 +26,7 @@
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collections;
import java.util.EnumMap;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
@ -41,6 +42,7 @@
import org.apache.hadoop.hdfs.server.namenode.FileJournalManager.EditLogFile;
import org.apache.hadoop.hdfs.server.namenode.FSImageStorageInspector.FSImageFile;
import org.apache.hadoop.hdfs.server.namenode.NNStorage.NameNodeDirType;
import org.apache.hadoop.hdfs.util.Holder;
import org.apache.hadoop.hdfs.util.MD5FileUtils;
import org.apache.hadoop.io.IOUtils;
import org.apache.hadoop.conf.Configuration;
@ -190,6 +192,36 @@ public static FSEditLog createStandaloneEditLog(File logDir)
ImmutableList.of(logDir.toURI()));
}
/**
* @param editLog a path of an edit log file
* @return the count of each type of operation in the log file
* @throws Exception if there is an error reading it
*/
public static EnumMap<FSEditLogOpCodes,Holder<Integer>> countEditLogOpTypes(
File editLog) throws Exception {
EnumMap<FSEditLogOpCodes, Holder<Integer>> opCounts =
new EnumMap<FSEditLogOpCodes, Holder<Integer>>(FSEditLogOpCodes.class);
EditLogInputStream elis = new EditLogFileInputStream(editLog);
try {
FSEditLogOp op;
while ((op = elis.readOp()) != null) {
Holder<Integer> i = opCounts.get(op.opCode);
if (i == null) {
i = new Holder<Integer>(0);
opCounts.put(op.opCode, i);
}
i.held++;
}
} finally {
IOUtils.closeStream(elis);
}
return opCounts;
}
/**
* Assert that all of the given directories have the same newest filename
* for fsimage that they hold the same data.
@ -393,7 +425,7 @@ static void assertNNHasCheckpoints(MiniDFSCluster cluster,
}
}
static List<File> getNameNodeCurrentDirs(MiniDFSCluster cluster) {
public static List<File> getNameNodeCurrentDirs(MiniDFSCluster cluster) {
List<File> nameDirs = Lists.newArrayList();
for (URI u : cluster.getNameDirs(0)) {
nameDirs.add(new File(u.getPath(), "current"));