HDFS-2991. Fix case where OP_ADD would not be logged in append(). Contributed by Todd Lipcon.
git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/trunk@1295214 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
a4ad12dd2d
commit
34c73db5e1
|
@ -314,6 +314,8 @@ Release 0.23.2 - UNRELEASED
|
|||
HDFS-3006. In WebHDFS, when the return body is empty, set the Content-Type
|
||||
to application/octet-stream instead of application/json. (szetszwo)
|
||||
|
||||
HDFS-2991. Fix case where OP_ADD would not be logged in append(). (todd)
|
||||
|
||||
Release 0.23.1 - 2012-02-17
|
||||
|
||||
INCOMPATIBLE CHANGES
|
||||
|
|
|
@ -44,6 +44,15 @@ import org.apache.hadoop.classification.InterfaceAudience;
|
|||
@InterfaceAudience.Private
|
||||
public class LayoutVersion {
|
||||
|
||||
/**
|
||||
* Version in which HDFS-2991 was fixed. This bug caused OP_ADD to
|
||||
* sometimes be skipped for append() calls. If we see such a case when
|
||||
* loading the edits, but the version is known to have that bug, we
|
||||
* workaround the issue. Otherwise we should consider it a corruption
|
||||
* and bail.
|
||||
*/
|
||||
public static final int BUGFIX_HDFS_2991_VERSION = -40;
|
||||
|
||||
/**
|
||||
* Enums for features that change the layout version.
|
||||
* <br><br>
|
||||
|
|
|
@ -249,8 +249,6 @@ public class FSDirectory implements Closeable {
|
|||
+" to the file system");
|
||||
return null;
|
||||
}
|
||||
// add create file record to log, record new generation stamp
|
||||
fsImage.getEditLog().logOpenFile(path, newNode);
|
||||
|
||||
if(NameNode.stateChangeLog.isDebugEnabled()) {
|
||||
NameNode.stateChangeLog.debug("DIR* FSDirectory.addFile: "
|
||||
|
|
|
@ -206,13 +206,22 @@ public class FSEditLogLoader {
|
|||
fsDir.updateFile(oldFile, addCloseOp.path, blocks,
|
||||
addCloseOp.mtime, addCloseOp.atime);
|
||||
if(addCloseOp.opCode == FSEditLogOpCodes.OP_CLOSE) { // OP_CLOSE
|
||||
assert oldFile.isUnderConstruction() :
|
||||
"File is not under construction: " + addCloseOp.path;
|
||||
if (!oldFile.isUnderConstruction() &&
|
||||
logVersion <= LayoutVersion.BUGFIX_HDFS_2991_VERSION) {
|
||||
// There was a bug (HDFS-2991) in hadoop < 0.23.1 where OP_CLOSE
|
||||
// could show up twice in a row. But after that version, this
|
||||
// should be fixed, so we should treat it as an error.
|
||||
throw new IOException(
|
||||
"File is not under construction: " + addCloseOp.path);
|
||||
}
|
||||
fsNamesys.getBlockManager().completeBlock(
|
||||
oldFile, blocks.length-1, true);
|
||||
INodeFile newFile =
|
||||
((INodeFileUnderConstruction)oldFile).convertToInodeFile();
|
||||
fsDir.replaceNode(addCloseOp.path, oldFile, newFile);
|
||||
|
||||
if (oldFile.isUnderConstruction()) {
|
||||
INodeFile newFile =
|
||||
((INodeFileUnderConstruction)oldFile).convertToInodeFile();
|
||||
fsDir.replaceNode(addCloseOp.path, oldFile, newFile);
|
||||
}
|
||||
} else if(! oldFile.isUnderConstruction()) { // OP_ADD for append
|
||||
INodeFileUnderConstruction cons = new INodeFileUnderConstruction(
|
||||
oldFile.getLocalNameBytes(),
|
||||
|
@ -231,8 +240,10 @@ public class FSEditLogLoader {
|
|||
if(addCloseOp.opCode == FSEditLogOpCodes.OP_ADD) {
|
||||
fsNamesys.leaseManager.addLease(addCloseOp.clientName, addCloseOp.path);
|
||||
} else { // Ops.OP_CLOSE
|
||||
fsNamesys.leaseManager.removeLease(
|
||||
((INodeFileUnderConstruction)oldFile).getClientName(), addCloseOp.path);
|
||||
if (oldFile.isUnderConstruction()) {
|
||||
fsNamesys.leaseManager.removeLease(
|
||||
((INodeFileUnderConstruction)oldFile).getClientName(), addCloseOp.path);
|
||||
}
|
||||
}
|
||||
break;
|
||||
}
|
||||
|
|
|
@ -1283,9 +1283,13 @@ public class FSNamesystem implements Namesystem, FSClusterStats,
|
|||
clientNode);
|
||||
dir.replaceNode(src, node, cons);
|
||||
leaseManager.addLease(cons.getClientName(), src);
|
||||
|
||||
|
||||
// convert last block to under-construction
|
||||
return blockManager.convertLastBlockToUnderConstruction(cons);
|
||||
LocatedBlock ret = blockManager.convertLastBlockToUnderConstruction(cons);
|
||||
|
||||
// add append file record to log, record lease, etc.
|
||||
getEditLog().logOpenFile(src, cons);
|
||||
return ret;
|
||||
} else {
|
||||
// Now we can add the name to the filesystem. This file has no
|
||||
// blocks associated with it.
|
||||
|
@ -1301,6 +1305,9 @@ public class FSNamesystem implements Namesystem, FSClusterStats,
|
|||
"Unable to add file to namespace.");
|
||||
}
|
||||
leaseManager.addLease(newNode.getClientName(), src);
|
||||
|
||||
// record file record in log, record new generation stamp
|
||||
getEditLog().logOpenFile(src, newNode);
|
||||
if (NameNode.stateChangeLog.isDebugEnabled()) {
|
||||
NameNode.stateChangeLog.debug("DIR* NameSystem.startFile: "
|
||||
+"add "+src+" to namespace for "+holder);
|
||||
|
|
|
@ -0,0 +1,166 @@
|
|||
/**
|
||||
* Licensed to the Apache Software Foundation (ASF) under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. The ASF licenses this file
|
||||
* to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package org.apache.hadoop.hdfs;
|
||||
|
||||
import static org.junit.Assert.assertEquals;
|
||||
|
||||
import java.io.File;
|
||||
import java.io.IOException;
|
||||
import java.util.EnumMap;
|
||||
import java.util.Random;
|
||||
|
||||
import org.apache.hadoop.conf.Configuration;
|
||||
import org.apache.hadoop.fs.CommonConfigurationKeysPublic;
|
||||
import org.apache.hadoop.fs.FSDataOutputStream;
|
||||
import org.apache.hadoop.fs.FileSystem;
|
||||
import org.apache.hadoop.fs.FileUtil;
|
||||
import org.apache.hadoop.fs.Path;
|
||||
import org.apache.hadoop.hdfs.server.common.HdfsServerConstants.StartupOption;
|
||||
import org.apache.hadoop.hdfs.server.namenode.FSEditLog;
|
||||
import org.apache.hadoop.hdfs.server.namenode.FSEditLogOp;
|
||||
import org.apache.hadoop.hdfs.server.namenode.FSEditLogOpCodes;
|
||||
import org.apache.hadoop.hdfs.server.namenode.FSImageTestUtil;
|
||||
import org.apache.hadoop.hdfs.server.namenode.NNStorage;
|
||||
import org.apache.hadoop.hdfs.util.Holder;
|
||||
import org.apache.hadoop.io.IOUtils;
|
||||
import org.apache.hadoop.test.GenericTestUtils;
|
||||
import org.junit.Test;
|
||||
|
||||
/**
|
||||
* Unit test to make sure that Append properly logs the right
|
||||
* things to the edit log, such that files aren't lost or truncated
|
||||
* on restart.
|
||||
*/
|
||||
public class TestFileAppendRestart {
|
||||
private static final int BLOCK_SIZE = 4096;
|
||||
private static final String HADOOP_23_BROKEN_APPEND_TGZ =
|
||||
"image-with-buggy-append.tgz";
|
||||
|
||||
private void writeAndAppend(FileSystem fs, Path p,
|
||||
int lengthForCreate, int lengthForAppend) throws IOException {
|
||||
// Creating a file with 4096 blockSize to write multiple blocks
|
||||
FSDataOutputStream stream = fs.create(
|
||||
p, true, BLOCK_SIZE, (short) 1, BLOCK_SIZE);
|
||||
try {
|
||||
AppendTestUtil.write(stream, 0, lengthForCreate);
|
||||
stream.close();
|
||||
|
||||
stream = fs.append(p);
|
||||
AppendTestUtil.write(stream, lengthForCreate, lengthForAppend);
|
||||
stream.close();
|
||||
} finally {
|
||||
IOUtils.closeStream(stream);
|
||||
}
|
||||
|
||||
int totalLength = lengthForCreate + lengthForAppend;
|
||||
assertEquals(totalLength, fs.getFileStatus(p).getLen());
|
||||
}
|
||||
|
||||
/**
|
||||
* Regression test for HDFS-2991. Creates and appends to files
|
||||
* where blocks start/end on block boundaries.
|
||||
*/
|
||||
@Test
|
||||
public void testAppendRestart() throws Exception {
|
||||
final Configuration conf = new HdfsConfiguration();
|
||||
// Turn off persistent IPC, so that the DFSClient can survive NN restart
|
||||
conf.setInt(
|
||||
CommonConfigurationKeysPublic.IPC_CLIENT_CONNECTION_MAXIDLETIME_KEY,
|
||||
0);
|
||||
MiniDFSCluster cluster = null;
|
||||
|
||||
FSDataOutputStream stream = null;
|
||||
try {
|
||||
cluster = new MiniDFSCluster.Builder(conf).numDataNodes(1).build();
|
||||
FileSystem fs = cluster.getFileSystem();
|
||||
File editLog =
|
||||
new File(FSImageTestUtil.getNameNodeCurrentDirs(cluster).get(0),
|
||||
NNStorage.getInProgressEditsFileName(1));
|
||||
EnumMap<FSEditLogOpCodes, Holder<Integer>> counts;
|
||||
|
||||
Path p1 = new Path("/block-boundaries");
|
||||
writeAndAppend(fs, p1, BLOCK_SIZE, BLOCK_SIZE);
|
||||
|
||||
counts = FSImageTestUtil.countEditLogOpTypes(editLog);
|
||||
assertEquals(2, (int)counts.get(FSEditLogOpCodes.OP_ADD).held);
|
||||
assertEquals(2, (int)counts.get(FSEditLogOpCodes.OP_CLOSE).held);
|
||||
|
||||
Path p2 = new Path("/not-block-boundaries");
|
||||
writeAndAppend(fs, p2, BLOCK_SIZE/2, BLOCK_SIZE);
|
||||
counts = FSImageTestUtil.countEditLogOpTypes(editLog);
|
||||
// We get *3* OP_ADDS from this test rather than two. The first
|
||||
// OP_ADD comes from re-opening the file to establish the lease,
|
||||
// the second comes from the updatePipeline call when the block
|
||||
// itself has its generation stamp incremented
|
||||
assertEquals(5, (int)counts.get(FSEditLogOpCodes.OP_ADD).held);
|
||||
assertEquals(4, (int)counts.get(FSEditLogOpCodes.OP_CLOSE).held);
|
||||
|
||||
cluster.restartNameNode();
|
||||
|
||||
AppendTestUtil.check(fs, p1, 2*BLOCK_SIZE);
|
||||
AppendTestUtil.check(fs, p2, 3*BLOCK_SIZE/2);
|
||||
} finally {
|
||||
IOUtils.closeStream(stream);
|
||||
if (cluster != null) { cluster.shutdown(); }
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Earlier versions of HDFS had a bug (HDFS-2991) which caused
|
||||
* append(), when called exactly at a block boundary,
|
||||
* to not log an OP_ADD. This ensures that we can read from
|
||||
* such buggy versions correctly, by loading an image created
|
||||
* using a namesystem image created with 0.23.1-rc2 exhibiting
|
||||
* the issue.
|
||||
*/
|
||||
@Test
|
||||
public void testLoadLogsFromBuggyEarlierVersions() throws IOException {
|
||||
final Configuration conf = new HdfsConfiguration();
|
||||
|
||||
String tarFile = System.getProperty("test.cache.data", "build/test/cache")
|
||||
+ "/" + HADOOP_23_BROKEN_APPEND_TGZ;
|
||||
String testDir = System.getProperty("test.build.data", "build/test/data");
|
||||
File dfsDir = new File(testDir, "image-with-buggy-append");
|
||||
if (dfsDir.exists() && !FileUtil.fullyDelete(dfsDir)) {
|
||||
throw new IOException("Could not delete dfs directory '" + dfsDir + "'");
|
||||
}
|
||||
FileUtil.unTar(new File(tarFile), new File(testDir));
|
||||
|
||||
File nameDir = new File(dfsDir, "name");
|
||||
GenericTestUtils.assertExists(nameDir);
|
||||
|
||||
conf.set(DFSConfigKeys.DFS_NAMENODE_NAME_DIR_KEY, nameDir.getAbsolutePath());
|
||||
|
||||
MiniDFSCluster cluster = new MiniDFSCluster.Builder(conf).numDataNodes(0)
|
||||
.format(false)
|
||||
.manageDataDfsDirs(false)
|
||||
.manageNameDfsDirs(false)
|
||||
.numDataNodes(0)
|
||||
.waitSafeMode(false)
|
||||
.startupOption(StartupOption.UPGRADE)
|
||||
.build();
|
||||
try {
|
||||
FileSystem fs = cluster.getFileSystem();
|
||||
Path testPath = new Path("/tmp/io_data/test_io_0");
|
||||
assertEquals(2*1024*1024, fs.getFileStatus(testPath).getLen());
|
||||
} finally {
|
||||
cluster.shutdown();
|
||||
}
|
||||
}
|
||||
}
|
|
@ -26,6 +26,7 @@ import java.net.URI;
|
|||
import java.util.ArrayList;
|
||||
import java.util.Arrays;
|
||||
import java.util.Collections;
|
||||
import java.util.EnumMap;
|
||||
import java.util.HashMap;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
|
@ -41,6 +42,7 @@ import org.apache.hadoop.hdfs.server.common.Storage.StorageDirectory;
|
|||
import org.apache.hadoop.hdfs.server.namenode.FileJournalManager.EditLogFile;
|
||||
import org.apache.hadoop.hdfs.server.namenode.FSImageStorageInspector.FSImageFile;
|
||||
import org.apache.hadoop.hdfs.server.namenode.NNStorage.NameNodeDirType;
|
||||
import org.apache.hadoop.hdfs.util.Holder;
|
||||
import org.apache.hadoop.hdfs.util.MD5FileUtils;
|
||||
import org.apache.hadoop.io.IOUtils;
|
||||
import org.apache.hadoop.conf.Configuration;
|
||||
|
@ -190,6 +192,36 @@ public abstract class FSImageTestUtil {
|
|||
ImmutableList.of(logDir.toURI()));
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* @param editLog a path of an edit log file
|
||||
* @return the count of each type of operation in the log file
|
||||
* @throws Exception if there is an error reading it
|
||||
*/
|
||||
public static EnumMap<FSEditLogOpCodes,Holder<Integer>> countEditLogOpTypes(
|
||||
File editLog) throws Exception {
|
||||
EnumMap<FSEditLogOpCodes, Holder<Integer>> opCounts =
|
||||
new EnumMap<FSEditLogOpCodes, Holder<Integer>>(FSEditLogOpCodes.class);
|
||||
|
||||
EditLogInputStream elis = new EditLogFileInputStream(editLog);
|
||||
try {
|
||||
FSEditLogOp op;
|
||||
while ((op = elis.readOp()) != null) {
|
||||
Holder<Integer> i = opCounts.get(op.opCode);
|
||||
if (i == null) {
|
||||
i = new Holder<Integer>(0);
|
||||
opCounts.put(op.opCode, i);
|
||||
}
|
||||
i.held++;
|
||||
}
|
||||
} finally {
|
||||
IOUtils.closeStream(elis);
|
||||
}
|
||||
|
||||
return opCounts;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Assert that all of the given directories have the same newest filename
|
||||
* for fsimage that they hold the same data.
|
||||
|
@ -393,7 +425,7 @@ public abstract class FSImageTestUtil {
|
|||
}
|
||||
}
|
||||
|
||||
static List<File> getNameNodeCurrentDirs(MiniDFSCluster cluster) {
|
||||
public static List<File> getNameNodeCurrentDirs(MiniDFSCluster cluster) {
|
||||
List<File> nameDirs = Lists.newArrayList();
|
||||
for (URI u : cluster.getNameDirs(0)) {
|
||||
nameDirs.add(new File(u.getPath(), "current"));
|
||||
|
|
Binary file not shown.
Loading…
Reference in New Issue