HDFS-2991. Fix case where OP_ADD would not be logged in append(). Contributed by Todd Lipcon.
git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/trunk@1295214 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
a4ad12dd2d
commit
34c73db5e1
|
@ -314,6 +314,8 @@ Release 0.23.2 - UNRELEASED
|
||||||
HDFS-3006. In WebHDFS, when the return body is empty, set the Content-Type
|
HDFS-3006. In WebHDFS, when the return body is empty, set the Content-Type
|
||||||
to application/octet-stream instead of application/json. (szetszwo)
|
to application/octet-stream instead of application/json. (szetszwo)
|
||||||
|
|
||||||
|
HDFS-2991. Fix case where OP_ADD would not be logged in append(). (todd)
|
||||||
|
|
||||||
Release 0.23.1 - 2012-02-17
|
Release 0.23.1 - 2012-02-17
|
||||||
|
|
||||||
INCOMPATIBLE CHANGES
|
INCOMPATIBLE CHANGES
|
||||||
|
|
|
@ -44,6 +44,15 @@ import org.apache.hadoop.classification.InterfaceAudience;
|
||||||
@InterfaceAudience.Private
|
@InterfaceAudience.Private
|
||||||
public class LayoutVersion {
|
public class LayoutVersion {
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Version in which HDFS-2991 was fixed. This bug caused OP_ADD to
|
||||||
|
* sometimes be skipped for append() calls. If we see such a case when
|
||||||
|
* loading the edits, but the version is known to have that bug, we
|
||||||
|
* workaround the issue. Otherwise we should consider it a corruption
|
||||||
|
* and bail.
|
||||||
|
*/
|
||||||
|
public static final int BUGFIX_HDFS_2991_VERSION = -40;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Enums for features that change the layout version.
|
* Enums for features that change the layout version.
|
||||||
* <br><br>
|
* <br><br>
|
||||||
|
|
|
@ -249,8 +249,6 @@ public class FSDirectory implements Closeable {
|
||||||
+" to the file system");
|
+" to the file system");
|
||||||
return null;
|
return null;
|
||||||
}
|
}
|
||||||
// add create file record to log, record new generation stamp
|
|
||||||
fsImage.getEditLog().logOpenFile(path, newNode);
|
|
||||||
|
|
||||||
if(NameNode.stateChangeLog.isDebugEnabled()) {
|
if(NameNode.stateChangeLog.isDebugEnabled()) {
|
||||||
NameNode.stateChangeLog.debug("DIR* FSDirectory.addFile: "
|
NameNode.stateChangeLog.debug("DIR* FSDirectory.addFile: "
|
||||||
|
|
|
@ -206,13 +206,22 @@ public class FSEditLogLoader {
|
||||||
fsDir.updateFile(oldFile, addCloseOp.path, blocks,
|
fsDir.updateFile(oldFile, addCloseOp.path, blocks,
|
||||||
addCloseOp.mtime, addCloseOp.atime);
|
addCloseOp.mtime, addCloseOp.atime);
|
||||||
if(addCloseOp.opCode == FSEditLogOpCodes.OP_CLOSE) { // OP_CLOSE
|
if(addCloseOp.opCode == FSEditLogOpCodes.OP_CLOSE) { // OP_CLOSE
|
||||||
assert oldFile.isUnderConstruction() :
|
if (!oldFile.isUnderConstruction() &&
|
||||||
"File is not under construction: " + addCloseOp.path;
|
logVersion <= LayoutVersion.BUGFIX_HDFS_2991_VERSION) {
|
||||||
|
// There was a bug (HDFS-2991) in hadoop < 0.23.1 where OP_CLOSE
|
||||||
|
// could show up twice in a row. But after that version, this
|
||||||
|
// should be fixed, so we should treat it as an error.
|
||||||
|
throw new IOException(
|
||||||
|
"File is not under construction: " + addCloseOp.path);
|
||||||
|
}
|
||||||
fsNamesys.getBlockManager().completeBlock(
|
fsNamesys.getBlockManager().completeBlock(
|
||||||
oldFile, blocks.length-1, true);
|
oldFile, blocks.length-1, true);
|
||||||
INodeFile newFile =
|
|
||||||
((INodeFileUnderConstruction)oldFile).convertToInodeFile();
|
if (oldFile.isUnderConstruction()) {
|
||||||
fsDir.replaceNode(addCloseOp.path, oldFile, newFile);
|
INodeFile newFile =
|
||||||
|
((INodeFileUnderConstruction)oldFile).convertToInodeFile();
|
||||||
|
fsDir.replaceNode(addCloseOp.path, oldFile, newFile);
|
||||||
|
}
|
||||||
} else if(! oldFile.isUnderConstruction()) { // OP_ADD for append
|
} else if(! oldFile.isUnderConstruction()) { // OP_ADD for append
|
||||||
INodeFileUnderConstruction cons = new INodeFileUnderConstruction(
|
INodeFileUnderConstruction cons = new INodeFileUnderConstruction(
|
||||||
oldFile.getLocalNameBytes(),
|
oldFile.getLocalNameBytes(),
|
||||||
|
@ -231,8 +240,10 @@ public class FSEditLogLoader {
|
||||||
if(addCloseOp.opCode == FSEditLogOpCodes.OP_ADD) {
|
if(addCloseOp.opCode == FSEditLogOpCodes.OP_ADD) {
|
||||||
fsNamesys.leaseManager.addLease(addCloseOp.clientName, addCloseOp.path);
|
fsNamesys.leaseManager.addLease(addCloseOp.clientName, addCloseOp.path);
|
||||||
} else { // Ops.OP_CLOSE
|
} else { // Ops.OP_CLOSE
|
||||||
fsNamesys.leaseManager.removeLease(
|
if (oldFile.isUnderConstruction()) {
|
||||||
((INodeFileUnderConstruction)oldFile).getClientName(), addCloseOp.path);
|
fsNamesys.leaseManager.removeLease(
|
||||||
|
((INodeFileUnderConstruction)oldFile).getClientName(), addCloseOp.path);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
|
@ -1283,9 +1283,13 @@ public class FSNamesystem implements Namesystem, FSClusterStats,
|
||||||
clientNode);
|
clientNode);
|
||||||
dir.replaceNode(src, node, cons);
|
dir.replaceNode(src, node, cons);
|
||||||
leaseManager.addLease(cons.getClientName(), src);
|
leaseManager.addLease(cons.getClientName(), src);
|
||||||
|
|
||||||
// convert last block to under-construction
|
// convert last block to under-construction
|
||||||
return blockManager.convertLastBlockToUnderConstruction(cons);
|
LocatedBlock ret = blockManager.convertLastBlockToUnderConstruction(cons);
|
||||||
|
|
||||||
|
// add append file record to log, record lease, etc.
|
||||||
|
getEditLog().logOpenFile(src, cons);
|
||||||
|
return ret;
|
||||||
} else {
|
} else {
|
||||||
// Now we can add the name to the filesystem. This file has no
|
// Now we can add the name to the filesystem. This file has no
|
||||||
// blocks associated with it.
|
// blocks associated with it.
|
||||||
|
@ -1301,6 +1305,9 @@ public class FSNamesystem implements Namesystem, FSClusterStats,
|
||||||
"Unable to add file to namespace.");
|
"Unable to add file to namespace.");
|
||||||
}
|
}
|
||||||
leaseManager.addLease(newNode.getClientName(), src);
|
leaseManager.addLease(newNode.getClientName(), src);
|
||||||
|
|
||||||
|
// record file record in log, record new generation stamp
|
||||||
|
getEditLog().logOpenFile(src, newNode);
|
||||||
if (NameNode.stateChangeLog.isDebugEnabled()) {
|
if (NameNode.stateChangeLog.isDebugEnabled()) {
|
||||||
NameNode.stateChangeLog.debug("DIR* NameSystem.startFile: "
|
NameNode.stateChangeLog.debug("DIR* NameSystem.startFile: "
|
||||||
+"add "+src+" to namespace for "+holder);
|
+"add "+src+" to namespace for "+holder);
|
||||||
|
|
|
@ -0,0 +1,166 @@
|
||||||
|
/**
|
||||||
|
* Licensed to the Apache Software Foundation (ASF) under one
|
||||||
|
* or more contributor license agreements. See the NOTICE file
|
||||||
|
* distributed with this work for additional information
|
||||||
|
* regarding copyright ownership. The ASF licenses this file
|
||||||
|
* to you under the Apache License, Version 2.0 (the
|
||||||
|
* "License"); you may not use this file except in compliance
|
||||||
|
* with the License. You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
package org.apache.hadoop.hdfs;
|
||||||
|
|
||||||
|
import static org.junit.Assert.assertEquals;
|
||||||
|
|
||||||
|
import java.io.File;
|
||||||
|
import java.io.IOException;
|
||||||
|
import java.util.EnumMap;
|
||||||
|
import java.util.Random;
|
||||||
|
|
||||||
|
import org.apache.hadoop.conf.Configuration;
|
||||||
|
import org.apache.hadoop.fs.CommonConfigurationKeysPublic;
|
||||||
|
import org.apache.hadoop.fs.FSDataOutputStream;
|
||||||
|
import org.apache.hadoop.fs.FileSystem;
|
||||||
|
import org.apache.hadoop.fs.FileUtil;
|
||||||
|
import org.apache.hadoop.fs.Path;
|
||||||
|
import org.apache.hadoop.hdfs.server.common.HdfsServerConstants.StartupOption;
|
||||||
|
import org.apache.hadoop.hdfs.server.namenode.FSEditLog;
|
||||||
|
import org.apache.hadoop.hdfs.server.namenode.FSEditLogOp;
|
||||||
|
import org.apache.hadoop.hdfs.server.namenode.FSEditLogOpCodes;
|
||||||
|
import org.apache.hadoop.hdfs.server.namenode.FSImageTestUtil;
|
||||||
|
import org.apache.hadoop.hdfs.server.namenode.NNStorage;
|
||||||
|
import org.apache.hadoop.hdfs.util.Holder;
|
||||||
|
import org.apache.hadoop.io.IOUtils;
|
||||||
|
import org.apache.hadoop.test.GenericTestUtils;
|
||||||
|
import org.junit.Test;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Unit test to make sure that Append properly logs the right
|
||||||
|
* things to the edit log, such that files aren't lost or truncated
|
||||||
|
* on restart.
|
||||||
|
*/
|
||||||
|
public class TestFileAppendRestart {
|
||||||
|
private static final int BLOCK_SIZE = 4096;
|
||||||
|
private static final String HADOOP_23_BROKEN_APPEND_TGZ =
|
||||||
|
"image-with-buggy-append.tgz";
|
||||||
|
|
||||||
|
private void writeAndAppend(FileSystem fs, Path p,
|
||||||
|
int lengthForCreate, int lengthForAppend) throws IOException {
|
||||||
|
// Creating a file with 4096 blockSize to write multiple blocks
|
||||||
|
FSDataOutputStream stream = fs.create(
|
||||||
|
p, true, BLOCK_SIZE, (short) 1, BLOCK_SIZE);
|
||||||
|
try {
|
||||||
|
AppendTestUtil.write(stream, 0, lengthForCreate);
|
||||||
|
stream.close();
|
||||||
|
|
||||||
|
stream = fs.append(p);
|
||||||
|
AppendTestUtil.write(stream, lengthForCreate, lengthForAppend);
|
||||||
|
stream.close();
|
||||||
|
} finally {
|
||||||
|
IOUtils.closeStream(stream);
|
||||||
|
}
|
||||||
|
|
||||||
|
int totalLength = lengthForCreate + lengthForAppend;
|
||||||
|
assertEquals(totalLength, fs.getFileStatus(p).getLen());
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Regression test for HDFS-2991. Creates and appends to files
|
||||||
|
* where blocks start/end on block boundaries.
|
||||||
|
*/
|
||||||
|
@Test
|
||||||
|
public void testAppendRestart() throws Exception {
|
||||||
|
final Configuration conf = new HdfsConfiguration();
|
||||||
|
// Turn off persistent IPC, so that the DFSClient can survive NN restart
|
||||||
|
conf.setInt(
|
||||||
|
CommonConfigurationKeysPublic.IPC_CLIENT_CONNECTION_MAXIDLETIME_KEY,
|
||||||
|
0);
|
||||||
|
MiniDFSCluster cluster = null;
|
||||||
|
|
||||||
|
FSDataOutputStream stream = null;
|
||||||
|
try {
|
||||||
|
cluster = new MiniDFSCluster.Builder(conf).numDataNodes(1).build();
|
||||||
|
FileSystem fs = cluster.getFileSystem();
|
||||||
|
File editLog =
|
||||||
|
new File(FSImageTestUtil.getNameNodeCurrentDirs(cluster).get(0),
|
||||||
|
NNStorage.getInProgressEditsFileName(1));
|
||||||
|
EnumMap<FSEditLogOpCodes, Holder<Integer>> counts;
|
||||||
|
|
||||||
|
Path p1 = new Path("/block-boundaries");
|
||||||
|
writeAndAppend(fs, p1, BLOCK_SIZE, BLOCK_SIZE);
|
||||||
|
|
||||||
|
counts = FSImageTestUtil.countEditLogOpTypes(editLog);
|
||||||
|
assertEquals(2, (int)counts.get(FSEditLogOpCodes.OP_ADD).held);
|
||||||
|
assertEquals(2, (int)counts.get(FSEditLogOpCodes.OP_CLOSE).held);
|
||||||
|
|
||||||
|
Path p2 = new Path("/not-block-boundaries");
|
||||||
|
writeAndAppend(fs, p2, BLOCK_SIZE/2, BLOCK_SIZE);
|
||||||
|
counts = FSImageTestUtil.countEditLogOpTypes(editLog);
|
||||||
|
// We get *3* OP_ADDS from this test rather than two. The first
|
||||||
|
// OP_ADD comes from re-opening the file to establish the lease,
|
||||||
|
// the second comes from the updatePipeline call when the block
|
||||||
|
// itself has its generation stamp incremented
|
||||||
|
assertEquals(5, (int)counts.get(FSEditLogOpCodes.OP_ADD).held);
|
||||||
|
assertEquals(4, (int)counts.get(FSEditLogOpCodes.OP_CLOSE).held);
|
||||||
|
|
||||||
|
cluster.restartNameNode();
|
||||||
|
|
||||||
|
AppendTestUtil.check(fs, p1, 2*BLOCK_SIZE);
|
||||||
|
AppendTestUtil.check(fs, p2, 3*BLOCK_SIZE/2);
|
||||||
|
} finally {
|
||||||
|
IOUtils.closeStream(stream);
|
||||||
|
if (cluster != null) { cluster.shutdown(); }
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Earlier versions of HDFS had a bug (HDFS-2991) which caused
|
||||||
|
* append(), when called exactly at a block boundary,
|
||||||
|
* to not log an OP_ADD. This ensures that we can read from
|
||||||
|
* such buggy versions correctly, by loading an image created
|
||||||
|
* using a namesystem image created with 0.23.1-rc2 exhibiting
|
||||||
|
* the issue.
|
||||||
|
*/
|
||||||
|
@Test
|
||||||
|
public void testLoadLogsFromBuggyEarlierVersions() throws IOException {
|
||||||
|
final Configuration conf = new HdfsConfiguration();
|
||||||
|
|
||||||
|
String tarFile = System.getProperty("test.cache.data", "build/test/cache")
|
||||||
|
+ "/" + HADOOP_23_BROKEN_APPEND_TGZ;
|
||||||
|
String testDir = System.getProperty("test.build.data", "build/test/data");
|
||||||
|
File dfsDir = new File(testDir, "image-with-buggy-append");
|
||||||
|
if (dfsDir.exists() && !FileUtil.fullyDelete(dfsDir)) {
|
||||||
|
throw new IOException("Could not delete dfs directory '" + dfsDir + "'");
|
||||||
|
}
|
||||||
|
FileUtil.unTar(new File(tarFile), new File(testDir));
|
||||||
|
|
||||||
|
File nameDir = new File(dfsDir, "name");
|
||||||
|
GenericTestUtils.assertExists(nameDir);
|
||||||
|
|
||||||
|
conf.set(DFSConfigKeys.DFS_NAMENODE_NAME_DIR_KEY, nameDir.getAbsolutePath());
|
||||||
|
|
||||||
|
MiniDFSCluster cluster = new MiniDFSCluster.Builder(conf).numDataNodes(0)
|
||||||
|
.format(false)
|
||||||
|
.manageDataDfsDirs(false)
|
||||||
|
.manageNameDfsDirs(false)
|
||||||
|
.numDataNodes(0)
|
||||||
|
.waitSafeMode(false)
|
||||||
|
.startupOption(StartupOption.UPGRADE)
|
||||||
|
.build();
|
||||||
|
try {
|
||||||
|
FileSystem fs = cluster.getFileSystem();
|
||||||
|
Path testPath = new Path("/tmp/io_data/test_io_0");
|
||||||
|
assertEquals(2*1024*1024, fs.getFileStatus(testPath).getLen());
|
||||||
|
} finally {
|
||||||
|
cluster.shutdown();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
|
@ -26,6 +26,7 @@ import java.net.URI;
|
||||||
import java.util.ArrayList;
|
import java.util.ArrayList;
|
||||||
import java.util.Arrays;
|
import java.util.Arrays;
|
||||||
import java.util.Collections;
|
import java.util.Collections;
|
||||||
|
import java.util.EnumMap;
|
||||||
import java.util.HashMap;
|
import java.util.HashMap;
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
import java.util.Map;
|
import java.util.Map;
|
||||||
|
@ -41,6 +42,7 @@ import org.apache.hadoop.hdfs.server.common.Storage.StorageDirectory;
|
||||||
import org.apache.hadoop.hdfs.server.namenode.FileJournalManager.EditLogFile;
|
import org.apache.hadoop.hdfs.server.namenode.FileJournalManager.EditLogFile;
|
||||||
import org.apache.hadoop.hdfs.server.namenode.FSImageStorageInspector.FSImageFile;
|
import org.apache.hadoop.hdfs.server.namenode.FSImageStorageInspector.FSImageFile;
|
||||||
import org.apache.hadoop.hdfs.server.namenode.NNStorage.NameNodeDirType;
|
import org.apache.hadoop.hdfs.server.namenode.NNStorage.NameNodeDirType;
|
||||||
|
import org.apache.hadoop.hdfs.util.Holder;
|
||||||
import org.apache.hadoop.hdfs.util.MD5FileUtils;
|
import org.apache.hadoop.hdfs.util.MD5FileUtils;
|
||||||
import org.apache.hadoop.io.IOUtils;
|
import org.apache.hadoop.io.IOUtils;
|
||||||
import org.apache.hadoop.conf.Configuration;
|
import org.apache.hadoop.conf.Configuration;
|
||||||
|
@ -190,6 +192,36 @@ public abstract class FSImageTestUtil {
|
||||||
ImmutableList.of(logDir.toURI()));
|
ImmutableList.of(logDir.toURI()));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @param editLog a path of an edit log file
|
||||||
|
* @return the count of each type of operation in the log file
|
||||||
|
* @throws Exception if there is an error reading it
|
||||||
|
*/
|
||||||
|
public static EnumMap<FSEditLogOpCodes,Holder<Integer>> countEditLogOpTypes(
|
||||||
|
File editLog) throws Exception {
|
||||||
|
EnumMap<FSEditLogOpCodes, Holder<Integer>> opCounts =
|
||||||
|
new EnumMap<FSEditLogOpCodes, Holder<Integer>>(FSEditLogOpCodes.class);
|
||||||
|
|
||||||
|
EditLogInputStream elis = new EditLogFileInputStream(editLog);
|
||||||
|
try {
|
||||||
|
FSEditLogOp op;
|
||||||
|
while ((op = elis.readOp()) != null) {
|
||||||
|
Holder<Integer> i = opCounts.get(op.opCode);
|
||||||
|
if (i == null) {
|
||||||
|
i = new Holder<Integer>(0);
|
||||||
|
opCounts.put(op.opCode, i);
|
||||||
|
}
|
||||||
|
i.held++;
|
||||||
|
}
|
||||||
|
} finally {
|
||||||
|
IOUtils.closeStream(elis);
|
||||||
|
}
|
||||||
|
|
||||||
|
return opCounts;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Assert that all of the given directories have the same newest filename
|
* Assert that all of the given directories have the same newest filename
|
||||||
* for fsimage that they hold the same data.
|
* for fsimage that they hold the same data.
|
||||||
|
@ -393,7 +425,7 @@ public abstract class FSImageTestUtil {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
static List<File> getNameNodeCurrentDirs(MiniDFSCluster cluster) {
|
public static List<File> getNameNodeCurrentDirs(MiniDFSCluster cluster) {
|
||||||
List<File> nameDirs = Lists.newArrayList();
|
List<File> nameDirs = Lists.newArrayList();
|
||||||
for (URI u : cluster.getNameDirs(0)) {
|
for (URI u : cluster.getNameDirs(0)) {
|
||||||
nameDirs.add(new File(u.getPath(), "current"));
|
nameDirs.add(new File(u.getPath(), "current"));
|
||||||
|
|
Binary file not shown.
Loading…
Reference in New Issue