From 6104520369045dfaa4b543cbad21236ed322249b Mon Sep 17 00:00:00 2001 From: Jing Zhao Date: Thu, 4 Sep 2014 18:54:38 -0700 Subject: [PATCH] HDFS-6886. Use single editlog record for creating file + overwrite. Contributed by Yi Liu. --- hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt | 3 + .../org/apache/hadoop/hdfs/inotify/Event.java | 12 ++ .../hadoop/hdfs/protocolPB/PBHelper.java | 6 +- .../hdfs/server/namenode/FSEditLog.java | 6 +- .../hdfs/server/namenode/FSEditLogLoader.java | 8 +- .../hdfs/server/namenode/FSEditLogOp.java | 18 ++ .../server/namenode/FSImageSerialization.java | 17 ++ .../hdfs/server/namenode/FSNamesystem.java | 45 +++-- .../InotifyFSEditLogOpTranslator.java | 1 + .../namenode/NameNodeLayoutVersion.java | 4 +- .../hadoop-hdfs/src/main/proto/inotify.proto | 1 + .../hdfs/TestDFSInotifyEventInputStream.java | 8 +- .../apache/hadoop/hdfs/TestFileCreation.java | 119 ++++++++++++ .../hdfs/server/namenode/CreateEditsLog.java | 2 +- .../hdfs/server/namenode/TestEditLog.java | 2 +- .../src/test/resources/editsStored | Bin 4992 -> 5252 bytes .../src/test/resources/editsStored.xml | 182 ++++++++++++------ 17 files changed, 339 insertions(+), 95 deletions(-) diff --git a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt index 3e00ebaeeb8..8964d2d4907 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt +++ b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt @@ -438,6 +438,9 @@ Release 2.6.0 - UNRELEASED HDFS-6959. Make the HDFS home directory location customizable. (yzhang via cmccabe) + HDFS-6886. Use single editlog record for creating file + overwrite. (Yi Liu + via jing9) + OPTIMIZATIONS HDFS-6690. Deduplicate xattr names in memory. (wang) diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/inotify/Event.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/inotify/Event.java index c7129ca324c..e8a34e7c21d 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/inotify/Event.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/inotify/Event.java @@ -100,6 +100,7 @@ public abstract class Event { private String groupName; private FsPermission perms; private String symlinkTarget; + private boolean overwrite; public static class Builder { private INodeType iNodeType; @@ -110,6 +111,7 @@ public abstract class Event { private String groupName; private FsPermission perms; private String symlinkTarget; + private boolean overwrite; public Builder iNodeType(INodeType type) { this.iNodeType = type; @@ -150,6 +152,11 @@ public abstract class Event { this.symlinkTarget = symlinkTarget; return this; } + + public Builder overwrite(boolean overwrite) { + this.overwrite = overwrite; + return this; + } public CreateEvent build() { return new CreateEvent(this); @@ -166,6 +173,7 @@ public abstract class Event { this.groupName = b.groupName; this.perms = b.perms; this.symlinkTarget = b.symlinkTarget; + this.overwrite = b.overwrite; } public INodeType getiNodeType() { @@ -208,6 +216,10 @@ public abstract class Event { public String getSymlinkTarget() { return symlinkTarget; } + + public boolean getOverwrite() { + return overwrite; + } } /** diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/protocolPB/PBHelper.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/protocolPB/PBHelper.java index 38ba7db1387..193b8268669 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/protocolPB/PBHelper.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/protocolPB/PBHelper.java @@ -2430,7 +2430,8 @@ public class PBHelper { .perms(convert(create.getPerms())) .replication(create.getReplication()) .symlinkTarget(create.getSymlinkTarget().isEmpty() ? null : - create.getSymlinkTarget()).build()); + create.getSymlinkTarget()) + .overwrite(create.getOverwrite()).build()); break; case EVENT_METADATA: InotifyProtos.MetadataUpdateEventProto meta = @@ -2508,7 +2509,8 @@ public class PBHelper { .setPerms(convert(ce2.getPerms())) .setReplication(ce2.getReplication()) .setSymlinkTarget(ce2.getSymlinkTarget() == null ? - "" : ce2.getSymlinkTarget()).build().toByteString() + "" : ce2.getSymlinkTarget()) + .setOverwrite(ce2.getOverwrite()).build().toByteString() ).build()); break; case METADATA: diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSEditLog.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSEditLog.java index 9d3538d4610..a157f4f9ab0 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSEditLog.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSEditLog.java @@ -702,7 +702,8 @@ public class FSEditLog implements LogsPurgeable { * Add open lease record to edit log. * Records the block locations of the last block. */ - public void logOpenFile(String path, INodeFile newNode, boolean toLogRpcIds) { + public void logOpenFile(String path, INodeFile newNode, boolean overwrite, + boolean toLogRpcIds) { Preconditions.checkArgument(newNode.isUnderConstruction()); PermissionStatus permissions = newNode.getPermissionStatus(); AddOp op = AddOp.getInstance(cache.get()) @@ -716,7 +717,8 @@ public class FSEditLog implements LogsPurgeable { .setPermissionStatus(permissions) .setClientName(newNode.getFileUnderConstructionFeature().getClientName()) .setClientMachine( - newNode.getFileUnderConstructionFeature().getClientMachine()); + newNode.getFileUnderConstructionFeature().getClientMachine()) + .setOverwrite(overwrite); AclFeature f = newNode.getAclFeature(); if (f != null) { diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSEditLogLoader.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSEditLogLoader.java index d522e51bc23..6afd81ccb2a 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSEditLogLoader.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSEditLogLoader.java @@ -340,8 +340,12 @@ public class FSEditLogLoader { // See if the file already exists (persistBlocks call) final INodesInPath iip = fsDir.getLastINodeInPath(path); - final INodeFile oldFile = INodeFile.valueOf( - iip.getINode(0), path, true); + INodeFile oldFile = INodeFile.valueOf(iip.getINode(0), path, true); + if (oldFile != null && addCloseOp.overwrite) { + // This is OP_ADD with overwrite + fsDir.unprotectedDelete(path, addCloseOp.mtime); + oldFile = null; + } INodeFile newFile = oldFile; if (oldFile == null) { // this is OP_ADD on a new file (case 1) // versions > 0 support per file replication diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSEditLogOp.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSEditLogOp.java index 5543e0cb86e..c5dd3df367b 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSEditLogOp.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSEditLogOp.java @@ -407,6 +407,7 @@ public abstract class FSEditLogOp { List xAttrs; String clientName; String clientMachine; + boolean overwrite; private AddCloseOp(FSEditLogOpCodes opCode) { super(opCode); @@ -486,6 +487,11 @@ public abstract class FSEditLogOp { this.clientMachine = clientMachine; return (T)this; } + + T setOverwrite(boolean overwrite) { + this.overwrite = overwrite; + return (T)this; + } @Override public void writeFields(DataOutputStream out) throws IOException { @@ -505,6 +511,7 @@ public abstract class FSEditLogOp { b.build().writeDelimitedTo(out); FSImageSerialization.writeString(clientName,out); FSImageSerialization.writeString(clientMachine,out); + FSImageSerialization.writeBoolean(overwrite, out); // write clientId and callId writeRpcIds(rpcClientId, rpcCallId, out); } @@ -570,6 +577,12 @@ public abstract class FSEditLogOp { this.xAttrs = readXAttrsFromEditLog(in, logVersion); this.clientName = FSImageSerialization.readString(in); this.clientMachine = FSImageSerialization.readString(in); + if (NameNodeLayoutVersion.supports( + NameNodeLayoutVersion.Feature.CREATE_OVERWRITE, logVersion)) { + this.overwrite = FSImageSerialization.readBoolean(in); + } else { + this.overwrite = false; + } // read clientId and callId readRpcIds(in, logVersion); } else { @@ -625,6 +638,8 @@ public abstract class FSEditLogOp { builder.append(clientName); builder.append(", clientMachine="); builder.append(clientMachine); + builder.append(", overwrite="); + builder.append(overwrite); if (this.opCode == OP_ADD) { appendRpcIdsToString(builder, rpcClientId, rpcCallId); } @@ -653,6 +668,8 @@ public abstract class FSEditLogOp { Long.toString(blockSize)); XMLUtils.addSaxString(contentHandler, "CLIENT_NAME", clientName); XMLUtils.addSaxString(contentHandler, "CLIENT_MACHINE", clientMachine); + XMLUtils.addSaxString(contentHandler, "OVERWRITE", + Boolean.toString(overwrite)); for (Block b : blocks) { FSEditLogOp.blockToXml(contentHandler, b); } @@ -676,6 +693,7 @@ public abstract class FSEditLogOp { this.blockSize = Long.parseLong(st.getValue("BLOCKSIZE")); this.clientName = st.getValue("CLIENT_NAME"); this.clientMachine = st.getValue("CLIENT_MACHINE"); + this.overwrite = Boolean.parseBoolean(st.getValueOrNull("OVERWRITE")); if (st.hasChildren("BLOCK")) { List blocks = st.getChildren("BLOCK"); this.blocks = new Block[blocks.size()]; diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSImageSerialization.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSImageSerialization.java index eb8354d7aac..3956d9a5894 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSImageSerialization.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSImageSerialization.java @@ -41,6 +41,7 @@ import org.apache.hadoop.hdfs.server.namenode.snapshot.SnapshotFSImageFormat.Ref import org.apache.hadoop.hdfs.util.XMLUtils; import org.apache.hadoop.hdfs.util.XMLUtils.InvalidXmlException; import org.apache.hadoop.hdfs.util.XMLUtils.Stanza; +import org.apache.hadoop.io.BooleanWritable; import org.apache.hadoop.io.IntWritable; import org.apache.hadoop.io.LongWritable; import org.apache.hadoop.io.ShortWritable; @@ -88,6 +89,7 @@ public class FSImageSerialization { final IntWritable U_INT = new IntWritable(); final LongWritable U_LONG = new LongWritable(); final FsPermission FILE_PERM = new FsPermission((short) 0); + final BooleanWritable U_BOOLEAN = new BooleanWritable(); } private static void writePermissionStatus(INodeAttributes inode, @@ -366,6 +368,21 @@ public class FSImageSerialization { uLong.write(out); } + /** read the boolean value */ + static boolean readBoolean(DataInput in) throws IOException { + BooleanWritable uBoolean = TL_DATA.get().U_BOOLEAN; + uBoolean.readFields(in); + return uBoolean.get(); + } + + /** write the boolean value */ + static void writeBoolean(boolean value, DataOutputStream out) + throws IOException { + BooleanWritable uBoolean = TL_DATA.get().U_BOOLEAN; + uBoolean.set(value); + uBoolean.write(out); + } + /** read the int value */ static int readInt(DataInput in) throws IOException { IntWritable uInt = TL_DATA.get().U_INT; diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java index 5d60dd74bc2..c1744f6421b 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java @@ -2448,6 +2448,7 @@ public class FSNamesystem implements Namesystem, FSClusterStats, * A special RetryStartFileException is used to indicate that we should * retry creation of a FileEncryptionInfo. */ + BlocksMapUpdateInfo toRemoveBlocks = null; try { boolean shouldContinue = true; int iters = 0; @@ -2496,9 +2497,9 @@ public class FSNamesystem implements Namesystem, FSClusterStats, checkOperation(OperationCategory.WRITE); checkNameNodeSafeMode("Cannot create file" + src); src = resolvePath(src, pathComponents); - startFileInternal(pc, src, permissions, holder, clientMachine, create, - overwrite, createParent, replication, blockSize, suite, edek, - logRetryCache); + toRemoveBlocks = startFileInternal(pc, src, permissions, holder, + clientMachine, create, overwrite, createParent, replication, + blockSize, suite, edek, logRetryCache); stat = dir.getFileInfo(src, false, FSDirectory.isReservedRawName(srcArg)); } catch (StandbyException se) { @@ -2519,6 +2520,10 @@ public class FSNamesystem implements Namesystem, FSClusterStats, // They need to be sync'ed even when an exception was thrown. if (!skipSync) { getEditLog().logSync(); + if (toRemoveBlocks != null) { + removeBlocks(toRemoveBlocks); + toRemoveBlocks.clear(); + } } } @@ -2535,11 +2540,11 @@ public class FSNamesystem implements Namesystem, FSClusterStats, * For description of parameters and exceptions thrown see * {@link ClientProtocol#create} */ - private void startFileInternal(FSPermissionChecker pc, String src, - PermissionStatus permissions, String holder, String clientMachine, - boolean create, boolean overwrite, boolean createParent, - short replication, long blockSize, CipherSuite suite, - EncryptedKeyVersion edek, boolean logRetryEntry) + private BlocksMapUpdateInfo startFileInternal(FSPermissionChecker pc, + String src, PermissionStatus permissions, String holder, + String clientMachine, boolean create, boolean overwrite, + boolean createParent, short replication, long blockSize, + CipherSuite suite, EncryptedKeyVersion edek, boolean logRetryEntry) throws FileAlreadyExistsException, AccessControlException, UnresolvedLinkException, FileNotFoundException, ParentNotDirectoryException, RetryStartFileException, IOException { @@ -2575,9 +2580,12 @@ public class FSNamesystem implements Namesystem, FSClusterStats, if (isPermissionEnabled) { if (overwrite && myFile != null) { checkPathAccess(pc, src, FsAction.WRITE); - } else { - checkAncestorAccess(pc, src, FsAction.WRITE); } + /* + * To overwrite existing file, need to check 'w' permission + * of parent (equals to ancestor in this case) + */ + checkAncestorAccess(pc, src, FsAction.WRITE); } if (!createParent) { @@ -2585,6 +2593,7 @@ public class FSNamesystem implements Namesystem, FSClusterStats, } try { + BlocksMapUpdateInfo toRemoveBlocks = null; if (myFile == null) { if (!create) { throw new FileNotFoundException("Can't overwrite non-existent " + @@ -2592,11 +2601,12 @@ public class FSNamesystem implements Namesystem, FSClusterStats, } } else { if (overwrite) { - try { - deleteInt(src, true, false); // File exists - delete if overwrite - } catch (AccessControlException e) { - logAuditEvent(false, "delete", src); - throw e; + toRemoveBlocks = new BlocksMapUpdateInfo(); + List toRemoveINodes = new ChunkedArrayList(); + long ret = dir.delete(src, toRemoveBlocks, toRemoveINodes, now()); + if (ret >= 0) { + incrDeletedFileCount(ret); + removePathAndBlocks(src, null, toRemoveINodes, true); } } else { // If lease soft limit time is expired, recover the lease @@ -2630,11 +2640,12 @@ public class FSNamesystem implements Namesystem, FSClusterStats, } // record file record in log, record new generation stamp - getEditLog().logOpenFile(src, newNode, logRetryEntry); + getEditLog().logOpenFile(src, newNode, overwrite, logRetryEntry); if (NameNode.stateChangeLog.isDebugEnabled()) { NameNode.stateChangeLog.debug("DIR* NameSystem.startFile: added " + src + " inode " + newNode.getId() + " " + holder); } + return toRemoveBlocks; } catch (IOException ie) { NameNode.stateChangeLog.warn("DIR* NameSystem.startFile: " + src + " " + ie.getMessage()); @@ -2737,7 +2748,7 @@ public class FSNamesystem implements Namesystem, FSClusterStats, } if (writeToEditLog) { - getEditLog().logOpenFile(src, cons, logRetryCache); + getEditLog().logOpenFile(src, cons, false, logRetryCache); } return ret; } diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/InotifyFSEditLogOpTranslator.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/InotifyFSEditLogOpTranslator.java index 676f8874cf0..00a5f2518cb 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/InotifyFSEditLogOpTranslator.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/InotifyFSEditLogOpTranslator.java @@ -50,6 +50,7 @@ public class InotifyFSEditLogOpTranslator { .ownerName(addOp.permissions.getUserName()) .groupName(addOp.permissions.getGroupName()) .perms(addOp.permissions.getPermission()) + .overwrite(addOp.overwrite) .iNodeType(Event.CreateEvent.INodeType.FILE).build() }; } else { return new Event[] { new Event.AppendEvent(addOp.path) }; diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/NameNodeLayoutVersion.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/NameNodeLayoutVersion.java index 6ae2806d8f1..1df6df4fc22 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/NameNodeLayoutVersion.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/NameNodeLayoutVersion.java @@ -65,7 +65,9 @@ public class NameNodeLayoutVersion { public static enum Feature implements LayoutFeature { ROLLING_UPGRADE(-55, -53, "Support rolling upgrade", false), EDITLOG_LENGTH(-56, "Add length field to every edit log op"), - XATTRS(-57, "Extended attributes"); + XATTRS(-57, "Extended attributes"), + CREATE_OVERWRITE(-58, "Use single editlog record for " + + "creating file with overwrite"); private final FeatureInfo info; diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/proto/inotify.proto b/hadoop-hdfs-project/hadoop-hdfs/src/main/proto/inotify.proto index b58bfcc3b5f..a1d4d920d1d 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/proto/inotify.proto +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/proto/inotify.proto @@ -72,6 +72,7 @@ message CreateEventProto { required FsPermissionProto perms = 6; optional int32 replication = 7; optional string symlinkTarget = 8; + optional bool overwrite = 9; } message CloseEventProto { diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestDFSInotifyEventInputStream.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestDFSInotifyEventInputStream.java index c268281a0f7..45b588b93a2 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestDFSInotifyEventInputStream.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestDFSInotifyEventInputStream.java @@ -147,12 +147,7 @@ public class TestDFSInotifyEventInputStream { Assert.assertTrue(re2.getSrcPath().equals("/file4")); Assert.assertTrue(re.getTimestamp() > 0); - // DeleteOp - next = waitForNextEvent(eis); - Assert.assertTrue(next.getEventType() == Event.EventType.UNLINK); - Assert.assertTrue(((Event.UnlinkEvent) next).getPath().equals("/file2")); - - // AddOp + // AddOp with overwrite next = waitForNextEvent(eis); Assert.assertTrue(next.getEventType() == Event.EventType.CREATE); Event.CreateEvent ce = (Event.CreateEvent) next; @@ -161,6 +156,7 @@ public class TestDFSInotifyEventInputStream { Assert.assertTrue(ce.getCtime() > 0); Assert.assertTrue(ce.getReplication() > 0); Assert.assertTrue(ce.getSymlinkTarget() == null); + Assert.assertTrue(ce.getOverwrite()); // CloseOp next = waitForNextEvent(eis); diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestFileCreation.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestFileCreation.java index c8b7df2c75c..3a399f3f5eb 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestFileCreation.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestFileCreation.java @@ -38,6 +38,7 @@ import static org.junit.Assert.fail; import static org.junit.Assume.assumeTrue; import java.io.BufferedReader; +import java.io.ByteArrayOutputStream; import java.io.File; import java.io.FileNotFoundException; import java.io.FileReader; @@ -70,6 +71,7 @@ import org.apache.hadoop.hdfs.protocol.ExtendedBlock; import org.apache.hadoop.hdfs.protocol.HdfsConstants; import org.apache.hadoop.hdfs.protocol.LocatedBlock; import org.apache.hadoop.hdfs.protocol.LocatedBlocks; +import org.apache.hadoop.hdfs.server.blockmanagement.BlockManager; import org.apache.hadoop.hdfs.server.datanode.DataNode; import org.apache.hadoop.hdfs.server.datanode.DataNodeTestUtils; import org.apache.hadoop.hdfs.server.datanode.SimulatedFSDataset; @@ -78,6 +80,8 @@ import org.apache.hadoop.hdfs.server.namenode.FSNamesystem; import org.apache.hadoop.hdfs.server.namenode.INodeId; import org.apache.hadoop.hdfs.server.namenode.LeaseExpiredException; import org.apache.hadoop.hdfs.server.namenode.LeaseManager; +import org.apache.hadoop.hdfs.server.namenode.NameNode; +import org.apache.hadoop.hdfs.server.namenode.NameNodeAdapter; import org.apache.hadoop.hdfs.server.protocol.NamenodeProtocols; import org.apache.hadoop.io.EnumSetWritable; import org.apache.hadoop.io.IOUtils; @@ -86,6 +90,7 @@ import org.apache.hadoop.security.UserGroupInformation; import org.apache.hadoop.test.GenericTestUtils; import org.apache.hadoop.util.Time; import org.apache.log4j.Level; +import org.junit.Assert; import org.junit.Test; /** @@ -1210,4 +1215,118 @@ public class TestFileCreation { } } + /** + * 1. Check the blocks of old file are cleaned after creating with overwrite + * 2. Restart NN, check the file + * 3. Save new checkpoint and restart NN, check the file + */ + @Test(timeout = 120000) + public void testFileCreationWithOverwrite() throws Exception { + Configuration conf = new Configuration(); + conf.setInt("dfs.blocksize", blockSize); + MiniDFSCluster cluster = new MiniDFSCluster.Builder(conf). + numDataNodes(3).build(); + DistributedFileSystem dfs = cluster.getFileSystem(); + try { + dfs.mkdirs(new Path("/foo/dir")); + String file = "/foo/dir/file"; + Path filePath = new Path(file); + + // Case 1: Create file with overwrite, check the blocks of old file + // are cleaned after creating with overwrite + NameNode nn = cluster.getNameNode(); + FSNamesystem fsn = NameNodeAdapter.getNamesystem(nn); + BlockManager bm = fsn.getBlockManager(); + + FSDataOutputStream out = dfs.create(filePath); + byte[] oldData = AppendTestUtil.randomBytes(seed, fileSize); + try { + out.write(oldData); + } finally { + out.close(); + } + + LocatedBlocks oldBlocks = NameNodeAdapter.getBlockLocations( + nn, file, 0, fileSize); + assertBlocks(bm, oldBlocks, true); + + out = dfs.create(filePath, true); + byte[] newData = AppendTestUtil.randomBytes(seed, fileSize); + try { + out.write(newData); + } finally { + out.close(); + } + dfs.deleteOnExit(filePath); + + LocatedBlocks newBlocks = NameNodeAdapter.getBlockLocations( + nn, file, 0, fileSize); + assertBlocks(bm, newBlocks, true); + assertBlocks(bm, oldBlocks, false); + + FSDataInputStream in = dfs.open(filePath); + byte[] result = null; + try { + result = readAll(in); + } finally { + in.close(); + } + Assert.assertArrayEquals(newData, result); + + // Case 2: Restart NN, check the file + cluster.restartNameNode(); + nn = cluster.getNameNode(); + in = dfs.open(filePath); + try { + result = readAll(in); + } finally { + in.close(); + } + Assert.assertArrayEquals(newData, result); + + // Case 3: Save new checkpoint and restart NN, check the file + NameNodeAdapter.enterSafeMode(nn, false); + NameNodeAdapter.saveNamespace(nn); + cluster.restartNameNode(); + nn = cluster.getNameNode(); + + in = dfs.open(filePath); + try { + result = readAll(in); + } finally { + in.close(); + } + Assert.assertArrayEquals(newData, result); + } finally { + if (dfs != null) { + dfs.close(); + } + if (cluster != null) { + cluster.shutdown(); + } + } + } + + private void assertBlocks(BlockManager bm, LocatedBlocks lbs, + boolean exist) { + for (LocatedBlock locatedBlock : lbs.getLocatedBlocks()) { + if (exist) { + assertTrue(bm.getStoredBlock(locatedBlock.getBlock(). + getLocalBlock()) != null); + } else { + assertTrue(bm.getStoredBlock(locatedBlock.getBlock(). + getLocalBlock()) == null); + } + } + } + + private byte[] readAll(FSDataInputStream in) throws IOException { + ByteArrayOutputStream out = new ByteArrayOutputStream(); + byte[] buffer = new byte[1024]; + int n = 0; + while((n = in.read(buffer)) > -1) { + out.write(buffer, 0, n); + } + return out.toByteArray(); + } } diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/CreateEditsLog.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/CreateEditsLog.java index a5e2edf87c9..3f96c0c5ce0 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/CreateEditsLog.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/CreateEditsLog.java @@ -99,7 +99,7 @@ public class CreateEditsLog { INodeFile fileUc = new INodeFile(inodeId.nextValue(), null, p, 0L, 0L, BlockInfo.EMPTY_ARRAY, replication, blockSize); fileUc.toUnderConstruction("", ""); - editLog.logOpenFile(filePath, fileUc, false); + editLog.logOpenFile(filePath, fileUc, false, false); editLog.logCloseFile(filePath, inode); if (currentBlockId - bidAtSync >= 2000) { // sync every 2K blocks diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestEditLog.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestEditLog.java index 47a807afd92..7b622426aad 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestEditLog.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestEditLog.java @@ -197,7 +197,7 @@ public class TestEditLog { p, 0L, 0L, BlockInfo.EMPTY_ARRAY, replication, blockSize); inode.toUnderConstruction("", ""); - editLog.logOpenFile("/filename" + (startIndex + i), inode, false); + editLog.logOpenFile("/filename" + (startIndex + i), inode, false, false); editLog.logCloseFile("/filename" + (startIndex + i), inode); editLog.logSync(); } diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/resources/editsStored b/hadoop-hdfs-project/hadoop-hdfs/src/test/resources/editsStored index a134969ca79136819a5f1e2d5e16771d54aed04a..5d93a505c01a7ec8430cea52fd48380c663d4318 100644 GIT binary patch delta 1662 zcmY+Fe@xV69LJxpcgJyX96Wa(+#$z(1@t(`6!Z)smK6{X`tt^Lm)>vP0{ZT6-ggR(t#z?0JIwxMaP8ya5uP&+v6CP z0s6wi;aP*%GRl}#db(==D7|m3;mWVDY*W|XfR2tDw z5g7-wu5R47zoC9lYs2og1|}V!Qp(M`z|#K*q=o8z7X#54SSbijxMED^KXwX zY9d68sr`wjYZGIGUT=of2C(pcPxg; z+uyq~^{g~xt>>QJsl#+;r_~f=>2Su}$0Hw421qz1)C|}h-9?97?B}l?Dv(ju23|oC zg)!spBPPFoU~xW%KemqC2-3i-DT&1g8J6_WdEd2B8Cx1BlPrZX)2_kqe${jQWZ(j( zX7_qc#3@wUc+@a&alt8zJ2dO;sgY6DSlN_N7>l}6dbv-}x{nNGO=sPUozb&aAuEUH zMc&*>o6)bP@U;OY=9oLD@$MZGw9OFfP|PrCb%QasUfr0$(ax?y8DrWc)=(dXG4ty` z)P(hu$RPtK@h9u-SM(c`3!lcOhqAMG)ye2izWyLDF2Sw{wPX0F~GnbR*O z0L!g8q0Xw_ei?m@v!22VW0tt5%(9;I02#=+^Fs63TY63&a$FF$y1DHY-!uA+*?=39 z1k=_I{&(Czz69*OM`bXU4+rD6@s5w$nmH=1LM?z>agF?B-sdlo5Ekl#u-*2ex_QAj zX1?r}QPw7_p0UDMOk&nIW%|i(T)_d)&*@>C1njS0#-*`DY}5)^5N1p(VK(0HR!W)zwDNZ0ghb5{nhrY?g4}$f zXD$QPD&Z8v`w1bQa%rfJu7-Mzur2`;_w(!Usyz)`r6OAjMLeGuI{Uu$p`yMqSeL;O zp23@@7sBb-(tgp(;WAITnM3P`aR^PWC)JYnC!VIrnkuWm-w1EY5Aant&r>QSW$M g`RUdfnxSUkF3=xCM@#L+;$ul@SOu)QvcCAne}yP0)Bpeg delta 1619 zcmY*ZYitx%6h7P8XSa{N`zY+zwzIpXba`!8i^O zCT+qa1}wFEgP`FNvlR*2AXH-vfgskX2v`hh6^VbKiC7+`VoW5Zo-;d>3@4e)-0%C& zH)qbAdv^r-7V78b=Dr`)x9N@yZrH_rCa1ytPV%gKBun>JmhK2o3K`JR-`X?9J^GeL zJpep`7X^cyVEd+y#QKfhiI-nbfG&JxY`}zsDzHB;(9QkZmRDm&K^diEIQTh(hpqo@ zt0xy%i)0MI$)^m(Y}iq>IS-L#3hLq>qmSJSJXQ+_&`L|sW9{DE&=G)93JJk>z3zqtJX|ufbf@aJbxvbg-MA~lrj6yaWX;}(DvP{UupEoC&ZFhG8K_4lEkH;*1 z^vk1x8-MI75fN5P@smUX(C>S%C#@}|ln5;4cCYxuD7_;nuK^bDvTT23-B;}+A|k4f z{F@~J!!fWVwLS|oX~Lb*@zMR{QF{QPwsNiRF{E=&?f3DK{gwT`fHy7L1_G*2eBm!GH{l&u* zTeS5*#Y=NTkumGVY7u$0o=Or+02Y5=@Tk^jIT7e{|C2*AL9Nfz{6KDqtxbG9p>0GJ zzm}V_zqz8>V8g79axTC#nTbl}h1u1*n=K6CZiQOGC-d4_|A5Owz-J0t&0F*L$U_vq zGr6NdL|CnKB}pWJ>_o-MOWI21)?=kJsVbMe)2c~{QNtzMGUjUUyic1^MQaf)t+ehH zW&rTCdUVmY`LVrLC+C9j_w_UGLp0&*G5|mEW$pJKD@F35v9Kn;TC&<4d2}Uz=vqPqK)#1y_((YKUhwq zU7?_>_+ERd`%Le|FSMi7<4w)caj(hcj&*K2aEdmHJmGj>4TddUT^%7}?eXHSZ?Bgl z+JqOy^IUX9?eQV+rNy{fC1U_{NjI^IpUgvOeATc4z0(3qC4bC1MKE^+scda~={oidp<*)3)vZ1AVL* Au>b%7 diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/resources/editsStored.xml b/hadoop-hdfs-project/hadoop-hdfs/src/test/resources/editsStored.xml index 340667cdf30..977be98fe43 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/resources/editsStored.xml +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/resources/editsStored.xml @@ -1,6 +1,6 @@ - -57 + -58 OP_START_LOG_SEGMENT @@ -42,6 +42,7 @@ 512 DFSClient_NONMAPREDUCE_1233039831_1 127.0.0.1 + false andrew supergroup @@ -64,6 +65,7 @@ 512 + false andrew supergroup @@ -174,6 +176,7 @@ 512 DFSClient_NONMAPREDUCE_1233039831_1 127.0.0.1 + false andrew supergroup @@ -196,6 +199,51 @@ 512 + false + + andrew + supergroup + 420 + + + + + OP_ADD + + 17 + 0 + 16388 + /file_create + 1 + 1402899229912 + 1402899229912 + 512 + DFSClient_NONMAPREDUCE_1233039831_1 + 127.0.0.1 + true + + andrew + supergroup + 420 + + e03f4a52-3d85-4e05-8942-286185e639bd + 21 + + + + OP_CLOSE + + 18 + 0 + 0 + /file_create + 1 + 1402899229931 + 1402899229912 + 512 + + + false andrew supergroup @@ -206,7 +254,7 @@ OP_SET_REPLICATION - 17 + 19 /file_create 1 @@ -214,7 +262,7 @@ OP_SET_PERMISSIONS - 18 + 20 /file_create 511 @@ -222,7 +270,7 @@ OP_SET_OWNER - 19 + 21 /file_create newOwner @@ -230,7 +278,7 @@ OP_TIMES - 20 + 22 0 /file_create 1285195527000 @@ -240,7 +288,7 @@ OP_SET_QUOTA - 21 + 23 /directory_mkdir 1000 -1 @@ -249,7 +297,7 @@ OP_RENAME - 22 + 24 0 /file_create /file_moved @@ -262,7 +310,7 @@ OP_ADD - 23 + 25 0 16389 /file_concat_target @@ -272,6 +320,7 @@ 512 DFSClient_NONMAPREDUCE_1233039831_1 127.0.0.1 + false andrew supergroup @@ -284,21 +333,21 @@ OP_ALLOCATE_BLOCK_ID - 24 + 26 1073741825 OP_SET_GENSTAMP_V2 - 25 + 27 1001 OP_ADD_BLOCK - 26 + 28 /file_concat_target 1073741825 @@ -312,21 +361,21 @@ OP_ALLOCATE_BLOCK_ID - 27 + 29 1073741826 OP_SET_GENSTAMP_V2 - 28 + 30 1002 OP_ADD_BLOCK - 29 + 31 /file_concat_target 1073741825 @@ -345,21 +394,21 @@ OP_ALLOCATE_BLOCK_ID - 30 + 32 1073741827 OP_SET_GENSTAMP_V2 - 31 + 33 1003 OP_ADD_BLOCK - 32 + 34 /file_concat_target 1073741826 @@ -378,7 +427,7 @@ OP_CLOSE - 33 + 35 0 0 /file_concat_target @@ -388,6 +437,7 @@ 512 + false 1073741825 512 @@ -413,7 +463,7 @@ OP_ADD - 34 + 36 0 16390 /file_concat_0 @@ -423,6 +473,7 @@ 512 DFSClient_NONMAPREDUCE_1233039831_1 127.0.0.1 + false andrew supergroup @@ -435,21 +486,21 @@ OP_ALLOCATE_BLOCK_ID - 35 + 37 1073741828 OP_SET_GENSTAMP_V2 - 36 + 38 1004 OP_ADD_BLOCK - 37 + 39 /file_concat_0 1073741828 @@ -463,21 +514,21 @@ OP_ALLOCATE_BLOCK_ID - 38 + 40 1073741829 OP_SET_GENSTAMP_V2 - 39 + 41 1005 OP_ADD_BLOCK - 40 + 42 /file_concat_0 1073741828 @@ -496,21 +547,21 @@ OP_ALLOCATE_BLOCK_ID - 41 + 43 1073741830 OP_SET_GENSTAMP_V2 - 42 + 44 1006 OP_ADD_BLOCK - 43 + 45 /file_concat_0 1073741829 @@ -529,7 +580,7 @@ OP_CLOSE - 44 + 46 0 0 /file_concat_0 @@ -539,6 +590,7 @@ 512 + false 1073741828 512 @@ -564,7 +616,7 @@ OP_ADD - 45 + 47 0 16391 /file_concat_1 @@ -574,6 +626,7 @@ 512 DFSClient_NONMAPREDUCE_1233039831_1 127.0.0.1 + false andrew supergroup @@ -586,21 +639,21 @@ OP_ALLOCATE_BLOCK_ID - 46 + 48 1073741831 OP_SET_GENSTAMP_V2 - 47 + 49 1007 OP_ADD_BLOCK - 48 + 50 /file_concat_1 1073741831 @@ -614,21 +667,21 @@ OP_ALLOCATE_BLOCK_ID - 49 + 51 1073741832 OP_SET_GENSTAMP_V2 - 50 + 52 1008 OP_ADD_BLOCK - 51 + 53 /file_concat_1 1073741831 @@ -647,21 +700,21 @@ OP_ALLOCATE_BLOCK_ID - 52 + 54 1073741833 OP_SET_GENSTAMP_V2 - 53 + 55 1009 OP_ADD_BLOCK - 54 + 56 /file_concat_1 1073741832 @@ -680,7 +733,7 @@ OP_CLOSE - 55 + 57 0 0 /file_concat_1 @@ -690,6 +743,7 @@ 512 + false 1073741831 512 @@ -715,7 +769,7 @@ OP_CONCAT_DELETE - 56 + 58 0 /file_concat_target 1402899230394 @@ -730,7 +784,7 @@ OP_SYMLINK - 57 + 59 0 16392 /file_symlink @@ -749,7 +803,7 @@ OP_ADD - 58 + 60 0 16393 /hard-lease-recovery-test @@ -759,6 +813,7 @@ 512 DFSClient_NONMAPREDUCE_1233039831_1 127.0.0.1 + false andrew supergroup @@ -771,21 +826,21 @@ OP_ALLOCATE_BLOCK_ID - 59 + 61 1073741834 OP_SET_GENSTAMP_V2 - 60 + 62 1010 OP_ADD_BLOCK - 61 + 63 /hard-lease-recovery-test 1073741834 @@ -799,7 +854,7 @@ OP_UPDATE_BLOCKS - 62 + 64 /hard-lease-recovery-test 1073741834 @@ -813,14 +868,14 @@ OP_SET_GENSTAMP_V2 - 63 + 65 1011 OP_REASSIGN_LEASE - 64 + 66 DFSClient_NONMAPREDUCE_1233039831_1 /hard-lease-recovery-test HDFS_NameNode @@ -829,7 +884,7 @@ OP_CLOSE - 65 + 67 0 0 /hard-lease-recovery-test @@ -839,6 +894,7 @@ 512 + false 1073741834 11 @@ -854,7 +910,7 @@ OP_ADD_CACHE_POOL - 66 + 68 pool1 andrew andrew @@ -868,7 +924,7 @@ OP_MODIFY_CACHE_POOL - 67 + 69 pool1 99 e03f4a52-3d85-4e05-8942-286185e639bd @@ -878,7 +934,7 @@ OP_ADD_CACHE_DIRECTIVE - 68 + 70 1 /path 1 @@ -891,7 +947,7 @@ OP_MODIFY_CACHE_DIRECTIVE - 69 + 71 1 2 e03f4a52-3d85-4e05-8942-286185e639bd @@ -901,7 +957,7 @@ OP_REMOVE_CACHE_DIRECTIVE - 70 + 72 1 e03f4a52-3d85-4e05-8942-286185e639bd 77 @@ -910,7 +966,7 @@ OP_REMOVE_CACHE_POOL - 71 + 73 pool1 e03f4a52-3d85-4e05-8942-286185e639bd 78 @@ -919,7 +975,7 @@ OP_SET_ACL - 72 + 74 /file_concat_target ACCESS @@ -952,7 +1008,7 @@ OP_SET_XATTR - 73 + 75 /file_concat_target USER @@ -966,7 +1022,7 @@ OP_SET_XATTR - 74 + 76 /file_concat_target USER @@ -980,7 +1036,7 @@ OP_REMOVE_XATTR - 75 + 77 /file_concat_target USER @@ -993,21 +1049,21 @@ OP_ROLLING_UPGRADE_START - 76 + 78 1402899233646 OP_ROLLING_UPGRADE_FINALIZE - 77 + 79 1402899233647 OP_END_LOG_SEGMENT - 78 + 80