From 998e44a972ad06b48675afac89656ffba8c4d595 Mon Sep 17 00:00:00 2001 From: Xiao Chen Date: Fri, 7 Oct 2016 17:30:30 -0700 Subject: [PATCH] HDFS-10797. Disk usage summary of snapshots causes renamed blocks to get counted twice. Contributed by Sean Mackrory. (cherry picked from commit 3ea027e970ec7c95a10e581435cdb6c3a6b64abb) --- .../ContentSummaryComputationContext.java | 94 ++++++++- .../hadoop/hdfs/server/namenode/INode.java | 1 + .../hdfs/server/namenode/INodeDirectory.java | 11 +- .../hdfs/server/namenode/INodeFile.java | 1 + .../hdfs/server/namenode/INodeReference.java | 2 + .../hdfs/server/namenode/INodeSymlink.java | 1 + .../DirectorySnapshottableFeature.java | 9 +- .../DirectoryWithSnapshotFeature.java | 14 +- .../server/namenode/snapshot/Snapshot.java | 1 + .../snapshot/TestRenameWithSnapshots.java | 199 ++++++++++++++++++ 10 files changed, 307 insertions(+), 26 deletions(-) diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/ContentSummaryComputationContext.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/ContentSummaryComputationContext.java index 6df9e75d347..4208b53dfa5 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/ContentSummaryComputationContext.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/ContentSummaryComputationContext.java @@ -21,6 +21,10 @@ import org.apache.hadoop.classification.InterfaceAudience; import org.apache.hadoop.classification.InterfaceStability; import org.apache.hadoop.hdfs.server.blockmanagement.BlockStoragePolicySuite; +import org.apache.hadoop.hdfs.server.namenode.snapshot.Snapshot; + +import java.util.HashSet; +import java.util.Set; @InterfaceAudience.Private @InterfaceStability.Unstable @@ -35,6 +39,8 @@ public class ContentSummaryComputationContext { private long yieldCount = 0; private long sleepMilliSec = 0; private int sleepNanoSec = 0; + private Set includedNodes = new HashSet<>(); + private Set deletedSnapshottedNodes = new HashSet<>(); /** * Constructor @@ -51,8 +57,8 @@ public ContentSummaryComputationContext(FSDirectory dir, this.fsn = fsn; this.limitPerRun = limitPerRun; this.nextCountLimit = limitPerRun; - this.counts = new ContentCounts.Builder().build(); - this.snapshotCounts = new ContentCounts.Builder().build(); + setCounts(new ContentCounts.Builder().build()); + setSnapshotCounts(new ContentCounts.Builder().build()); this.sleepMilliSec = sleepMicroSec/1000; this.sleepNanoSec = (int)((sleepMicroSec%1000)*1000); } @@ -82,6 +88,7 @@ public boolean yield() { } // Have we reached the limit? + ContentCounts counts = getCounts(); long currentCount = counts.getFileCount() + counts.getSymlinkCount() + counts.getDirectoryCount() + @@ -123,14 +130,22 @@ public boolean yield() { } /** Get the content counts */ - public ContentCounts getCounts() { + public synchronized ContentCounts getCounts() { return counts; } + private synchronized void setCounts(ContentCounts counts) { + this.counts = counts; + } + public ContentCounts getSnapshotCounts() { return snapshotCounts; } + private void setSnapshotCounts(ContentCounts snapshotCounts) { + this.snapshotCounts = snapshotCounts; + } + public BlockStoragePolicySuite getBlockStoragePolicySuite() { Preconditions.checkState((bsps != null || fsn != null), "BlockStoragePolicySuite must be either initialized or available via" + @@ -138,4 +153,77 @@ public BlockStoragePolicySuite getBlockStoragePolicySuite() { return (bsps != null) ? bsps: fsn.getBlockManager().getStoragePolicySuite(); } + + /** + * If the node is an INodeReference, resolves it to the actual inode. + * Snapshot diffs represent renamed / moved files as different + * INodeReferences, but the underlying INode it refers to is consistent. + * + * @param node + * @return The referred INode if there is one, else returns the input + * unmodified. + */ + private INode resolveINodeReference(INode node) { + if (node.isReference() && node instanceof INodeReference) { + return ((INodeReference)node).getReferredINode(); + } + return node; + } + + /** + * Reports that a node is about to be included in this summary. Can be used + * either to simply report that a node has been including, or check whether + * a node has already been included. + * + * @param node + * @return true if node has already been included + */ + public boolean nodeIncluded(INode node) { + INode resolvedNode = resolveINodeReference(node); + synchronized (includedNodes) { + if (!includedNodes.contains(resolvedNode)) { + includedNodes.add(resolvedNode); + return false; + } + } + return true; + } + + /** + * Schedules a node that is listed as DELETED in a snapshot's diff to be + * included in the summary at the end of computation. See + * {@link #tallyDeletedSnapshottedINodes()} for more context. + * + * @param node + */ + public void reportDeletedSnapshottedNode(INode node) { + deletedSnapshottedNodes.add(node); + } + + /** + * Finalizes the computation by including all nodes that were reported as + * deleted by a snapshot but have not been already including due to other + * references. + *

+ * Nodes that get renamed are listed in the snapshot's diff as both DELETED + * under the old name and CREATED under the new name. The computation + * relies on nodes to report themselves as being included (via + * {@link #nodeIncluded(INode)} as the only reliable way to determine which + * nodes were renamed within the tree being summarized and which were + * removed (either by deletion or being renamed outside of the tree). + */ + public synchronized void tallyDeletedSnapshottedINodes() { + /* Temporarily create a new counts object so these results can then be + added to both counts and snapshotCounts */ + ContentCounts originalCounts = getCounts(); + setCounts(new ContentCounts.Builder().build()); + for (INode node : deletedSnapshottedNodes) { + if (!nodeIncluded(node)) { + node.computeContentSummary(Snapshot.CURRENT_STATE_ID, this); + } + } + originalCounts.addContents(getCounts()); + snapshotCounts.addContents(getCounts()); + setCounts(originalCounts); + } } diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/INode.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/INode.java index 2804c7b0a70..853589838be 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/INode.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/INode.java @@ -429,6 +429,7 @@ public final ContentSummary computeContentSummary(BlockStoragePolicySuite bsps) public final ContentSummary computeAndConvertContentSummary(int snapshotId, ContentSummaryComputationContext summary) { computeContentSummary(snapshotId, summary); + summary.tallyDeletedSnapshottedINodes(); final ContentCounts counts = summary.getCounts(); final ContentCounts snapshotCounts = summary.getSnapshotCounts(); final QuotaCounts q = getQuotaCounts(); diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/INodeDirectory.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/INodeDirectory.java index 24c881556db..b6e27135fea 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/INodeDirectory.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/INodeDirectory.java @@ -628,17 +628,10 @@ public QuotaCounts computeQuotaUsage4CurrentDirectory( @Override public ContentSummaryComputationContext computeContentSummary(int snapshotId, ContentSummaryComputationContext summary) { + summary.nodeIncluded(this); final DirectoryWithSnapshotFeature sf = getDirectoryWithSnapshotFeature(); if (sf != null && snapshotId == Snapshot.CURRENT_STATE_ID) { - final ContentCounts counts = new ContentCounts.Builder().build(); - // if the getContentSummary call is against a non-snapshot path, the - // computation should include all the deleted files/directories - sf.computeContentSummary4Snapshot(summary.getBlockStoragePolicySuite(), - counts); - summary.getCounts().addContents(counts); - // Also add ContentSummary to snapshotCounts (So we can extract it - // later from the ContentSummary of all). - summary.getSnapshotCounts().addContents(counts); + sf.computeContentSummary4Snapshot(summary); } final DirectoryWithQuotaFeature q = getDirectoryWithQuotaFeature(); if (q != null && snapshotId == Snapshot.CURRENT_STATE_ID) { diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/INodeFile.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/INodeFile.java index 60db4579714..b0663d3b877 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/INodeFile.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/INodeFile.java @@ -638,6 +638,7 @@ public final QuotaCounts computeQuotaUsage(BlockStoragePolicySuite bsps, @Override public final ContentSummaryComputationContext computeContentSummary( int snapshotId, final ContentSummaryComputationContext summary) { + summary.nodeIncluded(this); final ContentCounts counts = summary.getCounts(); counts.addContent(Content.FILE, 1); final long fileLen = computeFileSize(snapshotId); diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/INodeReference.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/INodeReference.java index 1b852374c5d..56aaf8ce058 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/INodeReference.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/INodeReference.java @@ -315,6 +315,7 @@ public void destroyAndCollectBlocks(ReclaimContext reclaimContext) { @Override public ContentSummaryComputationContext computeContentSummary(int snapshotId, ContentSummaryComputationContext summary) { + summary.nodeIncluded(this); return referred.computeContentSummary(snapshotId, summary); } @@ -504,6 +505,7 @@ public int getLastSnapshotId() { @Override public final ContentSummaryComputationContext computeContentSummary( int snapshotId, ContentSummaryComputationContext summary) { + summary.nodeIncluded(this); final int s = snapshotId < lastSnapshotId ? snapshotId : lastSnapshotId; // only count storagespace for WithName final QuotaCounts q = computeQuotaUsage( diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/INodeSymlink.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/INodeSymlink.java index c76bea090f1..1223f4ef86c 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/INodeSymlink.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/INodeSymlink.java @@ -96,6 +96,7 @@ public QuotaCounts computeQuotaUsage(BlockStoragePolicySuite bsps, @Override public ContentSummaryComputationContext computeContentSummary(int snapshotId, final ContentSummaryComputationContext summary) { + summary.nodeIncluded(this); summary.getCounts().addContent(Content.SYMLINK, 1); return summary; } diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/snapshot/DirectorySnapshottableFeature.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/snapshot/DirectorySnapshottableFeature.java index 39db979ae02..fa7bacee84d 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/snapshot/DirectorySnapshottableFeature.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/snapshot/DirectorySnapshottableFeature.java @@ -29,9 +29,9 @@ import org.apache.hadoop.hdfs.DFSUtil; import org.apache.hadoop.hdfs.protocol.QuotaExceededException; import org.apache.hadoop.hdfs.protocol.SnapshotException; -import org.apache.hadoop.hdfs.server.blockmanagement.BlockStoragePolicySuite; import org.apache.hadoop.hdfs.server.namenode.Content; import org.apache.hadoop.hdfs.server.namenode.ContentCounts; +import org.apache.hadoop.hdfs.server.namenode.ContentSummaryComputationContext; import org.apache.hadoop.hdfs.server.namenode.INode; import org.apache.hadoop.hdfs.server.namenode.INodeDirectory; import org.apache.hadoop.hdfs.server.namenode.INodeDirectory.SnapshotAndINode; @@ -220,11 +220,12 @@ public Snapshot removeSnapshot( } @Override - public void computeContentSummary4Snapshot(final BlockStoragePolicySuite bsps, - final ContentCounts counts) { + public void computeContentSummary4Snapshot(ContentSummaryComputationContext + context) { + ContentCounts counts = context.getCounts(); counts.addContent(Content.SNAPSHOT, snapshotsByNames.size()); counts.addContent(Content.SNAPSHOTTABLE_DIRECTORY, 1); - super.computeContentSummary4Snapshot(bsps, counts); + super.computeContentSummary4Snapshot(context); } /** diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/snapshot/DirectoryWithSnapshotFeature.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/snapshot/DirectoryWithSnapshotFeature.java index 0111b3bfb65..9addbfa7ace 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/snapshot/DirectoryWithSnapshotFeature.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/snapshot/DirectoryWithSnapshotFeature.java @@ -30,7 +30,6 @@ import org.apache.hadoop.hdfs.protocol.QuotaExceededException; import org.apache.hadoop.hdfs.server.blockmanagement.BlockStoragePolicySuite; import org.apache.hadoop.hdfs.server.namenode.AclStorage; -import org.apache.hadoop.hdfs.server.namenode.ContentCounts; import org.apache.hadoop.hdfs.server.namenode.ContentSummaryComputationContext; import org.apache.hadoop.hdfs.server.namenode.FSImageSerialization; import org.apache.hadoop.hdfs.server.namenode.INode; @@ -629,18 +628,13 @@ public QuotaCounts computeQuotaUsage4CurrentDirectory( return counts; } - public void computeContentSummary4Snapshot(final BlockStoragePolicySuite bsps, - final ContentCounts counts) { - // Create a new blank summary context for blocking processing of subtree. - ContentSummaryComputationContext summary = - new ContentSummaryComputationContext(bsps); + public void computeContentSummary4Snapshot( + ContentSummaryComputationContext context) { for(DirectoryDiff d : diffs) { - for(INode deleted : d.getChildrenDiff().getList(ListType.DELETED)) { - deleted.computeContentSummary(Snapshot.CURRENT_STATE_ID, summary); + for(INode deletedNode : d.getChildrenDiff().getList(ListType.DELETED)) { + context.reportDeletedSnapshottedNode(deletedNode); } } - // Add the counts from deleted trees. - counts.addContents(summary.getCounts()); } /** diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/snapshot/Snapshot.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/snapshot/Snapshot.java index 1d1948a4302..c315fa39104 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/snapshot/Snapshot.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/snapshot/Snapshot.java @@ -177,6 +177,7 @@ public INode getChild(byte[] name, int snapshotId) { @Override public ContentSummaryComputationContext computeContentSummary( int snapshotId, ContentSummaryComputationContext summary) { + summary.nodeIncluded(this); return computeDirectoryContentSummary(summary, snapshotId); } diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/snapshot/TestRenameWithSnapshots.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/snapshot/TestRenameWithSnapshots.java index 827feb6a2a8..143a7b6ac81 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/snapshot/TestRenameWithSnapshots.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/snapshot/TestRenameWithSnapshots.java @@ -36,8 +36,10 @@ import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.ContentSummary; import org.apache.hadoop.fs.FSDataOutputStream; import org.apache.hadoop.fs.FileStatus; +import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Options.Rename; import org.apache.hadoop.fs.Path; import org.apache.hadoop.fs.permission.FsPermission; @@ -2409,4 +2411,201 @@ public void testRenameWithOverWrite() throws Exception { assertTrue(existsInDiffReport(entries, DiffType.RENAME, "foo/file2", "newDir/file2")); assertTrue(existsInDiffReport(entries, DiffType.RENAME, "foo/file3", "newDir/file1")); } + + private void checkSpaceConsumed(String message, Path directory, + long expectedSpace) throws Exception { + ContentSummary summary = hdfs.getContentSummary(directory); + assertEquals(message, expectedSpace, summary.getSpaceConsumed()); + } + + /** + * Runs through various combinations of renames, deletes, appends and other + * operations in a snapshotted directory and ensures disk usage summaries + * (e.g. du -s) are computed correctly. + * + * @throws Exception + */ + @Test (timeout=300000) + public void testDu() throws Exception { + File tempFile = File.createTempFile("testDu-", ".tmp"); + tempFile.deleteOnExit(); + + final FileSystem localfs = FileSystem.getLocal(conf); + final Path localOriginal = new Path(tempFile.getPath()); + final Path dfsRoot = new Path("/testDu"); + final Path dfsOriginal = new Path(dfsRoot, "original"); + final Path dfsRenamed1 = new Path(dfsRoot, "renamed1"); + final Path dfsRenamed2 = new Path(dfsRoot, "renamed2"); + final Path dfsAppended = new Path(dfsRoot, "appended"); + + /* We will test with a single block worth of data. If we don't at least use + a multiple of BLOCKSIZE, append operations will modify snapshotted blocks + and other factors will come into play here that we'll have to account for */ + final long spaceIncrement = BLOCKSIZE * REPL; + final byte[] appendData = new byte[(int) BLOCKSIZE]; + DFSTestUtil.createFile(localfs, localOriginal, BLOCKSIZE, REPL, SEED); + + FSDataOutputStream out = null; + long expectedSpace = 0; + + hdfs.mkdirs(dfsRoot); + checkSpaceConsumed("Du is wrong immediately", + dfsRoot, 0L); + + hdfs.copyFromLocalFile(localOriginal, dfsOriginal); + expectedSpace += spaceIncrement; + checkSpaceConsumed("Du is wrong after creating / copying file", + dfsRoot, expectedSpace); + + SnapshotTestHelper.createSnapshot(hdfs, dfsRoot, "s0"); + checkSpaceConsumed("Du is wrong after snapshotting", + dfsRoot, expectedSpace); + + hdfs.rename(dfsOriginal, dfsRenamed1); + checkSpaceConsumed("Du is wrong after 1 rename", + dfsRoot, expectedSpace); + + hdfs.rename(dfsRenamed1, dfsRenamed2); + checkSpaceConsumed("Du is wrong after 2 renames", + dfsRoot, expectedSpace); + + hdfs.delete(dfsRenamed2, false); + checkSpaceConsumed("Du is wrong after deletion", + dfsRoot, expectedSpace); + + hdfs.copyFromLocalFile(localOriginal, dfsOriginal); + expectedSpace += spaceIncrement; + checkSpaceConsumed("Du is wrong after replacing a renamed file", + dfsRoot, expectedSpace); + + hdfs.copyFromLocalFile(localOriginal, dfsAppended); + expectedSpace += spaceIncrement; + SnapshotTestHelper.createSnapshot(hdfs, dfsRoot, "s1"); + + out = hdfs.append(dfsAppended); + out.write(appendData); + out.close(); + expectedSpace += spaceIncrement; + checkSpaceConsumed("Du is wrong after 1 snapshot + append", + dfsRoot, expectedSpace); + + SnapshotTestHelper.createSnapshot(hdfs, dfsRoot, "s2"); + out = hdfs.append(dfsAppended); + out.write(appendData); + out.close(); + expectedSpace += spaceIncrement; + checkSpaceConsumed("Du is wrong after 2 snapshot + appends", + dfsRoot, expectedSpace); + + SnapshotTestHelper.createSnapshot(hdfs, dfsRoot, "s3"); + out = hdfs.append(dfsAppended); + out.write(appendData); + out.close(); + expectedSpace += spaceIncrement; + hdfs.rename(dfsAppended, dfsRenamed1); + checkSpaceConsumed("Du is wrong after snapshot, append, & rename", + dfsRoot, expectedSpace); + hdfs.delete(dfsRenamed1, false); + // everything but the last append is snapshotted + expectedSpace -= spaceIncrement; + checkSpaceConsumed("Du is wrong after snapshot, append, delete & rename", + dfsRoot, expectedSpace); + + hdfs.delete(dfsOriginal, false); + hdfs.deleteSnapshot(dfsRoot, "s0"); + hdfs.deleteSnapshot(dfsRoot, "s1"); + hdfs.deleteSnapshot(dfsRoot, "s2"); + hdfs.deleteSnapshot(dfsRoot, "s3"); + expectedSpace = 0; + checkSpaceConsumed("Du is wrong after deleting all files and snapshots", + dfsRoot, expectedSpace); + } + + /** + * Runs through various combinations of renames, deletes, appends and other + * operations between two snapshotted directories and ensures disk usage + * summaries (e.g. du -s) are computed correctly. + * + * This test currently assumes some incorrect behavior when files have been + * moved between subdirectories of the one being queried. In the cases + * below, only 1 block worth of data should ever actually be used. However + * if there are 2 - 3 subdirectories that do contained or have contained + * when snapshotted the same file, that file will be counted 2-3 times, + * respectively, since each directory is computed independently recursively. + * + * @throws Exception + */ + @Test (timeout=300000) + public void testDuMultipleDirs() throws Exception { + File tempFile = File.createTempFile("testDuMultipleDirs-", "" + ".tmp"); + tempFile.deleteOnExit(); + + final FileSystem localfs = FileSystem.getLocal(conf); + final Path localOriginal = new Path(tempFile.getPath()); + final Path dfsRoot = new Path("/testDuMultipleDirs"); + final Path snapshottable1 = new Path(dfsRoot, "snapshottable1"); + final Path snapshottable2 = new Path(dfsRoot, "snapshottable2"); + final Path nonsnapshottable = new Path(dfsRoot, "nonsnapshottable"); + final Path subdirectory = new Path(snapshottable1, "subdirectory"); + final Path dfsOriginal = new Path(snapshottable1, "file"); + final Path renamedNonsnapshottable = new Path(nonsnapshottable, "file"); + final Path renamedSnapshottable = new Path(snapshottable2, "file"); + final Path renamedSubdirectory = new Path(subdirectory, "file"); + + /* We will test with a single block worth of data. If we don't at least use + a multiple of BLOCKSIZE, append operations will modify snapshotted blocks + and other factors will come into play here that we'll have to account for */ + final long spaceConsumed = BLOCKSIZE * REPL; + DFSTestUtil.createFile(localfs, localOriginal, BLOCKSIZE, REPL, SEED); + + hdfs.mkdirs(snapshottable1); + hdfs.mkdirs(snapshottable2); + hdfs.mkdirs(nonsnapshottable); + hdfs.mkdirs(subdirectory); + checkSpaceConsumed("Du is wrong immediately", + dfsRoot, 0L); + + hdfs.copyFromLocalFile(localOriginal, dfsOriginal); + checkSpaceConsumed("Du is wrong after creating / copying file", + snapshottable1, spaceConsumed); + + SnapshotTestHelper.createSnapshot(hdfs, snapshottable1, "s1"); + checkSpaceConsumed("Du is wrong in original dir after 1st snapshot", + snapshottable1, spaceConsumed); + + hdfs.rename(dfsOriginal, renamedNonsnapshottable); + checkSpaceConsumed("Du is wrong in original dir after 1st rename", + snapshottable1, spaceConsumed); + checkSpaceConsumed("Du is wrong in non-snapshottable dir after 1st rename", + nonsnapshottable, spaceConsumed); + checkSpaceConsumed("Du is wrong in root dir after 1st rename", + dfsRoot, spaceConsumed); + + hdfs.rename(renamedNonsnapshottable, renamedSnapshottable); + checkSpaceConsumed("Du is wrong in original dir after 2nd rename", + snapshottable1, spaceConsumed); + checkSpaceConsumed("Du is wrong in non-snapshottable dir after 2nd rename", + nonsnapshottable, 0); + checkSpaceConsumed("Du is wrong in snapshottable dir after 2nd rename", + snapshottable2, spaceConsumed); + checkSpaceConsumed("Du is wrong in root dir after 2nd rename", + dfsRoot, spaceConsumed); + + SnapshotTestHelper.createSnapshot(hdfs, snapshottable2, "s2"); + hdfs.rename(renamedSnapshottable, renamedSubdirectory); + checkSpaceConsumed("Du is wrong in original dir after 3rd rename", + snapshottable1, spaceConsumed); + checkSpaceConsumed("Du is wrong in snapshottable dir after 3rd rename", + snapshottable2, spaceConsumed); + checkSpaceConsumed("Du is wrong in original subdirectory after 3rd rename", + subdirectory, spaceConsumed); + checkSpaceConsumed("Du is wrong in root dir after 3rd rename", + dfsRoot, spaceConsumed); + + hdfs.delete(renamedSubdirectory, false); + hdfs.deleteSnapshot(snapshottable1, "s1"); + hdfs.deleteSnapshot(snapshottable2, "s2"); + checkSpaceConsumed("Du is wrong after deleting all files and snapshots", + dfsRoot, 0); + } }