diff --git a/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/DistributedFileSystem.java b/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/DistributedFileSystem.java index 3883f2f849b..35b64173a7d 100644 --- a/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/DistributedFileSystem.java +++ b/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/DistributedFileSystem.java @@ -1994,6 +1994,93 @@ public Void next(final FileSystem fs, final Path p) }.resolve(this, absF); } + /** + * Returns a remote iterator so that followup calls are made on demand + * while consuming the SnapshotDiffReportListing entries. + * This reduces memory consumption overhead in case the snapshotDiffReport + * is huge. + * + * @param snapshotDir + * full path of the directory where snapshots are taken + * @param fromSnapshot + * snapshot name of the from point. Null indicates the current + * tree + * @param toSnapshot + * snapshot name of the to point. Null indicates the current + * tree. + * @return Remote iterator + */ + public RemoteIterator + snapshotDiffReportListingRemoteIterator( + final Path snapshotDir, final String fromSnapshot, + final String toSnapshot) throws IOException { + Path absF = fixRelativePart(snapshotDir); + return new FileSystemLinkResolver + >() { + @Override + public RemoteIterator doCall(final Path p) + throws IOException { + return new SnapshotDiffReportListingIterator( + getPathName(p), fromSnapshot, toSnapshot); + } + + @Override + public RemoteIterator next(final FileSystem fs, + final Path p) throws IOException { + return ((DistributedFileSystem) fs) + .snapshotDiffReportListingRemoteIterator(p, fromSnapshot, + toSnapshot); + } + }.resolve(this, absF); + + } + + /** + * This class defines an iterator that returns + * the SnapshotDiffReportListing for a snapshottable directory + * between two given snapshots. + */ + private final class SnapshotDiffReportListingIterator implements + RemoteIterator { + private final String snapshotDir; + private final String fromSnapshot; + private final String toSnapshot; + + private byte[] startPath; + private int index; + private boolean hasNext = true; + + private SnapshotDiffReportListingIterator(String snapshotDir, + String fromSnapshot, String toSnapshot) { + this.snapshotDir = snapshotDir; + this.fromSnapshot = fromSnapshot; + this.toSnapshot = toSnapshot; + this.startPath = DFSUtilClient.EMPTY_BYTES; + this.index = -1; + } + + @Override + public boolean hasNext() { + return hasNext; + } + + @Override + public SnapshotDiffReportListing next() throws IOException { + if (!hasNext) { + throw new java.util.NoSuchElementException( + "No more entry in SnapshotDiffReport for " + snapshotDir); + } + final SnapshotDiffReportListing part = + dfs.getSnapshotDiffReportListing(snapshotDir, fromSnapshot, + toSnapshot, startPath, index); + startPath = part.getLastPath(); + index = part.getLastIndex(); + hasNext = + !(Arrays.equals(startPath, DFSUtilClient.EMPTY_BYTES) && index == -1); + return part; + } + } + private SnapshotDiffReport getSnapshotDiffReportInternal( final String snapshotDir, final String fromSnapshot, final String toSnapshot) throws IOException { diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/snapshot/TestSnapshotDiffReport.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/snapshot/TestSnapshotDiffReport.java index a4fb8abd59f..3bfcfbf9710 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/snapshot/TestSnapshotDiffReport.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/snapshot/TestSnapshotDiffReport.java @@ -28,11 +28,15 @@ import java.util.EnumSet; import java.util.HashMap; import java.util.Random; +import java.util.List; +import java.util.ArrayList; +import org.apache.commons.collections.list.TreeList; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FSDataOutputStream; import org.apache.hadoop.fs.Options.Rename; import org.apache.hadoop.fs.Path; +import org.apache.hadoop.fs.RemoteIterator; import org.apache.hadoop.hdfs.DFSConfigKeys; import org.apache.hadoop.hdfs.DFSTestUtil; import org.apache.hadoop.hdfs.DFSUtil; @@ -40,14 +44,17 @@ import org.apache.hadoop.hdfs.MiniDFSCluster; import org.apache.hadoop.hdfs.client.HdfsDataOutputStream; import org.apache.hadoop.hdfs.client.HdfsDataOutputStream.SyncFlag; +import org.apache.hadoop.hdfs.client.impl.SnapshotDiffReportGenerator; import org.apache.hadoop.hdfs.protocol.SnapshotDiffReport; import org.apache.hadoop.hdfs.protocol.SnapshotDiffReport.DiffReportEntry; import org.apache.hadoop.hdfs.protocol.SnapshotDiffReport.DiffType; +import org.apache.hadoop.hdfs.protocol.SnapshotDiffReportListing; import org.apache.hadoop.hdfs.protocol.SnapshotException; import org.apache.hadoop.hdfs.server.namenode.INodeDirectory; import org.apache.hadoop.hdfs.server.namenode.NameNode; import org.apache.hadoop.hdfs.server.namenode.NameNodeAdapter; import org.apache.hadoop.test.GenericTestUtils; +import org.apache.hadoop.util.ChunkedArrayList; import org.apache.hadoop.util.Time; import org.junit.After; import org.junit.Assert; @@ -1409,4 +1416,127 @@ public void testDiffReportWithRpcLimit2() throws Exception { new DiffReportEntry(DiffType.DELETE, DFSUtil.string2Bytes("dir3/file3"))); } + + private void verifyDiffReportForGivenReport(Path dirPath, String from, + String to, SnapshotDiffReport report, DiffReportEntry... entries) + throws IOException { + // reverse the order of from and to + SnapshotDiffReport inverseReport = + hdfs.getSnapshotDiffReport(dirPath, to, from); + LOG.info(report.toString()); + LOG.info(inverseReport.toString() + "\n"); + + assertEquals(entries.length, report.getDiffList().size()); + assertEquals(entries.length, inverseReport.getDiffList().size()); + + for (DiffReportEntry entry : entries) { + if (entry.getType() == DiffType.MODIFY) { + assertTrue(report.getDiffList().contains(entry)); + assertTrue(inverseReport.getDiffList().contains(entry)); + } else if (entry.getType() == DiffType.DELETE) { + assertTrue(report.getDiffList().contains(entry)); + assertTrue(inverseReport.getDiffList().contains( + new DiffReportEntry(DiffType.CREATE, entry.getSourcePath()))); + } else if (entry.getType() == DiffType.CREATE) { + assertTrue(report.getDiffList().contains(entry)); + assertTrue(inverseReport.getDiffList().contains( + new DiffReportEntry(DiffType.DELETE, entry.getSourcePath()))); + } + } + } + + @Test + public void testSnapshotDiffReportRemoteIterator() throws Exception { + final Path root = new Path("/"); + hdfs.mkdirs(root); + for (int i = 1; i <= 3; i++) { + final Path path = new Path(root, "dir" + i); + hdfs.mkdirs(path); + } + for (int i = 1; i <= 3; i++) { + final Path path = new Path(root, "dir" + i); + for (int j = 1; j < 4; j++) { + final Path file = new Path(path, "file" + j); + DFSTestUtil.createFile(hdfs, file, BLOCKSIZE, REPLICATION, SEED); + } + } + SnapshotTestHelper.createSnapshot(hdfs, root, "s0"); + Path targetDir = new Path(root, "dir4"); + //create directory dir4 + hdfs.mkdirs(targetDir); + //moves files from dir1 to dir4 + Path path = new Path(root, "dir1"); + for (int j = 1; j < 4; j++) { + final Path srcPath = new Path(path, "file" + j); + final Path targetPath = new Path(targetDir, "file" + j); + hdfs.rename(srcPath, targetPath); + } + targetDir = new Path(root, "dir3"); + //overwrite existing files in dir3 from files in dir1 + path = new Path(root, "dir2"); + for (int j = 1; j < 4; j++) { + final Path srcPath = new Path(path, "file" + j); + final Path targetPath = new Path(targetDir, "file" + j); + hdfs.rename(srcPath, targetPath, Rename.OVERWRITE); + } + final Path pathToRename = new Path(root, "dir2"); + //move dir2 inside dir3 + hdfs.rename(pathToRename, targetDir); + SnapshotTestHelper.createSnapshot(hdfs, root, "s1"); + RemoteIterator iterator = + hdfs.snapshotDiffReportListingRemoteIterator(root, "s0", "s1"); + SnapshotDiffReportGenerator snapshotDiffReport; + List modifiedList = + new TreeList(); + List createdList = + new ChunkedArrayList<>(); + List deletedList = + new ChunkedArrayList<>(); + SnapshotDiffReportListing report = null; + List reportList = new ArrayList<>(); + while (iterator.hasNext()) { + report = iterator.next(); + reportList.add(report); + modifiedList.addAll(report.getModifyList()); + createdList.addAll(report.getCreateList()); + deletedList.addAll(report.getDeleteList()); + } + try { + iterator.next(); + } catch (Exception e) { + Assert.assertTrue( + e.getMessage().contains("No more entry in SnapshotDiffReport for /")); + } + Assert.assertNotEquals(0, reportList.size()); + // generate the snapshotDiffReport and Verify + snapshotDiffReport = new SnapshotDiffReportGenerator("/", "s0", "s1", + report.getIsFromEarlier(), modifiedList, createdList, deletedList); + verifyDiffReportForGivenReport(root, "s0", "s1", + snapshotDiffReport.generateReport(), + new DiffReportEntry(DiffType.MODIFY, DFSUtil.string2Bytes("")), + new DiffReportEntry(DiffType.CREATE, DFSUtil.string2Bytes("dir4")), + new DiffReportEntry(DiffType.RENAME, DFSUtil.string2Bytes("dir2"), + DFSUtil.string2Bytes("dir3/dir2")), + new DiffReportEntry(DiffType.MODIFY, DFSUtil.string2Bytes("dir1")), + new DiffReportEntry(DiffType.RENAME, DFSUtil.string2Bytes("dir1/file1"), + DFSUtil.string2Bytes("dir4/file1")), + new DiffReportEntry(DiffType.RENAME, DFSUtil.string2Bytes("dir1/file2"), + DFSUtil.string2Bytes("dir4/file2")), + new DiffReportEntry(DiffType.RENAME, DFSUtil.string2Bytes("dir1/file3"), + DFSUtil.string2Bytes("dir4/file3")), + new DiffReportEntry(DiffType.MODIFY, DFSUtil.string2Bytes("dir2")), + new DiffReportEntry(DiffType.RENAME, DFSUtil.string2Bytes("dir2/file1"), + DFSUtil.string2Bytes("dir3/file1")), + new DiffReportEntry(DiffType.RENAME, DFSUtil.string2Bytes("dir2/file2"), + DFSUtil.string2Bytes("dir3/file2")), + new DiffReportEntry(DiffType.RENAME, DFSUtil.string2Bytes("dir2/file3"), + DFSUtil.string2Bytes("dir3/file3")), + new DiffReportEntry(DiffType.MODIFY, DFSUtil.string2Bytes("dir3")), + new DiffReportEntry(DiffType.DELETE, + DFSUtil.string2Bytes("dir3/file1")), + new DiffReportEntry(DiffType.DELETE, + DFSUtil.string2Bytes("dir3/file1")), + new DiffReportEntry(DiffType.DELETE, + DFSUtil.string2Bytes("dir3/file3"))); + } }