diff --git a/hadoop-hdfs-project/hadoop-hdfs-client/dev-support/findbugsExcludeFile.xml b/hadoop-hdfs-project/hadoop-hdfs-client/dev-support/findbugsExcludeFile.xml
index 22ef7224a79..8e2bc944e87 100644
--- a/hadoop-hdfs-project/hadoop-hdfs-client/dev-support/findbugsExcludeFile.xml
+++ b/hadoop-hdfs-project/hadoop-hdfs-client/dev-support/findbugsExcludeFile.xml
@@ -19,6 +19,8 @@
+
+
diff --git a/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/DFSClient.java b/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/DFSClient.java
index 25e0f6c0806..3df36d6eae6 100644
--- a/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/DFSClient.java
+++ b/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/DFSClient.java
@@ -139,10 +139,10 @@ import org.apache.hadoop.hdfs.protocol.QuotaByStorageTypeExceededException;
import org.apache.hadoop.hdfs.protocol.ReencryptionStatusIterator;
import org.apache.hadoop.hdfs.protocol.RollingUpgradeInfo;
import org.apache.hadoop.hdfs.protocol.SnapshotAccessControlException;
-import org.apache.hadoop.hdfs.protocol.SnapshotDiffReport;
import org.apache.hadoop.hdfs.protocol.SnapshottableDirectoryStatus;
import org.apache.hadoop.hdfs.protocol.UnresolvedPathException;
import org.apache.hadoop.hdfs.protocol.ZoneReencryptionStatus;
+import org.apache.hadoop.hdfs.protocol.SnapshotDiffReportListing;
import org.apache.hadoop.hdfs.protocol.datatransfer.DataTransferProtoUtil;
import org.apache.hadoop.hdfs.protocol.datatransfer.IOStreamPair;
import org.apache.hadoop.hdfs.protocol.datatransfer.ReplaceDatanodeOnFailure;
@@ -2140,14 +2140,16 @@ public class DFSClient implements java.io.Closeable, RemotePeerFactory,
/**
* Get the difference between two snapshots, or between a snapshot and the
* current tree of a directory.
- * @see ClientProtocol#getSnapshotDiffReport(String, String, String)
+ * @see ClientProtocol#getSnapshotDiffReportListing
*/
- public SnapshotDiffReport getSnapshotDiffReport(String snapshotDir,
- String fromSnapshot, String toSnapshot) throws IOException {
+ public SnapshotDiffReportListing getSnapshotDiffReportListing(
+ String snapshotDir, String fromSnapshot, String toSnapshot,
+ byte[] startPath, int index) throws IOException {
checkOpen();
try (TraceScope ignored = tracer.newScope("getSnapshotDiffReport")) {
- return namenode.getSnapshotDiffReport(snapshotDir,
- fromSnapshot, toSnapshot);
+ return namenode
+ .getSnapshotDiffReportListing(snapshotDir, fromSnapshot, toSnapshot,
+ startPath, index);
} catch (RemoteException re) {
throw re.unwrapRemoteException();
}
diff --git a/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/DFSUtilClient.java b/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/DFSUtilClient.java
index 2a8bf0d39a4..f6b28e0350f 100644
--- a/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/DFSUtilClient.java
+++ b/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/DFSUtilClient.java
@@ -89,6 +89,7 @@ import java.util.concurrent.SynchronousQueue;
import java.util.concurrent.ThreadPoolExecutor;
import java.util.concurrent.TimeUnit;
import java.util.concurrent.atomic.AtomicInteger;
+import java.util.Arrays;
import static org.apache.hadoop.hdfs.client.HdfsClientConfigKeys.DFS_DATA_TRANSFER_CLIENT_TCPNODELAY_DEFAULT;
import static org.apache.hadoop.hdfs.client.HdfsClientConfigKeys.DFS_DATA_TRANSFER_CLIENT_TCPNODELAY_KEY;
@@ -124,6 +125,56 @@ public class DFSUtilClient {
return bytes2String(bytes, 0, bytes.length);
}
+ /**
+ * Converts a byte array to array of arrays of bytes
+ * on byte separator.
+ */
+ public static byte[][] bytes2byteArray(byte[] bytes) {
+ return bytes2byteArray(bytes, bytes.length, (byte)Path.SEPARATOR_CHAR);
+ }
+ /**
+ * Splits first len bytes in bytes to array of arrays of bytes
+ * on byte separator.
+ * @param bytes the byte array to split
+ * @param len the number of bytes to split
+ * @param separator the delimiting byte
+ */
+ public static byte[][] bytes2byteArray(byte[] bytes, int len,
+ byte separator) {
+ Preconditions.checkPositionIndex(len, bytes.length);
+ if (len == 0) {
+ return new byte[][]{null};
+ }
+ // Count the splits. Omit multiple separators and the last one by
+ // peeking at prior byte.
+ int splits = 0;
+ for (int i = 1; i < len; i++) {
+ if (bytes[i-1] == separator && bytes[i] != separator) {
+ splits++;
+ }
+ }
+ if (splits == 0 && bytes[0] == separator) {
+ return new byte[][]{null};
+ }
+ splits++;
+ byte[][] result = new byte[splits][];
+ int nextIndex = 0;
+ // Build the splits.
+ for (int i = 0; i < splits; i++) {
+ int startIndex = nextIndex;
+ // find next separator in the bytes.
+ while (nextIndex < len && bytes[nextIndex] != separator) {
+ nextIndex++;
+ }
+ result[i] = (nextIndex > 0)
+ ? Arrays.copyOfRange(bytes, startIndex, nextIndex)
+ : DFSUtilClient.EMPTY_BYTES; // reuse empty bytes for root.
+ do { // skip over separators.
+ nextIndex++;
+ } while (nextIndex < len && bytes[nextIndex] == separator);
+ }
+ return result;
+ }
/** Return used as percentage of capacity */
public static float getPercentUsed(long used, long capacity) {
return capacity <= 0 ? 100 : (used * 100.0f)/capacity;
@@ -277,11 +328,9 @@ public class DFSUtilClient {
* Given a list of path components returns a byte array
*/
public static byte[] byteArray2bytes(byte[][] pathComponents) {
- if (pathComponents.length == 0) {
+ if (pathComponents.length == 0 || (pathComponents.length == 1
+ && (pathComponents[0] == null || pathComponents[0].length == 0))) {
return EMPTY_BYTES;
- } else if (pathComponents.length == 1
- && (pathComponents[0] == null || pathComponents[0].length == 0)) {
- return new byte[]{(byte) Path.SEPARATOR_CHAR};
}
int length = 0;
for (int i = 0; i < pathComponents.length; i++) {
diff --git a/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/DistributedFileSystem.java b/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/DistributedFileSystem.java
index 9db12e1ba33..c010c8ab5c7 100644
--- a/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/DistributedFileSystem.java
+++ b/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/DistributedFileSystem.java
@@ -21,6 +21,7 @@ package org.apache.hadoop.hdfs;
import com.google.common.annotations.VisibleForTesting;
import com.google.common.base.Preconditions;
+import org.apache.commons.collections.list.TreeList;
import org.apache.hadoop.HadoopIllegalArgumentException;
import org.apache.hadoop.classification.InterfaceAudience;
import org.apache.hadoop.classification.InterfaceStability;
@@ -90,12 +91,16 @@ import org.apache.hadoop.hdfs.protocol.OpenFileEntry;
import org.apache.hadoop.hdfs.protocol.ZoneReencryptionStatus;
import org.apache.hadoop.hdfs.protocol.RollingUpgradeInfo;
import org.apache.hadoop.hdfs.protocol.SnapshotDiffReport;
+import org.apache.hadoop.hdfs.protocol.SnapshotDiffReportListing;
+import org.apache.hadoop.hdfs.protocol.SnapshotDiffReportListing.DiffReportListingEntry;
+import org.apache.hadoop.hdfs.client.impl.SnapshotDiffReportGenerator;
import org.apache.hadoop.hdfs.protocol.SnapshottableDirectoryStatus;
import org.apache.hadoop.hdfs.security.token.delegation.DelegationTokenIdentifier;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.net.NetUtils;
import org.apache.hadoop.security.Credentials;
import org.apache.hadoop.security.token.Token;
+import org.apache.hadoop.util.ChunkedArrayList;
import org.apache.hadoop.util.Progressable;
import javax.annotation.Nonnull;
@@ -1971,19 +1976,46 @@ public class DistributedFileSystem extends FileSystem {
}.resolve(this, absF);
}
+ private SnapshotDiffReport getSnapshotDiffReportInternal(
+ final String snapshotDir, final String fromSnapshot,
+ final String toSnapshot) throws IOException {
+ byte[] startPath = DFSUtilClient.EMPTY_BYTES;
+ int index = -1;
+ SnapshotDiffReportGenerator snapshotDiffReport;
+ List modifiedList = new TreeList();
+ List createdList = new ChunkedArrayList<>();
+ List deletedList = new ChunkedArrayList<>();
+ SnapshotDiffReportListing report;
+ do {
+ report = dfs.getSnapshotDiffReportListing(snapshotDir, fromSnapshot,
+ toSnapshot, startPath, index);
+ startPath = report.getLastPath();
+ index = report.getLastIndex();
+ modifiedList.addAll(report.getModifyList());
+ createdList.addAll(report.getCreateList());
+ deletedList.addAll(report.getDeleteList());
+ } while (!(Arrays.equals(startPath, DFSUtilClient.EMPTY_BYTES)
+ && index == -1));
+ snapshotDiffReport =
+ new SnapshotDiffReportGenerator(snapshotDir, fromSnapshot, toSnapshot,
+ report.getIsFromEarlier(), modifiedList, createdList, deletedList);
+ return snapshotDiffReport.generateReport();
+ }
+
/**
* Get the difference between two snapshots, or between a snapshot and the
* current tree of a directory.
*
- * @see DFSClient#getSnapshotDiffReport(String, String, String)
+ * @see DFSClient#getSnapshotDiffReportListing
*/
public SnapshotDiffReport getSnapshotDiffReport(final Path snapshotDir,
final String fromSnapshot, final String toSnapshot) throws IOException {
Path absF = fixRelativePart(snapshotDir);
return new FileSystemLinkResolver() {
@Override
- public SnapshotDiffReport doCall(final Path p) throws IOException {
- return dfs.getSnapshotDiffReport(getPathName(p), fromSnapshot,
+ public SnapshotDiffReport doCall(final Path p)
+ throws IOException {
+ return getSnapshotDiffReportInternal(getPathName(p), fromSnapshot,
toSnapshot);
}
diff --git a/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/client/impl/SnapshotDiffReportGenerator.java b/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/client/impl/SnapshotDiffReportGenerator.java
new file mode 100644
index 00000000000..4dbe98858f5
--- /dev/null
+++ b/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/client/impl/SnapshotDiffReportGenerator.java
@@ -0,0 +1,262 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hdfs.client.impl;
+
+import java.util.*;
+
+import com.google.common.primitives.SignedBytes;
+
+import org.apache.hadoop.util.ChunkedArrayList;
+import org.apache.hadoop.hdfs.protocol.SnapshotDiffReportListing.DiffReportListingEntry;
+import org.apache.hadoop.hdfs.protocol.SnapshotDiffReport;
+import org.apache.hadoop.hdfs.protocol.SnapshotDiffReport.DiffReportEntry;
+import org.apache.hadoop.hdfs.protocol.SnapshotDiffReport.DiffType;
+/**
+ * This class represents to end users the difference between two snapshots of
+ * the same directory, or the difference between a snapshot of the directory and
+ * its current state. Instead of capturing all the details of the diff, this
+ * class only lists where the changes happened and their types.
+ */
+public class SnapshotDiffReportGenerator {
+ /**
+ * Compare two inodes based on their full names.
+ */
+ public static final Comparator INODE_COMPARATOR =
+ new Comparator() {
+ @Override
+ public int compare(DiffReportListingEntry left,
+ DiffReportListingEntry right) {
+ final Comparator cmp =
+ SignedBytes.lexicographicalComparator();
+ //source path can never be null
+ final byte[][] l = left.getSourcePath();
+ final byte[][] r = right.getSourcePath();
+ if (l.length == 1 && l[0] == null) {
+ return -1;
+ } else if (r.length == 1 && r[0] == null) {
+ return 1;
+ } else {
+ for (int i = 0; i < l.length && i < r.length; i++) {
+ final int diff = cmp.compare(l[i], r[i]);
+ if (diff != 0) {
+ return diff;
+ }
+ }
+ return l.length == r.length ? 0 : l.length > r.length ? 1 : -1;
+ }
+ }
+ };
+
+ static class RenameEntry {
+ private byte[][] sourcePath;
+ private byte[][] targetPath;
+
+ void setSource(byte[][] srcPath) {
+ this.sourcePath = srcPath;
+ }
+
+ void setTarget(byte[][] target) {
+ this.targetPath = target;
+ }
+
+ boolean isRename() {
+ return sourcePath != null && targetPath != null;
+ }
+
+ byte[][] getSourcePath() {
+ return sourcePath;
+ }
+
+ byte[][] getTargetPath() {
+ return targetPath;
+ }
+ }
+
+ /*
+ * A class represnting the diff in a directory between two given snapshots
+ * in two lists: createdList and deleted list.
+ */
+ static class ChildrenDiff {
+ private final List createdList;
+ private final List deletedList;
+
+ ChildrenDiff(List createdList,
+ List deletedList) {
+ this.createdList = createdList != null ? createdList :
+ Collections.emptyList();
+ this.deletedList = deletedList != null ? deletedList :
+ Collections.emptyList();
+ }
+
+ public List getCreatedList() {
+ return createdList;
+ }
+
+ public List getDeletedList() {
+ return deletedList;
+ }
+ }
+
+ /**
+ * snapshot root full path.
+ */
+ private final String snapshotRoot;
+
+ /**
+ * start point of the diff.
+ */
+ private final String fromSnapshot;
+
+ /**
+ * end point of the diff.
+ */
+ private final String toSnapshot;
+
+ /**
+ * Flag to indicate the diff is calculated from older to newer snapshot
+ * or not.
+ */
+ private final boolean isFromEarlier;
+
+ /**
+ * A map capturing the detailed difference about file creation/deletion.
+ * Each key indicates a directory inode whose children have been changed
+ * between the two snapshots, while its associated value is a
+ * {@link ChildrenDiff} storing the changes (creation/deletion) happened to
+ * the children (files).
+ */
+ private final Map dirDiffMap =
+ new HashMap<>();
+
+ private final Map renameMap =
+ new HashMap<>();
+
+ private List mlist = null;
+ private List clist = null;
+ private List dlist = null;
+
+ public SnapshotDiffReportGenerator(String snapshotRoot, String fromSnapshot,
+ String toSnapshot, boolean isFromEarlier,
+ List mlist, List clist,
+ List dlist) {
+ this.snapshotRoot = snapshotRoot;
+ this.fromSnapshot = fromSnapshot;
+ this.toSnapshot = toSnapshot;
+ this.isFromEarlier = isFromEarlier;
+ this.mlist =
+ mlist != null ? mlist : Collections.emptyList();
+ this.clist =
+ clist != null ? clist : Collections.emptyList();
+ this.dlist =
+ dlist != null ? dlist : Collections.emptyList();
+ }
+
+ private RenameEntry getEntry(long inodeId) {
+ RenameEntry entry = renameMap.get(inodeId);
+ if (entry == null) {
+ entry = new RenameEntry();
+ renameMap.put(inodeId, entry);
+ }
+ return entry;
+ }
+
+ public void generateReportList() {
+ mlist.sort(INODE_COMPARATOR);
+ for (DiffReportListingEntry created : clist) {
+ ChildrenDiff entry = dirDiffMap.get(created.getDirId());
+ if (entry == null) {
+ List createdList = new ChunkedArrayList<>();
+ createdList.add(created);
+ ChildrenDiff list = new ChildrenDiff(createdList, null);
+ dirDiffMap.put(created.getDirId(), list);
+ } else {
+ dirDiffMap.get(created.getDirId()).getCreatedList().add(created);
+ }
+ if (created.isReference()) {
+ RenameEntry renameEntry = getEntry(created.getFileId());
+ if (renameEntry.getTargetPath() != null) {
+ renameEntry.setTarget(created.getSourcePath());
+ }
+ }
+ }
+ for (DiffReportListingEntry deleted : dlist) {
+ ChildrenDiff entry = dirDiffMap.get(deleted.getDirId());
+ if (entry == null || (entry.getDeletedList().isEmpty())) {
+ ChildrenDiff list;
+ List deletedList = new ChunkedArrayList<>();
+ deletedList.add(deleted);
+ if (entry == null) {
+ list = new ChildrenDiff(null, deletedList);
+ } else {
+ list = new ChildrenDiff(entry.getCreatedList(), deletedList);
+ }
+ dirDiffMap.put(deleted.getDirId(), list);
+ } else {
+ entry.getDeletedList().add(deleted);
+ }
+ if (deleted.isReference()) {
+ RenameEntry renameEntry = getEntry(deleted.getFileId());
+ renameEntry.setTarget(deleted.getTargetPath());
+ renameEntry.setSource(deleted.getSourcePath());
+ }
+ }
+ }
+
+ public SnapshotDiffReport generateReport() {
+ List diffReportList = new ChunkedArrayList<>();
+ generateReportList();
+ for (DiffReportListingEntry modified : mlist) {
+ diffReportList.add(
+ new DiffReportEntry(DiffType.MODIFY, modified.getSourcePath(), null));
+ if (modified.isReference()
+ && dirDiffMap.get(modified.getDirId()) != null) {
+ List subList = generateReport(modified);
+ diffReportList.addAll(subList);
+ }
+ }
+ return new SnapshotDiffReport(snapshotRoot, fromSnapshot, toSnapshot,
+ diffReportList);
+ }
+
+ private List generateReport(
+ DiffReportListingEntry modified) {
+ List diffReportList = new ChunkedArrayList<>();
+ ChildrenDiff list = dirDiffMap.get(modified.getDirId());
+ for (DiffReportListingEntry created : list.getCreatedList()) {
+ RenameEntry entry = renameMap.get(created.getFileId());
+ if (entry == null || !entry.isRename()) {
+ diffReportList.add(new DiffReportEntry(
+ isFromEarlier ? DiffType.CREATE : DiffType.DELETE,
+ created.getSourcePath()));
+ }
+ }
+ for (DiffReportListingEntry deleted : list.getDeletedList()) {
+ RenameEntry entry = renameMap.get(deleted.getFileId());
+ if (entry != null && entry.isRename()) {
+ diffReportList.add(new DiffReportEntry(DiffType.RENAME,
+ isFromEarlier ? entry.getSourcePath() : entry.getTargetPath(),
+ isFromEarlier ? entry.getTargetPath() : entry.getSourcePath()));
+ } else {
+ diffReportList.add(new DiffReportEntry(
+ isFromEarlier ? DiffType.DELETE : DiffType.CREATE,
+ deleted.getSourcePath()));
+ }
+ }
+ return diffReportList;
+ }
+}
diff --git a/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/protocol/ClientProtocol.java b/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/protocol/ClientProtocol.java
index f61ec75a6fd..eb2e11c995e 100644
--- a/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/protocol/ClientProtocol.java
+++ b/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/protocol/ClientProtocol.java
@@ -1288,6 +1288,35 @@ public interface ClientProtocol {
SnapshotDiffReport getSnapshotDiffReport(String snapshotRoot,
String fromSnapshot, String toSnapshot) throws IOException;
+ /**
+ * Get the difference between two snapshots, or between a snapshot and the
+ * current tree of a directory.
+ *
+ * @param snapshotRoot
+ * full path of the directory where snapshots are taken
+ * @param fromSnapshot
+ * snapshot name of the from point. Null indicates the current
+ * tree
+ * @param toSnapshot
+ * snapshot name of the to point. Null indicates the current
+ * tree.
+ * @param startPath
+ * path relative to the snapshottable root directory from where the
+ * snapshotdiff computation needs to start across multiple rpc calls
+ * @param index
+ * index in the created or deleted list of the directory at which
+ * the snapshotdiff computation stopped during the last rpc call
+ * as the no of entries exceeded the snapshotdiffentry limit. -1
+ * indicates, the snapshotdiff compuatation needs to start right
+ * from the startPath provided.
+ * @return The difference report represented as a {@link SnapshotDiffReport}.
+ * @throws IOException on error
+ */
+ @Idempotent
+ SnapshotDiffReportListing getSnapshotDiffReportListing(String snapshotRoot,
+ String fromSnapshot, String toSnapshot, byte[] startPath, int index)
+ throws IOException;
+
/**
* Add a CacheDirective to the CacheManager.
*
diff --git a/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/protocol/SnapshotDiffReportListing.java b/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/protocol/SnapshotDiffReportListing.java
new file mode 100644
index 00000000000..a0e35f6c2a7
--- /dev/null
+++ b/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/protocol/SnapshotDiffReportListing.java
@@ -0,0 +1,160 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hdfs.protocol;
+
+import java.util.Collections;
+import java.util.List;
+
+import org.apache.curator.shaded.com.google.common.base.Preconditions;
+
+import org.apache.hadoop.hdfs.DFSUtilClient;
+
+/**
+ * This class represents to the difference between two snapshots of
+ * the same directory, or the difference between a snapshot of the directory and
+ * its current state. This Class serves the purpose of collecting diff entries
+ * in 3 lists : created, deleted and modified list combined size of which is set
+ * by dfs.snapshotdiff-report.limit over one rpc call to the namenode.
+ */
+public class SnapshotDiffReportListing {
+ /**
+ * Representing the full path and diff type of a file/directory where changes
+ * have happened.
+ */
+ public static class DiffReportListingEntry {
+ /**
+ * The type of the difference.
+ */
+ private final long fileId;
+ private final long dirId;
+ private final boolean isReference;
+ /**
+ * The relative path (related to the snapshot root) of 1) the file/directory
+ * where changes have happened, or 2) the source file/dir of a rename op.
+ * or 3) target file/dir for a rename op.
+ */
+ private final byte[][] sourcePath;
+ private final byte[][] targetPath;
+
+ public DiffReportListingEntry(long dirId, long fileId, byte[][] sourcePath,
+ boolean isReference, byte[][] targetPath) {
+ Preconditions.checkNotNull(sourcePath);
+ this.dirId = dirId;
+ this.fileId = fileId;
+ this.sourcePath = sourcePath;
+ this.isReference = isReference;
+ this.targetPath = targetPath;
+ }
+
+ public DiffReportListingEntry(long dirId, long fileId, byte[] sourcePath,
+ boolean isReference, byte[] targetpath) {
+ Preconditions.checkNotNull(sourcePath);
+ this.dirId = dirId;
+ this.fileId = fileId;
+ this.sourcePath = DFSUtilClient.bytes2byteArray(sourcePath);
+ this.isReference = isReference;
+ this.targetPath =
+ targetpath == null ? null : DFSUtilClient.bytes2byteArray(targetpath);
+ }
+
+ public long getDirId() {
+ return dirId;
+ }
+
+ public long getFileId() {
+ return fileId;
+ }
+
+ public byte[][] getSourcePath() {
+ return sourcePath;
+ }
+
+ public byte[][] getTargetPath() {
+ return targetPath;
+ }
+
+ public boolean isReference() {
+ return isReference;
+ }
+ }
+
+ /** store the starting path to process across RPC's for snapshot diff. */
+ private final byte[] lastPath;
+
+ private final int lastIndex;
+
+ private final boolean isFromEarlier;
+
+ /** list of diff. */
+ private final List modifyList;
+
+ private final List createList;
+
+ private final List deleteList;
+
+ public SnapshotDiffReportListing() {
+ this.modifyList = Collections.emptyList();
+ this.createList = Collections.emptyList();
+ this.deleteList = Collections.emptyList();
+ this.lastPath = DFSUtilClient.string2Bytes("");
+ this.lastIndex = -1;
+ this.isFromEarlier = false;
+ }
+
+ public SnapshotDiffReportListing(byte[] startPath,
+ List modifiedEntryList,
+ List createdEntryList,
+ List deletedEntryList, int index,
+ boolean isFromEarlier) {
+ this.modifyList = modifiedEntryList;
+ this.createList = createdEntryList;
+ this.deleteList = deletedEntryList;
+ this.lastPath =
+ startPath != null ? startPath : DFSUtilClient.string2Bytes("");
+ this.lastIndex = index;
+ this.isFromEarlier = isFromEarlier;
+ }
+
+ public List getModifyList() {
+ return modifyList;
+ }
+
+ public List getCreateList() {
+ return createList;
+ }
+
+ public List getDeleteList() {
+ return deleteList;
+ }
+
+ /**
+ * @return {@link #lastPath}
+ */
+ public byte[] getLastPath() {
+ return lastPath;
+ }
+
+ public int getLastIndex() {
+ return lastIndex;
+ }
+
+ public boolean getIsFromEarlier() {
+ return isFromEarlier;
+ }
+
+}
diff --git a/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/protocolPB/ClientNamenodeProtocolTranslatorPB.java b/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/protocolPB/ClientNamenodeProtocolTranslatorPB.java
index aef7c1e9dec..38dc44b436c 100644
--- a/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/protocolPB/ClientNamenodeProtocolTranslatorPB.java
+++ b/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/protocolPB/ClientNamenodeProtocolTranslatorPB.java
@@ -79,6 +79,7 @@ import org.apache.hadoop.hdfs.protocol.OpenFileEntry;
import org.apache.hadoop.hdfs.protocol.ZoneReencryptionStatus;
import org.apache.hadoop.hdfs.protocol.RollingUpgradeInfo;
import org.apache.hadoop.hdfs.protocol.SnapshotDiffReport;
+import org.apache.hadoop.hdfs.protocol.SnapshotDiffReportListing;
import org.apache.hadoop.hdfs.protocol.SnapshottableDirectoryStatus;
import org.apache.hadoop.hdfs.protocol.proto.AclProtos.GetAclStatusRequestProto;
import org.apache.hadoop.hdfs.protocol.proto.AclProtos.GetAclStatusResponseProto;
@@ -133,6 +134,8 @@ import org.apache.hadoop.hdfs.protocol.proto.ClientNamenodeProtocolProtos.GetQuo
import org.apache.hadoop.hdfs.protocol.proto.ClientNamenodeProtocolProtos.GetServerDefaultsRequestProto;
import org.apache.hadoop.hdfs.protocol.proto.ClientNamenodeProtocolProtos.GetSnapshotDiffReportRequestProto;
import org.apache.hadoop.hdfs.protocol.proto.ClientNamenodeProtocolProtos.GetSnapshotDiffReportResponseProto;
+import org.apache.hadoop.hdfs.protocol.proto.ClientNamenodeProtocolProtos.GetSnapshotDiffReportListingRequestProto;
+import org.apache.hadoop.hdfs.protocol.proto.ClientNamenodeProtocolProtos.GetSnapshotDiffReportListingResponseProto;
import org.apache.hadoop.hdfs.protocol.proto.ClientNamenodeProtocolProtos.GetSnapshottableDirListingRequestProto;
import org.apache.hadoop.hdfs.protocol.proto.ClientNamenodeProtocolProtos.GetSnapshottableDirListingResponseProto;
import org.apache.hadoop.hdfs.protocol.proto.ClientNamenodeProtocolProtos.GetStoragePoliciesRequestProto;
@@ -1205,6 +1208,27 @@ public class ClientNamenodeProtocolTranslatorPB implements
}
}
+ @Override
+ public SnapshotDiffReportListing getSnapshotDiffReportListing(
+ String snapshotRoot, String fromSnapshot, String toSnapshot,
+ byte[] startPath, int index) throws IOException {
+ GetSnapshotDiffReportListingRequestProto req =
+ GetSnapshotDiffReportListingRequestProto.newBuilder()
+ .setSnapshotRoot(snapshotRoot).setFromSnapshot(fromSnapshot)
+ .setToSnapshot(toSnapshot).setCursor(
+ HdfsProtos.SnapshotDiffReportCursorProto.newBuilder()
+ .setStartPath(PBHelperClient.getByteString(startPath))
+ .setIndex(index).build()).build();
+ try {
+ GetSnapshotDiffReportListingResponseProto result =
+ rpcProxy.getSnapshotDiffReportListing(null, req);
+
+ return PBHelperClient.convert(result.getDiffReport());
+ } catch (ServiceException e) {
+ throw ProtobufHelper.getRemoteException(e);
+ }
+ }
+
@Override
public long addCacheDirective(CacheDirectiveInfo directive,
EnumSet flags) throws IOException {
diff --git a/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/protocolPB/PBHelperClient.java b/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/protocolPB/PBHelperClient.java
index d3b7f6d2f2e..fbc6dbfc5ba 100644
--- a/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/protocolPB/PBHelperClient.java
+++ b/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/protocolPB/PBHelperClient.java
@@ -99,6 +99,8 @@ import org.apache.hadoop.hdfs.protocol.ReplicatedBlockStats;
import org.apache.hadoop.hdfs.protocol.OpenFileEntry;
import org.apache.hadoop.hdfs.protocol.RollingUpgradeInfo;
import org.apache.hadoop.hdfs.protocol.RollingUpgradeStatus;
+import org.apache.hadoop.hdfs.protocol.SnapshotDiffReportListing;
+import org.apache.hadoop.hdfs.protocol.SnapshotDiffReportListing.DiffReportListingEntry;
import org.apache.hadoop.hdfs.protocol.SnapshotDiffReport;
import org.apache.hadoop.hdfs.protocol.SnapshotDiffReport.DiffReportEntry;
import org.apache.hadoop.hdfs.protocol.SnapshotDiffReport.DiffType;
@@ -169,6 +171,8 @@ import org.apache.hadoop.hdfs.protocol.proto.HdfsProtos.LocatedBlocksProto;
import org.apache.hadoop.hdfs.protocol.proto.HdfsProtos.QuotaUsageProto;
import org.apache.hadoop.hdfs.protocol.proto.HdfsProtos.ReencryptionInfoProto;
import org.apache.hadoop.hdfs.protocol.proto.HdfsProtos.RollingUpgradeStatusProto;
+import org.apache.hadoop.hdfs.protocol.proto.HdfsProtos.SnapshotDiffReportListingEntryProto;
+import org.apache.hadoop.hdfs.protocol.proto.HdfsProtos.SnapshotDiffReportListingProto;
import org.apache.hadoop.hdfs.protocol.proto.HdfsProtos.SnapshotDiffReportEntryProto;
import org.apache.hadoop.hdfs.protocol.proto.HdfsProtos.SnapshotDiffReportProto;
import org.apache.hadoop.hdfs.protocol.proto.HdfsProtos.SnapshottableDirectoryListingProto;
@@ -1489,6 +1493,61 @@ public class PBHelperClient {
.toByteArray() : null);
}
+ public static SnapshotDiffReportListing convert(
+ SnapshotDiffReportListingProto reportProto) {
+ if (reportProto == null) {
+ return null;
+ }
+ List modifyList =
+ reportProto.getModifiedEntriesList();
+ List modifiedEntries = new ChunkedArrayList<>();
+ for (SnapshotDiffReportListingEntryProto entryProto : modifyList) {
+ DiffReportListingEntry entry = convert(entryProto);
+ if (entry != null) {
+ modifiedEntries.add(entry);
+ }
+ }
+ List createList =
+ reportProto.getCreatedEntriesList();
+ List createdEntries = new ChunkedArrayList<>();
+ for (SnapshotDiffReportListingEntryProto entryProto : createList) {
+ DiffReportListingEntry entry = convert(entryProto);
+ if (entry != null) {
+ createdEntries.add(entry);
+ }
+ }
+ List deletedList =
+ reportProto.getDeletedEntriesList();
+ List deletedEntries = new ChunkedArrayList<>();
+ for (SnapshotDiffReportListingEntryProto entryProto : deletedList) {
+ DiffReportListingEntry entry = convert(entryProto);
+ if (entry != null) {
+ deletedEntries.add(entry);
+ }
+ }
+ byte[] startPath = reportProto.getCursor().getStartPath().toByteArray();
+ boolean isFromEarlier = reportProto.getIsFromEarlier();
+
+ int index = reportProto.getCursor().getIndex();
+ return new SnapshotDiffReportListing(startPath, modifiedEntries,
+ createdEntries, deletedEntries, index, isFromEarlier);
+ }
+
+ public static DiffReportListingEntry convert(
+ SnapshotDiffReportListingEntryProto entry) {
+ if (entry == null) {
+ return null;
+ }
+ long dirId = entry.getDirId();
+ long fileId = entry.getFileId();
+ boolean isReference = entry.getIsReference();
+ byte[] sourceName = entry.getFullpath().toByteArray();
+ byte[] targetName =
+ entry.hasTargetPath() ? entry.getTargetPath().toByteArray() : null;
+ return new DiffReportListingEntry(dirId, fileId, sourceName, isReference,
+ targetName);
+ }
+
public static SnapshottableDirectoryStatus[] convert(
SnapshottableDirectoryListingProto sdlp) {
if (sdlp == null)
@@ -2508,6 +2567,74 @@ public class PBHelperClient {
return builder.build();
}
+ public static SnapshotDiffReportListingEntryProto convert(
+ DiffReportListingEntry entry) {
+ if (entry == null) {
+ return null;
+ }
+ ByteString sourcePath = getByteString(
+ entry.getSourcePath() == null ? DFSUtilClient.EMPTY_BYTES :
+ DFSUtilClient.byteArray2bytes(entry.getSourcePath()));
+ long dirId = entry.getDirId();
+ long fileId = entry.getFileId();
+ boolean isReference = entry.isReference();
+ ByteString targetPath = getByteString(
+ entry.getTargetPath() == null ? DFSUtilClient.EMPTY_BYTES :
+ DFSUtilClient.byteArray2bytes(entry.getTargetPath()));
+ SnapshotDiffReportListingEntryProto.Builder builder =
+ SnapshotDiffReportListingEntryProto.newBuilder().setFullpath(sourcePath)
+ .setDirId(dirId).setFileId(fileId).setIsReference(isReference)
+ .setTargetPath(targetPath);
+ return builder.build();
+ }
+
+ public static SnapshotDiffReportListingProto convert(
+ SnapshotDiffReportListing report) {
+ if (report == null) {
+ return null;
+ }
+ ByteString startPath = getByteString(
+ report.getLastPath() == null ? DFSUtilClient.EMPTY_BYTES :
+ report.getLastPath());
+ List modifiedEntries = report.getModifyList();
+ List createdEntries = report.getCreateList();
+ List deletedEntries = report.getDeleteList();
+ List modifiedEntryProtos =
+ new ChunkedArrayList<>();
+ for (DiffReportListingEntry entry : modifiedEntries) {
+ SnapshotDiffReportListingEntryProto entryProto = convert(entry);
+ if (entryProto != null) {
+ modifiedEntryProtos.add(entryProto);
+ }
+ }
+ List createdEntryProtos =
+ new ChunkedArrayList<>();
+ for (DiffReportListingEntry entry : createdEntries) {
+ SnapshotDiffReportListingEntryProto entryProto = convert(entry);
+ if (entryProto != null) {
+ createdEntryProtos.add(entryProto);
+ }
+ }
+ List deletedEntryProtos =
+ new ChunkedArrayList<>();
+ for (DiffReportListingEntry entry : deletedEntries) {
+ SnapshotDiffReportListingEntryProto entryProto = convert(entry);
+ if (entryProto != null) {
+ deletedEntryProtos.add(entryProto);
+ }
+ }
+
+ return SnapshotDiffReportListingProto.newBuilder()
+ .addAllModifiedEntries(modifiedEntryProtos)
+ .addAllCreatedEntries(createdEntryProtos)
+ .addAllDeletedEntries(deletedEntryProtos)
+ .setIsFromEarlier(report.getIsFromEarlier())
+ .setCursor(HdfsProtos.SnapshotDiffReportCursorProto.newBuilder()
+ .setStartPath(startPath)
+ .setIndex(report.getLastIndex()).build())
+ .build();
+ }
+
public static SnapshotDiffReportProto convert(SnapshotDiffReport report) {
if (report == null) {
return null;
diff --git a/hadoop-hdfs-project/hadoop-hdfs-client/src/main/proto/ClientNamenodeProtocol.proto b/hadoop-hdfs-project/hadoop-hdfs-client/src/main/proto/ClientNamenodeProtocol.proto
index 6db6ad0804c..eb6da2553a4 100644
--- a/hadoop-hdfs-project/hadoop-hdfs-client/src/main/proto/ClientNamenodeProtocol.proto
+++ b/hadoop-hdfs-project/hadoop-hdfs-client/src/main/proto/ClientNamenodeProtocol.proto
@@ -297,6 +297,16 @@ message GetSnapshotDiffReportResponseProto {
required SnapshotDiffReportProto diffReport = 1;
}
+message GetSnapshotDiffReportListingRequestProto {
+ required string snapshotRoot = 1;
+ required string fromSnapshot = 2;
+ required string toSnapshot = 3;
+ optional SnapshotDiffReportCursorProto cursor = 4;
+}
+
+message GetSnapshotDiffReportListingResponseProto {
+ required SnapshotDiffReportListingProto diffReport = 1;
+}
message RenewLeaseRequestProto {
required string clientName = 1;
}
@@ -913,6 +923,8 @@ service ClientNamenodeProtocol {
returns(DeleteSnapshotResponseProto);
rpc getSnapshotDiffReport(GetSnapshotDiffReportRequestProto)
returns(GetSnapshotDiffReportResponseProto);
+ rpc getSnapshotDiffReportListing(GetSnapshotDiffReportListingRequestProto)
+ returns(GetSnapshotDiffReportListingResponseProto);
rpc isFileClosed(IsFileClosedRequestProto)
returns(IsFileClosedResponseProto);
rpc modifyAclEntries(ModifyAclEntriesRequestProto)
diff --git a/hadoop-hdfs-project/hadoop-hdfs-client/src/main/proto/hdfs.proto b/hadoop-hdfs-project/hadoop-hdfs-client/src/main/proto/hdfs.proto
index 953bf19fe0a..a423a4b5dbe 100644
--- a/hadoop-hdfs-project/hadoop-hdfs-client/src/main/proto/hdfs.proto
+++ b/hadoop-hdfs-project/hadoop-hdfs-client/src/main/proto/hdfs.proto
@@ -528,6 +528,32 @@ message SnapshotDiffReportProto {
repeated SnapshotDiffReportEntryProto diffReportEntries = 4;
}
+/**
+ * Snapshot diff report listing entry
+ */
+message SnapshotDiffReportListingEntryProto {
+ required bytes fullpath = 1;
+ required uint64 dirId = 2;
+ required bool isReference = 3;
+ optional bytes targetPath = 4;
+ optional uint64 fileId = 5;
+}
+
+message SnapshotDiffReportCursorProto {
+ required bytes startPath = 1;
+ required int32 index = 2 [default = -1];
+}
+/**
+ * Snapshot diff report listing
+ */
+message SnapshotDiffReportListingProto {
+ // full path of the directory where snapshots were taken
+ repeated SnapshotDiffReportListingEntryProto modifiedEntries = 1;
+ repeated SnapshotDiffReportListingEntryProto createdEntries = 2;
+ repeated SnapshotDiffReportListingEntryProto deletedEntries = 3;
+ required bool isFromEarlier = 4;
+ optional SnapshotDiffReportCursorProto cursor = 5;
+}
/**
* Block information
*
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DFSConfigKeys.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DFSConfigKeys.java
index 37071b61965..97b8b1ac6aa 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DFSConfigKeys.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DFSConfigKeys.java
@@ -381,6 +381,11 @@ public class DFSConfigKeys extends CommonConfigurationKeys {
DFS_NAMENODE_SNAPSHOT_DIFF_ALLOW_SNAP_ROOT_DESCENDANT_DEFAULT =
true;
+ public static final String
+ DFS_NAMENODE_SNAPSHOT_DIFF_LISTING_LIMIT =
+ "dfs.namenode.snapshotdiff.listing.limit";
+ public static final int
+ DFS_NAMENODE_SNAPSHOT_DIFF_LISTING_LIMIT_DEFAULT = 1000;
// Whether to enable datanode's stale state detection and usage for reads
public static final String DFS_NAMENODE_AVOID_STALE_DATANODE_FOR_READ_KEY = "dfs.namenode.avoid.read.stale.datanode";
public static final boolean DFS_NAMENODE_AVOID_STALE_DATANODE_FOR_READ_DEFAULT = false;
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DFSUtil.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DFSUtil.java
index 2f9781a455e..3f6c3d7a1bd 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DFSUtil.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DFSUtil.java
@@ -349,7 +349,8 @@ public class DFSUtil {
public static byte[][] getPathComponents(String path) {
// avoid intermediate split to String[]
final byte[] bytes = string2Bytes(path);
- return bytes2byteArray(bytes, bytes.length, (byte)Path.SEPARATOR_CHAR);
+ return DFSUtilClient
+ .bytes2byteArray(bytes, bytes.length, (byte) Path.SEPARATOR_CHAR);
}
/**
@@ -369,42 +370,9 @@ public class DFSUtil {
* @param len the number of bytes to split
* @param separator the delimiting byte
*/
- public static byte[][] bytes2byteArray(byte[] bytes,
- int len,
- byte separator) {
- Preconditions.checkPositionIndex(len, bytes.length);
- if (len == 0) {
- return new byte[][]{null};
- }
- // Count the splits. Omit multiple separators and the last one by
- // peeking at prior byte.
- int splits = 0;
- for (int i = 1; i < len; i++) {
- if (bytes[i-1] == separator && bytes[i] != separator) {
- splits++;
- }
- }
- if (splits == 0 && bytes[0] == separator) {
- return new byte[][]{null};
- }
- splits++;
- byte[][] result = new byte[splits][];
- int nextIndex = 0;
- // Build the splits.
- for (int i = 0; i < splits; i++) {
- int startIndex = nextIndex;
- // find next separator in the bytes.
- while (nextIndex < len && bytes[nextIndex] != separator) {
- nextIndex++;
- }
- result[i] = (nextIndex > 0)
- ? Arrays.copyOfRange(bytes, startIndex, nextIndex)
- : DFSUtilClient.EMPTY_BYTES; // reuse empty bytes for root.
- do { // skip over separators.
- nextIndex++;
- } while (nextIndex < len && bytes[nextIndex] == separator);
- }
- return result;
+ public static byte[][] bytes2byteArray(byte[] bytes, int len,
+ byte separator) {
+ return DFSUtilClient.bytes2byteArray(bytes, len, separator);
}
/**
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/protocolPB/ClientNamenodeProtocolServerSideTranslatorPB.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/protocolPB/ClientNamenodeProtocolServerSideTranslatorPB.java
index f5bbae16dae..2ae41e40425 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/protocolPB/ClientNamenodeProtocolServerSideTranslatorPB.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/protocolPB/ClientNamenodeProtocolServerSideTranslatorPB.java
@@ -55,6 +55,7 @@ import org.apache.hadoop.hdfs.protocol.LocatedBlocks;
import org.apache.hadoop.hdfs.protocol.OpenFileEntry;
import org.apache.hadoop.hdfs.protocol.RollingUpgradeInfo;
import org.apache.hadoop.hdfs.protocol.SnapshotDiffReport;
+import org.apache.hadoop.hdfs.protocol.SnapshotDiffReportListing;
import org.apache.hadoop.hdfs.protocol.SnapshottableDirectoryStatus;
import org.apache.hadoop.hdfs.protocol.ZoneReencryptionStatus;
import org.apache.hadoop.hdfs.protocol.proto.AclProtos.GetAclStatusRequestProto;
@@ -143,6 +144,8 @@ import org.apache.hadoop.hdfs.protocol.proto.ClientNamenodeProtocolProtos.GetSer
import org.apache.hadoop.hdfs.protocol.proto.ClientNamenodeProtocolProtos.GetServerDefaultsResponseProto;
import org.apache.hadoop.hdfs.protocol.proto.ClientNamenodeProtocolProtos.GetSnapshotDiffReportRequestProto;
import org.apache.hadoop.hdfs.protocol.proto.ClientNamenodeProtocolProtos.GetSnapshotDiffReportResponseProto;
+import org.apache.hadoop.hdfs.protocol.proto.ClientNamenodeProtocolProtos.GetSnapshotDiffReportListingRequestProto;
+import org.apache.hadoop.hdfs.protocol.proto.ClientNamenodeProtocolProtos.GetSnapshotDiffReportListingResponseProto;
import org.apache.hadoop.hdfs.protocol.proto.ClientNamenodeProtocolProtos.GetSnapshottableDirListingRequestProto;
import org.apache.hadoop.hdfs.protocol.proto.ClientNamenodeProtocolProtos.GetSnapshottableDirListingResponseProto;
import org.apache.hadoop.hdfs.protocol.proto.ClientNamenodeProtocolProtos.GetStoragePoliciesRequestProto;
@@ -1245,6 +1248,25 @@ public class ClientNamenodeProtocolServerSideTranslatorPB implements
}
}
+ @Override
+ public GetSnapshotDiffReportListingResponseProto getSnapshotDiffReportListing(
+ RpcController controller,
+ GetSnapshotDiffReportListingRequestProto request)
+ throws ServiceException {
+ try {
+ SnapshotDiffReportListing report = server
+ .getSnapshotDiffReportListing(request.getSnapshotRoot(),
+ request.getFromSnapshot(), request.getToSnapshot(),
+ request.getCursor().getStartPath().toByteArray(),
+ request.getCursor().getIndex());
+ //request.getStartPath(), request.getIndex());
+ return GetSnapshotDiffReportListingResponseProto.newBuilder()
+ .setDiffReport(PBHelperClient.convert(report)).build();
+ } catch (IOException e) {
+ throw new ServiceException(e);
+ }
+ }
+
@Override
public IsFileClosedResponseProto isFileClosed(
RpcController controller, IsFileClosedRequestProto request)
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/federation/router/RouterRpcServer.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/federation/router/RouterRpcServer.java
index 3bb5ca47fe5..b5acf120d8c 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/federation/router/RouterRpcServer.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/federation/router/RouterRpcServer.java
@@ -92,6 +92,7 @@ import org.apache.hadoop.hdfs.protocol.OpenFileEntry;
import org.apache.hadoop.hdfs.protocol.ReplicatedBlockStats;
import org.apache.hadoop.hdfs.protocol.RollingUpgradeInfo;
import org.apache.hadoop.hdfs.protocol.SnapshotDiffReport;
+import org.apache.hadoop.hdfs.protocol.SnapshotDiffReportListing;
import org.apache.hadoop.hdfs.protocol.SnapshottableDirectoryStatus;
import org.apache.hadoop.hdfs.protocol.ZoneReencryptionStatus;
import org.apache.hadoop.hdfs.protocol.proto.ClientNamenodeProtocolProtos.ClientNamenodeProtocol;
@@ -1508,6 +1509,14 @@ public class RouterRpcServer extends AbstractService implements ClientProtocol {
return null;
}
+ @Override // ClientProtocol
+ public SnapshotDiffReportListing getSnapshotDiffReportListing(
+ String snapshotRoot, String earlierSnapshotName, String laterSnapshotName,
+ byte[] startPath, int index) throws IOException {
+ checkOperation(OperationCategory.READ, false);
+ return null;
+ }
+
@Override // ClientProtocol
public long addCacheDirective(CacheDirectiveInfo path,
EnumSet flags) throws IOException {
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSDirSnapshotOp.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSDirSnapshotOp.java
index 9dd75bc349b..18427071d4d 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSDirSnapshotOp.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSDirSnapshotOp.java
@@ -24,6 +24,7 @@ import org.apache.hadoop.fs.permission.FsAction;
import org.apache.hadoop.hdfs.DFSUtil;
import org.apache.hadoop.hdfs.protocol.FSLimitException;
import org.apache.hadoop.hdfs.protocol.SnapshotDiffReport;
+import org.apache.hadoop.hdfs.protocol.SnapshotDiffReportListing;
import org.apache.hadoop.hdfs.protocol.SnapshotException;
import org.apache.hadoop.hdfs.protocol.SnapshottableDirectoryStatus;
import org.apache.hadoop.hdfs.server.namenode.FSDirectory.DirOp;
@@ -164,6 +165,29 @@ class FSDirSnapshotOp {
return diffs;
}
+ static SnapshotDiffReportListing getSnapshotDiffReportListing(FSDirectory fsd,
+ SnapshotManager snapshotManager, String path, String fromSnapshot,
+ String toSnapshot, byte[] startPath, int index,
+ int snapshotDiffReportLimit) throws IOException {
+ SnapshotDiffReportListing diffs;
+ final FSPermissionChecker pc = fsd.getPermissionChecker();
+ fsd.readLock();
+ try {
+ INodesInPath iip = fsd.resolvePath(pc, path, DirOp.READ);
+ if (fsd.isPermissionEnabled()) {
+ checkSubtreeReadPermission(fsd, pc, path, fromSnapshot);
+ checkSubtreeReadPermission(fsd, pc, path, toSnapshot);
+ }
+ diffs = snapshotManager
+ .diff(iip, path, fromSnapshot, toSnapshot, startPath, index,
+ snapshotDiffReportLimit);
+ } catch (Exception e) {
+ throw e;
+ } finally {
+ fsd.readUnlock();
+ }
+ return diffs;
+ }
/** Get a collection of full snapshot paths given file and snapshot dir.
* @param lsf a list of snapshottable features
* @param file full path of the file
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java
index d594f2a6552..d3d9cdc42da 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java
@@ -87,6 +87,8 @@ import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_PERMISSIONS_SUPERUSERGROU
import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_PERMISSIONS_SUPERUSERGROUP_KEY;
import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_REPLICATION_DEFAULT;
import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_REPLICATION_KEY;
+import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_SNAPSHOT_DIFF_LISTING_LIMIT;
+import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_SNAPSHOT_DIFF_LISTING_LIMIT_DEFAULT;
import org.apache.hadoop.hdfs.protocol.HdfsConstants;
import static org.apache.hadoop.hdfs.server.namenode.FSDirStatAndListingOp.*;
@@ -95,6 +97,8 @@ import org.apache.hadoop.hdfs.protocol.ReplicatedBlockStats;
import org.apache.hadoop.hdfs.protocol.ECBlockGroupStats;
import org.apache.hadoop.hdfs.protocol.OpenFileEntry;
import org.apache.hadoop.hdfs.protocol.ZoneReencryptionStatus;
+import org.apache.hadoop.hdfs.protocol.SnapshotDiffReportListing;
+import org.apache.hadoop.hdfs.protocol.SnapshotDiffReport;
import org.apache.hadoop.hdfs.server.namenode.metrics.ReplicatedBlocksMBean;
import org.apache.hadoop.hdfs.server.protocol.SlowDiskReports;
import static org.apache.hadoop.util.Time.now;
@@ -211,7 +215,6 @@ import org.apache.hadoop.hdfs.protocol.RollingUpgradeException;
import org.apache.hadoop.hdfs.protocol.RollingUpgradeInfo;
import org.apache.hadoop.hdfs.protocol.SnapshotAccessControlException;
import org.apache.hadoop.hdfs.protocol.SnapshotException;
-import org.apache.hadoop.hdfs.protocol.SnapshotDiffReport;
import org.apache.hadoop.hdfs.protocol.SnapshottableDirectoryStatus;
import org.apache.hadoop.hdfs.protocol.datatransfer.ReplaceDatanodeOnFailure;
import org.apache.hadoop.hdfs.security.token.block.BlockTokenIdentifier;
@@ -426,6 +429,7 @@ public class FSNamesystem implements Namesystem, FSNamesystemMBean,
private final UserGroupInformation fsOwner;
private final String supergroup;
private final boolean standbyShouldCheckpoint;
+ private final int snapshotDiffReportLimit;
/** Interval between each check of lease to release. */
private final long leaseRecheckIntervalMs;
@@ -761,6 +765,10 @@ public class FSNamesystem implements Namesystem, FSNamesystemMBean,
DFS_PERMISSIONS_SUPERUSERGROUP_DEFAULT);
this.isPermissionEnabled = conf.getBoolean(DFS_PERMISSIONS_ENABLED_KEY,
DFS_PERMISSIONS_ENABLED_DEFAULT);
+ this.snapshotDiffReportLimit =
+ conf.getInt(DFS_NAMENODE_SNAPSHOT_DIFF_LISTING_LIMIT,
+ DFS_NAMENODE_SNAPSHOT_DIFF_LISTING_LIMIT_DEFAULT);
+
LOG.info("fsOwner = " + fsOwner);
LOG.info("supergroup = " + supergroup);
LOG.info("isPermissionEnabled = " + isPermissionEnabled);
@@ -6364,16 +6372,16 @@ public class FSNamesystem implements Namesystem, FSNamesystemMBean,
/**
* Get the difference between two snapshots (or between a snapshot and the
* current status) of a snapshottable directory.
- *
+ *
* @param path The full path of the snapshottable directory.
* @param fromSnapshot Name of the snapshot to calculate the diff from. Null
* or empty string indicates the current tree.
* @param toSnapshot Name of the snapshot to calculated the diff to. Null or
* empty string indicates the current tree.
- * @return A report about the difference between {@code fromSnapshot} and
- * {@code toSnapshot}. Modified/deleted/created/renamed files and
- * directories belonging to the snapshottable directories are listed
- * and labeled as M/-/+/R respectively.
+ * @return A report about the difference between {@code fromSnapshot} and
+ * {@code toSnapshot}. Modified/deleted/created/renamed files and
+ * directories belonging to the snapshottable directories are listed
+ * and labeled as M/-/+/R respectively.
* @throws IOException
*/
SnapshotDiffReport getSnapshotDiffReport(String path,
@@ -6403,6 +6411,63 @@ public class FSNamesystem implements Namesystem, FSNamesystemMBean,
toSnapshotRoot, null);
return diffs;
}
+
+ /**
+ * Get the difference between two snapshots (or between a snapshot and the
+ * current status) of a snapshottable directory.
+ *
+ * @param path The full path of the snapshottable directory.
+ * @param fromSnapshot Name of the snapshot to calculate the diff from. Null
+ * or empty string indicates the current tree.
+ * @param toSnapshot Name of the snapshot to calculated the diff to. Null or
+ * empty string indicates the current tree.
+ * @param startPath
+ * path relative to the snapshottable root directory from where the
+ * snapshotdiff computation needs to start across multiple rpc calls
+ * @param index
+ * index in the created or deleted list of the directory at which
+ * the snapshotdiff computation stopped during the last rpc call
+ * as the no of entries exceeded the snapshotdiffentry limit. -1
+ * indicates, the snapshotdiff compuatation needs to start right
+ * from the startPath provided.
+ * @return A partial report about the difference between {@code fromSnapshot}
+ * and {@code toSnapshot}. Modified/deleted/created/renamed files and
+ * directories belonging to the snapshottable directories are listed
+ * and labeled as M/-/+/R respectively.
+ * @throws IOException
+ */
+ SnapshotDiffReportListing getSnapshotDiffReportListing(String path,
+ String fromSnapshot, String toSnapshot, byte[] startPath, int index)
+ throws IOException {
+ final String operationName = "computeSnapshotDiff";
+ SnapshotDiffReportListing diffs = null;
+ checkOperation(OperationCategory.READ);
+ boolean success = false;
+ String fromSnapshotRoot =
+ (fromSnapshot == null || fromSnapshot.isEmpty()) ? path :
+ Snapshot.getSnapshotPath(path, fromSnapshot);
+ String toSnapshotRoot =
+ (toSnapshot == null || toSnapshot.isEmpty()) ? path :
+ Snapshot.getSnapshotPath(path, toSnapshot);
+ readLock();
+ try {
+ checkOperation(OperationCategory.READ);
+ diffs = FSDirSnapshotOp
+ .getSnapshotDiffReportListing(dir, snapshotManager, path,
+ fromSnapshot, toSnapshot, startPath, index,
+ snapshotDiffReportLimit);
+ success = true;
+ } catch (AccessControlException ace) {
+ logAuditEvent(success, operationName, fromSnapshotRoot, toSnapshotRoot,
+ null);
+ throw ace;
+ } finally {
+ readUnlock(operationName);
+ }
+ logAuditEvent(success, operationName, fromSnapshotRoot, toSnapshotRoot,
+ null);
+ return diffs;
+ }
/**
* Delete a snapshot of a snapshottable directory
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/NameNodeRpcServer.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/NameNodeRpcServer.java
index 895e8737128..36d33a6f8b9 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/NameNodeRpcServer.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/NameNodeRpcServer.java
@@ -121,6 +121,7 @@ import org.apache.hadoop.hdfs.protocol.ReplicatedBlockStats;
import org.apache.hadoop.hdfs.protocol.ZoneReencryptionStatus;
import org.apache.hadoop.hdfs.protocol.RollingUpgradeInfo;
import org.apache.hadoop.hdfs.protocol.SnapshotDiffReport;
+import org.apache.hadoop.hdfs.protocol.SnapshotDiffReportListing;
import org.apache.hadoop.hdfs.protocol.SnapshottableDirectoryStatus;
import org.apache.hadoop.hdfs.protocol.UnregisteredNodeException;
import org.apache.hadoop.hdfs.protocol.UnresolvedPathException;
@@ -1862,6 +1863,18 @@ public class NameNodeRpcServer implements NamenodeProtocols {
return report;
}
+ @Override // ClientProtocol
+ public SnapshotDiffReportListing getSnapshotDiffReportListing(
+ String snapshotRoot, String earlierSnapshotName, String laterSnapshotName,
+ byte[] startPath, int index) throws IOException {
+ checkNNStartup();
+ SnapshotDiffReportListing report = namesystem
+ .getSnapshotDiffReportListing(snapshotRoot, earlierSnapshotName,
+ laterSnapshotName, startPath, index);
+ metrics.incrSnapshotDiffReportOps();
+ return report;
+ }
+
@Override // ClientProtocol
public long addCacheDirective(
CacheDirectiveInfo path, EnumSet flags) throws IOException {
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/snapshot/DirectorySnapshottableFeature.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/snapshot/DirectorySnapshottableFeature.java
index 076b78f49f4..217ad01cc7d 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/snapshot/DirectorySnapshottableFeature.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/snapshot/DirectorySnapshottableFeature.java
@@ -24,10 +24,12 @@ import java.util.Iterator;
import java.util.LinkedList;
import java.util.List;
import java.util.Set;
+import java.util.Arrays;
import org.apache.hadoop.HadoopIllegalArgumentException;
import org.apache.hadoop.classification.InterfaceAudience;
import org.apache.hadoop.hdfs.DFSUtil;
+import org.apache.hadoop.hdfs.DFSUtilClient;
import org.apache.hadoop.hdfs.protocol.QuotaExceededException;
import org.apache.hadoop.hdfs.protocol.SnapshotException;
import org.apache.hadoop.hdfs.server.blockmanagement.BlockStoragePolicySuite;
@@ -284,6 +286,54 @@ public class DirectorySnapshottableFeature extends DirectoryWithSnapshotFeature
return diffs;
}
+ /**
+ * Compute the difference between two snapshots (or a snapshot and the current
+ * directory) of the directory. The diff calculation can be scoped to either
+ * the snapshot root or any descendant directory under the snapshot root.
+ *
+ * @param snapshotRootDir the snapshot root directory
+ * @param snapshotDiffScopeDir the descendant directory under snapshot root
+ * to scope the diff calculation to.
+ * @param from The name of the start point of the comparison. Null indicating
+ * the current tree.
+ * @param to The name of the end point. Null indicating the current tree.
+ * @param startPath
+ * path relative to the snapshottable root directory from where the
+ * snapshotdiff computation needs to start across multiple rpc calls
+ * @param index
+ * index in the created or deleted list of the directory at which
+ * the snapshotdiff computation stopped during the last rpc call
+ * as the no of entries exceeded the snapshotdiffentry limit. -1
+ * indicates, the snapshotdiff computation needs to start right
+ * from the startPath provided.
+ *
+ * @return The difference between the start/end points.
+ * @throws SnapshotException If there is no snapshot matching the starting
+ * point, or if endSnapshotName is not null but cannot be identified
+ * as a previous snapshot.
+ */
+ SnapshotDiffListingInfo computeDiff(final INodeDirectory snapshotRootDir,
+ final INodeDirectory snapshotDiffScopeDir, final String from,
+ final String to, byte[] startPath, int index,
+ int snapshotDiffReportEntriesLimit) throws SnapshotException {
+ Preconditions.checkArgument(
+ snapshotDiffScopeDir.isDescendantOfSnapshotRoot(snapshotRootDir));
+ Snapshot fromSnapshot = getSnapshotByName(snapshotRootDir, from);
+ Snapshot toSnapshot = getSnapshotByName(snapshotRootDir, to);
+ boolean toProcess = Arrays.equals(startPath, DFSUtilClient.EMPTY_BYTES);
+ byte[][] resumePath = DFSUtilClient.bytes2byteArray(startPath);
+ if (from.equals(to)) {
+ return null;
+ }
+ SnapshotDiffListingInfo diffs =
+ new SnapshotDiffListingInfo(snapshotRootDir, snapshotDiffScopeDir,
+ fromSnapshot, toSnapshot, snapshotDiffReportEntriesLimit);
+ diffs.setLastIndex(index);
+ computeDiffRecursively(snapshotDiffScopeDir, snapshotDiffScopeDir,
+ new ArrayList(), diffs, resumePath, 0, toProcess);
+ return diffs;
+ }
+
/**
* Find the snapshot matching the given name.
*
@@ -367,12 +417,96 @@ public class DirectorySnapshottableFeature extends DirectoryWithSnapshotFeature
}
}
+ /**
+ * Recursively compute the difference between snapshots under a given
+ * directory/file partially.
+ * @param snapshotDir The directory where snapshots were taken. Can be a
+ * snapshot root directory or any descendant directory
+ * under snapshot root directory.
+ * @param node The directory/file under which the diff is computed.
+ * @param parentPath Relative path (corresponding to the snapshot root) of
+ * the node's parent.
+ * @param diffReport data structure used to store the diff.
+ * @param resume path from where to resume the snapshotdiff computation
+ * in one rpc call
+ * @param level indicates the level of the directory tree rooted at
+ * snapshotRoot.
+ * @param processFlag indicates that the dir/file where the snapshotdiff
+ * computation has to start is processed or not.
+ */
+ private boolean computeDiffRecursively(final INodeDirectory snapshotDir,
+ INode node, List parentPath, SnapshotDiffListingInfo diffReport,
+ final byte[][] resume, int level, boolean processFlag) {
+ final Snapshot earlier = diffReport.getEarlier();
+ final Snapshot later = diffReport.getLater();
+ byte[][] relativePath = parentPath.toArray(new byte[parentPath.size()][]);
+ if (!processFlag && level == resume.length
+ && Arrays.equals(resume[resume.length - 1], node.getLocalNameBytes())) {
+ processFlag = true;
+ }
+
+ if (node.isDirectory()) {
+ final ChildrenDiff diff = new ChildrenDiff();
+ INodeDirectory dir = node.asDirectory();
+ if (processFlag) {
+ DirectoryWithSnapshotFeature sf = dir.getDirectoryWithSnapshotFeature();
+ if (sf != null) {
+ boolean change =
+ sf.computeDiffBetweenSnapshots(earlier, later, diff, dir);
+ if (change) {
+ if (!diffReport.addDirDiff(dir.getId(), relativePath, diff)) {
+ return false;
+ }
+ }
+ }
+ }
+
+ ReadOnlyList children = dir.getChildrenList(earlier.getId());
+ boolean iterate = false;
+ for (INode child : children) {
+ final byte[] name = child.getLocalNameBytes();
+ if (!processFlag && !iterate && !Arrays.equals(resume[level], name)) {
+ continue;
+ }
+ iterate = true;
+ level = level + 1;
+ boolean toProcess = diff.searchIndex(ListType.DELETED, name) < 0;
+ if (!toProcess && child instanceof INodeReference.WithName) {
+ byte[][] renameTargetPath = findRenameTargetPath(snapshotDir,
+ (WithName) child, Snapshot.getSnapshotId(later));
+ if (renameTargetPath != null) {
+ toProcess = true;
+ }
+ }
+ if (toProcess) {
+ parentPath.add(name);
+ processFlag = computeDiffRecursively(snapshotDir, child, parentPath,
+ diffReport, resume, level, processFlag);
+ parentPath.remove(parentPath.size() - 1);
+ if (!processFlag) {
+ return false;
+ }
+ }
+ }
+ } else if (node.isFile() && node.asFile().isWithSnapshot() && processFlag) {
+ INodeFile file = node.asFile();
+ boolean change = file.getFileWithSnapshotFeature()
+ .changedBetweenSnapshots(file, earlier, later);
+ if (change) {
+ if (!diffReport.addFileDiff(file, relativePath)) {
+ return false;
+ }
+ }
+ }
+ return true;
+ }
+
/**
* We just found a deleted WithName node as the source of a rename operation.
* However, we should include it in our snapshot diff report as rename only
* if the rename target is also under the same snapshottable directory.
*/
- private byte[][] findRenameTargetPath(final INodeDirectory snapshotRoot,
+ public byte[][] findRenameTargetPath(final INodeDirectory snapshotRoot,
INodeReference.WithName wn, final int snapshotId) {
INode inode = wn.getReferredINode();
final LinkedList ancestors = Lists.newLinkedList();
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/snapshot/SnapshotDiffListingInfo.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/snapshot/SnapshotDiffListingInfo.java
new file mode 100644
index 00000000000..738aa236602
--- /dev/null
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/snapshot/SnapshotDiffListingInfo.java
@@ -0,0 +1,207 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hdfs.server.namenode.snapshot;
+
+import java.util.List;
+import java.util.ListIterator;
+
+import org.apache.hadoop.hdfs.DFSUtilClient;
+import org.apache.hadoop.hdfs.protocol.SnapshotDiffReportListing;
+import org.apache.hadoop.hdfs.protocol.SnapshotDiffReportListing.DiffReportListingEntry;
+import org.apache.hadoop.hdfs.server.namenode.INode;
+import org.apache.hadoop.hdfs.server.namenode.INodeDirectory;
+import org.apache.hadoop.hdfs.server.namenode.INodeFile;
+import org.apache.hadoop.hdfs.server.namenode.INodeReference;
+import org.apache.hadoop.hdfs.server.namenode.snapshot.DirectoryWithSnapshotFeature.ChildrenDiff;
+import org.apache.hadoop.hdfs.util.Diff.ListType;
+
+import com.google.common.base.Preconditions;
+import org.apache.hadoop.util.ChunkedArrayList;
+
+/**
+ * A class describing the difference between snapshots of a snapshottable
+ * directory where the difference is limited by dfs.snapshotDiff-report.limit.
+ */
+
+class SnapshotDiffListingInfo {
+ private final int maxEntries;
+
+ /** The root directory of the snapshots. */
+ private final INodeDirectory snapshotRoot;
+ /**
+ * The scope directory under which snapshot diff is calculated.
+ */
+ private final INodeDirectory snapshotDiffScopeDir;
+ /** The starting point of the difference. */
+ private final Snapshot from;
+ /** The end point of the difference. */
+ private final Snapshot to;
+
+ /** The path of the file to start for computing the snapshot diff. */
+ private byte[] lastPath = DFSUtilClient.EMPTY_BYTES;
+
+ private int lastIndex = -1;
+
+ /*
+ * A list containing all the modified entries between the given snapshots
+ * within a single rpc call.
+ */
+ private final List modifiedList =
+ new ChunkedArrayList<>();
+
+ private final List createdList =
+ new ChunkedArrayList<>();
+
+ private final List deletedList =
+ new ChunkedArrayList<>();
+
+ SnapshotDiffListingInfo(INodeDirectory snapshotRootDir,
+ INodeDirectory snapshotDiffScopeDir, Snapshot start, Snapshot end,
+ int snapshotDiffReportLimit) {
+ Preconditions.checkArgument(
+ snapshotRootDir.isSnapshottable() && snapshotDiffScopeDir
+ .isDescendantOfSnapshotRoot(snapshotRootDir));
+ this.snapshotRoot = snapshotRootDir;
+ this.snapshotDiffScopeDir = snapshotDiffScopeDir;
+ this.from = start;
+ this.to = end;
+ this.maxEntries = snapshotDiffReportLimit;
+ }
+
+ boolean addDirDiff(long dirId, byte[][] parent, ChildrenDiff diff) {
+ final Snapshot laterSnapshot = getLater();
+ if (lastIndex == -1) {
+ if (getTotalEntries() < maxEntries) {
+ modifiedList.add(new DiffReportListingEntry(
+ dirId, dirId, parent, true, null));
+ } else {
+ setLastPath(parent);
+ setLastIndex(-1);
+ return false;
+ }
+ }
+
+ if (lastIndex == -1 || lastIndex < diff.getList(ListType.CREATED).size()) {
+ ListIterator iterator = lastIndex != -1 ?
+ diff.getList(ListType.CREATED).listIterator(lastIndex)
+ : diff.getList(ListType.CREATED).listIterator();
+ while (iterator.hasNext()) {
+ if (getTotalEntries() < maxEntries) {
+ INode created = iterator.next();
+ byte[][] path = newPath(parent, created.getLocalNameBytes());
+ createdList.add(new DiffReportListingEntry(dirId, created.getId(),
+ path, created.isReference(), null));
+ } else {
+ setLastPath(parent);
+ setLastIndex(iterator.nextIndex());
+ return false;
+ }
+ }
+ setLastIndex(-1);
+ }
+
+ if (lastIndex == -1 || lastIndex >= diff.getList(ListType.CREATED).size()) {
+ int size = diff.getList(ListType.DELETED).size();
+ ListIterator iterator = lastIndex != -1 ?
+ diff.getList(ListType.DELETED).listIterator(lastIndex - size)
+ : diff.getList(ListType.DELETED).listIterator();
+ while (iterator.hasNext()) {
+ if (getTotalEntries() < maxEntries) {
+ final INode d = iterator.next();
+ byte[][] path = newPath(parent, d.getLocalNameBytes());
+ byte[][] target = findRenameTargetPath(d, laterSnapshot);
+ final DiffReportListingEntry e = target != null ?
+ new DiffReportListingEntry(dirId, d.getId(), path, true, target) :
+ new DiffReportListingEntry(dirId, d.getId(), path, false, null);
+ deletedList.add(e);
+ } else {
+ setLastPath(parent);
+ setLastIndex(size + iterator.nextIndex());
+ return false;
+ }
+ }
+ setLastIndex(-1);
+ }
+ return true;
+ }
+
+ private byte[][] findRenameTargetPath(INode deleted, Snapshot laterSnapshot) {
+ if (deleted instanceof INodeReference.WithName) {
+ return snapshotRoot.getDirectorySnapshottableFeature()
+ .findRenameTargetPath(snapshotDiffScopeDir,
+ (INodeReference.WithName) deleted,
+ Snapshot.getSnapshotId(laterSnapshot));
+ }
+ return null;
+ }
+
+ private static byte[][] newPath(byte[][] parent, byte[] name) {
+ byte[][] fullPath = new byte[parent.length + 1][];
+ System.arraycopy(parent, 0, fullPath, 0, parent.length);
+ fullPath[fullPath.length - 1] = name;
+ return fullPath;
+ }
+
+ Snapshot getEarlier() {
+ return isFromEarlier()? from: to;
+ }
+
+ Snapshot getLater() {
+ return isFromEarlier()? to: from;
+ }
+
+
+ public void setLastPath(byte[][] lastPath) {
+ this.lastPath = DFSUtilClient.byteArray2bytes(lastPath);
+ }
+
+ public void setLastIndex(int idx) {
+ this.lastIndex = idx;
+ }
+
+ boolean addFileDiff(INodeFile file, byte[][] relativePath) {
+ if (getTotalEntries() < maxEntries) {
+ modifiedList.add(new DiffReportListingEntry(file.getId(),
+ file.getId(), relativePath,false, null));
+ } else {
+ setLastPath(relativePath);
+ return false;
+ }
+ return true;
+ }
+ /** @return True if {@link #from} is earlier than {@link #to} */
+ boolean isFromEarlier() {
+ return Snapshot.ID_COMPARATOR.compare(from, to) < 0;
+ }
+
+
+ private int getTotalEntries() {
+ return createdList.size() + modifiedList.size() + deletedList.size();
+ }
+
+ /**
+ * Generate a {@link SnapshotDiffReportListing} based on detailed diff
+ * information.
+ *
+ * @return A {@link SnapshotDiffReportListing} describing the difference
+ */
+ public SnapshotDiffReportListing generateReport() {
+ return new SnapshotDiffReportListing(lastPath, modifiedList, createdList,
+ deletedList, lastIndex, isFromEarlier());
+ }
+}
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/snapshot/SnapshotManager.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/snapshot/SnapshotManager.java
index 58a218e5d0d..87985de66bf 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/snapshot/SnapshotManager.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/snapshot/SnapshotManager.java
@@ -44,6 +44,7 @@ import org.apache.hadoop.hdfs.DFSUtil;
import org.apache.hadoop.hdfs.DFSUtilClient;
import org.apache.hadoop.hdfs.protocol.HdfsFileStatus;
import org.apache.hadoop.hdfs.protocol.SnapshotDiffReport;
+import org.apache.hadoop.hdfs.protocol.SnapshotDiffReportListing;
import org.apache.hadoop.hdfs.protocol.SnapshotException;
import org.apache.hadoop.hdfs.protocol.SnapshotInfo;
import org.apache.hadoop.hdfs.protocol.SnapshottableDirectoryStatus;
@@ -466,6 +467,33 @@ public class SnapshotManager implements SnapshotStatsMXBean {
return diffs != null ? diffs.generateReport() : new SnapshotDiffReport(
snapshotPath, from, to, Collections. emptyList());
}
+
+ /**
+ * Compute the partial difference between two snapshots of a directory,
+ * or between a snapshot of the directory and its current tree.
+ */
+ public SnapshotDiffReportListing diff(final INodesInPath iip,
+ final String snapshotPath, final String from, final String to,
+ byte[] startPath, int index, int snapshotDiffReportLimit)
+ throws IOException {
+ // Find the source root directory path where the snapshots were taken.
+ // All the check for path has been included in the valueOf method.
+ INodeDirectory snapshotRootDir;
+ if (this.snapshotDiffAllowSnapRootDescendant) {
+ snapshotRootDir = getSnapshottableAncestorDir(iip);
+ } else {
+ snapshotRootDir = getSnapshottableRoot(iip);
+ }
+ Preconditions.checkNotNull(snapshotRootDir);
+ INodeDirectory snapshotDescendantDir = INodeDirectory.valueOf(
+ iip.getLastINode(), snapshotPath);
+ final SnapshotDiffListingInfo diffs =
+ snapshotRootDir.getDirectorySnapshottableFeature()
+ .computeDiff(snapshotRootDir, snapshotDescendantDir, from, to,
+ startPath, index, snapshotDiffReportLimit);
+ return diffs != null ? diffs.generateReport() :
+ new SnapshotDiffReportListing();
+ }
public void clearSnapshottableDirs() {
snapshottables.clear();
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/resources/hdfs-default.xml b/hadoop-hdfs-project/hadoop-hdfs/src/main/resources/hdfs-default.xml
index 79c2d8ec46a..dedf987a32d 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/resources/hdfs-default.xml
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/resources/hdfs-default.xml
@@ -4332,6 +4332,17 @@
+
+ dfs.namenode.snapshotdiff.listing.limit
+ 1000
+
+ Limit the number of entries generated by getSnapshotDiffReportListing within
+ one rpc call to the namenode.If less or equal to zero, at most
+ DFS_NAMENODE_SNAPSHOT_DIFF_LISTING_LIMIT_DEFAULT (= 1000) will be sent
+ across to the client within one rpc call.
+
+
+
dfs.pipeline.ecn
false
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/snapshot/TestSnapshotDiffReport.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/snapshot/TestSnapshotDiffReport.java
index e0a7b5bd6ca..a4fb8abd59f 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/snapshot/TestSnapshotDiffReport.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/snapshot/TestSnapshotDiffReport.java
@@ -90,6 +90,7 @@ public class TestSnapshotDiffReport {
conf.setBoolean(
DFSConfigKeys.DFS_NAMENODE_SNAPSHOT_DIFF_ALLOW_SNAP_ROOT_DESCENDANT,
true);
+ conf.setInt(DFSConfigKeys.DFS_NAMENODE_SNAPSHOT_DIFF_LISTING_LIMIT, 3);
cluster = new MiniDFSCluster.Builder(conf).numDataNodes(REPLICATION)
.format(true).build();
cluster.waitActive();
@@ -1293,4 +1294,119 @@ public class TestSnapshotDiffReport {
assertAtimeNotEquals(filePostSS, root, "s2", "s3");
}
+
+ /**
+ * Tests to verfy the diff report with maximum SnapsdiffReportEntries limit
+ * over an rpc being set to 3.
+ * @throws Exception
+ */
+ @Test
+ public void testDiffReportWithRpcLimit() throws Exception {
+ final Path root = new Path("/");
+ hdfs.mkdirs(root);
+ for (int i = 1; i < 4; i++) {
+ final Path path = new Path(root, "dir" + i);
+ hdfs.mkdirs(path);
+ }
+ SnapshotTestHelper.createSnapshot(hdfs, root, "s0");
+ for (int i = 1; i < 4; i++) {
+ final Path path = new Path(root, "dir" + i);
+ for (int j = 1; j < 4; j++) {
+ final Path file = new Path(path, "file" + j);
+ DFSTestUtil.createFile(hdfs, file, BLOCKSIZE, REPLICATION, SEED);
+ }
+ }
+
+ SnapshotTestHelper.createSnapshot(hdfs, root, "s1");
+ verifyDiffReport(root, "s0", "s1",
+ new DiffReportEntry(DiffType.MODIFY, DFSUtil.string2Bytes("")),
+ new DiffReportEntry(DiffType.MODIFY, DFSUtil.string2Bytes("dir1")),
+ new DiffReportEntry(DiffType.CREATE,
+ DFSUtil.string2Bytes("dir1/file1")),
+ new DiffReportEntry(DiffType.CREATE,
+ DFSUtil.string2Bytes("dir1/file2")),
+ new DiffReportEntry(DiffType.CREATE,
+ DFSUtil.string2Bytes("dir1/file3")),
+ new DiffReportEntry(DiffType.MODIFY, DFSUtil.string2Bytes("dir2")),
+ new DiffReportEntry(DiffType.CREATE,
+ DFSUtil.string2Bytes("dir2/file1")),
+ new DiffReportEntry(DiffType.CREATE,
+ DFSUtil.string2Bytes("dir2/file2")),
+ new DiffReportEntry(DiffType.CREATE,
+ DFSUtil.string2Bytes("dir2/file3")),
+ new DiffReportEntry(DiffType.MODIFY, DFSUtil.string2Bytes("dir3")),
+ new DiffReportEntry(DiffType.CREATE,
+ DFSUtil.string2Bytes("dir3/file1")),
+ new DiffReportEntry(DiffType.CREATE,
+ DFSUtil.string2Bytes("dir3/file2")),
+ new DiffReportEntry(DiffType.CREATE,
+ DFSUtil.string2Bytes("dir3/file3")));
+ }
+
+ @Test
+ public void testDiffReportWithRpcLimit2() throws Exception {
+ final Path root = new Path("/");
+ hdfs.mkdirs(root);
+ for (int i = 1; i <=3; i++) {
+ final Path path = new Path(root, "dir" + i);
+ hdfs.mkdirs(path);
+ }
+ for (int i = 1; i <= 3; i++) {
+ final Path path = new Path(root, "dir" + i);
+ for (int j = 1; j < 4; j++) {
+ final Path file = new Path(path, "file" + j);
+ DFSTestUtil.createFile(hdfs, file, BLOCKSIZE, REPLICATION, SEED);
+ }
+ }
+ SnapshotTestHelper.createSnapshot(hdfs, root, "s0");
+ Path targetDir = new Path(root, "dir4");
+ //create directory dir4
+ hdfs.mkdirs(targetDir);
+ //moves files from dir1 to dir4
+ Path path = new Path(root, "dir1");
+ for (int j = 1; j < 4; j++) {
+ final Path srcPath = new Path(path, "file" + j);
+ final Path targetPath = new Path(targetDir, "file" + j);
+ hdfs.rename(srcPath, targetPath);
+ }
+ targetDir = new Path(root, "dir3");
+ //overwrite existing files in dir3 from files in dir1
+ path = new Path(root, "dir2");
+ for (int j = 1; j < 4; j++) {
+ final Path srcPath = new Path(path, "file" + j);
+ final Path targetPath = new Path(targetDir, "file" + j);
+ hdfs.rename(srcPath, targetPath, Rename.OVERWRITE);
+ }
+ final Path pathToRename = new Path(root, "dir2");
+ //move dir2 inside dir3
+ hdfs.rename(pathToRename, targetDir);
+ SnapshotTestHelper.createSnapshot(hdfs, root, "s1");
+ verifyDiffReport(root, "s0", "s1",
+ new DiffReportEntry(DiffType.MODIFY, DFSUtil.string2Bytes("")),
+ new DiffReportEntry(DiffType.CREATE,
+ DFSUtil.string2Bytes("dir4")),
+ new DiffReportEntry(DiffType.RENAME, DFSUtil.string2Bytes("dir2"),
+ DFSUtil.string2Bytes("dir3/dir2")),
+ new DiffReportEntry(DiffType.MODIFY, DFSUtil.string2Bytes("dir1")),
+ new DiffReportEntry(DiffType.RENAME, DFSUtil.string2Bytes("dir1/file1"),
+ DFSUtil.string2Bytes("dir4/file1")),
+ new DiffReportEntry(DiffType.RENAME, DFSUtil.string2Bytes("dir1/file2"),
+ DFSUtil.string2Bytes("dir4/file2")),
+ new DiffReportEntry(DiffType.RENAME, DFSUtil.string2Bytes("dir1/file3"),
+ DFSUtil.string2Bytes("dir4/file3")),
+ new DiffReportEntry(DiffType.MODIFY, DFSUtil.string2Bytes("dir2")),
+ new DiffReportEntry(DiffType.RENAME, DFSUtil.string2Bytes("dir2/file1"),
+ DFSUtil.string2Bytes("dir3/file1")),
+ new DiffReportEntry(DiffType.RENAME, DFSUtil.string2Bytes("dir2/file2"),
+ DFSUtil.string2Bytes("dir3/file2")),
+ new DiffReportEntry(DiffType.RENAME, DFSUtil.string2Bytes("dir2/file3"),
+ DFSUtil.string2Bytes("dir3/file3")),
+ new DiffReportEntry(DiffType.MODIFY, DFSUtil.string2Bytes("dir3")),
+ new DiffReportEntry(DiffType.DELETE,
+ DFSUtil.string2Bytes("dir3/file1")),
+ new DiffReportEntry(DiffType.DELETE,
+ DFSUtil.string2Bytes("dir3/file1")),
+ new DiffReportEntry(DiffType.DELETE,
+ DFSUtil.string2Bytes("dir3/file3")));
+ }
}