HDFS-12191. Provide option to not capture the accessTime change of a file to snapshot if no other modification has been done to this file. Contributed by Yongjun Zhang.

(cherry picked from commit cf93d60d3f)

Conflicts:
	hadoop-hdfs-project/hadoop-hdfs/src/main/resources/hdfs-default.xml
	hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestFSDirAttrOp.java
This commit is contained in:
Yongjun Zhang 2017-08-29 16:57:03 -07:00
parent 036cbd7453
commit 870ef0b2fd
8 changed files with 233 additions and 27 deletions

View File

@ -331,6 +331,9 @@ public class DFSConfigKeys extends CommonConfigurationKeys {
public static final boolean DFS_NAMENODE_SNAPSHOT_CAPTURE_OPENFILES_DEFAULT = public static final boolean DFS_NAMENODE_SNAPSHOT_CAPTURE_OPENFILES_DEFAULT =
HdfsClientConfigKeys.DFS_NAMENODE_SNAPSHOT_CAPTURE_OPENFILES_DEFAULT; HdfsClientConfigKeys.DFS_NAMENODE_SNAPSHOT_CAPTURE_OPENFILES_DEFAULT;
public static final String DFS_NAMENODE_SNAPSHOT_SKIP_CAPTURE_ACCESSTIME_ONLY_CHANGE = "dfs.namenode.snapshot.skip.capture.accesstime-only-change";
public static final boolean DFS_NAMENODE_SNAPSHOT_SKIP_CAPTURE_ACCESSTIME_ONLY_CHANGE_DEFAULT = false;
// Whether to enable datanode's stale state detection and usage for reads // Whether to enable datanode's stale state detection and usage for reads
public static final String DFS_NAMENODE_AVOID_STALE_DATANODE_FOR_READ_KEY = "dfs.namenode.avoid.read.stale.datanode"; public static final String DFS_NAMENODE_AVOID_STALE_DATANODE_FOR_READ_KEY = "dfs.namenode.avoid.read.stale.datanode";
public static final boolean DFS_NAMENODE_AVOID_STALE_DATANODE_FOR_READ_DEFAULT = false; public static final boolean DFS_NAMENODE_AVOID_STALE_DATANODE_FOR_READ_DEFAULT = false;

View File

@ -493,7 +493,9 @@ public class FSDirAttrOp {
// then no need to store access time // then no need to store access time
if (atime != -1 && (status || force if (atime != -1 && (status || force
|| atime > inode.getAccessTime() + fsd.getAccessTimePrecision())) { || atime > inode.getAccessTime() + fsd.getAccessTimePrecision())) {
inode.setAccessTime(atime, latest); inode.setAccessTime(atime, latest,
fsd.getFSNamesystem().getSnapshotManager().
getSkipCaptureAccessTimeOnlyChange());
status = true; status = true;
} }
return status; return status;

View File

@ -409,7 +409,7 @@ public class FSEditLogLoader {
// update the block list. // update the block list.
// Update the salient file attributes. // Update the salient file attributes.
newFile.setAccessTime(addCloseOp.atime, Snapshot.CURRENT_STATE_ID); newFile.setAccessTime(addCloseOp.atime, Snapshot.CURRENT_STATE_ID, false);
newFile.setModificationTime(addCloseOp.mtime, Snapshot.CURRENT_STATE_ID); newFile.setModificationTime(addCloseOp.mtime, Snapshot.CURRENT_STATE_ID);
updateBlocks(fsDir, addCloseOp, iip, newFile); updateBlocks(fsDir, addCloseOp, iip, newFile);
break; break;
@ -429,7 +429,7 @@ public class FSEditLogLoader {
final INodeFile file = INodeFile.valueOf(iip.getLastINode(), path); final INodeFile file = INodeFile.valueOf(iip.getLastINode(), path);
// Update the salient file attributes. // Update the salient file attributes.
file.setAccessTime(addCloseOp.atime, Snapshot.CURRENT_STATE_ID); file.setAccessTime(addCloseOp.atime, Snapshot.CURRENT_STATE_ID, false);
file.setModificationTime(addCloseOp.mtime, Snapshot.CURRENT_STATE_ID); file.setModificationTime(addCloseOp.mtime, Snapshot.CURRENT_STATE_ID);
updateBlocks(fsDir, addCloseOp, iip, file); updateBlocks(fsDir, addCloseOp, iip, file);

View File

@ -723,8 +723,11 @@ public abstract class INode implements INodeAttributes, Diff.Element<byte[]> {
/** /**
* Set last access time of inode. * Set last access time of inode.
*/ */
public final INode setAccessTime(long accessTime, int latestSnapshotId) { public final INode setAccessTime(long accessTime, int latestSnapshotId,
recordModification(latestSnapshotId); boolean skipCaptureAccessTimeOnlyChangeInSnapshot) {
if (!skipCaptureAccessTimeOnlyChangeInSnapshot) {
recordModification(latestSnapshotId);
}
setAccessTime(accessTime); setAccessTime(accessTime);
return this; return this;
} }

View File

@ -19,6 +19,8 @@ package org.apache.hadoop.hdfs.server.namenode.snapshot;
import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_SNAPSHOT_CAPTURE_OPENFILES; import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_SNAPSHOT_CAPTURE_OPENFILES;
import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_SNAPSHOT_CAPTURE_OPENFILES_DEFAULT; import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_SNAPSHOT_CAPTURE_OPENFILES_DEFAULT;
import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_SNAPSHOT_SKIP_CAPTURE_ACCESSTIME_ONLY_CHANGE;
import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_SNAPSHOT_SKIP_CAPTURE_ACCESSTIME_ONLY_CHANGE_DEFAULT;
import java.io.DataInput; import java.io.DataInput;
import java.io.DataOutput; import java.io.DataOutput;
@ -32,6 +34,8 @@ import java.util.concurrent.atomic.AtomicInteger;
import javax.management.ObjectName; import javax.management.ObjectName;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.hdfs.DFSUtil; import org.apache.hadoop.hdfs.DFSUtil;
import org.apache.hadoop.hdfs.DFSUtilClient; import org.apache.hadoop.hdfs.DFSUtilClient;
@ -65,8 +69,19 @@ import com.google.common.base.Preconditions;
* if necessary. * if necessary.
*/ */
public class SnapshotManager implements SnapshotStatsMXBean { public class SnapshotManager implements SnapshotStatsMXBean {
public static final Log LOG = LogFactory.getLog(SnapshotManager.class);
private final FSDirectory fsdir; private final FSDirectory fsdir;
private final boolean captureOpenFiles; private final boolean captureOpenFiles;
/**
* If skipCaptureAccessTimeOnlyChange is set to true, if accessTime
* of a file changed but there is no other modification made to the file,
* it will not be captured in next snapshot. However, if there is other
* modification made to the file, the last access time will be captured
* together with the modification in next snapshot.
*/
private boolean skipCaptureAccessTimeOnlyChange = false;
private final AtomicInteger numSnapshots = new AtomicInteger(); private final AtomicInteger numSnapshots = new AtomicInteger();
private static final int SNAPSHOT_ID_BIT_WIDTH = 24; private static final int SNAPSHOT_ID_BIT_WIDTH = 24;
@ -82,6 +97,19 @@ public class SnapshotManager implements SnapshotStatsMXBean {
this.captureOpenFiles = conf.getBoolean( this.captureOpenFiles = conf.getBoolean(
DFS_NAMENODE_SNAPSHOT_CAPTURE_OPENFILES, DFS_NAMENODE_SNAPSHOT_CAPTURE_OPENFILES,
DFS_NAMENODE_SNAPSHOT_CAPTURE_OPENFILES_DEFAULT); DFS_NAMENODE_SNAPSHOT_CAPTURE_OPENFILES_DEFAULT);
this.skipCaptureAccessTimeOnlyChange = conf.getBoolean(
DFS_NAMENODE_SNAPSHOT_SKIP_CAPTURE_ACCESSTIME_ONLY_CHANGE,
DFS_NAMENODE_SNAPSHOT_SKIP_CAPTURE_ACCESSTIME_ONLY_CHANGE_DEFAULT);
LOG.info("Loaded config captureOpenFiles: " + captureOpenFiles
+ "skipCaptureAccessTimeOnlyChange: " +
skipCaptureAccessTimeOnlyChange);
}
/**
* @return skipCaptureAccessTimeOnlyChange
*/
public boolean getSkipCaptureAccessTimeOnlyChange() {
return skipCaptureAccessTimeOnlyChange;
} }
/** Used in tests only */ /** Used in tests only */

View File

@ -3947,22 +3947,34 @@
</description> </description>
</property> </property>
<property> <property>
<name>dfs.namenode.snapshot.capture.openfiles</name> <name>dfs.namenode.snapshot.capture.openfiles</name>
<value>false</value> <value>false</value>
<description> <description>
If true, snapshots taken will have an immutable copy of the If true, snapshots taken will have an immutable shared copy of
open files that have valid leases. Even after the open files the open files that have valid leases. Even after the open files
grow or shrink in size, snapshot will always have the previous grow or shrink in size, snapshot will always have the previous
point-in-time version of the open files, just like all other point-in-time version of the open files, just like all other
closed files. Default is false. closed files. Default is false.
Note: The file length captured for open files in snapshot is Note: The file length captured for open files in snapshot is
whats recorded in NameNode at the time of snapshot and it may whats recorded in NameNode at the time of snapshot and it may
be shorter than what the client has written till then. In order be shorter than what the client has written till then. In order
to capture the latest length, the client can call hflush/hsync to capture the latest length, the client can call hflush/hsync
with the flag SyncFlag.UPDATE_LENGTH on the open files handles. with the flag SyncFlag.UPDATE_LENGTH on the open files handles.
</description> </description>
</property> </property>
<property>
<name>dfs.namenode.snapshot.skip.capture.accesstime-only-change</name>
<value>false</value>
<description>
If accessTime of a file/directory changed but there is no other
modification made to the file/directory, the changed accesstime will
not be captured in next snapshot. However, if there is other modification
made to the file/directory, the latest access time will be captured
together with the modification in next snapshot.
</description>
</property>
<property> <property>
<name>dfs.pipeline.ecn</name> <name>dfs.pipeline.ecn</name>

View File

@ -22,6 +22,8 @@ import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory; import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.fs.UnresolvedLinkException; import org.apache.hadoop.fs.UnresolvedLinkException;
import org.apache.hadoop.hdfs.protocol.QuotaExceededException; import org.apache.hadoop.hdfs.protocol.QuotaExceededException;
import org.apache.hadoop.hdfs.server.namenode.FSNamesystem;
import org.apache.hadoop.hdfs.server.namenode.snapshot.SnapshotManager;
import org.junit.Test; import org.junit.Test;
import org.mockito.Mockito; import org.mockito.Mockito;
@ -37,10 +39,15 @@ public class TestFSDirAttrOp {
private boolean unprotectedSetTimes(long atime, long atime0, long precision, private boolean unprotectedSetTimes(long atime, long atime0, long precision,
long mtime, boolean force) throws QuotaExceededException, UnresolvedLinkException { long mtime, boolean force) throws QuotaExceededException, UnresolvedLinkException {
FSNamesystem fsn = Mockito.mock(FSNamesystem.class);
SnapshotManager ssMgr = Mockito.mock(SnapshotManager.class);
FSDirectory fsd = Mockito.mock(FSDirectory.class); FSDirectory fsd = Mockito.mock(FSDirectory.class);
INodesInPath iip = Mockito.mock(INodesInPath.class); INodesInPath iip = Mockito.mock(INodesInPath.class);
INode inode = Mockito.mock(INode.class); INode inode = Mockito.mock(INode.class);
when(fsd.getFSNamesystem()).thenReturn(fsn);
when(fsn.getSnapshotManager()).thenReturn(ssMgr);
when(ssMgr.getSkipCaptureAccessTimeOnlyChange()).thenReturn(false);
when(fsd.getAccessTimePrecision()).thenReturn(precision); when(fsd.getAccessTimePrecision()).thenReturn(precision);
when(fsd.hasWriteLock()).thenReturn(Boolean.TRUE); when(fsd.hasWriteLock()).thenReturn(Boolean.TRUE);
when(iip.getLastINode()).thenReturn(inode); when(iip.getLastINode()).thenReturn(inode);

View File

@ -18,10 +18,13 @@
package org.apache.hadoop.hdfs.server.namenode.snapshot; package org.apache.hadoop.hdfs.server.namenode.snapshot;
import static org.junit.Assert.assertEquals; import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertNotEquals;
import static org.junit.Assert.assertTrue; import static org.junit.Assert.assertTrue;
import static org.junit.Assert.fail; import static org.junit.Assert.fail;
import java.io.IOException; import java.io.IOException;
import java.text.SimpleDateFormat;
import java.util.Date;
import java.util.EnumSet; import java.util.EnumSet;
import java.util.HashMap; import java.util.HashMap;
import java.util.Random; import java.util.Random;
@ -43,15 +46,21 @@ import org.apache.hadoop.hdfs.protocol.SnapshotDiffReport.DiffType;
import org.apache.hadoop.hdfs.server.namenode.NameNode; import org.apache.hadoop.hdfs.server.namenode.NameNode;
import org.apache.hadoop.hdfs.server.namenode.NameNodeAdapter; import org.apache.hadoop.hdfs.server.namenode.NameNodeAdapter;
import org.apache.hadoop.test.GenericTestUtils; import org.apache.hadoop.test.GenericTestUtils;
import org.apache.hadoop.util.Time;
import org.junit.After; import org.junit.After;
import org.junit.Assert; import org.junit.Assert;
import org.junit.Before; import org.junit.Before;
import org.junit.Test; import org.junit.Test;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
/** /**
* Tests snapshot deletion. * Tests snapshot deletion.
*/ */
public class TestSnapshotDiffReport { public class TestSnapshotDiffReport {
private static final Logger LOG =
LoggerFactory.getLogger(TestSnapshotDiffReport.class);
private static final long SEED = 0; private static final long SEED = 0;
private static final short REPLICATION = 3; private static final short REPLICATION = 3;
private static final short REPLICATION_1 = 2; private static final short REPLICATION_1 = 2;
@ -73,6 +82,10 @@ public class TestSnapshotDiffReport {
conf = new Configuration(); conf = new Configuration();
conf.setBoolean( conf.setBoolean(
DFSConfigKeys.DFS_NAMENODE_SNAPSHOT_CAPTURE_OPENFILES, true); DFSConfigKeys.DFS_NAMENODE_SNAPSHOT_CAPTURE_OPENFILES, true);
conf.setLong(DFSConfigKeys.DFS_NAMENODE_ACCESSTIME_PRECISION_KEY, 1);
conf.setBoolean(
DFSConfigKeys.DFS_NAMENODE_SNAPSHOT_SKIP_CAPTURE_ACCESSTIME_ONLY_CHANGE,
true);
cluster = new MiniDFSCluster.Builder(conf).numDataNodes(REPLICATION) cluster = new MiniDFSCluster.Builder(conf).numDataNodes(REPLICATION)
.format(true).build(); .format(true).build();
cluster.waitActive(); cluster.waitActive();
@ -167,8 +180,8 @@ public class TestSnapshotDiffReport {
// reverse the order of from and to // reverse the order of from and to
SnapshotDiffReport inverseReport = hdfs SnapshotDiffReport inverseReport = hdfs
.getSnapshotDiffReport(dir, to, from); .getSnapshotDiffReport(dir, to, from);
System.out.println(report.toString()); LOG.info(report.toString());
System.out.println(inverseReport.toString() + "\n"); LOG.info(inverseReport.toString() + "\n");
assertEquals(entries.length, report.getDiffList().size()); assertEquals(entries.length, report.getDiffList().size());
assertEquals(entries.length, inverseReport.getDiffList().size()); assertEquals(entries.length, inverseReport.getDiffList().size());
@ -221,20 +234,20 @@ public class TestSnapshotDiffReport {
// diff between the same snapshot // diff between the same snapshot
SnapshotDiffReport report = hdfs.getSnapshotDiffReport(sub1, "s0", "s0"); SnapshotDiffReport report = hdfs.getSnapshotDiffReport(sub1, "s0", "s0");
System.out.println(report); LOG.info(report.toString());
assertEquals(0, report.getDiffList().size()); assertEquals(0, report.getDiffList().size());
report = hdfs.getSnapshotDiffReport(sub1, "", ""); report = hdfs.getSnapshotDiffReport(sub1, "", "");
System.out.println(report); LOG.info(report.toString());
assertEquals(0, report.getDiffList().size()); assertEquals(0, report.getDiffList().size());
report = hdfs.getSnapshotDiffReport(subsubsub1, "s0", "s2"); report = hdfs.getSnapshotDiffReport(subsubsub1, "s0", "s2");
System.out.println(report); LOG.info(report.toString());
assertEquals(0, report.getDiffList().size()); assertEquals(0, report.getDiffList().size());
// test path with scheme also works // test path with scheme also works
report = hdfs.getSnapshotDiffReport(hdfs.makeQualified(subsubsub1), "s0", "s2"); report = hdfs.getSnapshotDiffReport(hdfs.makeQualified(subsubsub1), "s0", "s2");
System.out.println(report); LOG.info(report.toString());
assertEquals(0, report.getDiffList().size()); assertEquals(0, report.getDiffList().size());
verifyDiffReport(sub1, "s0", "s2", verifyDiffReport(sub1, "s0", "s2",
@ -677,4 +690,142 @@ public class TestSnapshotDiffReport {
} }
private long getAccessTime(Path path) throws IOException {
return hdfs.getFileStatus(path).getAccessTime();
}
private String getAccessTimeStr(Path path) throws IOException {
SimpleDateFormat timeFmt = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss");
return timeFmt.format(new Date(getAccessTime(path)));
}
private Path getSSpath(Path path, Path ssRoot, String ssName) {
return new Path(ssRoot, ".snapshot/" + ssName + "/" +
path.toString().substring(ssRoot.toString().length()));
}
private void printAtime(Path path, Path ssRoot, String ssName)
throws IOException {
Path ssPath = getSSpath(path, ssRoot, ssName);
LOG.info("Access time "
+ path + ": " + getAccessTimeStr(path)
+ " " + ssPath + ": " + getAccessTimeStr(ssPath));
}
private void assertAtimeEquals(Path path, Path ssRoot,
String ssName1, String ssName2)
throws IOException {
Path ssPath1 = getSSpath(path, ssRoot, ssName1);
Path ssPath2 = getSSpath(path, ssRoot, ssName2);
assertEquals(getAccessTime(ssPath1), getAccessTime(ssPath2));
}
private void assertAtimeNotEquals(Path path, Path ssRoot,
String ssName1, String ssName2)
throws IOException {
Path ssPath1 = getSSpath(path, ssRoot, ssName1);
Path ssPath2 = getSSpath(path, ssRoot, ssName2);
assertNotEquals(getAccessTime(ssPath1), getAccessTime(ssPath2));
}
/**
* Check to see access time is not captured in snapshot when applicable.
* When DFS_NAMENODE_SNAPSHOT_SKIP_CAPTURE_ACCESSTIME_ONLY_CHANGE
* is set to true, and if a file's access time changed between two
* snapshots but has no other modification, then the access time is not
* captured in snapshot.
*/
@Test
public void testDontCaptureAccessTimeOnlyChangeReport() throws Exception {
final Path froot = new Path("/");
final Path root = new Path(froot, "/testSdiffCalc");
// items created pre enabling snapshot
final Path filePreSS = new Path(root, "fParent/filePreSS");
final Path dirPreSS = new Path(root, "dirPreSS");
final Path dirPreSSChild = new Path(dirPreSS, "dirPreSSChild");
// items created after enabling snapshot
final Path filePostSS = new Path(root, "fParent/filePostSS");
final Path dirPostSS = new Path(root, "dirPostSS");
final Path dirPostSSChild = new Path(dirPostSS, "dirPostSSChild");
DFSTestUtil.createFile(hdfs, filePreSS, BLOCKSIZE, REPLICATION, SEED);
DFSTestUtil.createFile(hdfs, dirPreSSChild, BLOCKSIZE, REPLICATION, SEED);
SnapshotTestHelper.createSnapshot(hdfs, root, "s0");
printAtime(filePreSS, root, "s0");
printAtime(dirPreSS, root, "s0");
// items created after creating the first snapshot
DFSTestUtil.createFile(hdfs, filePostSS, BLOCKSIZE, REPLICATION, SEED);
DFSTestUtil.createFile(hdfs, dirPostSSChild, BLOCKSIZE, REPLICATION, SEED);
Thread.sleep(3000);
long now = Time.now();
hdfs.setTimes(filePreSS, -1, now);
hdfs.setTimes(filePostSS, -1, now);
hdfs.setTimes(dirPreSS, -1, now);
hdfs.setTimes(dirPostSS, -1, now);
SnapshotTestHelper.createSnapshot(hdfs, root, "s1");
printAtime(filePreSS, root, "s1");
printAtime(dirPreSS, root, "s1");
printAtime(filePostSS, root, "s1");
printAtime(dirPostSS, root, "s1");
Thread.sleep(3000);
now = Time.now();
hdfs.setTimes(filePreSS, -1, now);
hdfs.setTimes(filePostSS, -1, now);
hdfs.setTimes(dirPreSS, -1, now);
hdfs.setTimes(dirPostSS, -1, now);
SnapshotTestHelper.createSnapshot(hdfs, root, "s2");
printAtime(filePreSS, root, "s2");
printAtime(dirPreSS, root, "s2");
printAtime(filePostSS, root, "s2");
printAtime(dirPostSS, root, "s2");
Thread.sleep(3000);
now = Time.now();
// modify filePostSS, and change access time
hdfs.setReplication(filePostSS, (short) (REPLICATION - 1));
hdfs.setTimes(filePostSS, -1, now);
SnapshotTestHelper.createSnapshot(hdfs, root, "s3");
LOG.info("\nsnapshotDiff s0 -> s1:");
LOG.info(hdfs.getSnapshotDiffReport(root, "s0", "s1").toString());
LOG.info("\nsnapshotDiff s1 -> s2:");
LOG.info(hdfs.getSnapshotDiffReport(root, "s1", "s2").toString());
assertAtimeEquals(filePreSS, root, "s0", "s1");
assertAtimeEquals(dirPreSS, root, "s0", "s1");
assertAtimeEquals(filePreSS, root, "s1", "s2");
assertAtimeEquals(dirPreSS, root, "s1", "s2");
assertAtimeEquals(filePostSS, root, "s1", "s2");
assertAtimeEquals(dirPostSS, root, "s1", "s2");
// access time should be captured in snapshot due to
// other modification
assertAtimeNotEquals(filePostSS, root, "s2", "s3");
// restart NN, and see the access time relationship
// still stands (no change caused by edit logs
// loading)
cluster.restartNameNodes();
cluster.waitActive();
assertAtimeEquals(filePreSS, root, "s0", "s1");
assertAtimeEquals(dirPreSS, root, "s0", "s1");
assertAtimeEquals(filePreSS, root, "s1", "s2");
assertAtimeEquals(dirPreSS, root, "s1", "s2");
assertAtimeEquals(filePostSS, root, "s1", "s2");
assertAtimeEquals(dirPostSS, root, "s1", "s2");
assertAtimeNotEquals(filePostSS, root, "s2", "s3");
}
} }