HDFS-12191. Provide option to not capture the accessTime change of a file to snapshot if no other modification has been done to this file. Contributed by Yongjun Zhang.

This commit is contained in:
Yongjun Zhang 2017-08-29 16:57:03 -07:00
parent 26fafc3597
commit cf93d60d3f
8 changed files with 233 additions and 27 deletions

View File

@ -357,6 +357,9 @@ public class DFSConfigKeys extends CommonConfigurationKeys {
public static final boolean DFS_NAMENODE_SNAPSHOT_CAPTURE_OPENFILES_DEFAULT =
HdfsClientConfigKeys.DFS_NAMENODE_SNAPSHOT_CAPTURE_OPENFILES_DEFAULT;
public static final String DFS_NAMENODE_SNAPSHOT_SKIP_CAPTURE_ACCESSTIME_ONLY_CHANGE = "dfs.namenode.snapshot.skip.capture.accesstime-only-change";
public static final boolean DFS_NAMENODE_SNAPSHOT_SKIP_CAPTURE_ACCESSTIME_ONLY_CHANGE_DEFAULT = false;
// Whether to enable datanode's stale state detection and usage for reads
public static final String DFS_NAMENODE_AVOID_STALE_DATANODE_FOR_READ_KEY = "dfs.namenode.avoid.read.stale.datanode";
public static final boolean DFS_NAMENODE_AVOID_STALE_DATANODE_FOR_READ_DEFAULT = false;

View File

@ -494,7 +494,9 @@ public class FSDirAttrOp {
// then no need to store access time
if (atime != -1 && (status || force
|| atime > inode.getAccessTime() + fsd.getAccessTimePrecision())) {
inode.setAccessTime(atime, latest);
inode.setAccessTime(atime, latest,
fsd.getFSNamesystem().getSnapshotManager().
getSkipCaptureAccessTimeOnlyChange());
status = true;
}
return status;

View File

@ -413,7 +413,7 @@ public class FSEditLogLoader {
// update the block list.
// Update the salient file attributes.
newFile.setAccessTime(addCloseOp.atime, Snapshot.CURRENT_STATE_ID);
newFile.setAccessTime(addCloseOp.atime, Snapshot.CURRENT_STATE_ID, false);
newFile.setModificationTime(addCloseOp.mtime, Snapshot.CURRENT_STATE_ID);
ErasureCodingPolicy ecPolicy =
FSDirErasureCodingOp.unprotectedGetErasureCodingPolicy(
@ -436,7 +436,7 @@ public class FSEditLogLoader {
final INodeFile file = INodeFile.valueOf(iip.getLastINode(), path);
// Update the salient file attributes.
file.setAccessTime(addCloseOp.atime, Snapshot.CURRENT_STATE_ID);
file.setAccessTime(addCloseOp.atime, Snapshot.CURRENT_STATE_ID, false);
file.setModificationTime(addCloseOp.mtime, Snapshot.CURRENT_STATE_ID);
ErasureCodingPolicy ecPolicy =
FSDirErasureCodingOp.unprotectedGetErasureCodingPolicy(

View File

@ -724,8 +724,11 @@ public abstract class INode implements INodeAttributes, Diff.Element<byte[]> {
/**
* Set last access time of inode.
*/
public final INode setAccessTime(long accessTime, int latestSnapshotId) {
recordModification(latestSnapshotId);
public final INode setAccessTime(long accessTime, int latestSnapshotId,
boolean skipCaptureAccessTimeOnlyChangeInSnapshot) {
if (!skipCaptureAccessTimeOnlyChangeInSnapshot) {
recordModification(latestSnapshotId);
}
setAccessTime(accessTime);
return this;
}

View File

@ -19,6 +19,8 @@ package org.apache.hadoop.hdfs.server.namenode.snapshot;
import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_SNAPSHOT_CAPTURE_OPENFILES;
import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_SNAPSHOT_CAPTURE_OPENFILES_DEFAULT;
import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_SNAPSHOT_SKIP_CAPTURE_ACCESSTIME_ONLY_CHANGE;
import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_SNAPSHOT_SKIP_CAPTURE_ACCESSTIME_ONLY_CHANGE_DEFAULT;
import java.io.DataInput;
import java.io.DataOutput;
@ -33,6 +35,8 @@ import java.util.concurrent.atomic.AtomicInteger;
import javax.management.ObjectName;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.hdfs.DFSUtil;
import org.apache.hadoop.hdfs.DFSUtilClient;
@ -67,8 +71,19 @@ import com.google.common.base.Preconditions;
* if necessary.
*/
public class SnapshotManager implements SnapshotStatsMXBean {
public static final Log LOG = LogFactory.getLog(SnapshotManager.class);
private final FSDirectory fsdir;
private final boolean captureOpenFiles;
/**
* If skipCaptureAccessTimeOnlyChange is set to true, if accessTime
* of a file changed but there is no other modification made to the file,
* it will not be captured in next snapshot. However, if there is other
* modification made to the file, the last access time will be captured
* together with the modification in next snapshot.
*/
private boolean skipCaptureAccessTimeOnlyChange = false;
private final AtomicInteger numSnapshots = new AtomicInteger();
private static final int SNAPSHOT_ID_BIT_WIDTH = 24;
@ -84,6 +99,19 @@ public class SnapshotManager implements SnapshotStatsMXBean {
this.captureOpenFiles = conf.getBoolean(
DFS_NAMENODE_SNAPSHOT_CAPTURE_OPENFILES,
DFS_NAMENODE_SNAPSHOT_CAPTURE_OPENFILES_DEFAULT);
this.skipCaptureAccessTimeOnlyChange = conf.getBoolean(
DFS_NAMENODE_SNAPSHOT_SKIP_CAPTURE_ACCESSTIME_ONLY_CHANGE,
DFS_NAMENODE_SNAPSHOT_SKIP_CAPTURE_ACCESSTIME_ONLY_CHANGE_DEFAULT);
LOG.info("Loaded config captureOpenFiles: " + captureOpenFiles
+ "skipCaptureAccessTimeOnlyChange: " +
skipCaptureAccessTimeOnlyChange);
}
/**
* @return skipCaptureAccessTimeOnlyChange
*/
public boolean getSkipCaptureAccessTimeOnlyChange() {
return skipCaptureAccessTimeOnlyChange;
}
/** Used in tests only */

View File

@ -4259,22 +4259,34 @@
</description>
</property>
<property>
<name>dfs.namenode.snapshot.capture.openfiles</name>
<value>false</value>
<description>
If true, snapshots taken will have an immutable shared copy of
the open files that have valid leases. Even after the open files
grow or shrink in size, snapshot will always have the previous
point-in-time version of the open files, just like all other
closed files. Default is false.
Note: The file length captured for open files in snapshot is
whats recorded in NameNode at the time of snapshot and it may
be shorter than what the client has written till then. In order
to capture the latest length, the client can call hflush/hsync
with the flag SyncFlag.UPDATE_LENGTH on the open files handles.
</description>
</property>
<property>
<name>dfs.namenode.snapshot.capture.openfiles</name>
<value>false</value>
<description>
If true, snapshots taken will have an immutable shared copy of
the open files that have valid leases. Even after the open files
grow or shrink in size, snapshot will always have the previous
point-in-time version of the open files, just like all other
closed files. Default is false.
Note: The file length captured for open files in snapshot is
whats recorded in NameNode at the time of snapshot and it may
be shorter than what the client has written till then. In order
to capture the latest length, the client can call hflush/hsync
with the flag SyncFlag.UPDATE_LENGTH on the open files handles.
</description>
</property>
<property>
<name>dfs.namenode.snapshot.skip.capture.accesstime-only-change</name>
<value>false</value>
<description>
If accessTime of a file/directory changed but there is no other
modification made to the file/directory, the changed accesstime will
not be captured in next snapshot. However, if there is other modification
made to the file/directory, the latest access time will be captured
together with the modification in next snapshot.
</description>
</property>
<property>
<name>dfs.pipeline.ecn</name>

View File

@ -21,6 +21,8 @@ package org.apache.hadoop.hdfs.server.namenode;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.hdfs.protocol.QuotaExceededException;
import org.apache.hadoop.hdfs.server.namenode.FSNamesystem;
import org.apache.hadoop.hdfs.server.namenode.snapshot.SnapshotManager;
import org.junit.Test;
import org.mockito.Mockito;
@ -36,10 +38,15 @@ public class TestFSDirAttrOp {
private boolean unprotectedSetTimes(long atime, long atime0, long precision,
long mtime, boolean force) throws QuotaExceededException {
FSNamesystem fsn = Mockito.mock(FSNamesystem.class);
SnapshotManager ssMgr = Mockito.mock(SnapshotManager.class);
FSDirectory fsd = Mockito.mock(FSDirectory.class);
INodesInPath iip = Mockito.mock(INodesInPath.class);
INode inode = Mockito.mock(INode.class);
when(fsd.getFSNamesystem()).thenReturn(fsn);
when(fsn.getSnapshotManager()).thenReturn(ssMgr);
when(ssMgr.getSkipCaptureAccessTimeOnlyChange()).thenReturn(false);
when(fsd.getAccessTimePrecision()).thenReturn(precision);
when(fsd.hasWriteLock()).thenReturn(Boolean.TRUE);
when(iip.getLastINode()).thenReturn(inode);

View File

@ -18,10 +18,13 @@
package org.apache.hadoop.hdfs.server.namenode.snapshot;
import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertNotEquals;
import static org.junit.Assert.assertTrue;
import static org.junit.Assert.fail;
import java.io.IOException;
import java.text.SimpleDateFormat;
import java.util.Date;
import java.util.EnumSet;
import java.util.HashMap;
import java.util.Random;
@ -43,15 +46,21 @@ import org.apache.hadoop.hdfs.protocol.SnapshotDiffReport.DiffType;
import org.apache.hadoop.hdfs.server.namenode.NameNode;
import org.apache.hadoop.hdfs.server.namenode.NameNodeAdapter;
import org.apache.hadoop.test.GenericTestUtils;
import org.apache.hadoop.util.Time;
import org.junit.After;
import org.junit.Assert;
import org.junit.Before;
import org.junit.Test;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
/**
* Tests snapshot deletion.
*/
public class TestSnapshotDiffReport {
private static final Logger LOG =
LoggerFactory.getLogger(TestSnapshotDiffReport.class);
private static final long SEED = 0;
private static final short REPLICATION = 3;
private static final short REPLICATION_1 = 2;
@ -73,6 +82,10 @@ public class TestSnapshotDiffReport {
conf = new Configuration();
conf.setBoolean(
DFSConfigKeys.DFS_NAMENODE_SNAPSHOT_CAPTURE_OPENFILES, true);
conf.setLong(DFSConfigKeys.DFS_NAMENODE_ACCESSTIME_PRECISION_KEY, 1);
conf.setBoolean(
DFSConfigKeys.DFS_NAMENODE_SNAPSHOT_SKIP_CAPTURE_ACCESSTIME_ONLY_CHANGE,
true);
cluster = new MiniDFSCluster.Builder(conf).numDataNodes(REPLICATION)
.format(true).build();
cluster.waitActive();
@ -167,8 +180,8 @@ public class TestSnapshotDiffReport {
// reverse the order of from and to
SnapshotDiffReport inverseReport = hdfs
.getSnapshotDiffReport(dir, to, from);
System.out.println(report.toString());
System.out.println(inverseReport.toString() + "\n");
LOG.info(report.toString());
LOG.info(inverseReport.toString() + "\n");
assertEquals(entries.length, report.getDiffList().size());
assertEquals(entries.length, inverseReport.getDiffList().size());
@ -221,20 +234,20 @@ public class TestSnapshotDiffReport {
// diff between the same snapshot
SnapshotDiffReport report = hdfs.getSnapshotDiffReport(sub1, "s0", "s0");
System.out.println(report);
LOG.info(report.toString());
assertEquals(0, report.getDiffList().size());
report = hdfs.getSnapshotDiffReport(sub1, "", "");
System.out.println(report);
LOG.info(report.toString());
assertEquals(0, report.getDiffList().size());
report = hdfs.getSnapshotDiffReport(subsubsub1, "s0", "s2");
System.out.println(report);
LOG.info(report.toString());
assertEquals(0, report.getDiffList().size());
// test path with scheme also works
report = hdfs.getSnapshotDiffReport(hdfs.makeQualified(subsubsub1), "s0", "s2");
System.out.println(report);
LOG.info(report.toString());
assertEquals(0, report.getDiffList().size());
verifyDiffReport(sub1, "s0", "s2",
@ -677,4 +690,142 @@ public class TestSnapshotDiffReport {
}
private long getAccessTime(Path path) throws IOException {
return hdfs.getFileStatus(path).getAccessTime();
}
private String getAccessTimeStr(Path path) throws IOException {
SimpleDateFormat timeFmt = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss");
return timeFmt.format(new Date(getAccessTime(path)));
}
private Path getSSpath(Path path, Path ssRoot, String ssName) {
return new Path(ssRoot, ".snapshot/" + ssName + "/" +
path.toString().substring(ssRoot.toString().length()));
}
private void printAtime(Path path, Path ssRoot, String ssName)
throws IOException {
Path ssPath = getSSpath(path, ssRoot, ssName);
LOG.info("Access time "
+ path + ": " + getAccessTimeStr(path)
+ " " + ssPath + ": " + getAccessTimeStr(ssPath));
}
private void assertAtimeEquals(Path path, Path ssRoot,
String ssName1, String ssName2)
throws IOException {
Path ssPath1 = getSSpath(path, ssRoot, ssName1);
Path ssPath2 = getSSpath(path, ssRoot, ssName2);
assertEquals(getAccessTime(ssPath1), getAccessTime(ssPath2));
}
private void assertAtimeNotEquals(Path path, Path ssRoot,
String ssName1, String ssName2)
throws IOException {
Path ssPath1 = getSSpath(path, ssRoot, ssName1);
Path ssPath2 = getSSpath(path, ssRoot, ssName2);
assertNotEquals(getAccessTime(ssPath1), getAccessTime(ssPath2));
}
/**
* Check to see access time is not captured in snapshot when applicable.
* When DFS_NAMENODE_SNAPSHOT_SKIP_CAPTURE_ACCESSTIME_ONLY_CHANGE
* is set to true, and if a file's access time changed between two
* snapshots but has no other modification, then the access time is not
* captured in snapshot.
*/
@Test
public void testDontCaptureAccessTimeOnlyChangeReport() throws Exception {
final Path froot = new Path("/");
final Path root = new Path(froot, "/testSdiffCalc");
// items created pre enabling snapshot
final Path filePreSS = new Path(root, "fParent/filePreSS");
final Path dirPreSS = new Path(root, "dirPreSS");
final Path dirPreSSChild = new Path(dirPreSS, "dirPreSSChild");
// items created after enabling snapshot
final Path filePostSS = new Path(root, "fParent/filePostSS");
final Path dirPostSS = new Path(root, "dirPostSS");
final Path dirPostSSChild = new Path(dirPostSS, "dirPostSSChild");
DFSTestUtil.createFile(hdfs, filePreSS, BLOCKSIZE, REPLICATION, SEED);
DFSTestUtil.createFile(hdfs, dirPreSSChild, BLOCKSIZE, REPLICATION, SEED);
SnapshotTestHelper.createSnapshot(hdfs, root, "s0");
printAtime(filePreSS, root, "s0");
printAtime(dirPreSS, root, "s0");
// items created after creating the first snapshot
DFSTestUtil.createFile(hdfs, filePostSS, BLOCKSIZE, REPLICATION, SEED);
DFSTestUtil.createFile(hdfs, dirPostSSChild, BLOCKSIZE, REPLICATION, SEED);
Thread.sleep(3000);
long now = Time.now();
hdfs.setTimes(filePreSS, -1, now);
hdfs.setTimes(filePostSS, -1, now);
hdfs.setTimes(dirPreSS, -1, now);
hdfs.setTimes(dirPostSS, -1, now);
SnapshotTestHelper.createSnapshot(hdfs, root, "s1");
printAtime(filePreSS, root, "s1");
printAtime(dirPreSS, root, "s1");
printAtime(filePostSS, root, "s1");
printAtime(dirPostSS, root, "s1");
Thread.sleep(3000);
now = Time.now();
hdfs.setTimes(filePreSS, -1, now);
hdfs.setTimes(filePostSS, -1, now);
hdfs.setTimes(dirPreSS, -1, now);
hdfs.setTimes(dirPostSS, -1, now);
SnapshotTestHelper.createSnapshot(hdfs, root, "s2");
printAtime(filePreSS, root, "s2");
printAtime(dirPreSS, root, "s2");
printAtime(filePostSS, root, "s2");
printAtime(dirPostSS, root, "s2");
Thread.sleep(3000);
now = Time.now();
// modify filePostSS, and change access time
hdfs.setReplication(filePostSS, (short) (REPLICATION - 1));
hdfs.setTimes(filePostSS, -1, now);
SnapshotTestHelper.createSnapshot(hdfs, root, "s3");
LOG.info("\nsnapshotDiff s0 -> s1:");
LOG.info(hdfs.getSnapshotDiffReport(root, "s0", "s1").toString());
LOG.info("\nsnapshotDiff s1 -> s2:");
LOG.info(hdfs.getSnapshotDiffReport(root, "s1", "s2").toString());
assertAtimeEquals(filePreSS, root, "s0", "s1");
assertAtimeEquals(dirPreSS, root, "s0", "s1");
assertAtimeEquals(filePreSS, root, "s1", "s2");
assertAtimeEquals(dirPreSS, root, "s1", "s2");
assertAtimeEquals(filePostSS, root, "s1", "s2");
assertAtimeEquals(dirPostSS, root, "s1", "s2");
// access time should be captured in snapshot due to
// other modification
assertAtimeNotEquals(filePostSS, root, "s2", "s3");
// restart NN, and see the access time relationship
// still stands (no change caused by edit logs
// loading)
cluster.restartNameNodes();
cluster.waitActive();
assertAtimeEquals(filePreSS, root, "s0", "s1");
assertAtimeEquals(dirPreSS, root, "s0", "s1");
assertAtimeEquals(filePreSS, root, "s1", "s2");
assertAtimeEquals(dirPreSS, root, "s1", "s2");
assertAtimeEquals(filePostSS, root, "s1", "s2");
assertAtimeEquals(dirPostSS, root, "s1", "s2");
assertAtimeNotEquals(filePostSS, root, "s2", "s3");
}
}