HDFS-8036. Use snapshot path as source when using snapshot diff report in DistCp. Contributed by Jing Zhao.

This commit is contained in:
Haohui Mai 2015-04-01 16:50:59 -07:00
parent cf33bc1050
commit 8ebbbc6ebd
4 changed files with 63 additions and 4 deletions

View File

@ -944,6 +944,9 @@ Release 2.7.0 - UNRELEASED
HDFS-7748. Separate ECN flags from the Status in the DataTransferPipelineAck. HDFS-7748. Separate ECN flags from the Status in the DataTransferPipelineAck.
(Anu Engineer and Haohui Mai via wheat9) (Anu Engineer and Haohui Mai via wheat9)
HDFS-8036. Use snapshot path as source when using snapshot diff report in
DistCp. (Jing Zhao via wheat9)
BREAKDOWN OF HDFS-7584 SUBTASKS AND RELATED JIRAS BREAKDOWN OF HDFS-7584 SUBTASKS AND RELATED JIRAS
HDFS-7720. Quota by Storage Type API, tools and ClientNameNode HDFS-7720. Quota by Storage Type API, tools and ClientNameNode

View File

@ -22,6 +22,7 @@ import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path; import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hdfs.DistributedFileSystem; import org.apache.hadoop.hdfs.DistributedFileSystem;
import org.apache.hadoop.hdfs.protocol.HdfsConstants;
import org.apache.hadoop.hdfs.protocol.SnapshotDiffReport; import org.apache.hadoop.hdfs.protocol.SnapshotDiffReport;
import java.io.IOException; import java.io.IOException;
@ -86,6 +87,22 @@ class DistCpSync {
} finally { } finally {
deleteTargetTmpDir(targetFs, tmpDir); deleteTargetTmpDir(targetFs, tmpDir);
// TODO: since we have tmp directory, we can support "undo" with failures // TODO: since we have tmp directory, we can support "undo" with failures
// set the source path using the snapshot path
inputOptions.setSourcePaths(Arrays.asList(getSourceSnapshotPath(sourceDir,
inputOptions.getToSnapshot())));
}
}
private static String getSnapshotName(String name) {
return Path.CUR_DIR.equals(name) ? "" : name;
}
private static Path getSourceSnapshotPath(Path sourceDir, String snapshotName) {
if (Path.CUR_DIR.equals(snapshotName)) {
return sourceDir;
} else {
return new Path(sourceDir,
HdfsConstants.DOT_SNAPSHOT_DIR + Path.SEPARATOR + snapshotName);
} }
} }
@ -136,8 +153,10 @@ class DistCpSync {
static DiffInfo[] getDiffs(DistCpOptions inputOptions, static DiffInfo[] getDiffs(DistCpOptions inputOptions,
DistributedFileSystem fs, Path sourceDir, Path targetDir) { DistributedFileSystem fs, Path sourceDir, Path targetDir) {
try { try {
final String from = getSnapshotName(inputOptions.getFromSnapshot());
final String to = getSnapshotName(inputOptions.getToSnapshot());
SnapshotDiffReport sourceDiff = fs.getSnapshotDiffReport(sourceDir, SnapshotDiffReport sourceDiff = fs.getSnapshotDiffReport(sourceDir,
inputOptions.getFromSnapshot(), inputOptions.getToSnapshot()); from, to);
return DiffInfo.getDiffs(sourceDiff, targetDir); return DiffInfo.getDiffs(sourceDiff, targetDir);
} catch (IOException e) { } catch (IOException e) {
DistCp.LOG.warn("Failed to compute snapshot diff on " + sourceDir, e); DistCp.LOG.warn("Failed to compute snapshot diff on " + sourceDir, e);

View File

@ -90,8 +90,7 @@ public class CopyCommitter extends FileOutputCommitter {
} }
try { try {
if (conf.getBoolean(DistCpConstants.CONF_LABEL_DELETE_MISSING, false) if (conf.getBoolean(DistCpConstants.CONF_LABEL_DELETE_MISSING, false)) {
&& !(conf.getBoolean(DistCpConstants.CONF_LABEL_DIFF, false))) {
deleteMissing(conf); deleteMissing(conf);
} else if (conf.getBoolean(DistCpConstants.CONF_LABEL_ATOMIC_COPY, false)) { } else if (conf.getBoolean(DistCpConstants.CONF_LABEL_ATOMIC_COPY, false)) {
commitData(conf); commitData(conf);

View File

@ -24,6 +24,7 @@ import org.apache.hadoop.hdfs.DFSTestUtil;
import org.apache.hadoop.hdfs.DistributedFileSystem; import org.apache.hadoop.hdfs.DistributedFileSystem;
import org.apache.hadoop.hdfs.HdfsConfiguration; import org.apache.hadoop.hdfs.HdfsConfiguration;
import org.apache.hadoop.hdfs.MiniDFSCluster; import org.apache.hadoop.hdfs.MiniDFSCluster;
import org.apache.hadoop.hdfs.protocol.HdfsConstants;
import org.apache.hadoop.hdfs.protocol.SnapshotDiffReport; import org.apache.hadoop.hdfs.protocol.SnapshotDiffReport;
import org.apache.hadoop.io.IOUtils; import org.apache.hadoop.io.IOUtils;
import org.apache.hadoop.io.SequenceFile; import org.apache.hadoop.io.SequenceFile;
@ -97,6 +98,8 @@ public class TestDistCpSync {
dfs.createSnapshot(source, "s2"); dfs.createSnapshot(source, "s2");
dfs.createSnapshot(target, "s1"); dfs.createSnapshot(target, "s1");
Assert.assertTrue(DistCpSync.sync(options, conf)); Assert.assertTrue(DistCpSync.sync(options, conf));
// reset source paths in options
options.setSourcePaths(Arrays.asList(source));
// changes have been made in target // changes have been made in target
final Path subTarget = new Path(target, "sub"); final Path subTarget = new Path(target, "sub");
@ -183,9 +186,21 @@ public class TestDistCpSync {
changeData(source); changeData(source);
dfs.createSnapshot(source, "s2"); dfs.createSnapshot(source, "s2");
// before sync, make some further changes on source. this should not affect
// the later distcp since we're copying (s2-s1) to target
final Path toDelete = new Path(source, "foo/d1/foo/f1");
dfs.delete(toDelete, true);
final Path newdir = new Path(source, "foo/d1/foo/newdir");
dfs.mkdirs(newdir);
// do the sync // do the sync
Assert.assertTrue(DistCpSync.sync(options, conf)); Assert.assertTrue(DistCpSync.sync(options, conf));
// make sure the source path has been updated to the snapshot path
final Path spath = new Path(source,
HdfsConstants.DOT_SNAPSHOT_DIR + Path.SEPARATOR + "s2");
Assert.assertEquals(spath, options.getSourcePaths().get(0));
// build copy listing // build copy listing
final Path listingPath = new Path("/tmp/META/fileList.seq"); final Path listingPath = new Path("/tmp/META/fileList.seq");
CopyListing listing = new GlobbedCopyListing(conf, new Credentials()); CopyListing listing = new GlobbedCopyListing(conf, new Credentials());
@ -209,7 +224,7 @@ public class TestDistCpSync {
.getCounter(CopyMapper.Counter.BYTESCOPIED).getValue()); .getCounter(CopyMapper.Counter.BYTESCOPIED).getValue());
// verify the source and target now has the same structure // verify the source and target now has the same structure
verifyCopy(dfs.getFileStatus(source), dfs.getFileStatus(target), false); verifyCopy(dfs.getFileStatus(spath), dfs.getFileStatus(target), false);
} }
private Map<Text, CopyListingFileStatus> getListing(Path listingPath) private Map<Text, CopyListingFileStatus> getListing(Path listingPath)
@ -248,6 +263,29 @@ public class TestDistCpSync {
} }
} }
/**
* Similar test with testSync, but the "to" snapshot is specified as "."
* @throws Exception
*/
@Test
public void testSyncWithCurrent() throws Exception {
options.setUseDiff(true, "s1", ".");
initData(source);
initData(target);
dfs.allowSnapshot(source);
dfs.allowSnapshot(target);
dfs.createSnapshot(source, "s1");
dfs.createSnapshot(target, "s1");
// make changes under source
changeData(source);
// do the sync
Assert.assertTrue(DistCpSync.sync(options, conf));
// make sure the source path is still unchanged
Assert.assertEquals(source, options.getSourcePaths().get(0));
}
private void initData2(Path dir) throws Exception { private void initData2(Path dir) throws Exception {
final Path test = new Path(dir, "test"); final Path test = new Path(dir, "test");
final Path foo = new Path(dir, "foo"); final Path foo = new Path(dir, "foo");