From caec6a19458569ba9604e112ec52c0b844018999 Mon Sep 17 00:00:00 2001 From: Steve Loughran Date: Fri, 13 Mar 2020 19:33:28 +0000 Subject: [PATCH] HADOOP-16775. DistCp reuses the same temp file within the task for different files. Contributed by Amir Shenavandeh. This avoids overwrite consistency issues with S3 and other stores Change-Id: Ic4d05ef3397e963ba28fd9f775bb362b0da36ad9 --- .../apache/hadoop/tools/mapred/RetriableFileCopyCommand.java | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/hadoop-tools/hadoop-distcp/src/main/java/org/apache/hadoop/tools/mapred/RetriableFileCopyCommand.java b/hadoop-tools/hadoop-distcp/src/main/java/org/apache/hadoop/tools/mapred/RetriableFileCopyCommand.java index 9557f2a64ca..909085cc442 100644 --- a/hadoop-tools/hadoop-distcp/src/main/java/org/apache/hadoop/tools/mapred/RetriableFileCopyCommand.java +++ b/hadoop-tools/hadoop-distcp/src/main/java/org/apache/hadoop/tools/mapred/RetriableFileCopyCommand.java @@ -231,7 +231,9 @@ public class RetriableFileCopyCommand extends RetriableCommand { Path root = target.equals(targetWorkPath) ? targetWorkPath.getParent() : targetWorkPath; Path tempFile = new Path(root, ".distcp.tmp." + - context.getTaskAttemptID().toString()); + context.getTaskAttemptID().toString() + + "." + String.valueOf(System.currentTimeMillis())); + LOG.info("Creating temp file: " + tempFile); return tempFile; }