HADOOP-15273.distcp can't handle remote stores with different checksum algorithms.
Contributed by Steve Loughran.
(cherry picked from commit 7ef4d942dd
)
This commit is contained in:
parent
f879504fe1
commit
1771af2320
|
@ -534,11 +534,6 @@ public final class DistCpOptions {
|
||||||
+ "mutually exclusive");
|
+ "mutually exclusive");
|
||||||
}
|
}
|
||||||
|
|
||||||
if (!syncFolder && skipCRC) {
|
|
||||||
throw new IllegalArgumentException(
|
|
||||||
"Skip CRC is valid only with update options");
|
|
||||||
}
|
|
||||||
|
|
||||||
if (!syncFolder && append) {
|
if (!syncFolder && append) {
|
||||||
throw new IllegalArgumentException(
|
throw new IllegalArgumentException(
|
||||||
"Append is valid only with update options");
|
"Append is valid only with update options");
|
||||||
|
|
|
@ -210,15 +210,30 @@ public class RetriableFileCopyCommand extends RetriableCommand {
|
||||||
throws IOException {
|
throws IOException {
|
||||||
if (!DistCpUtils.checksumsAreEqual(sourceFS, source, sourceChecksum,
|
if (!DistCpUtils.checksumsAreEqual(sourceFS, source, sourceChecksum,
|
||||||
targetFS, target)) {
|
targetFS, target)) {
|
||||||
StringBuilder errorMessage = new StringBuilder("Check-sum mismatch between ")
|
StringBuilder errorMessage =
|
||||||
.append(source).append(" and ").append(target).append(".");
|
new StringBuilder("Checksum mismatch between ")
|
||||||
if (sourceFS.getFileStatus(source).getBlockSize() !=
|
.append(source).append(" and ").append(target).append(".");
|
||||||
|
boolean addSkipHint = false;
|
||||||
|
String srcScheme = sourceFS.getScheme();
|
||||||
|
String targetScheme = targetFS.getScheme();
|
||||||
|
if (!srcScheme.equals(targetScheme)
|
||||||
|
&& !(srcScheme.contains("hdfs") && targetScheme.contains("hdfs"))) {
|
||||||
|
// the filesystems are different and they aren't both hdfs connectors
|
||||||
|
errorMessage.append("Source and destination filesystems are of"
|
||||||
|
+ " different types\n")
|
||||||
|
.append("Their checksum algorithms may be incompatible");
|
||||||
|
addSkipHint = true;
|
||||||
|
} else if (sourceFS.getFileStatus(source).getBlockSize() !=
|
||||||
targetFS.getFileStatus(target).getBlockSize()) {
|
targetFS.getFileStatus(target).getBlockSize()) {
|
||||||
errorMessage.append(" Source and target differ in block-size.")
|
errorMessage.append(" Source and target differ in block-size.\n")
|
||||||
.append(" Use -pb to preserve block-sizes during copy.")
|
.append(" Use -pb to preserve block-sizes during copy.");
|
||||||
.append(" Alternatively, skip checksum-checks altogether, using -skipCrc.")
|
addSkipHint = true;
|
||||||
|
}
|
||||||
|
if (addSkipHint) {
|
||||||
|
errorMessage.append(" You can skip checksum-checks altogether "
|
||||||
|
+ " with -skipcrccheck.\n")
|
||||||
.append(" (NOTE: By skipping checksums, one runs the risk of " +
|
.append(" (NOTE: By skipping checksums, one runs the risk of " +
|
||||||
"masking data-corruption during file-transfer.)");
|
"masking data-corruption during file-transfer.)\n");
|
||||||
}
|
}
|
||||||
throw new IOException(errorMessage.toString());
|
throw new IOException(errorMessage.toString());
|
||||||
}
|
}
|
||||||
|
|
|
@ -44,6 +44,7 @@ import org.apache.hadoop.io.Text;
|
||||||
import org.apache.hadoop.mapreduce.Mapper;
|
import org.apache.hadoop.mapreduce.Mapper;
|
||||||
import org.apache.hadoop.security.AccessControlException;
|
import org.apache.hadoop.security.AccessControlException;
|
||||||
import org.apache.hadoop.security.UserGroupInformation;
|
import org.apache.hadoop.security.UserGroupInformation;
|
||||||
|
import org.apache.hadoop.test.GenericTestUtils;
|
||||||
import org.apache.hadoop.tools.CopyListingFileStatus;
|
import org.apache.hadoop.tools.CopyListingFileStatus;
|
||||||
import org.apache.hadoop.tools.DistCpConstants;
|
import org.apache.hadoop.tools.DistCpConstants;
|
||||||
import org.apache.hadoop.tools.DistCpOptionSwitch;
|
import org.apache.hadoop.tools.DistCpOptionSwitch;
|
||||||
|
@ -915,7 +916,7 @@ public class TestCopyMapper {
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test(timeout=40000)
|
@Test(timeout=40000)
|
||||||
public void testCopyFailOnBlockSizeDifference() {
|
public void testCopyFailOnBlockSizeDifference() throws Exception {
|
||||||
try {
|
try {
|
||||||
deleteState();
|
deleteState();
|
||||||
createSourceDataWithDifferentBlockSize();
|
createSourceDataWithDifferentBlockSize();
|
||||||
|
@ -942,12 +943,11 @@ public class TestCopyMapper {
|
||||||
|
|
||||||
Assert.fail("Copy should have failed because of block-size difference.");
|
Assert.fail("Copy should have failed because of block-size difference.");
|
||||||
}
|
}
|
||||||
catch (Exception exception) {
|
catch (IOException exception) {
|
||||||
// Check that the exception suggests the use of -pb/-skipCrc.
|
// Check that the exception suggests the use of -pb/-skipcrccheck.
|
||||||
Assert.assertTrue("Failure exception should have suggested the use of -pb.",
|
Throwable cause = exception.getCause().getCause();
|
||||||
exception.getCause().getCause().getMessage().contains("pb"));
|
GenericTestUtils.assertExceptionContains("-pb", cause);
|
||||||
Assert.assertTrue("Failure exception should have suggested the use of -skipCrc.",
|
GenericTestUtils.assertExceptionContains("-skipcrccheck", cause);
|
||||||
exception.getCause().getCause().getMessage().contains("skipCrc"));
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue