HADOOP-15273.distcp can't handle remote stores with different checksum algorithms.

Contributed by Steve Loughran.

(cherry picked from commit 7ef4d942dd)
This commit is contained in:
Steve Loughran 2018-03-08 11:24:06 +00:00
parent f879504fe1
commit 1771af2320
3 changed files with 29 additions and 19 deletions

View File

@ -534,11 +534,6 @@ public final class DistCpOptions {
+ "mutually exclusive");
}
if (!syncFolder && skipCRC) {
throw new IllegalArgumentException(
"Skip CRC is valid only with update options");
}
if (!syncFolder && append) {
throw new IllegalArgumentException(
"Append is valid only with update options");

View File

@ -210,15 +210,30 @@ public class RetriableFileCopyCommand extends RetriableCommand {
throws IOException {
if (!DistCpUtils.checksumsAreEqual(sourceFS, source, sourceChecksum,
targetFS, target)) {
StringBuilder errorMessage = new StringBuilder("Check-sum mismatch between ")
.append(source).append(" and ").append(target).append(".");
if (sourceFS.getFileStatus(source).getBlockSize() !=
StringBuilder errorMessage =
new StringBuilder("Checksum mismatch between ")
.append(source).append(" and ").append(target).append(".");
boolean addSkipHint = false;
String srcScheme = sourceFS.getScheme();
String targetScheme = targetFS.getScheme();
if (!srcScheme.equals(targetScheme)
&& !(srcScheme.contains("hdfs") && targetScheme.contains("hdfs"))) {
// the filesystems are different and they aren't both hdfs connectors
errorMessage.append("Source and destination filesystems are of"
+ " different types\n")
.append("Their checksum algorithms may be incompatible");
addSkipHint = true;
} else if (sourceFS.getFileStatus(source).getBlockSize() !=
targetFS.getFileStatus(target).getBlockSize()) {
errorMessage.append(" Source and target differ in block-size.")
.append(" Use -pb to preserve block-sizes during copy.")
.append(" Alternatively, skip checksum-checks altogether, using -skipCrc.")
errorMessage.append(" Source and target differ in block-size.\n")
.append(" Use -pb to preserve block-sizes during copy.");
addSkipHint = true;
}
if (addSkipHint) {
errorMessage.append(" You can skip checksum-checks altogether "
+ " with -skipcrccheck.\n")
.append(" (NOTE: By skipping checksums, one runs the risk of " +
"masking data-corruption during file-transfer.)");
"masking data-corruption during file-transfer.)\n");
}
throw new IOException(errorMessage.toString());
}

View File

@ -44,6 +44,7 @@ import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.security.AccessControlException;
import org.apache.hadoop.security.UserGroupInformation;
import org.apache.hadoop.test.GenericTestUtils;
import org.apache.hadoop.tools.CopyListingFileStatus;
import org.apache.hadoop.tools.DistCpConstants;
import org.apache.hadoop.tools.DistCpOptionSwitch;
@ -915,7 +916,7 @@ public class TestCopyMapper {
}
@Test(timeout=40000)
public void testCopyFailOnBlockSizeDifference() {
public void testCopyFailOnBlockSizeDifference() throws Exception {
try {
deleteState();
createSourceDataWithDifferentBlockSize();
@ -942,12 +943,11 @@ public class TestCopyMapper {
Assert.fail("Copy should have failed because of block-size difference.");
}
catch (Exception exception) {
// Check that the exception suggests the use of -pb/-skipCrc.
Assert.assertTrue("Failure exception should have suggested the use of -pb.",
exception.getCause().getCause().getMessage().contains("pb"));
Assert.assertTrue("Failure exception should have suggested the use of -skipCrc.",
exception.getCause().getCause().getMessage().contains("skipCrc"));
catch (IOException exception) {
// Check that the exception suggests the use of -pb/-skipcrccheck.
Throwable cause = exception.getCause().getCause();
GenericTestUtils.assertExceptionContains("-pb", cause);
GenericTestUtils.assertExceptionContains("-skipcrccheck", cause);
}
}