HADOOP-15273.distcp can't handle remote stores with different checksum algorithms.

Contributed by Steve Loughran.

(cherry picked from commit 7ef4d942dd)
This commit is contained in:
Steve Loughran 2018-03-08 11:24:06 +00:00
parent f879504fe1
commit 1771af2320
3 changed files with 29 additions and 19 deletions

View File

@ -534,11 +534,6 @@ public final class DistCpOptions {
+ "mutually exclusive"); + "mutually exclusive");
} }
if (!syncFolder && skipCRC) {
throw new IllegalArgumentException(
"Skip CRC is valid only with update options");
}
if (!syncFolder && append) { if (!syncFolder && append) {
throw new IllegalArgumentException( throw new IllegalArgumentException(
"Append is valid only with update options"); "Append is valid only with update options");

View File

@ -210,15 +210,30 @@ public class RetriableFileCopyCommand extends RetriableCommand {
throws IOException { throws IOException {
if (!DistCpUtils.checksumsAreEqual(sourceFS, source, sourceChecksum, if (!DistCpUtils.checksumsAreEqual(sourceFS, source, sourceChecksum,
targetFS, target)) { targetFS, target)) {
StringBuilder errorMessage = new StringBuilder("Check-sum mismatch between ") StringBuilder errorMessage =
.append(source).append(" and ").append(target).append("."); new StringBuilder("Checksum mismatch between ")
if (sourceFS.getFileStatus(source).getBlockSize() != .append(source).append(" and ").append(target).append(".");
boolean addSkipHint = false;
String srcScheme = sourceFS.getScheme();
String targetScheme = targetFS.getScheme();
if (!srcScheme.equals(targetScheme)
&& !(srcScheme.contains("hdfs") && targetScheme.contains("hdfs"))) {
// the filesystems are different and they aren't both hdfs connectors
errorMessage.append("Source and destination filesystems are of"
+ " different types\n")
.append("Their checksum algorithms may be incompatible");
addSkipHint = true;
} else if (sourceFS.getFileStatus(source).getBlockSize() !=
targetFS.getFileStatus(target).getBlockSize()) { targetFS.getFileStatus(target).getBlockSize()) {
errorMessage.append(" Source and target differ in block-size.") errorMessage.append(" Source and target differ in block-size.\n")
.append(" Use -pb to preserve block-sizes during copy.") .append(" Use -pb to preserve block-sizes during copy.");
.append(" Alternatively, skip checksum-checks altogether, using -skipCrc.") addSkipHint = true;
}
if (addSkipHint) {
errorMessage.append(" You can skip checksum-checks altogether "
+ " with -skipcrccheck.\n")
.append(" (NOTE: By skipping checksums, one runs the risk of " + .append(" (NOTE: By skipping checksums, one runs the risk of " +
"masking data-corruption during file-transfer.)"); "masking data-corruption during file-transfer.)\n");
} }
throw new IOException(errorMessage.toString()); throw new IOException(errorMessage.toString());
} }

View File

@ -44,6 +44,7 @@ import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Mapper; import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.security.AccessControlException; import org.apache.hadoop.security.AccessControlException;
import org.apache.hadoop.security.UserGroupInformation; import org.apache.hadoop.security.UserGroupInformation;
import org.apache.hadoop.test.GenericTestUtils;
import org.apache.hadoop.tools.CopyListingFileStatus; import org.apache.hadoop.tools.CopyListingFileStatus;
import org.apache.hadoop.tools.DistCpConstants; import org.apache.hadoop.tools.DistCpConstants;
import org.apache.hadoop.tools.DistCpOptionSwitch; import org.apache.hadoop.tools.DistCpOptionSwitch;
@ -915,7 +916,7 @@ public class TestCopyMapper {
} }
@Test(timeout=40000) @Test(timeout=40000)
public void testCopyFailOnBlockSizeDifference() { public void testCopyFailOnBlockSizeDifference() throws Exception {
try { try {
deleteState(); deleteState();
createSourceDataWithDifferentBlockSize(); createSourceDataWithDifferentBlockSize();
@ -942,12 +943,11 @@ public class TestCopyMapper {
Assert.fail("Copy should have failed because of block-size difference."); Assert.fail("Copy should have failed because of block-size difference.");
} }
catch (Exception exception) { catch (IOException exception) {
// Check that the exception suggests the use of -pb/-skipCrc. // Check that the exception suggests the use of -pb/-skipcrccheck.
Assert.assertTrue("Failure exception should have suggested the use of -pb.", Throwable cause = exception.getCause().getCause();
exception.getCause().getCause().getMessage().contains("pb")); GenericTestUtils.assertExceptionContains("-pb", cause);
Assert.assertTrue("Failure exception should have suggested the use of -skipCrc.", GenericTestUtils.assertExceptionContains("-skipcrccheck", cause);
exception.getCause().getCause().getMessage().contains("skipCrc"));
} }
} }