HADOOP-8042. When copying a file out of HDFS, modifying it, and uploading it back into HDFS, the put fails due to a CRC mismatch (Daryn Sharp via bobby)
git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/trunk@1242389 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
a1574e1715
commit
7f7b05226e
|
@ -162,6 +162,10 @@ Release 0.23.2 - UNRELEASED
|
|||
|
||||
BUG FIXES
|
||||
|
||||
HADOOP-8042 When copying a file out of HDFS, modifying it, and uploading
|
||||
it back into HDFS, the put fails due to a CRC mismatch
|
||||
(Daryn Sharp via bobby)
|
||||
|
||||
Release 0.23.1 - 2012-02-08
|
||||
|
||||
INCOMPATIBLE CHANGES
|
||||
|
|
|
@ -43,6 +43,7 @@ public abstract class ChecksumFileSystem extends FilterFileSystem {
|
|||
private static final byte[] CHECKSUM_VERSION = new byte[] {'c', 'r', 'c', 0};
|
||||
private int bytesPerChecksum = 512;
|
||||
private boolean verifyChecksum = true;
|
||||
private boolean writeChecksum = true;
|
||||
|
||||
public static double getApproxChkSumLength(long size) {
|
||||
return ChecksumFSOutputSummer.CHKSUM_AS_FRACTION * size;
|
||||
|
@ -67,6 +68,11 @@ public abstract class ChecksumFileSystem extends FilterFileSystem {
|
|||
this.verifyChecksum = verifyChecksum;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void setWriteChecksum(boolean writeChecksum) {
|
||||
this.writeChecksum = writeChecksum;
|
||||
}
|
||||
|
||||
/** get the raw file system */
|
||||
public FileSystem getRawFileSystem() {
|
||||
return fs;
|
||||
|
@ -428,9 +434,20 @@ public abstract class ChecksumFileSystem extends FilterFileSystem {
|
|||
throw new IOException("Mkdirs failed to create " + parent);
|
||||
}
|
||||
}
|
||||
final FSDataOutputStream out = new FSDataOutputStream(
|
||||
new ChecksumFSOutputSummer(this, f, overwrite, bufferSize, replication,
|
||||
blockSize, progress), null);
|
||||
final FSDataOutputStream out;
|
||||
if (writeChecksum) {
|
||||
out = new FSDataOutputStream(
|
||||
new ChecksumFSOutputSummer(this, f, overwrite, bufferSize, replication,
|
||||
blockSize, progress), null);
|
||||
} else {
|
||||
out = fs.create(f, permission, overwrite, bufferSize, replication,
|
||||
blockSize, progress);
|
||||
// remove the checksum file since we aren't writing one
|
||||
Path checkFile = getChecksumFile(f);
|
||||
if (fs.exists(checkFile)) {
|
||||
fs.delete(checkFile, true);
|
||||
}
|
||||
}
|
||||
if (permission != null) {
|
||||
setPermission(f, permission);
|
||||
}
|
||||
|
|
|
@ -1936,6 +1936,15 @@ public abstract class FileSystem extends Configured implements Closeable {
|
|||
//doesn't do anything
|
||||
}
|
||||
|
||||
/**
|
||||
* Set the write checksum flag. This is only applicable if the
|
||||
* corresponding FileSystem supports checksum. By default doesn't do anything.
|
||||
* @param writeChecksum
|
||||
*/
|
||||
public void setWriteChecksum(boolean writeChecksum) {
|
||||
//doesn't do anything
|
||||
}
|
||||
|
||||
/**
|
||||
* Return a list of file status objects that corresponds to the list of paths
|
||||
* excluding those non-existent paths.
|
||||
|
|
|
@ -360,6 +360,11 @@ public class FilterFileSystem extends FileSystem {
|
|||
public void setVerifyChecksum(boolean verifyChecksum) {
|
||||
fs.setVerifyChecksum(verifyChecksum);
|
||||
}
|
||||
|
||||
@Override
|
||||
public void setWriteChecksum(boolean writeChecksum) {
|
||||
fs.setVerifyChecksum(writeChecksum);
|
||||
}
|
||||
|
||||
@Override
|
||||
public Configuration getConf() {
|
||||
|
|
|
@ -41,7 +41,9 @@ import org.apache.hadoop.io.IOUtils;
|
|||
*/
|
||||
abstract class CommandWithDestination extends FsCommand {
|
||||
protected PathData dst;
|
||||
protected boolean overwrite = false;
|
||||
private boolean overwrite = false;
|
||||
private boolean verifyChecksum = true;
|
||||
private boolean writeChecksum = true;
|
||||
|
||||
/**
|
||||
*
|
||||
|
@ -53,6 +55,14 @@ abstract class CommandWithDestination extends FsCommand {
|
|||
overwrite = flag;
|
||||
}
|
||||
|
||||
protected void setVerifyChecksum(boolean flag) {
|
||||
verifyChecksum = flag;
|
||||
}
|
||||
|
||||
protected void setWriteChecksum(boolean flag) {
|
||||
writeChecksum = flag;
|
||||
}
|
||||
|
||||
/**
|
||||
* The last arg is expected to be a local path, if only one argument is
|
||||
* given then the destination will be the current directory
|
||||
|
@ -201,6 +211,7 @@ abstract class CommandWithDestination extends FsCommand {
|
|||
* @throws IOException if copy fails
|
||||
*/
|
||||
protected void copyFileToTarget(PathData src, PathData target) throws IOException {
|
||||
src.fs.setVerifyChecksum(verifyChecksum);
|
||||
copyStreamToTarget(src.fs.open(src.path), target);
|
||||
}
|
||||
|
||||
|
@ -217,6 +228,7 @@ abstract class CommandWithDestination extends FsCommand {
|
|||
if (target.exists && (target.stat.isDirectory() || !overwrite)) {
|
||||
throw new PathExistsException(target.toString());
|
||||
}
|
||||
target.fs.setWriteChecksum(writeChecksum);
|
||||
PathData tempFile = null;
|
||||
try {
|
||||
tempFile = target.createTempFile(target+"._COPYING_");
|
||||
|
|
|
@ -25,7 +25,6 @@ import java.util.List;
|
|||
|
||||
import org.apache.hadoop.classification.InterfaceAudience;
|
||||
import org.apache.hadoop.classification.InterfaceStability;
|
||||
import org.apache.hadoop.fs.ChecksumFileSystem;
|
||||
import org.apache.hadoop.fs.FileUtil;
|
||||
|
||||
/** Various commands for copy files */
|
||||
|
@ -103,43 +102,17 @@ class CopyCommands {
|
|||
"to the local name. <src> is kept. When copying multiple,\n" +
|
||||
"files, the destination must be a directory.";
|
||||
|
||||
/**
|
||||
* The prefix for the tmp file used in copyToLocal.
|
||||
* It must be at least three characters long, required by
|
||||
* {@link java.io.File#createTempFile(String, String, File)}.
|
||||
*/
|
||||
private boolean copyCrc;
|
||||
private boolean verifyChecksum;
|
||||
|
||||
@Override
|
||||
protected void processOptions(LinkedList<String> args)
|
||||
throws IOException {
|
||||
CommandFormat cf = new CommandFormat(
|
||||
1, Integer.MAX_VALUE, "crc", "ignoreCrc");
|
||||
cf.parse(args);
|
||||
copyCrc = cf.getOpt("crc");
|
||||
verifyChecksum = !cf.getOpt("ignoreCrc");
|
||||
|
||||
setWriteChecksum(cf.getOpt("crc"));
|
||||
setVerifyChecksum(!cf.getOpt("ignoreCrc"));
|
||||
setRecursive(true);
|
||||
getLocalDestination(args);
|
||||
}
|
||||
|
||||
@Override
|
||||
protected void copyFileToTarget(PathData src, PathData target)
|
||||
throws IOException {
|
||||
src.fs.setVerifyChecksum(verifyChecksum);
|
||||
|
||||
if (copyCrc && !(src.fs instanceof ChecksumFileSystem)) {
|
||||
displayWarning(src.fs + ": Does not support checksums");
|
||||
copyCrc = false;
|
||||
}
|
||||
|
||||
super.copyFileToTarget(src, target);
|
||||
if (copyCrc) {
|
||||
// should we delete real file if crc copy fails?
|
||||
super.copyFileToTarget(src.getChecksumFile(), target.getChecksumFile());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
|
@ -27,7 +27,6 @@ import java.net.URISyntaxException;
|
|||
import org.apache.hadoop.classification.InterfaceAudience;
|
||||
import org.apache.hadoop.classification.InterfaceStability;
|
||||
import org.apache.hadoop.conf.Configuration;
|
||||
import org.apache.hadoop.fs.ChecksumFileSystem;
|
||||
import org.apache.hadoop.fs.FileStatus;
|
||||
import org.apache.hadoop.fs.FileSystem;
|
||||
import org.apache.hadoop.fs.LocalFileSystem;
|
||||
|
@ -169,19 +168,6 @@ public class PathData {
|
|||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Return the corresponding crc data for a file. Avoids exposing the fs
|
||||
* contortions to the caller.
|
||||
* @return PathData of the crc file
|
||||
* @throws IOException is anything goes wrong
|
||||
*/
|
||||
public PathData getChecksumFile() throws IOException {
|
||||
checkIfExists(FileTypeRequirement.SHOULD_NOT_BE_DIRECTORY);
|
||||
ChecksumFileSystem srcFs = (ChecksumFileSystem)fs;
|
||||
Path srcPath = srcFs.getChecksumFile(path);
|
||||
return new PathData(srcFs.getRawFileSystem(), srcPath.toString());
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns a temporary file for this PathData with the given extension.
|
||||
* The file will be deleted on exit.
|
||||
|
|
|
@ -470,6 +470,15 @@ public class ViewFileSystem extends FileSystem {
|
|||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public void setWriteChecksum(final boolean writeChecksum) {
|
||||
List<InodeTree.MountPoint<FileSystem>> mountPoints =
|
||||
fsState.getMountPoints();
|
||||
for (InodeTree.MountPoint<FileSystem> mount : mountPoints) {
|
||||
mount.target.targetFileSystem.setWriteChecksum(writeChecksum);
|
||||
}
|
||||
}
|
||||
|
||||
public MountPoint[] getMountPoints() {
|
||||
List<InodeTree.MountPoint<FileSystem>> mountPoints =
|
||||
fsState.getMountPoints();
|
||||
|
|
|
@ -0,0 +1,97 @@
|
|||
/**
|
||||
* Licensed to the Apache Software Foundation (ASF) under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. The ASF licenses this file
|
||||
* to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package org.apache.hadoop.fs;
|
||||
|
||||
import static org.junit.Assert.*;
|
||||
|
||||
import java.io.IOException;
|
||||
|
||||
import org.apache.hadoop.conf.Configuration;
|
||||
import org.junit.Before;
|
||||
import org.junit.BeforeClass;
|
||||
import org.junit.Test;
|
||||
|
||||
public class TestFsShellCopy {
|
||||
static Configuration conf;
|
||||
static FsShell shell;
|
||||
static LocalFileSystem lfs;
|
||||
static Path testRootDir, srcPath, dstPath;
|
||||
|
||||
@BeforeClass
|
||||
public static void setup() throws Exception {
|
||||
conf = new Configuration();
|
||||
shell = new FsShell(conf);
|
||||
lfs = FileSystem.getLocal(conf);
|
||||
testRootDir = new Path(
|
||||
System.getProperty("test.build.data","test/build/data"), "testShellCopy");
|
||||
lfs.mkdirs(testRootDir);
|
||||
srcPath = new Path(testRootDir, "srcFile");
|
||||
dstPath = new Path(testRootDir, "dstFile");
|
||||
}
|
||||
|
||||
@Before
|
||||
public void prepFiles() throws Exception {
|
||||
lfs.setVerifyChecksum(true);
|
||||
lfs.setWriteChecksum(true);
|
||||
|
||||
lfs.delete(srcPath, true);
|
||||
lfs.delete(dstPath, true);
|
||||
FSDataOutputStream out = lfs.create(srcPath);
|
||||
out.writeChars("hi");
|
||||
out.close();
|
||||
assertTrue(lfs.exists(lfs.getChecksumFile(srcPath)));
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testCopyNoCrc() throws Exception {
|
||||
shellRun(0, "-get", srcPath.toString(), dstPath.toString());
|
||||
checkPath(dstPath, false);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testCopyCrc() throws Exception {
|
||||
shellRun(0, "-get", "-crc", srcPath.toString(), dstPath.toString());
|
||||
checkPath(dstPath, true);
|
||||
}
|
||||
|
||||
|
||||
@Test
|
||||
public void testCorruptedCopyCrc() throws Exception {
|
||||
FSDataOutputStream out = lfs.getRawFileSystem().create(srcPath);
|
||||
out.writeChars("bang");
|
||||
out.close();
|
||||
shellRun(1, "-get", srcPath.toString(), dstPath.toString());
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testCorruptedCopyIgnoreCrc() throws Exception {
|
||||
shellRun(0, "-get", "-ignoreCrc", srcPath.toString(), dstPath.toString());
|
||||
checkPath(dstPath, false);
|
||||
}
|
||||
|
||||
private void checkPath(Path p, boolean expectChecksum) throws IOException {
|
||||
assertTrue(lfs.exists(p));
|
||||
boolean hasChecksum = lfs.exists(lfs.getChecksumFile(p));
|
||||
assertEquals(expectChecksum, hasChecksum);
|
||||
}
|
||||
|
||||
private void shellRun(int n, String ... args) throws Exception {
|
||||
assertEquals(n, shell.run(args));
|
||||
}
|
||||
}
|
Loading…
Reference in New Issue