Revert "HADOOP-13145 In DistCp, prevent unnecessary getFileStatus call when not preserving metadata. Contributed by Chris Nauroth."
This reverts commit edb912c571
.
This commit is contained in:
parent
f32b37ec89
commit
ce17c9fd51
|
@ -349,12 +349,6 @@
|
||||||
<artifactId>hadoop-distcp</artifactId>
|
<artifactId>hadoop-distcp</artifactId>
|
||||||
<version>${project.version}</version>
|
<version>${project.version}</version>
|
||||||
</dependency>
|
</dependency>
|
||||||
<dependency>
|
|
||||||
<groupId>org.apache.hadoop</groupId>
|
|
||||||
<artifactId>hadoop-distcp</artifactId>
|
|
||||||
<version>${project.version}</version>
|
|
||||||
<type>test-jar</type>
|
|
||||||
</dependency>
|
|
||||||
<dependency>
|
<dependency>
|
||||||
<groupId>org.apache.hadoop</groupId>
|
<groupId>org.apache.hadoop</groupId>
|
||||||
<artifactId>hadoop-datajoin</artifactId>
|
<artifactId>hadoop-datajoin</artifactId>
|
||||||
|
|
|
@ -235,16 +235,5 @@
|
||||||
<scope>test</scope>
|
<scope>test</scope>
|
||||||
<type>jar</type>
|
<type>jar</type>
|
||||||
</dependency>
|
</dependency>
|
||||||
<dependency>
|
|
||||||
<groupId>org.apache.hadoop</groupId>
|
|
||||||
<artifactId>hadoop-distcp</artifactId>
|
|
||||||
<scope>test</scope>
|
|
||||||
</dependency>
|
|
||||||
<dependency>
|
|
||||||
<groupId>org.apache.hadoop</groupId>
|
|
||||||
<artifactId>hadoop-distcp</artifactId>
|
|
||||||
<scope>test</scope>
|
|
||||||
<type>test-jar</type>
|
|
||||||
</dependency>
|
|
||||||
</dependencies>
|
</dependencies>
|
||||||
</project>
|
</project>
|
||||||
|
|
|
@ -741,15 +741,6 @@ or in batch runs.
|
||||||
Smaller values should result in faster test runs, especially when the object
|
Smaller values should result in faster test runs, especially when the object
|
||||||
store is a long way away.
|
store is a long way away.
|
||||||
|
|
||||||
DistCp tests targeting S3A support a configurable file size. The default is
|
|
||||||
10 MB, but the configuration value is expressed in KB so that it can be tuned
|
|
||||||
smaller to achieve faster test runs.
|
|
||||||
|
|
||||||
<property>
|
|
||||||
<name>scale.test.distcp.file.size.kb</name>
|
|
||||||
<value>10240</value>
|
|
||||||
</property>
|
|
||||||
|
|
||||||
### Running the Tests
|
### Running the Tests
|
||||||
|
|
||||||
After completing the configuration, execute the test run through Maven.
|
After completing the configuration, execute the test run through Maven.
|
||||||
|
|
|
@ -1,46 +0,0 @@
|
||||||
/**
|
|
||||||
* Licensed to the Apache Software Foundation (ASF) under one
|
|
||||||
* or more contributor license agreements. See the NOTICE file
|
|
||||||
* distributed with this work for additional information
|
|
||||||
* regarding copyright ownership. The ASF licenses this file
|
|
||||||
* to you under the Apache License, Version 2.0 (the
|
|
||||||
* "License"); you may not use this file except in compliance
|
|
||||||
* with the License. You may obtain a copy of the License at
|
|
||||||
*
|
|
||||||
* http://www.apache.org/licenses/LICENSE-2.0
|
|
||||||
*
|
|
||||||
* Unless required by applicable law or agreed to in writing, software
|
|
||||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
|
||||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
||||||
* See the License for the specific language governing permissions and
|
|
||||||
* limitations under the License.
|
|
||||||
*/
|
|
||||||
|
|
||||||
package org.apache.hadoop.fs.contract.s3a;
|
|
||||||
|
|
||||||
import static org.apache.hadoop.fs.s3a.Constants.MIN_MULTIPART_THRESHOLD;
|
|
||||||
import static org.apache.hadoop.fs.s3a.Constants.MULTIPART_SIZE;
|
|
||||||
|
|
||||||
import org.apache.hadoop.conf.Configuration;
|
|
||||||
import org.apache.hadoop.tools.contract.AbstractContractDistCpTest;
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Contract test suite covering S3A integration with DistCp.
|
|
||||||
*/
|
|
||||||
public class TestS3AContractDistCp extends AbstractContractDistCpTest {
|
|
||||||
|
|
||||||
private static final long MULTIPART_SETTING = 8 * 1024 * 1024; // 8 MB
|
|
||||||
|
|
||||||
@Override
|
|
||||||
protected Configuration createConfiguration() {
|
|
||||||
Configuration newConf = super.createConfiguration();
|
|
||||||
newConf.setLong(MIN_MULTIPART_THRESHOLD, MULTIPART_SETTING);
|
|
||||||
newConf.setLong(MULTIPART_SIZE, MULTIPART_SETTING);
|
|
||||||
return newConf;
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
protected S3AContract createContract(Configuration conf) {
|
|
||||||
return new S3AContract(conf);
|
|
||||||
}
|
|
||||||
}
|
|
|
@ -175,25 +175,6 @@
|
||||||
<type>test-jar</type>
|
<type>test-jar</type>
|
||||||
</dependency>
|
</dependency>
|
||||||
|
|
||||||
<dependency>
|
|
||||||
<groupId>org.apache.hadoop</groupId>
|
|
||||||
<artifactId>hadoop-mapreduce-client-jobclient</artifactId>
|
|
||||||
<scope>test</scope>
|
|
||||||
</dependency>
|
|
||||||
|
|
||||||
<dependency>
|
|
||||||
<groupId>org.apache.hadoop</groupId>
|
|
||||||
<artifactId>hadoop-distcp</artifactId>
|
|
||||||
<scope>test</scope>
|
|
||||||
</dependency>
|
|
||||||
|
|
||||||
<dependency>
|
|
||||||
<groupId>org.apache.hadoop</groupId>
|
|
||||||
<artifactId>hadoop-distcp</artifactId>
|
|
||||||
<scope>test</scope>
|
|
||||||
<type>test-jar</type>
|
|
||||||
</dependency>
|
|
||||||
|
|
||||||
<dependency>
|
<dependency>
|
||||||
<groupId>org.mockito</groupId>
|
<groupId>org.mockito</groupId>
|
||||||
<artifactId>mockito-all</artifactId>
|
<artifactId>mockito-all</artifactId>
|
||||||
|
|
|
@ -1,33 +0,0 @@
|
||||||
/**
|
|
||||||
* Licensed to the Apache Software Foundation (ASF) under one
|
|
||||||
* or more contributor license agreements. See the NOTICE file
|
|
||||||
* distributed with this work for additional information
|
|
||||||
* regarding copyright ownership. The ASF licenses this file
|
|
||||||
* to you under the Apache License, Version 2.0 (the
|
|
||||||
* "License"); you may not use this file except in compliance
|
|
||||||
* with the License. You may obtain a copy of the License at
|
|
||||||
*
|
|
||||||
* http://www.apache.org/licenses/LICENSE-2.0
|
|
||||||
*
|
|
||||||
* Unless required by applicable law or agreed to in writing, software
|
|
||||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
|
||||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
||||||
* See the License for the specific language governing permissions and
|
|
||||||
* limitations under the License.
|
|
||||||
*/
|
|
||||||
|
|
||||||
package org.apache.hadoop.fs.azure.contract;
|
|
||||||
|
|
||||||
import org.apache.hadoop.conf.Configuration;
|
|
||||||
import org.apache.hadoop.tools.contract.AbstractContractDistCpTest;
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Contract test suite covering WASB integration with DistCp.
|
|
||||||
*/
|
|
||||||
public class TestAzureNativeContractDistCp extends AbstractContractDistCpTest {
|
|
||||||
|
|
||||||
@Override
|
|
||||||
protected NativeAzureFileSystemContract createContract(Configuration conf) {
|
|
||||||
return new NativeAzureFileSystemContract(conf);
|
|
||||||
}
|
|
||||||
}
|
|
|
@ -175,22 +175,6 @@
|
||||||
</manifest>
|
</manifest>
|
||||||
</archive>
|
</archive>
|
||||||
</configuration>
|
</configuration>
|
||||||
<executions>
|
|
||||||
<execution>
|
|
||||||
<id>prepare-jar</id>
|
|
||||||
<phase>prepare-package</phase>
|
|
||||||
<goals>
|
|
||||||
<goal>jar</goal>
|
|
||||||
</goals>
|
|
||||||
</execution>
|
|
||||||
<execution>
|
|
||||||
<id>prepare-test-jar</id>
|
|
||||||
<phase>prepare-package</phase>
|
|
||||||
<goals>
|
|
||||||
<goal>test-jar</goal>
|
|
||||||
</goals>
|
|
||||||
</execution>
|
|
||||||
</executions>
|
|
||||||
</plugin>
|
</plugin>
|
||||||
<plugin>
|
<plugin>
|
||||||
<groupId>org.apache.maven.plugins</groupId>
|
<groupId>org.apache.maven.plugins</groupId>
|
||||||
|
|
|
@ -195,13 +195,9 @@ public class DistCpUtils {
|
||||||
EnumSet<FileAttribute> attributes,
|
EnumSet<FileAttribute> attributes,
|
||||||
boolean preserveRawXattrs) throws IOException {
|
boolean preserveRawXattrs) throws IOException {
|
||||||
|
|
||||||
// If not preserving anything from FileStatus, don't bother fetching it.
|
FileStatus targetFileStatus = targetFS.getFileStatus(path);
|
||||||
FileStatus targetFileStatus = attributes.isEmpty() ? null :
|
String group = targetFileStatus.getGroup();
|
||||||
targetFS.getFileStatus(path);
|
String user = targetFileStatus.getOwner();
|
||||||
String group = targetFileStatus == null ? null :
|
|
||||||
targetFileStatus.getGroup();
|
|
||||||
String user = targetFileStatus == null ? null :
|
|
||||||
targetFileStatus.getOwner();
|
|
||||||
boolean chown = false;
|
boolean chown = false;
|
||||||
|
|
||||||
if (attributes.contains(FileAttribute.ACL)) {
|
if (attributes.contains(FileAttribute.ACL)) {
|
||||||
|
|
|
@ -1,204 +0,0 @@
|
||||||
/**
|
|
||||||
* Licensed to the Apache Software Foundation (ASF) under one
|
|
||||||
* or more contributor license agreements. See the NOTICE file
|
|
||||||
* distributed with this work for additional information
|
|
||||||
* regarding copyright ownership. The ASF licenses this file
|
|
||||||
* to you under the Apache License, Version 2.0 (the
|
|
||||||
* "License"); you may not use this file except in compliance
|
|
||||||
* with the License. You may obtain a copy of the License at
|
|
||||||
*
|
|
||||||
* http://www.apache.org/licenses/LICENSE-2.0
|
|
||||||
*
|
|
||||||
* Unless required by applicable law or agreed to in writing, software
|
|
||||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
|
||||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
||||||
* See the License for the specific language governing permissions and
|
|
||||||
* limitations under the License.
|
|
||||||
*/
|
|
||||||
|
|
||||||
package org.apache.hadoop.tools.contract;
|
|
||||||
|
|
||||||
import static org.apache.hadoop.fs.contract.ContractTestUtils.*;
|
|
||||||
import static org.junit.Assert.*;
|
|
||||||
|
|
||||||
import java.util.Arrays;
|
|
||||||
|
|
||||||
import org.apache.hadoop.conf.Configuration;
|
|
||||||
import org.apache.hadoop.fs.FileSystem;
|
|
||||||
import org.apache.hadoop.fs.Path;
|
|
||||||
import org.apache.hadoop.fs.contract.AbstractFSContractTestBase;
|
|
||||||
import org.apache.hadoop.fs.contract.ContractTestUtils;
|
|
||||||
import org.apache.hadoop.mapreduce.Job;
|
|
||||||
import org.apache.hadoop.test.GenericTestUtils;
|
|
||||||
import org.apache.hadoop.tools.DistCp;
|
|
||||||
import org.apache.hadoop.tools.DistCpOptions;
|
|
||||||
|
|
||||||
import org.junit.Before;
|
|
||||||
import org.junit.Rule;
|
|
||||||
import org.junit.Test;
|
|
||||||
import org.junit.rules.TestName;
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Contract test suite covering a file system's integration with DistCp. The
|
|
||||||
* tests coordinate two file system instances: one "local", which is the local
|
|
||||||
* file system, and the other "remote", which is the file system implementation
|
|
||||||
* under test. The tests in the suite cover both copying from local to remote
|
|
||||||
* (e.g. a backup use case) and copying from remote to local (e.g. a restore use
|
|
||||||
* case).
|
|
||||||
*/
|
|
||||||
public abstract class AbstractContractDistCpTest
|
|
||||||
extends AbstractFSContractTestBase {
|
|
||||||
|
|
||||||
@Rule
|
|
||||||
public TestName testName = new TestName();
|
|
||||||
|
|
||||||
private Configuration conf;
|
|
||||||
private FileSystem localFS, remoteFS;
|
|
||||||
private Path localDir, remoteDir;
|
|
||||||
|
|
||||||
@Override
|
|
||||||
protected Configuration createConfiguration() {
|
|
||||||
Configuration newConf = new Configuration();
|
|
||||||
newConf.set("mapred.job.tracker", "local");
|
|
||||||
return newConf;
|
|
||||||
}
|
|
||||||
|
|
||||||
@Before
|
|
||||||
@Override
|
|
||||||
public void setup() throws Exception {
|
|
||||||
super.setup();
|
|
||||||
conf = getContract().getConf();
|
|
||||||
localFS = FileSystem.getLocal(conf);
|
|
||||||
remoteFS = getFileSystem();
|
|
||||||
// Test paths are isolated by concrete subclass name and test method name.
|
|
||||||
// All paths are fully qualified including scheme (not taking advantage of
|
|
||||||
// default file system), so if something fails, the messages will make it
|
|
||||||
// clear which paths are local and which paths are remote.
|
|
||||||
Path testSubDir = new Path(getClass().getSimpleName(),
|
|
||||||
testName.getMethodName());
|
|
||||||
localDir = localFS.makeQualified(new Path(new Path(
|
|
||||||
GenericTestUtils.getTestDir().toURI()), testSubDir));
|
|
||||||
mkdirs(localFS, localDir);
|
|
||||||
remoteDir = remoteFS.makeQualified(
|
|
||||||
new Path(getContract().getTestPath(), testSubDir));
|
|
||||||
mkdirs(remoteFS, remoteDir);
|
|
||||||
}
|
|
||||||
|
|
||||||
@Test
|
|
||||||
public void deepDirectoryStructureToRemote() throws Exception {
|
|
||||||
describe("copy a deep directory structure from local to remote");
|
|
||||||
deepDirectoryStructure(localFS, localDir, remoteFS, remoteDir);
|
|
||||||
}
|
|
||||||
|
|
||||||
@Test
|
|
||||||
public void largeFilesToRemote() throws Exception {
|
|
||||||
describe("copy multiple large files from local to remote");
|
|
||||||
largeFiles(localFS, localDir, remoteFS, remoteDir);
|
|
||||||
}
|
|
||||||
|
|
||||||
@Test
|
|
||||||
public void deepDirectoryStructureFromRemote() throws Exception {
|
|
||||||
describe("copy a deep directory structure from remote to local");
|
|
||||||
deepDirectoryStructure(remoteFS, remoteDir, localFS, localDir);
|
|
||||||
}
|
|
||||||
|
|
||||||
@Test
|
|
||||||
public void largeFilesFromRemote() throws Exception {
|
|
||||||
describe("copy multiple large files from remote to local");
|
|
||||||
largeFiles(remoteFS, remoteDir, localFS, localDir);
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Executes a test using a file system sub-tree with multiple nesting levels.
|
|
||||||
*
|
|
||||||
* @param srcFS source FileSystem
|
|
||||||
* @param srcDir source directory
|
|
||||||
* @param dstFS destination FileSystem
|
|
||||||
* @param dstDir destination directory
|
|
||||||
* @throws Exception if there is a failure
|
|
||||||
*/
|
|
||||||
private void deepDirectoryStructure(FileSystem srcFS, Path srcDir,
|
|
||||||
FileSystem dstFS, Path dstDir) throws Exception {
|
|
||||||
Path inputDir = new Path(srcDir, "inputDir");
|
|
||||||
Path inputSubDir1 = new Path(inputDir, "subDir1");
|
|
||||||
Path inputSubDir2 = new Path(inputDir, "subDir2/subDir3");
|
|
||||||
Path inputFile1 = new Path(inputDir, "file1");
|
|
||||||
Path inputFile2 = new Path(inputSubDir1, "file2");
|
|
||||||
Path inputFile3 = new Path(inputSubDir2, "file3");
|
|
||||||
mkdirs(srcFS, inputSubDir1);
|
|
||||||
mkdirs(srcFS, inputSubDir2);
|
|
||||||
byte[] data1 = dataset(100, 33, 43);
|
|
||||||
createFile(srcFS, inputFile1, true, data1);
|
|
||||||
byte[] data2 = dataset(200, 43, 53);
|
|
||||||
createFile(srcFS, inputFile2, true, data2);
|
|
||||||
byte[] data3 = dataset(300, 53, 63);
|
|
||||||
createFile(srcFS, inputFile3, true, data3);
|
|
||||||
Path target = new Path(dstDir, "outputDir");
|
|
||||||
runDistCp(inputDir, target);
|
|
||||||
ContractTestUtils.assertIsDirectory(dstFS, target);
|
|
||||||
verifyFileContents(dstFS, new Path(target, "inputDir/file1"), data1);
|
|
||||||
verifyFileContents(dstFS,
|
|
||||||
new Path(target, "inputDir/subDir1/file2"), data2);
|
|
||||||
verifyFileContents(dstFS,
|
|
||||||
new Path(target, "inputDir/subDir2/subDir3/file3"), data3);
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Executes a test using multiple large files.
|
|
||||||
*
|
|
||||||
* @param srcFS source FileSystem
|
|
||||||
* @param srcDir source directory
|
|
||||||
* @param dstFS destination FileSystem
|
|
||||||
* @param dstDir destination directory
|
|
||||||
* @throws Exception if there is a failure
|
|
||||||
*/
|
|
||||||
private void largeFiles(FileSystem srcFS, Path srcDir, FileSystem dstFS,
|
|
||||||
Path dstDir) throws Exception {
|
|
||||||
Path inputDir = new Path(srcDir, "inputDir");
|
|
||||||
Path inputFile1 = new Path(inputDir, "file1");
|
|
||||||
Path inputFile2 = new Path(inputDir, "file2");
|
|
||||||
Path inputFile3 = new Path(inputDir, "file3");
|
|
||||||
mkdirs(srcFS, inputDir);
|
|
||||||
int fileSizeKb = conf.getInt("scale.test.distcp.file.size.kb", 10 * 1024);
|
|
||||||
int fileSizeMb = fileSizeKb * 1024;
|
|
||||||
getLog().info("{} with file size {}", testName.getMethodName(), fileSizeMb);
|
|
||||||
byte[] data1 = dataset((fileSizeMb + 1) * 1024 * 1024, 33, 43);
|
|
||||||
createFile(srcFS, inputFile1, true, data1);
|
|
||||||
byte[] data2 = dataset((fileSizeMb + 2) * 1024 * 1024, 43, 53);
|
|
||||||
createFile(srcFS, inputFile2, true, data2);
|
|
||||||
byte[] data3 = dataset((fileSizeMb + 3) * 1024 * 1024, 53, 63);
|
|
||||||
createFile(srcFS, inputFile3, true, data3);
|
|
||||||
Path target = new Path(dstDir, "outputDir");
|
|
||||||
runDistCp(inputDir, target);
|
|
||||||
ContractTestUtils.assertIsDirectory(dstFS, target);
|
|
||||||
verifyFileContents(dstFS, new Path(target, "inputDir/file1"), data1);
|
|
||||||
verifyFileContents(dstFS, new Path(target, "inputDir/file2"), data2);
|
|
||||||
verifyFileContents(dstFS, new Path(target, "inputDir/file3"), data3);
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Executes DistCp and asserts that the job finished successfully.
|
|
||||||
*
|
|
||||||
* @param src source path
|
|
||||||
* @param dst destination path
|
|
||||||
* @throws Exception if there is a failure
|
|
||||||
*/
|
|
||||||
private void runDistCp(Path src, Path dst) throws Exception {
|
|
||||||
DistCpOptions options = new DistCpOptions(Arrays.asList(src), dst);
|
|
||||||
Job job = new DistCp(conf, options).execute();
|
|
||||||
assertNotNull("Unexpected null job returned from DistCp execution.", job);
|
|
||||||
assertTrue("DistCp job did not complete.", job.isComplete());
|
|
||||||
assertTrue("DistCp job did not complete successfully.", job.isSuccessful());
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Creates a directory and any ancestor directories required.
|
|
||||||
*
|
|
||||||
* @param fs FileSystem in which to create directories
|
|
||||||
* @param dir path of directory to create
|
|
||||||
* @throws Exception if there is a failure
|
|
||||||
*/
|
|
||||||
private static void mkdirs(FileSystem fs, Path dir) throws Exception {
|
|
||||||
assertTrue("Failed to mkdir " + dir, fs.mkdirs(dir));
|
|
||||||
}
|
|
||||||
}
|
|
Loading…
Reference in New Issue