HDFS-9640. Remove hsftp from DistCp in trunk. Contributed by Wei-Chiu Chuang.

This commit is contained in:
Akira Ajisaka 2016-03-28 15:32:38 +09:00
parent 55ae143923
commit 18c7e58283
8 changed files with 1 additions and 254 deletions

View File

@ -19,7 +19,6 @@
package org.apache.hadoop.tools;
import java.io.IOException;
import java.net.URL;
import java.util.Random;
import org.apache.commons.logging.Log;
@ -254,88 +253,10 @@ public class DistCp extends Configured implements Tool {
job.getConfiguration().set(JobContext.NUM_MAPS,
String.valueOf(inputOptions.getMaxMaps()));
if (inputOptions.getSslConfigurationFile() != null) {
setupSSLConfig(job);
}
inputOptions.appendToConf(job.getConfiguration());
return job;
}
/**
* Setup ssl configuration on the job configuration to enable hsftp access
* from map job. Also copy the ssl configuration file to Distributed cache
*
* @param job - Reference to job's handle
* @throws java.io.IOException - Exception if unable to locate ssl config file
*/
private void setupSSLConfig(Job job) throws IOException {
Configuration configuration = job.getConfiguration();
URL sslFileUrl = configuration.getResource(inputOptions
.getSslConfigurationFile());
if (sslFileUrl == null) {
throw new IOException(
"Given ssl configuration file doesn't exist in class path : "
+ inputOptions.getSslConfigurationFile());
}
Path sslConfigPath = new Path(sslFileUrl.toString());
addSSLFilesToDistCache(job, sslConfigPath);
configuration.set(DistCpConstants.CONF_LABEL_SSL_CONF, sslConfigPath.getName());
configuration.set(DistCpConstants.CONF_LABEL_SSL_KEYSTORE, sslConfigPath.getName());
}
/**
* Add SSL files to distributed cache. Trust store, key store and ssl config xml
*
* @param job - Job handle
* @param sslConfigPath - ssl Configuration file specified through options
* @throws IOException - If any
*/
private void addSSLFilesToDistCache(Job job,
Path sslConfigPath) throws IOException {
Configuration configuration = job.getConfiguration();
FileSystem localFS = FileSystem.getLocal(configuration);
Configuration sslConf = new Configuration(false);
sslConf.addResource(sslConfigPath);
Path localStorePath = getLocalStorePath(sslConf,
DistCpConstants.CONF_LABEL_SSL_TRUST_STORE_LOCATION);
job.addCacheFile(localStorePath.makeQualified(localFS.getUri(),
localFS.getWorkingDirectory()).toUri());
configuration.set(DistCpConstants.CONF_LABEL_SSL_TRUST_STORE_LOCATION,
localStorePath.getName());
localStorePath = getLocalStorePath(sslConf,
DistCpConstants.CONF_LABEL_SSL_KEY_STORE_LOCATION);
job.addCacheFile(localStorePath.makeQualified(localFS.getUri(),
localFS.getWorkingDirectory()).toUri());
configuration.set(DistCpConstants.CONF_LABEL_SSL_KEY_STORE_LOCATION,
localStorePath.getName());
job.addCacheFile(sslConfigPath.makeQualified(localFS.getUri(),
localFS.getWorkingDirectory()).toUri());
}
/**
* Get Local Trust store/key store path
*
* @param sslConf - Config from SSL Client xml
* @param storeKey - Key for either trust store or key store
* @return - Path where the store is present
* @throws IOException -If any
*/
private Path getLocalStorePath(Configuration sslConf, String storeKey) throws IOException {
if (sslConf.get(storeKey) != null) {
return new Path(sslConf.get(storeKey));
} else {
throw new IOException("Store for " + storeKey + " is not set in " +
inputOptions.getSslConfigurationFile());
}
}
/**
* Setup output format appropriately
*

View File

@ -49,7 +49,6 @@ public class DistCpConstants {
"distcp.preserve.rawxattrs";
public static final String CONF_LABEL_SYNC_FOLDERS = "distcp.sync.folders";
public static final String CONF_LABEL_DELETE_MISSING = "distcp.delete.missing.source";
public static final String CONF_LABEL_SSL_CONF = "distcp.keystore.resource";
public static final String CONF_LABEL_LISTSTATUS_THREADS = "distcp.liststatus.threads";
public static final String CONF_LABEL_MAX_MAPS = "distcp.max.maps";
public static final String CONF_LABEL_SOURCE_LISTING = "distcp.source.listing";
@ -76,9 +75,6 @@ public class DistCpConstants {
/* Total number of paths to copy, includes directories. Unfiltered count */
public static final String CONF_LABEL_TOTAL_NUMBER_OF_RECORDS = "mapred.number.of.records";
/* SSL keystore resource */
public static final String CONF_LABEL_SSL_KEYSTORE = "dfs.https.client.keystore.resource";
/* If input is based -f <<source listing>>, file containing the src paths */
public static final String CONF_LABEL_LISTING_FILE_PATH = "distcp.listing.file.path";
@ -106,18 +102,6 @@ public class DistCpConstants {
/* DistCp CopyListing class override param */
public static final String CONF_LABEL_COPY_LISTING_CLASS = "distcp.copy.listing.class";
/**
* Conf label for SSL Trust-store location.
*/
public static final String CONF_LABEL_SSL_TRUST_STORE_LOCATION
= "ssl.client.truststore.location";
/**
* Conf label for SSL Key-store location.
*/
public static final String CONF_LABEL_SSL_KEY_STORE_LOCATION
= "ssl.client.keystore.location";
/**
* Constants for DistCp return code to shell / consumer of ToolRunner's run
*/

View File

@ -74,15 +74,6 @@ public enum DistCpOptionSwitch {
DELETE_MISSING(DistCpConstants.CONF_LABEL_DELETE_MISSING,
new Option("delete", false, "Delete from target, " +
"files missing in source")),
/**
* Configuration file to use with hftps:// for securely copying
* files across clusters. Typically the configuration file contains
* truststore/keystore information such as location, password and type
*/
SSL_CONF(DistCpConstants.CONF_LABEL_SSL_CONF,
new Option("mapredSslConf", true, "Configuration for ssl config file" +
", to use with hftps://. Must be in the classpath.")),
/**
* Number of threads for building source file listing (before map-reduce
* phase, max one listStatus per thread at a time).

View File

@ -49,8 +49,6 @@ public class DistCpOptions {
private int maxMaps = DistCpConstants.DEFAULT_MAPS;
private float mapBandwidth = DistCpConstants.DEFAULT_BANDWIDTH_MB;
private String sslConfigurationFile;
private String copyStrategy = DistCpConstants.UNIFORMSIZE;
private EnumSet<FileAttribute> preserveStatus = EnumSet.noneOf(FileAttribute.class);
@ -134,7 +132,6 @@ public class DistCpOptions {
this.numListstatusThreads = that.numListstatusThreads;
this.maxMaps = that.maxMaps;
this.mapBandwidth = that.mapBandwidth;
this.sslConfigurationFile = that.getSslConfigurationFile();
this.copyStrategy = that.copyStrategy;
this.preserveStatus = that.preserveStatus;
this.preserveRawXattrs = that.preserveRawXattrs;
@ -380,24 +377,6 @@ public class DistCpOptions {
this.mapBandwidth = mapBandwidth;
}
/**
* Get path where the ssl configuration file is present to use for hftps://
*
* @return Path on local file system
*/
public String getSslConfigurationFile() {
return sslConfigurationFile;
}
/**
* Set the SSL configuration file path to use with hftps:// (local path)
*
* @param sslConfigurationFile - Local ssl config file path
*/
public void setSslConfigurationFile(String sslConfigurationFile) {
this.sslConfigurationFile = sslConfigurationFile;
}
/**
* Returns an iterator with the list of file attributes to preserve
*
@ -670,7 +649,6 @@ public class DistCpOptions {
", numListstatusThreads=" + numListstatusThreads +
", maxMaps=" + maxMaps +
", mapBandwidth=" + mapBandwidth +
", sslConfigurationFile='" + sslConfigurationFile + '\'' +
", copyStrategy='" + copyStrategy + '\'' +
", preserveStatus=" + preserveStatus +
", preserveRawXattrs=" + preserveRawXattrs +

View File

@ -137,11 +137,6 @@ public class OptionsParser {
parseBandwidth(command, option);
if (command.hasOption(DistCpOptionSwitch.SSL_CONF.getSwitch())) {
option.setSslConfigurationFile(command.
getOptionValue(DistCpOptionSwitch.SSL_CONF.getSwitch()));
}
parseNumListStatusThreads(command, option);
parseMaxMaps(command, option);

View File

@ -19,10 +19,7 @@
package org.apache.hadoop.tools.mapred;
import java.io.FileNotFoundException;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.OutputStream;
import java.util.Arrays;
import java.util.EnumSet;
import org.apache.commons.logging.Log;
@ -33,7 +30,6 @@ import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.JobContext;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.tools.CopyListingFileStatus;
import org.apache.hadoop.tools.DistCpConstants;
@ -119,73 +115,9 @@ public class CopyMapper extends Mapper<Text, CopyListingFileStatus, Text, Text>
overWrite = true; // When target is an existing file, overwrite it.
}
if (conf.get(DistCpConstants.CONF_LABEL_SSL_CONF) != null) {
initializeSSLConf(context);
}
startEpoch = System.currentTimeMillis();
}
/**
* Initialize SSL Config if same is set in conf
*
* @throws IOException - If any
*/
private void initializeSSLConf(Context context) throws IOException {
LOG.info("Initializing SSL configuration");
String workDir = conf.get(JobContext.JOB_LOCAL_DIR) + "/work";
Path[] cacheFiles = context.getLocalCacheFiles();
Configuration sslConfig = new Configuration(false);
String sslConfFileName = conf.get(DistCpConstants.CONF_LABEL_SSL_CONF);
Path sslClient = findCacheFile(cacheFiles, sslConfFileName);
if (sslClient == null) {
LOG.warn("SSL Client config file not found. Was looking for " + sslConfFileName +
" in " + Arrays.toString(cacheFiles));
return;
}
sslConfig.addResource(sslClient);
String trustStoreFile = conf.get("ssl.client.truststore.location");
Path trustStorePath = findCacheFile(cacheFiles, trustStoreFile);
sslConfig.set("ssl.client.truststore.location", trustStorePath.toString());
String keyStoreFile = conf.get("ssl.client.keystore.location");
Path keyStorePath = findCacheFile(cacheFiles, keyStoreFile);
sslConfig.set("ssl.client.keystore.location", keyStorePath.toString());
try {
OutputStream out = new FileOutputStream(workDir + "/" + sslConfFileName);
try {
sslConfig.writeXml(out);
} finally {
out.close();
}
conf.set(DistCpConstants.CONF_LABEL_SSL_KEYSTORE, sslConfFileName);
} catch (IOException e) {
LOG.warn("Unable to write out the ssl configuration. " +
"Will fall back to default ssl-client.xml in class path, if there is one", e);
}
}
/**
* Find entry from distributed cache
*
* @param cacheFiles - All localized cache files
* @param fileName - fileName to search
* @return Path of the filename if found, else null
*/
private Path findCacheFile(Path[] cacheFiles, String fileName) {
if (cacheFiles != null && cacheFiles.length > 0) {
for (Path file : cacheFiles) {
if (file.getName().equals(fileName)) {
return file;
}
}
}
return null;
}
/**
* Implementation of the Mapper::map(). Does the copy.
* @param relPath The target path.

View File

@ -32,7 +32,6 @@ DistCp Guide
- [Map sizing](#Map_sizing)
- [Copying Between Versions of HDFS](#Copying_Between_Versions_of_HDFS)
- [MapReduce and other side-effects](#MapReduce_and_other_side-effects)
- [SSL Configurations for HSFTP sources](#SSL_Configurations_for_HSFTP_sources)
- [Frequently Asked Questions](#Frequently_Asked_Questions)
---
@ -232,7 +231,6 @@ Flag | Description | Notes
`-strategy {dynamic|uniformsize}` | Choose the copy-strategy to be used in DistCp. | By default, uniformsize is used. (i.e. Maps are balanced on the total size of files copied by each map. Similar to legacy.) If "dynamic" is specified, `DynamicInputFormat` is used instead. (This is described in the Architecture section, under InputFormats.)
`-bandwidth` | Specify bandwidth per map, in MB/second. | Each map will be restricted to consume only the specified bandwidth. This is not always exact. The map throttles back its bandwidth consumption during a copy, such that the **net** bandwidth used tends towards the specified value.
`-atomic {-tmp <tmp_dir>}` | Specify atomic commit, with optional tmp directory. | `-atomic` instructs DistCp to copy the source data to a temporary target location, and then move the temporary target to the final-location atomically. Data will either be available at final target in a complete and consistent form, or not at all. Optionally, `-tmp` may be used to specify the location of the tmp-target. If not specified, a default is chosen. **Note:** tmp_dir must be on the final target cluster.
`-mapredSslConf <ssl_conf_file>` | Specify SSL Config file, to be used with HSFTP source | When using the hsftp protocol with a source, the security- related properties may be specified in a config-file and passed to DistCp. \<ssl_conf_file\> needs to be in the classpath.
`-async` | Run DistCp asynchronously. Quits as soon as the Hadoop Job is launched. | The Hadoop Job-id is logged, for tracking.
`-diff` | Use snapshot diff report to identify the difference between source and target. |
`-numListstatusThreads` | Number of threads to use for building file listing | At most 40 threads.
@ -432,43 +430,6 @@ $H3 MapReduce and other side-effects
* If `mapreduce.map.speculative` is set set final and true, the result of the
copy is undefined.
$H3 SSL Configurations for HSFTP sources
To use an HSFTP source (i.e. using the hsftp protocol), a SSL configuration
file needs to be specified (via the `-mapredSslConf` option). This must
specify 3 parameters:
* `ssl.client.truststore.location`: The local-filesystem location of the
trust-store file, containing the certificate for the NameNode.
* `ssl.client.truststore.type`: (Optional) The format of the trust-store
file.
* `ssl.client.truststore.password`: (Optional) Password for the trust-store
file.
The following is an example SSL configuration file:
<configuration>
<property>
<name>ssl.client.truststore.location</name>
<value>/work/keystore.jks</value>
<description>Truststore to be used by clients like distcp. Must be specified.</description>
</property>
<property>
<name>ssl.client.truststore.password</name>
<value>changeme</value>
<description>Optional. Default value is "".</description>
</property>
<property>
<name>ssl.client.truststore.type</name>
<value>jks</value>
<description>Optional. Default value is "jks".</description>
</property>
</configuration>
The SSL configuration file must be in the class-path of the DistCp program.
Frequently Asked Questions
--------------------------

View File

@ -251,21 +251,6 @@ public class TestOptionsParser {
} catch (IllegalArgumentException ignore) { }
}
@Test
public void testParseSSLConf() {
DistCpOptions options = OptionsParser.parse(new String[] {
"hdfs://localhost:8020/source/first",
"hdfs://localhost:8020/target/"});
Assert.assertNull(options.getSslConfigurationFile());
options = OptionsParser.parse(new String[] {
"-mapredSslConf",
"/tmp/ssl-client.xml",
"hdfs://localhost:8020/source/first",
"hdfs://localhost:8020/target/"});
Assert.assertEquals(options.getSslConfigurationFile(), "/tmp/ssl-client.xml");
}
@Test
public void testParseMaps() {
DistCpOptions options = OptionsParser.parse(new String[] {
@ -402,7 +387,7 @@ public class TestOptionsParser {
String val = "DistCpOptions{atomicCommit=false, syncFolder=false, "
+ "deleteMissing=false, ignoreFailures=false, overwrite=false, "
+ "skipCRC=false, blocking=true, numListstatusThreads=0, maxMaps=20, "
+ "mapBandwidth=100.0, sslConfigurationFile='null', "
+ "mapBandwidth=100.0, "
+ "copyStrategy='uniformsize', preserveStatus=[], "
+ "preserveRawXattrs=false, atomicWorkPath=null, logPath=null, "
+ "sourceFileListing=abc, sourcePaths=null, targetPath=xyz, "