diff --git a/hadoop-tools/hadoop-distcp/src/main/java/org/apache/hadoop/tools/DistCp.java b/hadoop-tools/hadoop-distcp/src/main/java/org/apache/hadoop/tools/DistCp.java index 9db2b1b1e0f..2342fa984ae 100644 --- a/hadoop-tools/hadoop-distcp/src/main/java/org/apache/hadoop/tools/DistCp.java +++ b/hadoop-tools/hadoop-distcp/src/main/java/org/apache/hadoop/tools/DistCp.java @@ -19,7 +19,6 @@ package org.apache.hadoop.tools; import java.io.IOException; -import java.net.URL; import java.util.Random; import org.apache.commons.logging.Log; @@ -254,88 +253,10 @@ public class DistCp extends Configured implements Tool { job.getConfiguration().set(JobContext.NUM_MAPS, String.valueOf(inputOptions.getMaxMaps())); - if (inputOptions.getSslConfigurationFile() != null) { - setupSSLConfig(job); - } - inputOptions.appendToConf(job.getConfiguration()); return job; } - /** - * Setup ssl configuration on the job configuration to enable hsftp access - * from map job. Also copy the ssl configuration file to Distributed cache - * - * @param job - Reference to job's handle - * @throws java.io.IOException - Exception if unable to locate ssl config file - */ - private void setupSSLConfig(Job job) throws IOException { - Configuration configuration = job.getConfiguration(); - URL sslFileUrl = configuration.getResource(inputOptions - .getSslConfigurationFile()); - if (sslFileUrl == null) { - throw new IOException( - "Given ssl configuration file doesn't exist in class path : " - + inputOptions.getSslConfigurationFile()); - } - Path sslConfigPath = new Path(sslFileUrl.toString()); - - addSSLFilesToDistCache(job, sslConfigPath); - configuration.set(DistCpConstants.CONF_LABEL_SSL_CONF, sslConfigPath.getName()); - configuration.set(DistCpConstants.CONF_LABEL_SSL_KEYSTORE, sslConfigPath.getName()); - } - - /** - * Add SSL files to distributed cache. Trust store, key store and ssl config xml - * - * @param job - Job handle - * @param sslConfigPath - ssl Configuration file specified through options - * @throws IOException - If any - */ - private void addSSLFilesToDistCache(Job job, - Path sslConfigPath) throws IOException { - Configuration configuration = job.getConfiguration(); - FileSystem localFS = FileSystem.getLocal(configuration); - - Configuration sslConf = new Configuration(false); - sslConf.addResource(sslConfigPath); - - Path localStorePath = getLocalStorePath(sslConf, - DistCpConstants.CONF_LABEL_SSL_TRUST_STORE_LOCATION); - job.addCacheFile(localStorePath.makeQualified(localFS.getUri(), - localFS.getWorkingDirectory()).toUri()); - configuration.set(DistCpConstants.CONF_LABEL_SSL_TRUST_STORE_LOCATION, - localStorePath.getName()); - - localStorePath = getLocalStorePath(sslConf, - DistCpConstants.CONF_LABEL_SSL_KEY_STORE_LOCATION); - job.addCacheFile(localStorePath.makeQualified(localFS.getUri(), - localFS.getWorkingDirectory()).toUri()); - configuration.set(DistCpConstants.CONF_LABEL_SSL_KEY_STORE_LOCATION, - localStorePath.getName()); - - job.addCacheFile(sslConfigPath.makeQualified(localFS.getUri(), - localFS.getWorkingDirectory()).toUri()); - - } - - /** - * Get Local Trust store/key store path - * - * @param sslConf - Config from SSL Client xml - * @param storeKey - Key for either trust store or key store - * @return - Path where the store is present - * @throws IOException -If any - */ - private Path getLocalStorePath(Configuration sslConf, String storeKey) throws IOException { - if (sslConf.get(storeKey) != null) { - return new Path(sslConf.get(storeKey)); - } else { - throw new IOException("Store for " + storeKey + " is not set in " + - inputOptions.getSslConfigurationFile()); - } - } - /** * Setup output format appropriately * diff --git a/hadoop-tools/hadoop-distcp/src/main/java/org/apache/hadoop/tools/DistCpConstants.java b/hadoop-tools/hadoop-distcp/src/main/java/org/apache/hadoop/tools/DistCpConstants.java index 93d6a621cf2..95d26df3ae3 100644 --- a/hadoop-tools/hadoop-distcp/src/main/java/org/apache/hadoop/tools/DistCpConstants.java +++ b/hadoop-tools/hadoop-distcp/src/main/java/org/apache/hadoop/tools/DistCpConstants.java @@ -49,7 +49,6 @@ public class DistCpConstants { "distcp.preserve.rawxattrs"; public static final String CONF_LABEL_SYNC_FOLDERS = "distcp.sync.folders"; public static final String CONF_LABEL_DELETE_MISSING = "distcp.delete.missing.source"; - public static final String CONF_LABEL_SSL_CONF = "distcp.keystore.resource"; public static final String CONF_LABEL_LISTSTATUS_THREADS = "distcp.liststatus.threads"; public static final String CONF_LABEL_MAX_MAPS = "distcp.max.maps"; public static final String CONF_LABEL_SOURCE_LISTING = "distcp.source.listing"; @@ -76,9 +75,6 @@ public class DistCpConstants { /* Total number of paths to copy, includes directories. Unfiltered count */ public static final String CONF_LABEL_TOTAL_NUMBER_OF_RECORDS = "mapred.number.of.records"; - /* SSL keystore resource */ - public static final String CONF_LABEL_SSL_KEYSTORE = "dfs.https.client.keystore.resource"; - /* If input is based -f <>, file containing the src paths */ public static final String CONF_LABEL_LISTING_FILE_PATH = "distcp.listing.file.path"; @@ -106,18 +102,6 @@ public class DistCpConstants { /* DistCp CopyListing class override param */ public static final String CONF_LABEL_COPY_LISTING_CLASS = "distcp.copy.listing.class"; - /** - * Conf label for SSL Trust-store location. - */ - public static final String CONF_LABEL_SSL_TRUST_STORE_LOCATION - = "ssl.client.truststore.location"; - - /** - * Conf label for SSL Key-store location. - */ - public static final String CONF_LABEL_SSL_KEY_STORE_LOCATION - = "ssl.client.keystore.location"; - /** * Constants for DistCp return code to shell / consumer of ToolRunner's run */ diff --git a/hadoop-tools/hadoop-distcp/src/main/java/org/apache/hadoop/tools/DistCpOptionSwitch.java b/hadoop-tools/hadoop-distcp/src/main/java/org/apache/hadoop/tools/DistCpOptionSwitch.java index 94855842b06..c1048966e90 100644 --- a/hadoop-tools/hadoop-distcp/src/main/java/org/apache/hadoop/tools/DistCpOptionSwitch.java +++ b/hadoop-tools/hadoop-distcp/src/main/java/org/apache/hadoop/tools/DistCpOptionSwitch.java @@ -74,15 +74,6 @@ public enum DistCpOptionSwitch { DELETE_MISSING(DistCpConstants.CONF_LABEL_DELETE_MISSING, new Option("delete", false, "Delete from target, " + "files missing in source")), - - /** - * Configuration file to use with hftps:// for securely copying - * files across clusters. Typically the configuration file contains - * truststore/keystore information such as location, password and type - */ - SSL_CONF(DistCpConstants.CONF_LABEL_SSL_CONF, - new Option("mapredSslConf", true, "Configuration for ssl config file" + - ", to use with hftps://. Must be in the classpath.")), /** * Number of threads for building source file listing (before map-reduce * phase, max one listStatus per thread at a time). diff --git a/hadoop-tools/hadoop-distcp/src/main/java/org/apache/hadoop/tools/DistCpOptions.java b/hadoop-tools/hadoop-distcp/src/main/java/org/apache/hadoop/tools/DistCpOptions.java index cbd4ac077cb..0b1234b962b 100644 --- a/hadoop-tools/hadoop-distcp/src/main/java/org/apache/hadoop/tools/DistCpOptions.java +++ b/hadoop-tools/hadoop-distcp/src/main/java/org/apache/hadoop/tools/DistCpOptions.java @@ -49,8 +49,6 @@ public class DistCpOptions { private int maxMaps = DistCpConstants.DEFAULT_MAPS; private float mapBandwidth = DistCpConstants.DEFAULT_BANDWIDTH_MB; - private String sslConfigurationFile; - private String copyStrategy = DistCpConstants.UNIFORMSIZE; private EnumSet preserveStatus = EnumSet.noneOf(FileAttribute.class); @@ -134,7 +132,6 @@ public class DistCpOptions { this.numListstatusThreads = that.numListstatusThreads; this.maxMaps = that.maxMaps; this.mapBandwidth = that.mapBandwidth; - this.sslConfigurationFile = that.getSslConfigurationFile(); this.copyStrategy = that.copyStrategy; this.preserveStatus = that.preserveStatus; this.preserveRawXattrs = that.preserveRawXattrs; @@ -380,24 +377,6 @@ public class DistCpOptions { this.mapBandwidth = mapBandwidth; } - /** - * Get path where the ssl configuration file is present to use for hftps:// - * - * @return Path on local file system - */ - public String getSslConfigurationFile() { - return sslConfigurationFile; - } - - /** - * Set the SSL configuration file path to use with hftps:// (local path) - * - * @param sslConfigurationFile - Local ssl config file path - */ - public void setSslConfigurationFile(String sslConfigurationFile) { - this.sslConfigurationFile = sslConfigurationFile; - } - /** * Returns an iterator with the list of file attributes to preserve * @@ -670,7 +649,6 @@ public class DistCpOptions { ", numListstatusThreads=" + numListstatusThreads + ", maxMaps=" + maxMaps + ", mapBandwidth=" + mapBandwidth + - ", sslConfigurationFile='" + sslConfigurationFile + '\'' + ", copyStrategy='" + copyStrategy + '\'' + ", preserveStatus=" + preserveStatus + ", preserveRawXattrs=" + preserveRawXattrs + diff --git a/hadoop-tools/hadoop-distcp/src/main/java/org/apache/hadoop/tools/OptionsParser.java b/hadoop-tools/hadoop-distcp/src/main/java/org/apache/hadoop/tools/OptionsParser.java index b41451346df..0f0ef2e39c5 100644 --- a/hadoop-tools/hadoop-distcp/src/main/java/org/apache/hadoop/tools/OptionsParser.java +++ b/hadoop-tools/hadoop-distcp/src/main/java/org/apache/hadoop/tools/OptionsParser.java @@ -137,11 +137,6 @@ public class OptionsParser { parseBandwidth(command, option); - if (command.hasOption(DistCpOptionSwitch.SSL_CONF.getSwitch())) { - option.setSslConfigurationFile(command. - getOptionValue(DistCpOptionSwitch.SSL_CONF.getSwitch())); - } - parseNumListStatusThreads(command, option); parseMaxMaps(command, option); diff --git a/hadoop-tools/hadoop-distcp/src/main/java/org/apache/hadoop/tools/mapred/CopyMapper.java b/hadoop-tools/hadoop-distcp/src/main/java/org/apache/hadoop/tools/mapred/CopyMapper.java index f75fe76178c..09bceadbc59 100644 --- a/hadoop-tools/hadoop-distcp/src/main/java/org/apache/hadoop/tools/mapred/CopyMapper.java +++ b/hadoop-tools/hadoop-distcp/src/main/java/org/apache/hadoop/tools/mapred/CopyMapper.java @@ -19,10 +19,7 @@ package org.apache.hadoop.tools.mapred; import java.io.FileNotFoundException; -import java.io.FileOutputStream; import java.io.IOException; -import java.io.OutputStream; -import java.util.Arrays; import java.util.EnumSet; import org.apache.commons.logging.Log; @@ -33,7 +30,6 @@ import org.apache.hadoop.fs.FileStatus; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; import org.apache.hadoop.io.Text; -import org.apache.hadoop.mapreduce.JobContext; import org.apache.hadoop.mapreduce.Mapper; import org.apache.hadoop.tools.CopyListingFileStatus; import org.apache.hadoop.tools.DistCpConstants; @@ -119,73 +115,9 @@ public class CopyMapper extends Mapper overWrite = true; // When target is an existing file, overwrite it. } - if (conf.get(DistCpConstants.CONF_LABEL_SSL_CONF) != null) { - initializeSSLConf(context); - } startEpoch = System.currentTimeMillis(); } - /** - * Initialize SSL Config if same is set in conf - * - * @throws IOException - If any - */ - private void initializeSSLConf(Context context) throws IOException { - LOG.info("Initializing SSL configuration"); - - String workDir = conf.get(JobContext.JOB_LOCAL_DIR) + "/work"; - Path[] cacheFiles = context.getLocalCacheFiles(); - - Configuration sslConfig = new Configuration(false); - String sslConfFileName = conf.get(DistCpConstants.CONF_LABEL_SSL_CONF); - Path sslClient = findCacheFile(cacheFiles, sslConfFileName); - if (sslClient == null) { - LOG.warn("SSL Client config file not found. Was looking for " + sslConfFileName + - " in " + Arrays.toString(cacheFiles)); - return; - } - sslConfig.addResource(sslClient); - - String trustStoreFile = conf.get("ssl.client.truststore.location"); - Path trustStorePath = findCacheFile(cacheFiles, trustStoreFile); - sslConfig.set("ssl.client.truststore.location", trustStorePath.toString()); - - String keyStoreFile = conf.get("ssl.client.keystore.location"); - Path keyStorePath = findCacheFile(cacheFiles, keyStoreFile); - sslConfig.set("ssl.client.keystore.location", keyStorePath.toString()); - - try { - OutputStream out = new FileOutputStream(workDir + "/" + sslConfFileName); - try { - sslConfig.writeXml(out); - } finally { - out.close(); - } - conf.set(DistCpConstants.CONF_LABEL_SSL_KEYSTORE, sslConfFileName); - } catch (IOException e) { - LOG.warn("Unable to write out the ssl configuration. " + - "Will fall back to default ssl-client.xml in class path, if there is one", e); - } - } - - /** - * Find entry from distributed cache - * - * @param cacheFiles - All localized cache files - * @param fileName - fileName to search - * @return Path of the filename if found, else null - */ - private Path findCacheFile(Path[] cacheFiles, String fileName) { - if (cacheFiles != null && cacheFiles.length > 0) { - for (Path file : cacheFiles) { - if (file.getName().equals(fileName)) { - return file; - } - } - } - return null; - } - /** * Implementation of the Mapper::map(). Does the copy. * @param relPath The target path. diff --git a/hadoop-tools/hadoop-distcp/src/site/markdown/DistCp.md.vm b/hadoop-tools/hadoop-distcp/src/site/markdown/DistCp.md.vm index bac5eccafd1..4dd15832dd8 100644 --- a/hadoop-tools/hadoop-distcp/src/site/markdown/DistCp.md.vm +++ b/hadoop-tools/hadoop-distcp/src/site/markdown/DistCp.md.vm @@ -32,7 +32,6 @@ DistCp Guide - [Map sizing](#Map_sizing) - [Copying Between Versions of HDFS](#Copying_Between_Versions_of_HDFS) - [MapReduce and other side-effects](#MapReduce_and_other_side-effects) - - [SSL Configurations for HSFTP sources](#SSL_Configurations_for_HSFTP_sources) - [Frequently Asked Questions](#Frequently_Asked_Questions) --- @@ -232,7 +231,6 @@ Flag | Description | Notes `-strategy {dynamic|uniformsize}` | Choose the copy-strategy to be used in DistCp. | By default, uniformsize is used. (i.e. Maps are balanced on the total size of files copied by each map. Similar to legacy.) If "dynamic" is specified, `DynamicInputFormat` is used instead. (This is described in the Architecture section, under InputFormats.) `-bandwidth` | Specify bandwidth per map, in MB/second. | Each map will be restricted to consume only the specified bandwidth. This is not always exact. The map throttles back its bandwidth consumption during a copy, such that the **net** bandwidth used tends towards the specified value. `-atomic {-tmp }` | Specify atomic commit, with optional tmp directory. | `-atomic` instructs DistCp to copy the source data to a temporary target location, and then move the temporary target to the final-location atomically. Data will either be available at final target in a complete and consistent form, or not at all. Optionally, `-tmp` may be used to specify the location of the tmp-target. If not specified, a default is chosen. **Note:** tmp_dir must be on the final target cluster. -`-mapredSslConf ` | Specify SSL Config file, to be used with HSFTP source | When using the hsftp protocol with a source, the security- related properties may be specified in a config-file and passed to DistCp. \ needs to be in the classpath. `-async` | Run DistCp asynchronously. Quits as soon as the Hadoop Job is launched. | The Hadoop Job-id is logged, for tracking. `-diff` | Use snapshot diff report to identify the difference between source and target. | `-numListstatusThreads` | Number of threads to use for building file listing | At most 40 threads. @@ -432,43 +430,6 @@ $H3 MapReduce and other side-effects * If `mapreduce.map.speculative` is set set final and true, the result of the copy is undefined. -$H3 SSL Configurations for HSFTP sources - - To use an HSFTP source (i.e. using the hsftp protocol), a SSL configuration - file needs to be specified (via the `-mapredSslConf` option). This must - specify 3 parameters: - - * `ssl.client.truststore.location`: The local-filesystem location of the - trust-store file, containing the certificate for the NameNode. - * `ssl.client.truststore.type`: (Optional) The format of the trust-store - file. - * `ssl.client.truststore.password`: (Optional) Password for the trust-store - file. - - The following is an example SSL configuration file: - - - - ssl.client.truststore.location - /work/keystore.jks - Truststore to be used by clients like distcp. Must be specified. - - - - ssl.client.truststore.password - changeme - Optional. Default value is "". - - - - ssl.client.truststore.type - jks - Optional. Default value is "jks". - - - - The SSL configuration file must be in the class-path of the DistCp program. - Frequently Asked Questions -------------------------- diff --git a/hadoop-tools/hadoop-distcp/src/test/java/org/apache/hadoop/tools/TestOptionsParser.java b/hadoop-tools/hadoop-distcp/src/test/java/org/apache/hadoop/tools/TestOptionsParser.java index 4c136a89ab2..6186f30b801 100644 --- a/hadoop-tools/hadoop-distcp/src/test/java/org/apache/hadoop/tools/TestOptionsParser.java +++ b/hadoop-tools/hadoop-distcp/src/test/java/org/apache/hadoop/tools/TestOptionsParser.java @@ -251,21 +251,6 @@ public class TestOptionsParser { } catch (IllegalArgumentException ignore) { } } - @Test - public void testParseSSLConf() { - DistCpOptions options = OptionsParser.parse(new String[] { - "hdfs://localhost:8020/source/first", - "hdfs://localhost:8020/target/"}); - Assert.assertNull(options.getSslConfigurationFile()); - - options = OptionsParser.parse(new String[] { - "-mapredSslConf", - "/tmp/ssl-client.xml", - "hdfs://localhost:8020/source/first", - "hdfs://localhost:8020/target/"}); - Assert.assertEquals(options.getSslConfigurationFile(), "/tmp/ssl-client.xml"); - } - @Test public void testParseMaps() { DistCpOptions options = OptionsParser.parse(new String[] { @@ -402,7 +387,7 @@ public class TestOptionsParser { String val = "DistCpOptions{atomicCommit=false, syncFolder=false, " + "deleteMissing=false, ignoreFailures=false, overwrite=false, " + "skipCRC=false, blocking=true, numListstatusThreads=0, maxMaps=20, " - + "mapBandwidth=100.0, sslConfigurationFile='null', " + + "mapBandwidth=100.0, " + "copyStrategy='uniformsize', preserveStatus=[], " + "preserveRawXattrs=false, atomicWorkPath=null, logPath=null, " + "sourceFileListing=abc, sourcePaths=null, targetPath=xyz, "