MAPREDUCE-6741. Add MR support to redact job conf properties. Contributed by Haibo Chen
This commit is contained in:
parent
522ddbde79
commit
4aba858750
|
@ -52,6 +52,7 @@ import org.apache.hadoop.mapreduce.MRJobConfig;
|
|||
import org.apache.hadoop.mapreduce.TaskType;
|
||||
import org.apache.hadoop.mapreduce.TypeConverter;
|
||||
import org.apache.hadoop.mapreduce.util.JobHistoryEventUtils;
|
||||
import org.apache.hadoop.mapreduce.util.MRJobConfUtil;
|
||||
import org.apache.hadoop.mapreduce.v2.api.records.JobId;
|
||||
import org.apache.hadoop.mapreduce.v2.api.records.JobState;
|
||||
import org.apache.hadoop.mapreduce.v2.app.AppContext;
|
||||
|
@ -507,16 +508,16 @@ public class JobHistoryEventHandler extends AbstractService
|
|||
if (conf != null) {
|
||||
// TODO Ideally this should be written out to the job dir
|
||||
// (.staging/jobid/files - RecoveryService will need to be patched)
|
||||
FSDataOutputStream jobFileOut = null;
|
||||
try {
|
||||
if (logDirConfPath != null) {
|
||||
jobFileOut = stagingDirFS.create(logDirConfPath, true);
|
||||
conf.writeXml(jobFileOut);
|
||||
jobFileOut.close();
|
||||
if (logDirConfPath != null) {
|
||||
Configuration redactedConf = new Configuration(conf);
|
||||
MRJobConfUtil.redact(redactedConf);
|
||||
try (FSDataOutputStream jobFileOut = stagingDirFS
|
||||
.create(logDirConfPath, true)) {
|
||||
redactedConf.writeXml(jobFileOut);
|
||||
} catch (IOException e) {
|
||||
LOG.info("Failed to write the job configuration file", e);
|
||||
throw e;
|
||||
}
|
||||
} catch (IOException e) {
|
||||
LOG.info("Failed to write the job configuration file", e);
|
||||
throw e;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -26,8 +26,7 @@ import javax.xml.bind.annotation.XmlAccessorType;
|
|||
import javax.xml.bind.annotation.XmlRootElement;
|
||||
|
||||
import org.apache.hadoop.conf.Configuration;
|
||||
import org.apache.hadoop.fs.FileContext;
|
||||
import org.apache.hadoop.fs.Path;
|
||||
import org.apache.hadoop.mapreduce.util.MRJobConfUtil;
|
||||
import org.apache.hadoop.mapreduce.v2.app.job.Job;
|
||||
|
||||
@XmlRootElement(name = "conf")
|
||||
|
@ -45,6 +44,7 @@ public class ConfInfo {
|
|||
this.property = new ArrayList<ConfEntryInfo>();
|
||||
Configuration jobConf = job.loadConfFile();
|
||||
this.path = job.getConfFile().toString();
|
||||
MRJobConfUtil.redact(jobConf);
|
||||
for (Map.Entry<String, String> entry : jobConf) {
|
||||
this.property.add(new ConfEntryInfo(entry.getKey(), entry.getValue(),
|
||||
jobConf.getPropertySources(entry.getKey())));
|
||||
|
|
|
@ -29,6 +29,7 @@ import static org.mockito.Mockito.when;
|
|||
|
||||
import java.io.File;
|
||||
import java.io.FileOutputStream;
|
||||
import java.io.InputStream;
|
||||
import java.io.IOException;
|
||||
import java.util.HashMap;
|
||||
|
||||
|
@ -52,6 +53,7 @@ import org.apache.hadoop.mapreduce.TaskID;
|
|||
import org.apache.hadoop.mapreduce.TaskType;
|
||||
import org.apache.hadoop.mapreduce.TypeConverter;
|
||||
import org.apache.hadoop.mapreduce.util.JobHistoryEventUtils;
|
||||
import org.apache.hadoop.mapreduce.util.MRJobConfUtil;
|
||||
import org.apache.hadoop.mapreduce.v2.api.records.JobId;
|
||||
import org.apache.hadoop.mapreduce.v2.app.AppContext;
|
||||
import org.apache.hadoop.mapreduce.v2.app.MRAppMaster.RunningAppContext;
|
||||
|
@ -372,6 +374,74 @@ public class TestJobHistoryEventHandler {
|
|||
}
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testPropertyRedactionForJHS() throws Exception {
|
||||
final Configuration conf = new Configuration();
|
||||
|
||||
String sensitivePropertyName = "aws.fake.credentials.name";
|
||||
String sensitivePropertyValue = "aws.fake.credentials.val";
|
||||
conf.set(sensitivePropertyName, sensitivePropertyValue);
|
||||
conf.set(MRJobConfig.MR_JOB_REDACTED_PROPERTIES,
|
||||
sensitivePropertyName);
|
||||
conf.set(CommonConfigurationKeysPublic.FS_DEFAULT_NAME_KEY,
|
||||
dfsCluster.getURI().toString());
|
||||
final TestParams params = new TestParams();
|
||||
conf.set(MRJobConfig.MR_AM_STAGING_DIR, params.dfsWorkDir);
|
||||
|
||||
final JHEvenHandlerForTest jheh =
|
||||
new JHEvenHandlerForTest(params.mockAppContext, 0, false);
|
||||
|
||||
try {
|
||||
jheh.init(conf);
|
||||
jheh.start();
|
||||
handleEvent(jheh, new JobHistoryEvent(params.jobId,
|
||||
new AMStartedEvent(params.appAttemptId, 200, params.containerId,
|
||||
"nmhost", 3000, 4000, -1)));
|
||||
handleEvent(jheh, new JobHistoryEvent(params.jobId,
|
||||
new JobUnsuccessfulCompletionEvent(TypeConverter.fromYarn(
|
||||
params.jobId), 0, 0, 0, JobStateInternal.FAILED.toString())));
|
||||
|
||||
// verify the value of the sensitive property in job.xml is restored.
|
||||
Assert.assertEquals(sensitivePropertyName + " is modified.",
|
||||
conf.get(sensitivePropertyName), sensitivePropertyValue);
|
||||
|
||||
// load the job_conf.xml in JHS directory and verify property redaction.
|
||||
Path jhsJobConfFile = getJobConfInIntermediateDoneDir(conf, params.jobId);
|
||||
Assert.assertTrue("The job_conf.xml file is not in the JHS directory",
|
||||
FileContext.getFileContext(conf).util().exists(jhsJobConfFile));
|
||||
Configuration jhsJobConf = new Configuration();
|
||||
|
||||
try (InputStream input = FileSystem.get(conf).open(jhsJobConfFile)) {
|
||||
jhsJobConf.addResource(input);
|
||||
Assert.assertEquals(
|
||||
sensitivePropertyName + " is not redacted in HDFS.",
|
||||
MRJobConfUtil.REDACTION_REPLACEMENT_VAL,
|
||||
jhsJobConf.get(sensitivePropertyName));
|
||||
}
|
||||
} finally {
|
||||
jheh.stop();
|
||||
purgeHdfsHistoryIntermediateDoneDirectory(conf);
|
||||
}
|
||||
}
|
||||
|
||||
private static Path getJobConfInIntermediateDoneDir(Configuration conf,
|
||||
JobId jobId) throws IOException {
|
||||
Path userDoneDir = new Path(
|
||||
JobHistoryUtils.getHistoryIntermediateDoneDirForUser(conf));
|
||||
Path doneDirPrefix =
|
||||
FileContext.getFileContext(conf).makeQualified(userDoneDir);
|
||||
return new Path(
|
||||
doneDirPrefix, JobHistoryUtils.getIntermediateConfFileName(jobId));
|
||||
}
|
||||
|
||||
private void purgeHdfsHistoryIntermediateDoneDirectory(Configuration conf)
|
||||
throws IOException {
|
||||
FileSystem fs = FileSystem.get(dfsCluster.getConfiguration(0));
|
||||
String intermDoneDirPrefix =
|
||||
JobHistoryUtils.getConfiguredHistoryIntermediateDoneDirPrefix(conf);
|
||||
fs.delete(new Path(intermDoneDirPrefix), true);
|
||||
}
|
||||
|
||||
@Test (timeout=50000)
|
||||
public void testDefaultFsIsUsedForHistory() throws Exception {
|
||||
// Create default configuration pointing to the minicluster
|
||||
|
@ -413,6 +483,7 @@ public class TestJobHistoryEventHandler {
|
|||
localFileSystem.exists(new Path(t.dfsWorkDir)));
|
||||
} finally {
|
||||
jheh.stop();
|
||||
purgeHdfsHistoryIntermediateDoneDirectory(conf);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -23,6 +23,8 @@ import java.io.PrintWriter;
|
|||
import java.util.HashMap;
|
||||
import java.util.Map;
|
||||
|
||||
import org.apache.hadoop.mapreduce.MRJobConfig;
|
||||
import org.apache.hadoop.mapreduce.util.MRJobConfUtil;
|
||||
import org.apache.hadoop.yarn.webapp.View;
|
||||
import org.junit.Test;
|
||||
import org.apache.hadoop.conf.Configuration;
|
||||
|
@ -65,6 +67,9 @@ public class TestBlocks {
|
|||
Path path = new Path("conf");
|
||||
Configuration configuration = new Configuration();
|
||||
configuration.set("Key for test", "Value for test");
|
||||
final String redactedProp = "Key for redaction";
|
||||
configuration.set(MRJobConfig.MR_JOB_REDACTED_PROPERTIES,
|
||||
redactedProp);
|
||||
when(job.getConfFile()).thenReturn(path);
|
||||
when(job.loadConfFile()).thenReturn(configuration);
|
||||
|
||||
|
@ -85,9 +90,10 @@ public class TestBlocks {
|
|||
configurationBlock.render(html);
|
||||
pWriter.flush();
|
||||
assertTrue(data.toString().contains("Key for test"));
|
||||
|
||||
assertTrue(data.toString().contains("Value for test"));
|
||||
|
||||
assertTrue(data.toString().contains(redactedProp));
|
||||
assertTrue(data.toString().contains(
|
||||
MRJobConfUtil.REDACTION_REPLACEMENT_VAL));
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
|
@ -977,4 +977,9 @@ public interface MRJobConfig {
|
|||
public static final String MR_NUM_OPPORTUNISTIC_MAPS_PER_100 =
|
||||
"mapreduce.job.num-opportunistic-maps-per-100";
|
||||
public static final int DEFAULT_MR_NUM_OPPORTUNISTIC_MAPS_PER_100 = 0;
|
||||
|
||||
/**
|
||||
* A comma-separated list of properties whose value will be redacted.
|
||||
*/
|
||||
String MR_JOB_REDACTED_PROPERTIES = "mapreduce.job.redacted-properties";
|
||||
}
|
||||
|
|
|
@ -0,0 +1,45 @@
|
|||
/**
|
||||
* Licensed to the Apache Software Foundation (ASF) under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. The ASF licenses this file
|
||||
* to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package org.apache.hadoop.mapreduce.util;
|
||||
|
||||
import org.apache.hadoop.conf.Configuration;
|
||||
import org.apache.hadoop.mapreduce.MRJobConfig;
|
||||
|
||||
/**
|
||||
* A class that contains utility methods for MR Job configuration.
|
||||
*/
|
||||
public final class MRJobConfUtil {
|
||||
public static final String REDACTION_REPLACEMENT_VAL = "*********(redacted)";
|
||||
|
||||
/**
|
||||
* Redact job configuration properties.
|
||||
* @param conf the job configuration to redact
|
||||
*/
|
||||
public static void redact(final Configuration conf) {
|
||||
for (String prop : conf.getTrimmedStringCollection(
|
||||
MRJobConfig.MR_JOB_REDACTED_PROPERTIES)) {
|
||||
conf.set(prop, REDACTION_REPLACEMENT_VAL);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* There is no reason to instantiate this utility class.
|
||||
*/
|
||||
private MRJobConfUtil() {
|
||||
}
|
||||
}
|
|
@ -1932,4 +1932,12 @@
|
|||
<name>mapreduce.jobhistory.loadedjob.tasks.max</name>
|
||||
<value>-1</value>
|
||||
</property>
|
||||
|
||||
<property>
|
||||
<description>
|
||||
The list of job configuration properties whose value will be redacted.
|
||||
</description>
|
||||
<name>mapreduce.job.redacted-properties</name>
|
||||
<value></value>
|
||||
</property>
|
||||
</configuration>
|
||||
|
|
Loading…
Reference in New Issue