MAPREDUCE-3157. [Rumen] Fix TraceBuilder to handle 0.20 history file names also.
git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/trunk@1182293 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
d6546fc0a4
commit
efb3cd64a2
|
@ -24,6 +24,9 @@ Trunk (unreleased changes)
|
|||
|
||||
BUG FIXES
|
||||
|
||||
MAPREDUCE-3157. [Rumen] Fix TraceBuilder to handle 0.20 history file
|
||||
names also. (Ravi Gummadi)
|
||||
|
||||
MAPREDUCE-2950. [Gridmix] TestUserResolve fails in trunk.
|
||||
(Ravi Gummadi via amarrk)
|
||||
|
||||
|
|
|
@ -246,8 +246,57 @@ public class TestRumenJobTraces {
|
|||
}
|
||||
|
||||
/**
|
||||
* Tests if {@link TraceBuilder} can correctly identify and parse jobhistory
|
||||
* filenames. The testcase checks if {@link TraceBuilder}
|
||||
* Validate the parsing of given history file name. Also validate the history
|
||||
* file name suffixed with old/stale file suffix.
|
||||
* @param jhFileName job history file path
|
||||
* @param jid JobID
|
||||
*/
|
||||
private void validateHistoryFileNameParsing(Path jhFileName,
|
||||
org.apache.hadoop.mapred.JobID jid) {
|
||||
JobID extractedJID =
|
||||
JobID.forName(JobHistoryUtils.extractJobID(jhFileName.getName()));
|
||||
assertEquals("TraceBuilder failed to parse the current JH filename"
|
||||
+ jhFileName, jid, extractedJID);
|
||||
// test jobhistory filename with old/stale file suffix
|
||||
jhFileName = jhFileName.suffix(JobHistory.getOldFileSuffix("123"));
|
||||
extractedJID =
|
||||
JobID.forName(JobHistoryUtils.extractJobID(jhFileName.getName()));
|
||||
assertEquals("TraceBuilder failed to parse the current JH filename"
|
||||
+ "(old-suffix):" + jhFileName,
|
||||
jid, extractedJID);
|
||||
}
|
||||
|
||||
/**
|
||||
* Validate the parsing of given history conf file name. Also validate the
|
||||
* history conf file name suffixed with old/stale file suffix.
|
||||
* @param jhConfFileName job history conf file path
|
||||
* @param jid JobID
|
||||
*/
|
||||
private void validateJHConfFileNameParsing(Path jhConfFileName,
|
||||
org.apache.hadoop.mapred.JobID jid) {
|
||||
assertTrue("TraceBuilder failed to parse the JH conf filename:"
|
||||
+ jhConfFileName,
|
||||
JobHistoryUtils.isJobConfXml(jhConfFileName.getName()));
|
||||
JobID extractedJID =
|
||||
JobID.forName(JobHistoryUtils.extractJobID(jhConfFileName.getName()));
|
||||
assertEquals("TraceBuilder failed to parse the current JH conf filename:"
|
||||
+ jhConfFileName, jid, extractedJID);
|
||||
// Test jobhistory conf filename with old/stale file suffix
|
||||
jhConfFileName = jhConfFileName.suffix(JobHistory.getOldFileSuffix("123"));
|
||||
assertTrue("TraceBuilder failed to parse the current JH conf filename"
|
||||
+ " (old suffix):" + jhConfFileName,
|
||||
JobHistoryUtils.isJobConfXml(jhConfFileName.getName()));
|
||||
extractedJID =
|
||||
JobID.forName(JobHistoryUtils.extractJobID(jhConfFileName.getName()));
|
||||
assertEquals("TraceBuilder failed to parse the JH conf filename"
|
||||
+ "(old-suffix):" + jhConfFileName,
|
||||
jid, extractedJID);
|
||||
}
|
||||
|
||||
/**
|
||||
* Tests if {@link TraceBuilder} can correctly identify and parse different
|
||||
* versions of jobhistory filenames. The testcase checks if
|
||||
* {@link TraceBuilder}
|
||||
* - correctly identifies a jobhistory filename without suffix
|
||||
* - correctly parses a jobhistory filename without suffix to extract out
|
||||
* the jobid
|
||||
|
@ -261,36 +310,36 @@ public class TestRumenJobTraces {
|
|||
public void testJobHistoryFilenameParsing() throws IOException {
|
||||
final Configuration conf = new Configuration();
|
||||
final FileSystem lfs = FileSystem.getLocal(conf);
|
||||
String user = "test";
|
||||
String user = "testUser";
|
||||
org.apache.hadoop.mapred.JobID jid =
|
||||
new org.apache.hadoop.mapred.JobID("12345", 1);
|
||||
final Path rootInputDir =
|
||||
new Path(System.getProperty("test.tools.input.dir", ""))
|
||||
.makeQualified(lfs.getUri(), lfs.getWorkingDirectory());
|
||||
|
||||
// Check if jobhistory filename are detected properly
|
||||
// Check if current jobhistory filenames are detected properly
|
||||
Path jhFilename = JobHistory.getJobHistoryFile(rootInputDir, jid, user);
|
||||
JobID extractedJID =
|
||||
JobID.forName(TraceBuilder.extractJobID(jhFilename.getName()));
|
||||
assertEquals("TraceBuilder failed to parse the current JH filename",
|
||||
jid, extractedJID);
|
||||
// test jobhistory filename with old/stale file suffix
|
||||
jhFilename = jhFilename.suffix(JobHistory.getOldFileSuffix("123"));
|
||||
extractedJID =
|
||||
JobID.forName(TraceBuilder.extractJobID(jhFilename.getName()));
|
||||
assertEquals("TraceBuilder failed to parse the current JH filename"
|
||||
+ "(old-suffix)",
|
||||
jid, extractedJID);
|
||||
|
||||
// Check if the conf filename in jobhistory are detected properly
|
||||
validateHistoryFileNameParsing(jhFilename, jid);
|
||||
|
||||
// Check if Pre21 V1 jophistory file names are detected properly
|
||||
jhFilename = new Path("jt-identifier_" + jid + "_user-name_job-name");
|
||||
validateHistoryFileNameParsing(jhFilename, jid);
|
||||
|
||||
// Check if Pre21 V2 jobhistory file names are detected properly
|
||||
jhFilename = new Path(jid + "_user-name_job-name");
|
||||
validateHistoryFileNameParsing(jhFilename, jid);
|
||||
|
||||
// Check if the current jobhistory conf filenames are detected properly
|
||||
Path jhConfFilename = JobHistory.getConfFile(rootInputDir, jid);
|
||||
assertTrue("TraceBuilder failed to parse the current JH conf filename",
|
||||
TraceBuilder.isJobConfXml(jhConfFilename.getName(), null));
|
||||
// test jobhistory conf filename with old/stale file suffix
|
||||
jhConfFilename = jhConfFilename.suffix(JobHistory.getOldFileSuffix("123"));
|
||||
assertTrue("TraceBuilder failed to parse the current JH conf filename"
|
||||
+ " (old suffix)",
|
||||
TraceBuilder.isJobConfXml(jhConfFilename.getName(), null));
|
||||
validateJHConfFileNameParsing(jhConfFilename, jid);
|
||||
|
||||
// Check if Pre21 V1 jobhistory conf file names are detected properly
|
||||
jhConfFilename = new Path("jt-identifier_" + jid + "_conf.xml");
|
||||
validateJHConfFileNameParsing(jhConfFilename, jid);
|
||||
|
||||
// Check if Pre21 V2 jobhistory conf file names are detected properly
|
||||
jhConfFilename = new Path(jid + "_conf.xml");
|
||||
validateJHConfFileNameParsing(jhConfFilename, jid);
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
|
@ -0,0 +1,115 @@
|
|||
/**
|
||||
* Licensed to the Apache Software Foundation (ASF) under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. The ASF licenses this file
|
||||
* to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package org.apache.hadoop.tools.rumen;
|
||||
|
||||
import java.util.regex.Matcher;
|
||||
import java.util.regex.Pattern;
|
||||
|
||||
import org.apache.hadoop.mapreduce.JobID;
|
||||
import org.apache.hadoop.mapreduce.jobhistory.JobHistory;
|
||||
|
||||
/**
|
||||
* Job History related utils for handling multiple formats of history logs of
|
||||
* different hadoop versions like Pre21 history logs, current history logs.
|
||||
*/
|
||||
public class JobHistoryUtils {
|
||||
|
||||
private static String applyParser(String fileName, Pattern pattern) {
|
||||
Matcher matcher = pattern.matcher(fileName);
|
||||
|
||||
if (!matcher.matches()) {
|
||||
return null;
|
||||
}
|
||||
|
||||
return matcher.group(1);
|
||||
}
|
||||
|
||||
/**
|
||||
* Extracts jobID string from the given job history log file name or
|
||||
* job history configuration file name.
|
||||
* @param fileName name of job history file or job history configuration file
|
||||
* @return a valid jobID String, parsed out of the file name. Otherwise,
|
||||
* [especially for .crc files] returns null.
|
||||
*/
|
||||
static String extractJobID(String fileName) {
|
||||
// Get jobID if fileName is a config file name.
|
||||
String jobId = extractJobIDFromConfFileName(fileName);
|
||||
if (jobId == null) {
|
||||
// Get JobID if fileName is a job history file name
|
||||
jobId = extractJobIDFromHistoryFileName(fileName);
|
||||
}
|
||||
return jobId;
|
||||
}
|
||||
|
||||
/**
|
||||
* Extracts jobID string from the given job history file name.
|
||||
* @param fileName name of the job history file
|
||||
* @return JobID if the given <code>fileName</code> is a valid job history
|
||||
* file name, <code>null</code> otherwise.
|
||||
*/
|
||||
private static String extractJobIDFromHistoryFileName(String fileName) {
|
||||
// History file name could be in one of the following formats
|
||||
// (1) old pre21 job history file name format
|
||||
// (2) new pre21 job history file name format
|
||||
// (3) current job history file name format i.e. 0.22
|
||||
String pre21JobID = applyParser(fileName,
|
||||
Pre21JobHistoryConstants.JOBHISTORY_FILENAME_REGEX_V1);
|
||||
if (pre21JobID == null) {
|
||||
pre21JobID = applyParser(fileName,
|
||||
Pre21JobHistoryConstants.JOBHISTORY_FILENAME_REGEX_V2);
|
||||
}
|
||||
if (pre21JobID != null) {
|
||||
return pre21JobID;
|
||||
}
|
||||
return applyParser(fileName, JobHistory.JOBHISTORY_FILENAME_REGEX);
|
||||
}
|
||||
|
||||
/**
|
||||
* Extracts jobID string from the given job conf xml file name.
|
||||
* @param fileName name of the job conf xml file
|
||||
* @return job id if the given <code>fileName</code> is a valid job conf xml
|
||||
* file name, <code>null</code> otherwise.
|
||||
*/
|
||||
private static String extractJobIDFromConfFileName(String fileName) {
|
||||
// History conf file name could be in one of the following formats
|
||||
// (1) old pre21 job history file name format
|
||||
// (2) new pre21 job history file name format
|
||||
// (3) current job history file name format i.e. 0.22
|
||||
String pre21JobID = applyParser(fileName,
|
||||
Pre21JobHistoryConstants.CONF_FILENAME_REGEX_V1);
|
||||
if (pre21JobID == null) {
|
||||
pre21JobID = applyParser(fileName,
|
||||
Pre21JobHistoryConstants.CONF_FILENAME_REGEX_V2);
|
||||
}
|
||||
if (pre21JobID != null) {
|
||||
return pre21JobID;
|
||||
}
|
||||
return applyParser(fileName, JobHistory.CONF_FILENAME_REGEX);
|
||||
}
|
||||
|
||||
/**
|
||||
* Checks if the given <code>fileName</code> is a valid job conf xml file name
|
||||
* @param fileName name of the file to be validated
|
||||
* @return <code>true</code> if the given <code>fileName</code> is a valid
|
||||
* job conf xml file name.
|
||||
*/
|
||||
static boolean isJobConfXml(String fileName) {
|
||||
String jobId = extractJobIDFromConfFileName(fileName);
|
||||
return jobId != null;
|
||||
}
|
||||
}
|
|
@ -20,10 +20,10 @@ package org.apache.hadoop.tools.rumen;
|
|||
import java.util.regex.Pattern;
|
||||
|
||||
import org.apache.hadoop.mapreduce.JobID;
|
||||
import org.apache.hadoop.mapreduce.jobhistory.JobHistory;
|
||||
|
||||
/**
|
||||
*
|
||||
*
|
||||
* Job History related constants for Hadoop releases prior to 0.21
|
||||
*/
|
||||
public class Pre21JobHistoryConstants {
|
||||
|
||||
|
@ -51,18 +51,34 @@ public class Pre21JobHistoryConstants {
|
|||
}
|
||||
|
||||
/**
|
||||
* Pre21 regex for jobhistory filename
|
||||
* Regex for Pre21 V1(old) jobhistory filename
|
||||
* i.e jt-identifier_job-id_user-name_job-name
|
||||
*/
|
||||
static final Pattern JOBHISTORY_FILENAME_REGEX =
|
||||
static final Pattern JOBHISTORY_FILENAME_REGEX_V1 =
|
||||
Pattern.compile("[^.].+_(" + JobID.JOBID_REGEX + ")_.+");
|
||||
/**
|
||||
* Regex for Pre21 V2(new) jobhistory filename
|
||||
* i.e job-id_user-name_job-name
|
||||
*/
|
||||
static final Pattern JOBHISTORY_FILENAME_REGEX_V2 =
|
||||
Pattern.compile("(" + JobID.JOBID_REGEX + ")_.+");
|
||||
|
||||
static final String OLD_FULL_SUFFIX_REGEX_STRING =
|
||||
"(?:\\.[0-9]+" + Pattern.quote(JobHistory.OLD_SUFFIX) + ")";
|
||||
|
||||
/**
|
||||
* Pre21 regex for jobhistory conf filename
|
||||
* Regex for Pre21 V1(old) jobhistory conf filename
|
||||
* i.e jt-identifier_job-id_conf.xml
|
||||
*/
|
||||
static final Pattern CONF_FILENAME_REGEX =
|
||||
Pattern.compile("[^.].+_(" + JobID.JOBID_REGEX
|
||||
+ ")_conf.xml(?:\\.[0-9a-zA-Z]+)?");
|
||||
static final Pattern CONF_FILENAME_REGEX_V1 =
|
||||
Pattern.compile("[^.].+_(" + JobID.JOBID_REGEX + ")_conf.xml"
|
||||
+ OLD_FULL_SUFFIX_REGEX_STRING + "?");
|
||||
/**
|
||||
* Regex for Pre21 V2(new) jobhistory conf filename
|
||||
* i.e job-id_conf.xml
|
||||
*/
|
||||
static final Pattern CONF_FILENAME_REGEX_V2 =
|
||||
Pattern.compile("(" + JobID.JOBID_REGEX + ")_conf.xml"
|
||||
+ OLD_FULL_SUFFIX_REGEX_STRING + "?");
|
||||
|
||||
}
|
||||
|
|
|
@ -198,42 +198,6 @@ public class TraceBuilder extends Configured implements Tool {
|
|||
}
|
||||
}
|
||||
|
||||
private static String applyParser(String fileName, Pattern pattern) {
|
||||
Matcher matcher = pattern.matcher(fileName);
|
||||
|
||||
if (!matcher.matches()) {
|
||||
return null;
|
||||
}
|
||||
|
||||
return matcher.group(1);
|
||||
}
|
||||
|
||||
/**
|
||||
* @param fileName
|
||||
* @return the jobID String, parsed out of the file name. We return a valid
|
||||
* String for either a history log file or a config file. Otherwise,
|
||||
* [especially for .crc files] we return null.
|
||||
*/
|
||||
static String extractJobID(String fileName) {
|
||||
String jobId = applyParser(fileName, JobHistory.JOBHISTORY_FILENAME_REGEX);
|
||||
if (jobId == null) {
|
||||
// check if its a pre21 jobhistory file
|
||||
jobId = applyParser(fileName,
|
||||
Pre21JobHistoryConstants.JOBHISTORY_FILENAME_REGEX);
|
||||
}
|
||||
return jobId;
|
||||
}
|
||||
|
||||
static boolean isJobConfXml(String fileName, InputStream input) {
|
||||
String jobId = applyParser(fileName, JobHistory.CONF_FILENAME_REGEX);
|
||||
if (jobId == null) {
|
||||
// check if its a pre21 jobhistory conf file
|
||||
jobId = applyParser(fileName,
|
||||
Pre21JobHistoryConstants.CONF_FILENAME_REGEX);
|
||||
}
|
||||
return jobId != null;
|
||||
}
|
||||
|
||||
|
||||
@SuppressWarnings("unchecked")
|
||||
@Override
|
||||
|
@ -268,7 +232,7 @@ public class TraceBuilder extends Configured implements Tool {
|
|||
JobHistoryParser parser = null;
|
||||
|
||||
try {
|
||||
String jobID = extractJobID(filePair.first());
|
||||
String jobID = JobHistoryUtils.extractJobID(filePair.first());
|
||||
if (jobID == null) {
|
||||
LOG.warn("File skipped: Invalid file name: "
|
||||
+ filePair.first());
|
||||
|
@ -282,8 +246,9 @@ public class TraceBuilder extends Configured implements Tool {
|
|||
jobBuilder = new JobBuilder(jobID);
|
||||
}
|
||||
|
||||
if (isJobConfXml(filePair.first(), ris)) {
|
||||
processJobConf(JobConfigurationParser.parse(ris.rewind()), jobBuilder);
|
||||
if (JobHistoryUtils.isJobConfXml(filePair.first())) {
|
||||
processJobConf(JobConfigurationParser.parse(ris.rewind()),
|
||||
jobBuilder);
|
||||
} else {
|
||||
parser = JobHistoryParserFactory.getParser(ris);
|
||||
if (parser == null) {
|
||||
|
|
Loading…
Reference in New Issue