MAPREDUCE-3157. [Rumen] Fix TraceBuilder to handle 0.20 history file names also.

git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/trunk@1182293 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Ravi Gummadi 2011-10-12 09:58:14 +00:00
parent d6546fc0a4
commit efb3cd64a2
5 changed files with 219 additions and 71 deletions

View File

@ -24,6 +24,9 @@ Trunk (unreleased changes)
BUG FIXES
MAPREDUCE-3157. [Rumen] Fix TraceBuilder to handle 0.20 history file
names also. (Ravi Gummadi)
MAPREDUCE-2950. [Gridmix] TestUserResolve fails in trunk.
(Ravi Gummadi via amarrk)

View File

@ -246,8 +246,57 @@ public void testHadoop20JHParser() throws Exception {
}
/**
* Tests if {@link TraceBuilder} can correctly identify and parse jobhistory
* filenames. The testcase checks if {@link TraceBuilder}
* Validate the parsing of given history file name. Also validate the history
* file name suffixed with old/stale file suffix.
* @param jhFileName job history file path
* @param jid JobID
*/
private void validateHistoryFileNameParsing(Path jhFileName,
org.apache.hadoop.mapred.JobID jid) {
JobID extractedJID =
JobID.forName(JobHistoryUtils.extractJobID(jhFileName.getName()));
assertEquals("TraceBuilder failed to parse the current JH filename"
+ jhFileName, jid, extractedJID);
// test jobhistory filename with old/stale file suffix
jhFileName = jhFileName.suffix(JobHistory.getOldFileSuffix("123"));
extractedJID =
JobID.forName(JobHistoryUtils.extractJobID(jhFileName.getName()));
assertEquals("TraceBuilder failed to parse the current JH filename"
+ "(old-suffix):" + jhFileName,
jid, extractedJID);
}
/**
* Validate the parsing of given history conf file name. Also validate the
* history conf file name suffixed with old/stale file suffix.
* @param jhConfFileName job history conf file path
* @param jid JobID
*/
private void validateJHConfFileNameParsing(Path jhConfFileName,
org.apache.hadoop.mapred.JobID jid) {
assertTrue("TraceBuilder failed to parse the JH conf filename:"
+ jhConfFileName,
JobHistoryUtils.isJobConfXml(jhConfFileName.getName()));
JobID extractedJID =
JobID.forName(JobHistoryUtils.extractJobID(jhConfFileName.getName()));
assertEquals("TraceBuilder failed to parse the current JH conf filename:"
+ jhConfFileName, jid, extractedJID);
// Test jobhistory conf filename with old/stale file suffix
jhConfFileName = jhConfFileName.suffix(JobHistory.getOldFileSuffix("123"));
assertTrue("TraceBuilder failed to parse the current JH conf filename"
+ " (old suffix):" + jhConfFileName,
JobHistoryUtils.isJobConfXml(jhConfFileName.getName()));
extractedJID =
JobID.forName(JobHistoryUtils.extractJobID(jhConfFileName.getName()));
assertEquals("TraceBuilder failed to parse the JH conf filename"
+ "(old-suffix):" + jhConfFileName,
jid, extractedJID);
}
/**
* Tests if {@link TraceBuilder} can correctly identify and parse different
* versions of jobhistory filenames. The testcase checks if
* {@link TraceBuilder}
* - correctly identifies a jobhistory filename without suffix
* - correctly parses a jobhistory filename without suffix to extract out
* the jobid
@ -261,36 +310,36 @@ public void testHadoop20JHParser() throws Exception {
public void testJobHistoryFilenameParsing() throws IOException {
final Configuration conf = new Configuration();
final FileSystem lfs = FileSystem.getLocal(conf);
String user = "test";
String user = "testUser";
org.apache.hadoop.mapred.JobID jid =
new org.apache.hadoop.mapred.JobID("12345", 1);
final Path rootInputDir =
new Path(System.getProperty("test.tools.input.dir", ""))
.makeQualified(lfs.getUri(), lfs.getWorkingDirectory());
// Check if jobhistory filename are detected properly
// Check if current jobhistory filenames are detected properly
Path jhFilename = JobHistory.getJobHistoryFile(rootInputDir, jid, user);
JobID extractedJID =
JobID.forName(TraceBuilder.extractJobID(jhFilename.getName()));
assertEquals("TraceBuilder failed to parse the current JH filename",
jid, extractedJID);
// test jobhistory filename with old/stale file suffix
jhFilename = jhFilename.suffix(JobHistory.getOldFileSuffix("123"));
extractedJID =
JobID.forName(TraceBuilder.extractJobID(jhFilename.getName()));
assertEquals("TraceBuilder failed to parse the current JH filename"
+ "(old-suffix)",
jid, extractedJID);
validateHistoryFileNameParsing(jhFilename, jid);
// Check if the conf filename in jobhistory are detected properly
// Check if Pre21 V1 jophistory file names are detected properly
jhFilename = new Path("jt-identifier_" + jid + "_user-name_job-name");
validateHistoryFileNameParsing(jhFilename, jid);
// Check if Pre21 V2 jobhistory file names are detected properly
jhFilename = new Path(jid + "_user-name_job-name");
validateHistoryFileNameParsing(jhFilename, jid);
// Check if the current jobhistory conf filenames are detected properly
Path jhConfFilename = JobHistory.getConfFile(rootInputDir, jid);
assertTrue("TraceBuilder failed to parse the current JH conf filename",
TraceBuilder.isJobConfXml(jhConfFilename.getName(), null));
// test jobhistory conf filename with old/stale file suffix
jhConfFilename = jhConfFilename.suffix(JobHistory.getOldFileSuffix("123"));
assertTrue("TraceBuilder failed to parse the current JH conf filename"
+ " (old suffix)",
TraceBuilder.isJobConfXml(jhConfFilename.getName(), null));
validateJHConfFileNameParsing(jhConfFilename, jid);
// Check if Pre21 V1 jobhistory conf file names are detected properly
jhConfFilename = new Path("jt-identifier_" + jid + "_conf.xml");
validateJHConfFileNameParsing(jhConfFilename, jid);
// Check if Pre21 V2 jobhistory conf file names are detected properly
jhConfFilename = new Path(jid + "_conf.xml");
validateJHConfFileNameParsing(jhConfFilename, jid);
}
/**

View File

@ -0,0 +1,115 @@
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.tools.rumen;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import org.apache.hadoop.mapreduce.JobID;
import org.apache.hadoop.mapreduce.jobhistory.JobHistory;
/**
* Job History related utils for handling multiple formats of history logs of
* different hadoop versions like Pre21 history logs, current history logs.
*/
public class JobHistoryUtils {
private static String applyParser(String fileName, Pattern pattern) {
Matcher matcher = pattern.matcher(fileName);
if (!matcher.matches()) {
return null;
}
return matcher.group(1);
}
/**
* Extracts jobID string from the given job history log file name or
* job history configuration file name.
* @param fileName name of job history file or job history configuration file
* @return a valid jobID String, parsed out of the file name. Otherwise,
* [especially for .crc files] returns null.
*/
static String extractJobID(String fileName) {
// Get jobID if fileName is a config file name.
String jobId = extractJobIDFromConfFileName(fileName);
if (jobId == null) {
// Get JobID if fileName is a job history file name
jobId = extractJobIDFromHistoryFileName(fileName);
}
return jobId;
}
/**
* Extracts jobID string from the given job history file name.
* @param fileName name of the job history file
* @return JobID if the given <code>fileName</code> is a valid job history
* file name, <code>null</code> otherwise.
*/
private static String extractJobIDFromHistoryFileName(String fileName) {
// History file name could be in one of the following formats
// (1) old pre21 job history file name format
// (2) new pre21 job history file name format
// (3) current job history file name format i.e. 0.22
String pre21JobID = applyParser(fileName,
Pre21JobHistoryConstants.JOBHISTORY_FILENAME_REGEX_V1);
if (pre21JobID == null) {
pre21JobID = applyParser(fileName,
Pre21JobHistoryConstants.JOBHISTORY_FILENAME_REGEX_V2);
}
if (pre21JobID != null) {
return pre21JobID;
}
return applyParser(fileName, JobHistory.JOBHISTORY_FILENAME_REGEX);
}
/**
* Extracts jobID string from the given job conf xml file name.
* @param fileName name of the job conf xml file
* @return job id if the given <code>fileName</code> is a valid job conf xml
* file name, <code>null</code> otherwise.
*/
private static String extractJobIDFromConfFileName(String fileName) {
// History conf file name could be in one of the following formats
// (1) old pre21 job history file name format
// (2) new pre21 job history file name format
// (3) current job history file name format i.e. 0.22
String pre21JobID = applyParser(fileName,
Pre21JobHistoryConstants.CONF_FILENAME_REGEX_V1);
if (pre21JobID == null) {
pre21JobID = applyParser(fileName,
Pre21JobHistoryConstants.CONF_FILENAME_REGEX_V2);
}
if (pre21JobID != null) {
return pre21JobID;
}
return applyParser(fileName, JobHistory.CONF_FILENAME_REGEX);
}
/**
* Checks if the given <code>fileName</code> is a valid job conf xml file name
* @param fileName name of the file to be validated
* @return <code>true</code> if the given <code>fileName</code> is a valid
* job conf xml file name.
*/
static boolean isJobConfXml(String fileName) {
String jobId = extractJobIDFromConfFileName(fileName);
return jobId != null;
}
}

View File

@ -20,10 +20,10 @@
import java.util.regex.Pattern;
import org.apache.hadoop.mapreduce.JobID;
import org.apache.hadoop.mapreduce.jobhistory.JobHistory;
/**
*
*
* Job History related constants for Hadoop releases prior to 0.21
*/
public class Pre21JobHistoryConstants {
@ -51,18 +51,34 @@ public static enum Values {
}
/**
* Pre21 regex for jobhistory filename
* Regex for Pre21 V1(old) jobhistory filename
* i.e jt-identifier_job-id_user-name_job-name
*/
static final Pattern JOBHISTORY_FILENAME_REGEX =
static final Pattern JOBHISTORY_FILENAME_REGEX_V1 =
Pattern.compile("[^.].+_(" + JobID.JOBID_REGEX + ")_.+");
/**
* Regex for Pre21 V2(new) jobhistory filename
* i.e job-id_user-name_job-name
*/
static final Pattern JOBHISTORY_FILENAME_REGEX_V2 =
Pattern.compile("(" + JobID.JOBID_REGEX + ")_.+");
static final String OLD_FULL_SUFFIX_REGEX_STRING =
"(?:\\.[0-9]+" + Pattern.quote(JobHistory.OLD_SUFFIX) + ")";
/**
* Pre21 regex for jobhistory conf filename
* Regex for Pre21 V1(old) jobhistory conf filename
* i.e jt-identifier_job-id_conf.xml
*/
static final Pattern CONF_FILENAME_REGEX =
Pattern.compile("[^.].+_(" + JobID.JOBID_REGEX
+ ")_conf.xml(?:\\.[0-9a-zA-Z]+)?");
static final Pattern CONF_FILENAME_REGEX_V1 =
Pattern.compile("[^.].+_(" + JobID.JOBID_REGEX + ")_conf.xml"
+ OLD_FULL_SUFFIX_REGEX_STRING + "?");
/**
* Regex for Pre21 V2(new) jobhistory conf filename
* i.e job-id_conf.xml
*/
static final Pattern CONF_FILENAME_REGEX_V2 =
Pattern.compile("(" + JobID.JOBID_REGEX + ")_conf.xml"
+ OLD_FULL_SUFFIX_REGEX_STRING + "?");
}

View File

@ -198,42 +198,6 @@ public static void main(String[] args) {
}
}
private static String applyParser(String fileName, Pattern pattern) {
Matcher matcher = pattern.matcher(fileName);
if (!matcher.matches()) {
return null;
}
return matcher.group(1);
}
/**
* @param fileName
* @return the jobID String, parsed out of the file name. We return a valid
* String for either a history log file or a config file. Otherwise,
* [especially for .crc files] we return null.
*/
static String extractJobID(String fileName) {
String jobId = applyParser(fileName, JobHistory.JOBHISTORY_FILENAME_REGEX);
if (jobId == null) {
// check if its a pre21 jobhistory file
jobId = applyParser(fileName,
Pre21JobHistoryConstants.JOBHISTORY_FILENAME_REGEX);
}
return jobId;
}
static boolean isJobConfXml(String fileName, InputStream input) {
String jobId = applyParser(fileName, JobHistory.CONF_FILENAME_REGEX);
if (jobId == null) {
// check if its a pre21 jobhistory conf file
jobId = applyParser(fileName,
Pre21JobHistoryConstants.CONF_FILENAME_REGEX);
}
return jobId != null;
}
@SuppressWarnings("unchecked")
@Override
@ -268,7 +232,7 @@ public int run(String[] args) throws Exception {
JobHistoryParser parser = null;
try {
String jobID = extractJobID(filePair.first());
String jobID = JobHistoryUtils.extractJobID(filePair.first());
if (jobID == null) {
LOG.warn("File skipped: Invalid file name: "
+ filePair.first());
@ -282,8 +246,9 @@ public int run(String[] args) throws Exception {
jobBuilder = new JobBuilder(jobID);
}
if (isJobConfXml(filePair.first(), ris)) {
processJobConf(JobConfigurationParser.parse(ris.rewind()), jobBuilder);
if (JobHistoryUtils.isJobConfXml(filePair.first())) {
processJobConf(JobConfigurationParser.parse(ris.rewind()),
jobBuilder);
} else {
parser = JobHistoryParserFactory.getParser(ris);
if (parser == null) {