diff --git a/hadoop-mapreduce-project/CHANGES.txt b/hadoop-mapreduce-project/CHANGES.txt index 0cad461627a..d5d9196f2bd 100644 --- a/hadoop-mapreduce-project/CHANGES.txt +++ b/hadoop-mapreduce-project/CHANGES.txt @@ -24,6 +24,9 @@ Trunk (unreleased changes) BUG FIXES + MAPREDUCE-3157. [Rumen] Fix TraceBuilder to handle 0.20 history file + names also. (Ravi Gummadi) + MAPREDUCE-2950. [Gridmix] TestUserResolve fails in trunk. (Ravi Gummadi via amarrk) diff --git a/hadoop-mapreduce-project/src/test/mapred/org/apache/hadoop/tools/rumen/TestRumenJobTraces.java b/hadoop-mapreduce-project/src/test/mapred/org/apache/hadoop/tools/rumen/TestRumenJobTraces.java index 32bce5c3ce8..4246b4e2b1b 100644 --- a/hadoop-mapreduce-project/src/test/mapred/org/apache/hadoop/tools/rumen/TestRumenJobTraces.java +++ b/hadoop-mapreduce-project/src/test/mapred/org/apache/hadoop/tools/rumen/TestRumenJobTraces.java @@ -246,8 +246,57 @@ public class TestRumenJobTraces { } /** - * Tests if {@link TraceBuilder} can correctly identify and parse jobhistory - * filenames. The testcase checks if {@link TraceBuilder} + * Validate the parsing of given history file name. Also validate the history + * file name suffixed with old/stale file suffix. + * @param jhFileName job history file path + * @param jid JobID + */ + private void validateHistoryFileNameParsing(Path jhFileName, + org.apache.hadoop.mapred.JobID jid) { + JobID extractedJID = + JobID.forName(JobHistoryUtils.extractJobID(jhFileName.getName())); + assertEquals("TraceBuilder failed to parse the current JH filename" + + jhFileName, jid, extractedJID); + // test jobhistory filename with old/stale file suffix + jhFileName = jhFileName.suffix(JobHistory.getOldFileSuffix("123")); + extractedJID = + JobID.forName(JobHistoryUtils.extractJobID(jhFileName.getName())); + assertEquals("TraceBuilder failed to parse the current JH filename" + + "(old-suffix):" + jhFileName, + jid, extractedJID); + } + + /** + * Validate the parsing of given history conf file name. Also validate the + * history conf file name suffixed with old/stale file suffix. + * @param jhConfFileName job history conf file path + * @param jid JobID + */ + private void validateJHConfFileNameParsing(Path jhConfFileName, + org.apache.hadoop.mapred.JobID jid) { + assertTrue("TraceBuilder failed to parse the JH conf filename:" + + jhConfFileName, + JobHistoryUtils.isJobConfXml(jhConfFileName.getName())); + JobID extractedJID = + JobID.forName(JobHistoryUtils.extractJobID(jhConfFileName.getName())); + assertEquals("TraceBuilder failed to parse the current JH conf filename:" + + jhConfFileName, jid, extractedJID); + // Test jobhistory conf filename with old/stale file suffix + jhConfFileName = jhConfFileName.suffix(JobHistory.getOldFileSuffix("123")); + assertTrue("TraceBuilder failed to parse the current JH conf filename" + + " (old suffix):" + jhConfFileName, + JobHistoryUtils.isJobConfXml(jhConfFileName.getName())); + extractedJID = + JobID.forName(JobHistoryUtils.extractJobID(jhConfFileName.getName())); + assertEquals("TraceBuilder failed to parse the JH conf filename" + + "(old-suffix):" + jhConfFileName, + jid, extractedJID); + } + + /** + * Tests if {@link TraceBuilder} can correctly identify and parse different + * versions of jobhistory filenames. The testcase checks if + * {@link TraceBuilder} * - correctly identifies a jobhistory filename without suffix * - correctly parses a jobhistory filename without suffix to extract out * the jobid @@ -261,36 +310,36 @@ public class TestRumenJobTraces { public void testJobHistoryFilenameParsing() throws IOException { final Configuration conf = new Configuration(); final FileSystem lfs = FileSystem.getLocal(conf); - String user = "test"; + String user = "testUser"; org.apache.hadoop.mapred.JobID jid = new org.apache.hadoop.mapred.JobID("12345", 1); final Path rootInputDir = new Path(System.getProperty("test.tools.input.dir", "")) .makeQualified(lfs.getUri(), lfs.getWorkingDirectory()); - // Check if jobhistory filename are detected properly + // Check if current jobhistory filenames are detected properly Path jhFilename = JobHistory.getJobHistoryFile(rootInputDir, jid, user); - JobID extractedJID = - JobID.forName(TraceBuilder.extractJobID(jhFilename.getName())); - assertEquals("TraceBuilder failed to parse the current JH filename", - jid, extractedJID); - // test jobhistory filename with old/stale file suffix - jhFilename = jhFilename.suffix(JobHistory.getOldFileSuffix("123")); - extractedJID = - JobID.forName(TraceBuilder.extractJobID(jhFilename.getName())); - assertEquals("TraceBuilder failed to parse the current JH filename" - + "(old-suffix)", - jid, extractedJID); - - // Check if the conf filename in jobhistory are detected properly + validateHistoryFileNameParsing(jhFilename, jid); + + // Check if Pre21 V1 jophistory file names are detected properly + jhFilename = new Path("jt-identifier_" + jid + "_user-name_job-name"); + validateHistoryFileNameParsing(jhFilename, jid); + + // Check if Pre21 V2 jobhistory file names are detected properly + jhFilename = new Path(jid + "_user-name_job-name"); + validateHistoryFileNameParsing(jhFilename, jid); + + // Check if the current jobhistory conf filenames are detected properly Path jhConfFilename = JobHistory.getConfFile(rootInputDir, jid); - assertTrue("TraceBuilder failed to parse the current JH conf filename", - TraceBuilder.isJobConfXml(jhConfFilename.getName(), null)); - // test jobhistory conf filename with old/stale file suffix - jhConfFilename = jhConfFilename.suffix(JobHistory.getOldFileSuffix("123")); - assertTrue("TraceBuilder failed to parse the current JH conf filename" - + " (old suffix)", - TraceBuilder.isJobConfXml(jhConfFilename.getName(), null)); + validateJHConfFileNameParsing(jhConfFilename, jid); + + // Check if Pre21 V1 jobhistory conf file names are detected properly + jhConfFilename = new Path("jt-identifier_" + jid + "_conf.xml"); + validateJHConfFileNameParsing(jhConfFilename, jid); + + // Check if Pre21 V2 jobhistory conf file names are detected properly + jhConfFilename = new Path(jid + "_conf.xml"); + validateJHConfFileNameParsing(jhConfFilename, jid); } /** diff --git a/hadoop-mapreduce-project/src/tools/org/apache/hadoop/tools/rumen/JobHistoryUtils.java b/hadoop-mapreduce-project/src/tools/org/apache/hadoop/tools/rumen/JobHistoryUtils.java new file mode 100644 index 00000000000..a6e8919ed90 --- /dev/null +++ b/hadoop-mapreduce-project/src/tools/org/apache/hadoop/tools/rumen/JobHistoryUtils.java @@ -0,0 +1,115 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.tools.rumen; + +import java.util.regex.Matcher; +import java.util.regex.Pattern; + +import org.apache.hadoop.mapreduce.JobID; +import org.apache.hadoop.mapreduce.jobhistory.JobHistory; + +/** + * Job History related utils for handling multiple formats of history logs of + * different hadoop versions like Pre21 history logs, current history logs. + */ +public class JobHistoryUtils { + + private static String applyParser(String fileName, Pattern pattern) { + Matcher matcher = pattern.matcher(fileName); + + if (!matcher.matches()) { + return null; + } + + return matcher.group(1); + } + + /** + * Extracts jobID string from the given job history log file name or + * job history configuration file name. + * @param fileName name of job history file or job history configuration file + * @return a valid jobID String, parsed out of the file name. Otherwise, + * [especially for .crc files] returns null. + */ + static String extractJobID(String fileName) { + // Get jobID if fileName is a config file name. + String jobId = extractJobIDFromConfFileName(fileName); + if (jobId == null) { + // Get JobID if fileName is a job history file name + jobId = extractJobIDFromHistoryFileName(fileName); + } + return jobId; + } + + /** + * Extracts jobID string from the given job history file name. + * @param fileName name of the job history file + * @return JobID if the given fileName is a valid job history + * file name, null otherwise. + */ + private static String extractJobIDFromHistoryFileName(String fileName) { + // History file name could be in one of the following formats + // (1) old pre21 job history file name format + // (2) new pre21 job history file name format + // (3) current job history file name format i.e. 0.22 + String pre21JobID = applyParser(fileName, + Pre21JobHistoryConstants.JOBHISTORY_FILENAME_REGEX_V1); + if (pre21JobID == null) { + pre21JobID = applyParser(fileName, + Pre21JobHistoryConstants.JOBHISTORY_FILENAME_REGEX_V2); + } + if (pre21JobID != null) { + return pre21JobID; + } + return applyParser(fileName, JobHistory.JOBHISTORY_FILENAME_REGEX); + } + + /** + * Extracts jobID string from the given job conf xml file name. + * @param fileName name of the job conf xml file + * @return job id if the given fileName is a valid job conf xml + * file name, null otherwise. + */ + private static String extractJobIDFromConfFileName(String fileName) { + // History conf file name could be in one of the following formats + // (1) old pre21 job history file name format + // (2) new pre21 job history file name format + // (3) current job history file name format i.e. 0.22 + String pre21JobID = applyParser(fileName, + Pre21JobHistoryConstants.CONF_FILENAME_REGEX_V1); + if (pre21JobID == null) { + pre21JobID = applyParser(fileName, + Pre21JobHistoryConstants.CONF_FILENAME_REGEX_V2); + } + if (pre21JobID != null) { + return pre21JobID; + } + return applyParser(fileName, JobHistory.CONF_FILENAME_REGEX); + } + + /** + * Checks if the given fileName is a valid job conf xml file name + * @param fileName name of the file to be validated + * @return true if the given fileName is a valid + * job conf xml file name. + */ + static boolean isJobConfXml(String fileName) { + String jobId = extractJobIDFromConfFileName(fileName); + return jobId != null; + } +} diff --git a/hadoop-mapreduce-project/src/tools/org/apache/hadoop/tools/rumen/Pre21JobHistoryConstants.java b/hadoop-mapreduce-project/src/tools/org/apache/hadoop/tools/rumen/Pre21JobHistoryConstants.java index 6a972219f82..184db8ff046 100644 --- a/hadoop-mapreduce-project/src/tools/org/apache/hadoop/tools/rumen/Pre21JobHistoryConstants.java +++ b/hadoop-mapreduce-project/src/tools/org/apache/hadoop/tools/rumen/Pre21JobHistoryConstants.java @@ -20,10 +20,10 @@ package org.apache.hadoop.tools.rumen; import java.util.regex.Pattern; import org.apache.hadoop.mapreduce.JobID; +import org.apache.hadoop.mapreduce.jobhistory.JobHistory; /** - * - * + * Job History related constants for Hadoop releases prior to 0.21 */ public class Pre21JobHistoryConstants { @@ -51,18 +51,34 @@ public class Pre21JobHistoryConstants { } /** - * Pre21 regex for jobhistory filename + * Regex for Pre21 V1(old) jobhistory filename * i.e jt-identifier_job-id_user-name_job-name */ - static final Pattern JOBHISTORY_FILENAME_REGEX = + static final Pattern JOBHISTORY_FILENAME_REGEX_V1 = Pattern.compile("[^.].+_(" + JobID.JOBID_REGEX + ")_.+"); + /** + * Regex for Pre21 V2(new) jobhistory filename + * i.e job-id_user-name_job-name + */ + static final Pattern JOBHISTORY_FILENAME_REGEX_V2 = + Pattern.compile("(" + JobID.JOBID_REGEX + ")_.+"); + + static final String OLD_FULL_SUFFIX_REGEX_STRING = + "(?:\\.[0-9]+" + Pattern.quote(JobHistory.OLD_SUFFIX) + ")"; /** - * Pre21 regex for jobhistory conf filename + * Regex for Pre21 V1(old) jobhistory conf filename * i.e jt-identifier_job-id_conf.xml */ - static final Pattern CONF_FILENAME_REGEX = - Pattern.compile("[^.].+_(" + JobID.JOBID_REGEX - + ")_conf.xml(?:\\.[0-9a-zA-Z]+)?"); + static final Pattern CONF_FILENAME_REGEX_V1 = + Pattern.compile("[^.].+_(" + JobID.JOBID_REGEX + ")_conf.xml" + + OLD_FULL_SUFFIX_REGEX_STRING + "?"); + /** + * Regex for Pre21 V2(new) jobhistory conf filename + * i.e job-id_conf.xml + */ + static final Pattern CONF_FILENAME_REGEX_V2 = + Pattern.compile("(" + JobID.JOBID_REGEX + ")_conf.xml" + + OLD_FULL_SUFFIX_REGEX_STRING + "?"); } diff --git a/hadoop-mapreduce-project/src/tools/org/apache/hadoop/tools/rumen/TraceBuilder.java b/hadoop-mapreduce-project/src/tools/org/apache/hadoop/tools/rumen/TraceBuilder.java index 7330c712a84..c03030971c7 100644 --- a/hadoop-mapreduce-project/src/tools/org/apache/hadoop/tools/rumen/TraceBuilder.java +++ b/hadoop-mapreduce-project/src/tools/org/apache/hadoop/tools/rumen/TraceBuilder.java @@ -198,42 +198,6 @@ public class TraceBuilder extends Configured implements Tool { } } - private static String applyParser(String fileName, Pattern pattern) { - Matcher matcher = pattern.matcher(fileName); - - if (!matcher.matches()) { - return null; - } - - return matcher.group(1); - } - - /** - * @param fileName - * @return the jobID String, parsed out of the file name. We return a valid - * String for either a history log file or a config file. Otherwise, - * [especially for .crc files] we return null. - */ - static String extractJobID(String fileName) { - String jobId = applyParser(fileName, JobHistory.JOBHISTORY_FILENAME_REGEX); - if (jobId == null) { - // check if its a pre21 jobhistory file - jobId = applyParser(fileName, - Pre21JobHistoryConstants.JOBHISTORY_FILENAME_REGEX); - } - return jobId; - } - - static boolean isJobConfXml(String fileName, InputStream input) { - String jobId = applyParser(fileName, JobHistory.CONF_FILENAME_REGEX); - if (jobId == null) { - // check if its a pre21 jobhistory conf file - jobId = applyParser(fileName, - Pre21JobHistoryConstants.CONF_FILENAME_REGEX); - } - return jobId != null; - } - @SuppressWarnings("unchecked") @Override @@ -268,7 +232,7 @@ public class TraceBuilder extends Configured implements Tool { JobHistoryParser parser = null; try { - String jobID = extractJobID(filePair.first()); + String jobID = JobHistoryUtils.extractJobID(filePair.first()); if (jobID == null) { LOG.warn("File skipped: Invalid file name: " + filePair.first()); @@ -282,8 +246,9 @@ public class TraceBuilder extends Configured implements Tool { jobBuilder = new JobBuilder(jobID); } - if (isJobConfXml(filePair.first(), ris)) { - processJobConf(JobConfigurationParser.parse(ris.rewind()), jobBuilder); + if (JobHistoryUtils.isJobConfXml(filePair.first())) { + processJobConf(JobConfigurationParser.parse(ris.rewind()), + jobBuilder); } else { parser = JobHistoryParserFactory.getParser(ris); if (parser == null) {