From b4c8567e1b975ccf32dfc1f63aadb78ac29f2c69 Mon Sep 17 00:00:00 2001 From: Tsz-wo Sze Date: Sat, 18 Feb 2012 01:12:18 +0000 Subject: [PATCH] MAPREDUCE-3583. Change pid to String and stime to BigInteger in order to handle integers larger than Long.MAX_VALUE. Contributed by Zhihong Yu git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/trunk@1245828 13f79535-47bb-0310-9956-ffa450edef68 --- hadoop-mapreduce-project/CHANGES.txt | 3 + .../util/ProcfsBasedProcessTree.java | 124 ++++++++++-------- .../yarn/util/ProcfsBasedProcessTree.java | 114 ++++++++-------- .../yarn/util/TestProcfsBasedProcessTree.java | 6 +- 4 files changed, 134 insertions(+), 113 deletions(-) diff --git a/hadoop-mapreduce-project/CHANGES.txt b/hadoop-mapreduce-project/CHANGES.txt index e0d5652b0ad..c516d7e4c1a 100644 --- a/hadoop-mapreduce-project/CHANGES.txt +++ b/hadoop-mapreduce-project/CHANGES.txt @@ -123,6 +123,9 @@ Release 0.23.2 - UNRELEASED MAPREDUCE-3856. Instances of RunningJob class givs incorrect job tracking urls when mutiple jobs are submitted from same client jvm. (Eric Payne via sseth) + + MAPREDUCE-3583. Change pid to String and stime to BigInteger in order to + avoid NumberFormatException caused by overflow. (Zhihong Yu via szetszwo) Release 0.23.1 - 2012-02-08 diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/util/ProcfsBasedProcessTree.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/util/ProcfsBasedProcessTree.java index 58516c42197..1d02ca55594 100644 --- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/util/ProcfsBasedProcessTree.java +++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/util/ProcfsBasedProcessTree.java @@ -23,6 +23,7 @@ import java.io.File; import java.io.FileNotFoundException; import java.io.FileReader; import java.io.IOException; +import java.math.BigInteger; import java.util.ArrayList; import java.util.List; import java.util.Map; @@ -91,12 +92,14 @@ public class ProcfsBasedProcessTree extends ProcessTree { // to a test directory. private String procfsDir; - private Integer pid = -1; + static private String deadPid = "-1"; + private String pid = deadPid; + static private Pattern numberPattern = Pattern.compile("[1-9][0-9]*"); private Long cpuTime = 0L; private boolean setsidUsed = false; private long sleeptimeBeforeSigkill = DEFAULT_SLEEPTIME_BEFORE_SIGKILL; - private Map processTree = new HashMap(); + private Map processTree = new HashMap(); public ProcfsBasedProcessTree(String pid) { this(pid, false, DEFAULT_SLEEPTIME_BEFORE_SIGKILL); @@ -166,19 +169,19 @@ public class ProcfsBasedProcessTree extends ProcessTree { * @return the process-tree with latest state. */ public ProcfsBasedProcessTree getProcessTree() { - if (pid != -1) { + if (!pid.equals(deadPid)) { // Get the list of processes - List processList = getProcessList(); + List processList = getProcessList(); - Map allProcessInfo = new HashMap(); + Map allProcessInfo = new HashMap(); // cache the processTree to get the age for processes - Map oldProcs = - new HashMap(processTree); + Map oldProcs = + new HashMap(processTree); processTree.clear(); ProcessInfo me = null; - for (Integer proc : processList) { + for (String proc : processList) { // Get information for each process ProcessInfo pInfo = new ProcessInfo(proc); if (constructProcessInfo(pInfo, procfsDir) != null) { @@ -195,9 +198,9 @@ public class ProcfsBasedProcessTree extends ProcessTree { } // Add each process to its parent. - for (Map.Entry entry : allProcessInfo.entrySet()) { - Integer pID = entry.getKey(); - if (pID != 1) { + for (Map.Entry entry : allProcessInfo.entrySet()) { + String pID = entry.getKey(); + if (!pID.equals("1")) { ProcessInfo pInfo = entry.getValue(); ProcessInfo parentPInfo = allProcessInfo.get(pInfo.getPpid()); if (parentPInfo != null) { @@ -218,7 +221,7 @@ public class ProcfsBasedProcessTree extends ProcessTree { } // update age values and compute the number of jiffies since last update - for (Map.Entry procs : processTree.entrySet()) { + for (Map.Entry procs : processTree.entrySet()) { ProcessInfo oldInfo = oldProcs.get(procs.getKey()); if (procs.getValue() != null) { procs.getValue().updateJiffy(oldInfo); @@ -242,10 +245,10 @@ public class ProcfsBasedProcessTree extends ProcessTree { * @return true if the root-process is alive, false otherwise. */ public boolean isAlive() { - if (pid == -1) { + if (pid.equals(deadPid)) { return false; } else { - return isAlive(pid.toString()); + return isAlive(pid); } } @@ -256,8 +259,8 @@ public class ProcfsBasedProcessTree extends ProcessTree { * alive, false otherwise. */ public boolean isAnyProcessInTreeAlive() { - for (Integer pId : processTree.keySet()) { - if (isAlive(pId.toString())) { + for (String pId : processTree.keySet()) { + if (isAlive(pId)) { return true; } } @@ -269,9 +272,8 @@ public class ProcfsBasedProcessTree extends ProcessTree { * @param procfsDir Procfs root dir */ static boolean checkPidPgrpidForMatch(String pidStr, String procfsDir) { - Integer pId = Integer.parseInt(pidStr); // Get information for this process - ProcessInfo pInfo = new ProcessInfo(pId); + ProcessInfo pInfo = new ProcessInfo(pidStr); pInfo = constructProcessInfo(pInfo, procfsDir); if (pInfo == null) { // process group leader may have finished execution, but we still need to @@ -279,14 +281,15 @@ public class ProcfsBasedProcessTree extends ProcessTree { return true; } + String pgrpId = pInfo.getPgrpId().toString(); //make sure that pId and its pgrpId match - if (!pInfo.getPgrpId().equals(pId)) { - LOG.warn("Unexpected: Process with PID " + pId + - " is not a process group leader."); + if (!pgrpId.equals(pidStr)) { + LOG.warn("Unexpected: Process with PID " + pidStr + + " is not a process group leader. pgrpId is: " + pInfo.getPgrpId()); return false; } if (LOG.isDebugEnabled()) { - LOG.debug(pId + " is a process group leader, as expected."); + LOG.debug(pidStr + " is a process group leader, as expected."); } return true; } @@ -324,7 +327,7 @@ public class ProcfsBasedProcessTree extends ProcessTree { */ public void destroy(boolean inBackground) { LOG.debug("Killing ProcfsBasedProcessTree of " + pid); - if (pid == -1) { + if (pid.equals(deadPid)) { return; } if (isAlive(pid.toString())) { @@ -347,7 +350,7 @@ public class ProcfsBasedProcessTree extends ProcessTree { } private static final String PROCESSTREE_DUMP_FORMAT = - "\t|- %d %d %d %d %s %d %d %d %d %s\n"; + "\t|- %s %s %d %d %s %d %d %d %d %s\n"; /** * Get a dump of the process-tree. @@ -458,34 +461,27 @@ public class ProcfsBasedProcessTree extends ProcessTree { return cpuTime; } - private static Integer getValidPID(String pid) { - Integer retPid = -1; - try { - retPid = Integer.parseInt(pid); - if (retPid <= 0) { - retPid = -1; - } - } catch (NumberFormatException nfe) { - retPid = -1; - } - return retPid; + private static String getValidPID(String pid) { + if (pid == null) return deadPid; + Matcher m = numberPattern.matcher(pid); + if (m.matches()) return pid; + return deadPid; } /** * Get the list of all processes in the system. */ - private List getProcessList() { + private List getProcessList() { String[] processDirs = (new File(procfsDir)).list(); - List processList = new ArrayList(); + List processList = new ArrayList(); for (String dir : processDirs) { + Matcher m = numberPattern.matcher(dir); + if (!m.matches()) continue; try { - int pd = Integer.parseInt(dir); if ((new File(procfsDir, dir)).isDirectory()) { - processList.add(Integer.valueOf(pd)); + processList.add(dir); } - } catch (NumberFormatException n) { - // skip this directory } catch (SecurityException s) { // skip this process } @@ -511,7 +507,7 @@ public class ProcfsBasedProcessTree extends ProcessTree { BufferedReader in = null; FileReader fReader = null; try { - File pidDir = new File(procfsDir, String.valueOf(pinfo.getPid())); + File pidDir = new File(procfsDir, pinfo.getPid()); fReader = new FileReader(new File(pidDir, PROCFS_STAT_FILE)); in = new BufferedReader(fReader); } catch (FileNotFoundException f) { @@ -528,9 +524,9 @@ public class ProcfsBasedProcessTree extends ProcessTree { boolean mat = m.find(); if (mat) { // Set (name) (ppid) (pgrpId) (session) (utime) (stime) (vsize) (rss) - pinfo.updateProcessInfo(m.group(2), Integer.parseInt(m.group(3)), + pinfo.updateProcessInfo(m.group(2), m.group(3), Integer.parseInt(m.group(4)), Integer.parseInt(m.group(5)), - Long.parseLong(m.group(7)), Long.parseLong(m.group(8)), + Long.parseLong(m.group(7)), new BigInteger(m.group(8)), Long.parseLong(m.group(10)), Long.parseLong(m.group(11))); } else { LOG.warn("Unexpected: procfs stat file is not in the expected format" @@ -562,7 +558,7 @@ public class ProcfsBasedProcessTree extends ProcessTree { */ public String toString() { StringBuffer pTree = new StringBuffer("[ "); - for (Integer p : processTree.keySet()) { + for (String p : processTree.keySet()) { pTree.append(p); pTree.append(" "); } @@ -575,15 +571,16 @@ public class ProcfsBasedProcessTree extends ProcessTree { * */ private static class ProcessInfo { - private Integer pid; // process-id + private String pid; // process-id private String name; // command name private Integer pgrpId; // process group-id - private Integer ppid; // parent process-id + private String ppid; // parent process-id private Integer sessionId; // session-id private Long vmem; // virtual memory usage private Long rssmemPage; // rss memory usage in # of pages private Long utime = 0L; // # of jiffies in user mode - private Long stime = 0L; // # of jiffies in kernel mode + private final BigInteger MAX_LONG = BigInteger.valueOf(Long.MAX_VALUE); + private BigInteger stime = new BigInteger("0"); // # of jiffies in kernel mode // how many times has this process been seen alive private int age; @@ -595,13 +592,13 @@ public class ProcfsBasedProcessTree extends ProcessTree { private List children = new ArrayList(); // list of children - public ProcessInfo(int pid) { - this.pid = Integer.valueOf(pid); + public ProcessInfo(String pid) { + this.pid = pid; // seeing this the first time. this.age = 1; } - public Integer getPid() { + public String getPid() { return pid; } @@ -613,7 +610,7 @@ public class ProcfsBasedProcessTree extends ProcessTree { return pgrpId; } - public Integer getPpid() { + public String getPpid() { return ppid; } @@ -629,7 +626,7 @@ public class ProcfsBasedProcessTree extends ProcessTree { return utime; } - public Long getStime() { + public BigInteger getStime() { return stime; } @@ -652,8 +649,8 @@ public class ProcfsBasedProcessTree extends ProcessTree { return false; } - public void updateProcessInfo(String name, Integer ppid, Integer pgrpId, - Integer sessionId, Long utime, Long stime, Long vmem, Long rssmem) { + public void updateProcessInfo(String name, String ppid, Integer pgrpId, + Integer sessionId, Long utime, BigInteger stime, Long vmem, Long rssmem) { this.name = name; this.ppid = ppid; this.pgrpId = pgrpId; @@ -665,8 +662,19 @@ public class ProcfsBasedProcessTree extends ProcessTree { } public void updateJiffy(ProcessInfo oldInfo) { - this.dtime = (oldInfo == null ? this.utime + this.stime - : (this.utime + this.stime) - (oldInfo.utime + oldInfo.stime)); + if (oldInfo == null) { + BigInteger sum = this.stime.add(BigInteger.valueOf(this.utime)); + if (sum.compareTo(MAX_LONG) > 0) { + this.dtime = 0L; + LOG.warn("Sum of stime (" + this.stime + ") and utime (" + this.utime + + ") is greater than " + Long.MAX_VALUE); + } else { + this.dtime = sum.longValue(); + } + return; + } + this.dtime = (this.utime - oldInfo.utime + + this.stime.subtract(oldInfo.stime).longValue()); } public void updateAge(ProcessInfo oldInfo) { @@ -690,7 +698,7 @@ public class ProcfsBasedProcessTree extends ProcessTree { FileReader fReader = null; try { fReader = - new FileReader(new File(new File(procfsDir, pid.toString()), + new FileReader(new File(new File(procfsDir, pid), PROCFS_CMDLINE_FILE)); } catch (FileNotFoundException f) { // The process vanished in the interim! diff --git a/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/util/ProcfsBasedProcessTree.java b/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/util/ProcfsBasedProcessTree.java index db5f532987f..db65ad20cb8 100644 --- a/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/util/ProcfsBasedProcessTree.java +++ b/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/util/ProcfsBasedProcessTree.java @@ -23,6 +23,7 @@ import java.io.File; import java.io.FileNotFoundException; import java.io.FileReader; import java.io.IOException; +import java.math.BigInteger; import java.util.ArrayList; import java.util.HashMap; import java.util.LinkedList; @@ -91,12 +92,14 @@ public class ProcfsBasedProcessTree { // to a test directory. private String procfsDir; - protected final Integer pid; + static private String deadPid = "-1"; + private String pid = deadPid; + static private Pattern numberPattern = Pattern.compile("[1-9][0-9]*"); private Long cpuTime = 0L; private boolean setsidUsed = false; - protected Map processTree = - new HashMap(); + protected Map processTree = + new HashMap(); public ProcfsBasedProcessTree(String pid) { this(pid, false); @@ -150,19 +153,19 @@ public class ProcfsBasedProcessTree { * @return the process-tree with latest state. */ public ProcfsBasedProcessTree getProcessTree() { - if (pid != -1) { + if (!pid.equals(deadPid)) { // Get the list of processes - List processList = getProcessList(); + List processList = getProcessList(); - Map allProcessInfo = new HashMap(); + Map allProcessInfo = new HashMap(); // cache the processTree to get the age for processes - Map oldProcs = - new HashMap(processTree); + Map oldProcs = + new HashMap(processTree); processTree.clear(); ProcessInfo me = null; - for (Integer proc : processList) { + for (String proc : processList) { // Get information for each process ProcessInfo pInfo = new ProcessInfo(proc); if (constructProcessInfo(pInfo, procfsDir) != null) { @@ -179,9 +182,9 @@ public class ProcfsBasedProcessTree { } // Add each process to its parent. - for (Map.Entry entry : allProcessInfo.entrySet()) { - Integer pID = entry.getKey(); - if (pID != 1) { + for (Map.Entry entry : allProcessInfo.entrySet()) { + String pID = entry.getKey(); + if (!pID.equals("1")) { ProcessInfo pInfo = entry.getValue(); ProcessInfo parentPInfo = allProcessInfo.get(pInfo.getPpid()); if (parentPInfo != null) { @@ -202,7 +205,7 @@ public class ProcfsBasedProcessTree { } // update age values and compute the number of jiffies since last update - for (Map.Entry procs : processTree.entrySet()) { + for (Map.Entry procs : processTree.entrySet()) { ProcessInfo oldInfo = oldProcs.get(procs.getKey()); if (procs.getValue() != null) { procs.getValue().updateJiffy(oldInfo); @@ -227,20 +230,22 @@ public class ProcfsBasedProcessTree { return checkPidPgrpidForMatch(pid, PROCFS); } - public static boolean checkPidPgrpidForMatch(int _pid, String procfs) { + public static boolean checkPidPgrpidForMatch(String _pid, String procfs) { // Get information for this process ProcessInfo pInfo = new ProcessInfo(_pid); pInfo = constructProcessInfo(pInfo, procfs); // null if process group leader finished execution; issue no warning // make sure that pid and its pgrpId match - return pInfo == null || pInfo.getPgrpId().equals(_pid); + if (pInfo == null) return true; + String pgrpId = pInfo.getPgrpId().toString(); + return pgrpId.equals(_pid); } private static final String PROCESSTREE_DUMP_FORMAT = - "\t|- %d %d %d %d %s %d %d %d %d %s\n"; + "\t|- %s %s %d %d %s %d %d %d %d %s\n"; - public List getCurrentProcessIDs() { - List currentPIDs = new ArrayList(); + public List getCurrentProcessIDs() { + List currentPIDs = new ArrayList(); currentPIDs.addAll(processTree.keySet()); return currentPIDs; } @@ -354,34 +359,27 @@ public class ProcfsBasedProcessTree { return cpuTime; } - private static Integer getValidPID(String pid) { - Integer retPid = -1; - try { - retPid = Integer.parseInt(pid); - if (retPid <= 0) { - retPid = -1; - } - } catch (NumberFormatException nfe) { - retPid = -1; - } - return retPid; + private static String getValidPID(String pid) { + if (pid == null) return deadPid; + Matcher m = numberPattern.matcher(pid); + if (m.matches()) return pid; + return deadPid; } /** * Get the list of all processes in the system. */ - private List getProcessList() { + private List getProcessList() { String[] processDirs = (new File(procfsDir)).list(); - List processList = new ArrayList(); + List processList = new ArrayList(); for (String dir : processDirs) { + Matcher m = numberPattern.matcher(dir); + if (!m.matches()) continue; try { - int pd = Integer.parseInt(dir); if ((new File(procfsDir, dir)).isDirectory()) { - processList.add(Integer.valueOf(pd)); + processList.add(dir); } - } catch (NumberFormatException n) { - // skip this directory } catch (SecurityException s) { // skip this process } @@ -407,7 +405,7 @@ public class ProcfsBasedProcessTree { BufferedReader in = null; FileReader fReader = null; try { - File pidDir = new File(procfsDir, String.valueOf(pinfo.getPid())); + File pidDir = new File(procfsDir, pinfo.getPid()); fReader = new FileReader(new File(pidDir, PROCFS_STAT_FILE)); in = new BufferedReader(fReader); } catch (FileNotFoundException f) { @@ -424,9 +422,9 @@ public class ProcfsBasedProcessTree { boolean mat = m.find(); if (mat) { // Set (name) (ppid) (pgrpId) (session) (utime) (stime) (vsize) (rss) - pinfo.updateProcessInfo(m.group(2), Integer.parseInt(m.group(3)), + pinfo.updateProcessInfo(m.group(2), m.group(3), Integer.parseInt(m.group(4)), Integer.parseInt(m.group(5)), - Long.parseLong(m.group(7)), Long.parseLong(m.group(8)), + Long.parseLong(m.group(7)), new BigInteger(m.group(8)), Long.parseLong(m.group(10)), Long.parseLong(m.group(11))); } else { LOG.warn("Unexpected: procfs stat file is not in the expected format" @@ -458,7 +456,7 @@ public class ProcfsBasedProcessTree { */ public String toString() { StringBuffer pTree = new StringBuffer("[ "); - for (Integer p : processTree.keySet()) { + for (String p : processTree.keySet()) { pTree.append(p); pTree.append(" "); } @@ -471,15 +469,16 @@ public class ProcfsBasedProcessTree { * */ private static class ProcessInfo { - private Integer pid; // process-id + private String pid; // process-id private String name; // command name private Integer pgrpId; // process group-id - private Integer ppid; // parent process-id + private String ppid; // parent process-id private Integer sessionId; // session-id private Long vmem; // virtual memory usage private Long rssmemPage; // rss memory usage in # of pages private Long utime = 0L; // # of jiffies in user mode - private Long stime = 0L; // # of jiffies in kernel mode + private final BigInteger MAX_LONG = BigInteger.valueOf(Long.MAX_VALUE); + private BigInteger stime = new BigInteger("0"); // # of jiffies in kernel mode // how many times has this process been seen alive private int age; @@ -491,13 +490,13 @@ public class ProcfsBasedProcessTree { private List children = new ArrayList(); // list of children - public ProcessInfo(int pid) { - this.pid = Integer.valueOf(pid); + public ProcessInfo(String pid) { + this.pid = pid; // seeing this the first time. this.age = 1; } - public Integer getPid() { + public String getPid() { return pid; } @@ -509,7 +508,7 @@ public class ProcfsBasedProcessTree { return pgrpId; } - public Integer getPpid() { + public String getPpid() { return ppid; } @@ -525,7 +524,7 @@ public class ProcfsBasedProcessTree { return utime; } - public Long getStime() { + public BigInteger getStime() { return stime; } @@ -548,8 +547,8 @@ public class ProcfsBasedProcessTree { return false; } - public void updateProcessInfo(String name, Integer ppid, Integer pgrpId, - Integer sessionId, Long utime, Long stime, Long vmem, Long rssmem) { + public void updateProcessInfo(String name, String ppid, Integer pgrpId, + Integer sessionId, Long utime, BigInteger stime, Long vmem, Long rssmem) { this.name = name; this.ppid = ppid; this.pgrpId = pgrpId; @@ -559,10 +558,21 @@ public class ProcfsBasedProcessTree { this.vmem = vmem; this.rssmemPage = rssmem; } - + public void updateJiffy(ProcessInfo oldInfo) { - this.dtime = (oldInfo == null ? this.utime + this.stime - : (this.utime + this.stime) - (oldInfo.utime + oldInfo.stime)); + if (oldInfo == null) { + BigInteger sum = this.stime.add(BigInteger.valueOf(this.utime)); + if (sum.compareTo(MAX_LONG) > 0) { + this.dtime = 0L; + LOG.warn("Sum of stime (" + this.stime + ") and utime (" + this.utime + + ") is greater than " + Long.MAX_VALUE); + } else { + this.dtime = sum.longValue(); + } + return; + } + this.dtime = (this.utime - oldInfo.utime + + this.stime.subtract(oldInfo.stime).longValue()); } public void updateAge(ProcessInfo oldInfo) { diff --git a/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-common/src/test/java/org/apache/hadoop/yarn/util/TestProcfsBasedProcessTree.java b/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-common/src/test/java/org/apache/hadoop/yarn/util/TestProcfsBasedProcessTree.java index 454ef2c2038..644089bbda3 100644 --- a/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-common/src/test/java/org/apache/hadoop/yarn/util/TestProcfsBasedProcessTree.java +++ b/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-common/src/test/java/org/apache/hadoop/yarn/util/TestProcfsBasedProcessTree.java @@ -527,7 +527,7 @@ public class TestProcfsBasedProcessTree { // Let us not create stat file for pid 100. Assert.assertTrue(ProcfsBasedProcessTree.checkPidPgrpidForMatch( - Integer.valueOf(pid), procfsRootDir.getAbsolutePath())); + pid, procfsRootDir.getAbsolutePath())); } finally { FileUtil.fullyDelete(procfsRootDir); } @@ -662,8 +662,8 @@ public class TestProcfsBasedProcessTree { */ private static boolean isAnyProcessInTreeAlive( ProcfsBasedProcessTree processTree) { - for (Integer pId : processTree.getCurrentProcessIDs()) { - if (isAlive(pId.toString())) { + for (String pId : processTree.getCurrentProcessIDs()) { + if (isAlive(pId)) { return true; } }