YARN-4612. Fix rumen and scheduler load simulator handle killed tasks

properly. Contributed by Ming Ma.

(cherry picked from commit 4efdf3a979)
This commit is contained in:
Xuan 2016-01-26 18:17:12 -08:00 committed by Konstantin V Shvachko
parent 71d91d53bf
commit cef5ef2377
4 changed files with 625 additions and 4 deletions

View File

@ -473,9 +473,12 @@ public class JobBuilder {
task.setTaskStatus(getPre21Value(event.getTaskStatus()));
TaskFailed t = (TaskFailed)(event.getDatum());
task.putDiagnosticInfo(t.error.toString());
task.putFailedDueToAttemptId(t.failedDueToAttempt.toString());
// killed task wouldn't have failed attempt.
if (t.getFailedDueToAttempt() != null) {
task.putFailedDueToAttemptId(t.getFailedDueToAttempt().toString());
}
org.apache.hadoop.mapreduce.jobhistory.JhCounters counters =
((TaskFailed) event.getDatum()).counters;
((TaskFailed) event.getDatum()).getCounters();
task.incorporateCounters(
counters == null ? EMPTY_COUNTERS : counters);
}
@ -500,7 +503,7 @@ public class JobBuilder {
attempt.setFinishTime(event.getFinishTime());
org.apache.hadoop.mapreduce.jobhistory.JhCounters counters =
((TaskAttemptUnsuccessfulCompletion) event.getDatum()).counters;
((TaskAttemptUnsuccessfulCompletion) event.getDatum()).getCounters();
attempt.incorporateCounters(
counters == null ? EMPTY_COUNTERS : counters);
attempt.arraySetClockSplits(event.getClockSplits());
@ -509,7 +512,7 @@ public class JobBuilder {
attempt.arraySetPhysMemKbytes(event.getPhysMemKbytes());
TaskAttemptUnsuccessfulCompletion t =
(TaskAttemptUnsuccessfulCompletion) (event.getDatum());
attempt.putDiagnosticInfo(t.error.toString());
attempt.putDiagnosticInfo(t.getError().toString());
}
private void processTaskAttemptStartedEvent(TaskAttemptStartedEvent event) {

View File

@ -10212,4 +10212,610 @@
"clusterReduceMB" : -1,
"jobMapMB" : 200,
"jobReduceMB" : 200
} {
"priority" : "NORMAL",
"jobID" : "job_1369942127770_1207",
"user" : "jenkins",
"jobName" : "TeraGen",
"submitTime" : 1371223054499,
"finishTime" : 1371223153874,
"queue" : "sls_queue_1",
"mapTasks" : [ {
"startTime" : 1371223059053,
"taskID" : "task_1369942127770_1207_m_000000",
"taskType" : "MAP",
"finishTime" : 1371223078206,
"attempts" : [ ],
"preferredLocations" : [ ],
"taskStatus" : "KILLED",
"inputBytes" : -1,
"inputRecords" : -1,
"outputBytes" : -1,
"outputRecords" : -1
} ],
"reduceTasks" : [ ],
"launchTime" : 1371223058937,
"totalMaps" : 1,
"totalReduces" : 0,
"otherTasks" : [ ],
"jobProperties" : {
"mapreduce.job.ubertask.enable" : "false",
"yarn.resourcemanager.max-completed-applications" : "10000",
"yarn.resourcemanager.delayed.delegation-token.removal-interval-ms" : "30000",
"mapreduce.client.submit.file.replication" : "2",
"yarn.nodemanager.container-manager.thread-count" : "20",
"mapred.queue.default.acl-administer-jobs" : "*",
"dfs.image.transfer.bandwidthPerSec" : "0",
"mapreduce.tasktracker.healthchecker.interval" : "60000",
"mapreduce.jobtracker.staging.root.dir" : "/user",
"yarn.resourcemanager.recovery.enabled" : "false",
"yarn.resourcemanager.am.max-retries" : "1",
"dfs.block.access.token.lifetime" : "600",
"fs.AbstractFileSystem.file.impl" : "org.apache.hadoop.fs.local.LocalFs",
"mapreduce.client.completion.pollinterval" : "5000",
"mapreduce.job.ubertask.maxreduces" : "1",
"mapreduce.reduce.shuffle.memory.limit.percent" : "0.25",
"dfs.domain.socket.path" : "/var/run/hdfs-sockets/dn",
"hadoop.ssl.keystores.factory.class" : "org.apache.hadoop.security.ssl.FileBasedKeyStoresFactory",
"hadoop.http.authentication.kerberos.keytab" : "${user.home}/hadoop.keytab",
"yarn.nodemanager.keytab" : "/etc/krb5.keytab",
"io.seqfile.sorter.recordlimit" : "1000000",
"s3.blocksize" : "67108864",
"mapreduce.task.io.sort.factor" : "10",
"yarn.nodemanager.disk-health-checker.interval-ms" : "120000",
"mapreduce.job.working.dir" : "hdfs://a2115.smile.com:8020/user/jenkins",
"yarn.admin.acl" : "*",
"mapreduce.job.speculative.speculativecap" : "0.1",
"dfs.namenode.num.checkpoints.retained" : "2",
"dfs.namenode.delegation.token.renew-interval" : "86400000",
"yarn.nodemanager.resource.memory-mb" : "8192",
"io.map.index.interval" : "128",
"s3.client-write-packet-size" : "65536",
"mapreduce.task.files.preserve.failedtasks" : "false",
"dfs.namenode.http-address" : "a2115.smile.com:20101",
"ha.zookeeper.session-timeout.ms" : "5000",
"hadoop.hdfs.configuration.version" : "1",
"s3.replication" : "3",
"dfs.datanode.balance.bandwidthPerSec" : "1048576",
"mapreduce.reduce.shuffle.connect.timeout" : "180000",
"hadoop.ssl.enabled" : "false",
"dfs.journalnode.rpc-address" : "0.0.0.0:8485",
"yarn.nodemanager.aux-services" : "mapreduce.shuffle",
"mapreduce.job.counters.max" : "120",
"dfs.datanode.readahead.bytes" : "4193404",
"ipc.client.connect.max.retries.on.timeouts" : "45",
"mapreduce.job.complete.cancel.delegation.tokens" : "true",
"dfs.client.failover.max.attempts" : "15",
"dfs.namenode.checkpoint.dir" : "file://${hadoop.tmp.dir}/dfs/namesecondary",
"dfs.namenode.replication.work.multiplier.per.iteration" : "2",
"fs.trash.interval" : "1",
"yarn.resourcemanager.admin.address" : "a2115.smile.com:8033",
"ha.health-monitor.check-interval.ms" : "1000",
"mapreduce.job.outputformat.class" : "org.apache.hadoop.examples.terasort.TeraOutputFormat",
"hadoop.jetty.logs.serve.aliases" : "true",
"hadoop.http.authentication.kerberos.principal" : "HTTP/_HOST@LOCALHOST",
"mapreduce.job.reduce.shuffle.consumer.plugin.class" : "org.apache.hadoop.mapreduce.task.reduce.Shuffle",
"s3native.blocksize" : "67108864",
"dfs.namenode.edits.dir" : "${dfs.namenode.name.dir}",
"ha.health-monitor.sleep-after-disconnect.ms" : "1000",
"dfs.encrypt.data.transfer" : "false",
"dfs.datanode.http.address" : "0.0.0.0:50075",
"mapreduce.terasort.num-rows" : "400000000",
"mapreduce.job.map.class" : "org.apache.hadoop.examples.terasort.TeraGen$SortGenMapper",
"mapreduce.jobtracker.jobhistory.task.numberprogresssplits" : "12",
"dfs.namenode.write.stale.datanode.ratio" : "0.5f",
"dfs.client.use.datanode.hostname" : "false",
"yarn.acl.enable" : "true",
"hadoop.security.instrumentation.requires.admin" : "false",
"yarn.nodemanager.localizer.fetch.thread-count" : "4",
"hadoop.security.authorization" : "false",
"user.name" : "jenkins",
"dfs.namenode.fs-limits.min-block-size" : "1048576",
"dfs.client.failover.connection.retries.on.timeouts" : "0",
"hadoop.security.group.mapping.ldap.search.filter.group" : "(objectClass=group)",
"mapreduce.output.fileoutputformat.compress.codec" : "org.apache.hadoop.io.compress.DefaultCodec",
"dfs.namenode.safemode.extension" : "30000",
"mapreduce.shuffle.port" : "8080",
"mapreduce.reduce.log.level" : "INFO",
"yarn.log-aggregation-enable" : "false",
"dfs.datanode.sync.behind.writes" : "false",
"mapreduce.jobtracker.instrumentation" : "org.apache.hadoop.mapred.JobTrackerMetricsInst",
"dfs.https.server.keystore.resource" : "ssl-server.xml",
"hadoop.security.group.mapping.ldap.search.attr.group.name" : "cn",
"dfs.namenode.replication.min" : "1",
"mapreduce.map.java.opts" : " -Xmx825955249",
"yarn.scheduler.fair.allocation.file" : "/etc/yarn/fair-scheduler.xml",
"s3native.bytes-per-checksum" : "512",
"mapreduce.tasktracker.tasks.sleeptimebeforesigkill" : "5000",
"tfile.fs.output.buffer.size" : "262144",
"yarn.nodemanager.local-dirs" : "${hadoop.tmp.dir}/nm-local-dir",
"mapreduce.jobtracker.persist.jobstatus.active" : "false",
"fs.AbstractFileSystem.hdfs.impl" : "org.apache.hadoop.fs.Hdfs",
"mapreduce.job.map.output.collector.class" : "org.apache.hadoop.mapred.MapTask$MapOutputBuffer",
"mapreduce.tasktracker.local.dir.minspacestart" : "0",
"dfs.namenode.safemode.min.datanodes" : "0",
"hadoop.security.uid.cache.secs" : "14400",
"dfs.client.https.need-auth" : "false",
"dfs.client.write.exclude.nodes.cache.expiry.interval.millis" : "600000",
"dfs.client.https.keystore.resource" : "ssl-client.xml",
"dfs.namenode.max.objects" : "0",
"hadoop.ssl.client.conf" : "ssl-client.xml",
"dfs.namenode.safemode.threshold-pct" : "0.999f",
"mapreduce.tasktracker.local.dir.minspacekill" : "0",
"mapreduce.jobtracker.retiredjobs.cache.size" : "1000",
"dfs.blocksize" : "134217728",
"yarn.resourcemanager.scheduler.class" : "org.apache.hadoop.yarn.server.resourcemanager.scheduler.fifo.FifoScheduler",
"mapreduce.job.reduce.slowstart.completedmaps" : "0.8",
"mapreduce.job.end-notification.retry.attempts" : "5",
"mapreduce.job.inputformat.class" : "org.apache.hadoop.examples.terasort.TeraGen$RangeInputFormat",
"mapreduce.map.memory.mb" : "1024",
"mapreduce.job.user.name" : "jenkins",
"mapreduce.tasktracker.outofband.heartbeat" : "false",
"io.native.lib.available" : "true",
"mapreduce.jobtracker.persist.jobstatus.hours" : "0",
"dfs.client-write-packet-size" : "65536",
"mapreduce.client.progressmonitor.pollinterval" : "1000",
"dfs.namenode.name.dir" : "file://${hadoop.tmp.dir}/dfs/name",
"dfs.ha.log-roll.period" : "120",
"mapreduce.reduce.input.buffer.percent" : "0.0",
"mapreduce.map.output.compress.codec" : "org.apache.hadoop.io.compress.SnappyCodec",
"dfs.client.failover.sleep.base.millis" : "500",
"dfs.datanode.directoryscan.threads" : "1",
"mapreduce.jobtracker.address" : "neededForHive:999999",
"mapreduce.cluster.local.dir" : "${hadoop.tmp.dir}/mapred/local",
"yarn.scheduler.fair.user-as-default-queue" : "true",
"mapreduce.job.application.attempt.id" : "1",
"dfs.permissions.enabled" : "true",
"mapreduce.tasktracker.taskcontroller" : "org.apache.hadoop.mapred.DefaultTaskController",
"yarn.scheduler.fair.preemption" : "true",
"mapreduce.reduce.shuffle.parallelcopies" : "5",
"dfs.support.append" : "true",
"yarn.nodemanager.env-whitelist" : "JAVA_HOME,HADOOP_COMMON_HOME,HADOOP_HDFS_HOME,HADOOP_CONF_DIR,YARN_HOME",
"mapreduce.jobtracker.heartbeats.in.second" : "100",
"mapreduce.job.maxtaskfailures.per.tracker" : "3",
"ipc.client.connection.maxidletime" : "10000",
"mapreduce.shuffle.ssl.enabled" : "false",
"dfs.namenode.invalidate.work.pct.per.iteration" : "0.32f",
"dfs.blockreport.intervalMsec" : "21600000",
"fs.s3.sleepTimeSeconds" : "10",
"dfs.namenode.replication.considerLoad" : "true",
"dfs.client.block.write.retries" : "3",
"hadoop.ssl.server.conf" : "ssl-server.xml",
"dfs.namenode.name.dir.restore" : "false",
"rpc.engine.org.apache.hadoop.mapreduce.v2.api.MRClientProtocolPB" : "org.apache.hadoop.ipc.ProtobufRpcEngine",
"dfs.datanode.hdfs-blocks-metadata.enabled" : "true",
"ha.zookeeper.parent-znode" : "/hadoop-ha",
"io.seqfile.lazydecompress" : "true",
"mapreduce.reduce.merge.inmem.threshold" : "1000",
"mapreduce.input.fileinputformat.split.minsize" : "0",
"dfs.replication" : "3",
"ipc.client.tcpnodelay" : "false",
"dfs.namenode.accesstime.precision" : "3600000",
"s3.stream-buffer-size" : "4096",
"mapreduce.jobtracker.tasktracker.maxblacklists" : "4",
"dfs.client.read.shortcircuit.skip.checksum" : "false",
"mapreduce.job.jvm.numtasks" : "1",
"mapreduce.task.io.sort.mb" : "100",
"io.file.buffer.size" : "65536",
"dfs.namenode.audit.loggers" : "default",
"dfs.namenode.checkpoint.txns" : "1000000",
"yarn.nodemanager.admin-env" : "MALLOC_ARENA_MAX=$MALLOC_ARENA_MAX",
"mapreduce.job.jar" : "/user/jenkins/.staging/job_1369942127770_1207/job.jar",
"mapreduce.job.split.metainfo.maxsize" : "10000000",
"kfs.replication" : "3",
"rpc.engine.org.apache.hadoop.hdfs.protocolPB.ClientNamenodeProtocolPB" : "org.apache.hadoop.ipc.ProtobufRpcEngine",
"yarn.app.mapreduce.am.scheduler.heartbeat.interval-ms" : "1000",
"mapreduce.reduce.maxattempts" : "4",
"kfs.stream-buffer-size" : "4096",
"dfs.ha.tail-edits.period" : "60",
"hadoop.security.authentication" : "simple",
"fs.s3.buffer.dir" : "${hadoop.tmp.dir}/s3",
"rpc.engine.org.apache.hadoop.yarn.api.AMRMProtocolPB" : "org.apache.hadoop.ipc.ProtobufRpcEngine",
"mapreduce.jobtracker.taskscheduler" : "org.apache.hadoop.mapred.JobQueueTaskScheduler",
"yarn.app.mapreduce.am.job.task.listener.thread-count" : "30",
"dfs.namenode.avoid.read.stale.datanode" : "false",
"mapreduce.job.reduces" : "0",
"mapreduce.map.sort.spill.percent" : "0.8",
"dfs.client.file-block-storage-locations.timeout" : "60",
"dfs.datanode.drop.cache.behind.writes" : "false",
"mapreduce.job.end-notification.retry.interval" : "1",
"mapreduce.job.maps" : "96",
"mapreduce.job.speculative.slownodethreshold" : "1.0",
"tfile.fs.input.buffer.size" : "262144",
"mapreduce.map.speculative" : "false",
"dfs.block.access.token.enable" : "false",
"dfs.journalnode.http-address" : "0.0.0.0:8480",
"mapreduce.job.acl-view-job" : " ",
"mapreduce.reduce.shuffle.retry-delay.max.ms" : "60000",
"mapreduce.job.end-notification.max.retry.interval" : "5",
"ftp.blocksize" : "67108864",
"mapreduce.tasktracker.http.threads" : "80",
"mapreduce.reduce.java.opts" : " -Xmx825955249",
"dfs.datanode.data.dir" : "file://${hadoop.tmp.dir}/dfs/data",
"ha.failover-controller.cli-check.rpc-timeout.ms" : "20000",
"dfs.namenode.max.extra.edits.segments.retained" : "10000",
"dfs.https.port" : "20102",
"dfs.namenode.replication.interval" : "3",
"mapreduce.task.skip.start.attempts" : "2",
"dfs.namenode.https-address" : "a2115.smile.com:20102",
"mapreduce.jobtracker.persist.jobstatus.dir" : "/jobtracker/jobsInfo",
"ipc.client.kill.max" : "10",
"dfs.ha.automatic-failover.enabled" : "false",
"mapreduce.jobhistory.keytab" : "/etc/security/keytab/jhs.service.keytab",
"dfs.image.transfer.timeout" : "600000",
"dfs.client.failover.sleep.max.millis" : "15000",
"mapreduce.job.end-notification.max.attempts" : "5",
"mapreduce.task.tmp.dir" : "./tmp",
"dfs.default.chunk.view.size" : "32768",
"kfs.bytes-per-checksum" : "512",
"mapreduce.reduce.memory.mb" : "1024",
"hadoop.http.filter.initializers" : "org.apache.hadoop.yarn.server.webproxy.amfilter.AmFilterInitializer",
"dfs.datanode.failed.volumes.tolerated" : "0",
"hadoop.http.authentication.type" : "simple",
"dfs.datanode.data.dir.perm" : "700",
"yarn.resourcemanager.client.thread-count" : "50",
"ipc.server.listen.queue.size" : "128",
"mapreduce.reduce.skip.maxgroups" : "0",
"file.stream-buffer-size" : "4096",
"dfs.namenode.fs-limits.max-directory-items" : "0",
"io.mapfile.bloom.size" : "1048576",
"yarn.nodemanager.container-executor.class" : "org.apache.hadoop.yarn.server.nodemanager.DefaultContainerExecutor",
"mapreduce.map.maxattempts" : "4",
"mapreduce.jobtracker.jobhistory.block.size" : "3145728",
"yarn.log-aggregation.retain-seconds" : "-1",
"yarn.app.mapreduce.am.job.committer.cancel-timeout" : "60000",
"ftp.replication" : "3",
"mapreduce.jobtracker.http.address" : "0.0.0.0:50030",
"yarn.nodemanager.health-checker.script.timeout-ms" : "1200000",
"mapreduce.jobhistory.address" : "a2115.smile.com:10020",
"mapreduce.jobtracker.taskcache.levels" : "2",
"dfs.datanode.dns.nameserver" : "default",
"mapreduce.application.classpath" : "$HADOOP_MAPRED_HOME/share/hadoop/mapreduce/*,$HADOOP_MAPRED_HOME/share/hadoop/mapreduce/lib/*",
"yarn.nodemanager.log.retain-seconds" : "10800",
"mapred.child.java.opts" : "-Xmx200m",
"dfs.replication.max" : "512",
"map.sort.class" : "org.apache.hadoop.util.QuickSort",
"dfs.stream-buffer-size" : "4096",
"dfs.namenode.backup.address" : "0.0.0.0:50100",
"hadoop.util.hash.type" : "murmur",
"dfs.block.access.key.update.interval" : "600",
"dfs.datanode.dns.interface" : "default",
"dfs.datanode.use.datanode.hostname" : "false",
"mapreduce.job.output.key.class" : "org.apache.hadoop.io.Text",
"dfs.client.read.shortcircuit" : "false",
"dfs.namenode.backup.http-address" : "0.0.0.0:50105",
"yarn.nodemanager.container-monitor.interval-ms" : "3000",
"yarn.nodemanager.disk-health-checker.min-healthy-disks" : "0.25",
"kfs.client-write-packet-size" : "65536",
"ha.zookeeper.acl" : "world:anyone:rwcda",
"yarn.nodemanager.sleep-delay-before-sigkill.ms" : "250",
"mapreduce.job.dir" : "/user/jenkins/.staging/job_1369942127770_1207",
"io.map.index.skip" : "0",
"net.topology.node.switch.mapping.impl" : "org.apache.hadoop.net.ScriptBasedMapping",
"fs.s3.maxRetries" : "4",
"ha.failover-controller.new-active.rpc-timeout.ms" : "60000",
"s3native.client-write-packet-size" : "65536",
"yarn.resourcemanager.amliveliness-monitor.interval-ms" : "1000",
"hadoop.http.staticuser.user" : "dr.who",
"mapreduce.reduce.speculative" : "false",
"mapreduce.client.output.filter" : "FAILED",
"mapreduce.ifile.readahead.bytes" : "4194304",
"mapreduce.tasktracker.report.address" : "127.0.0.1:0",
"mapreduce.task.userlog.limit.kb" : "0",
"mapreduce.tasktracker.map.tasks.maximum" : "2",
"hadoop.http.authentication.simple.anonymous.allowed" : "true",
"hadoop.fuse.timer.period" : "5",
"dfs.namenode.num.extra.edits.retained" : "1000000",
"hadoop.rpc.socket.factory.class.default" : "org.apache.hadoop.net.StandardSocketFactory",
"mapreduce.job.submithostname" : "a2115.smile.com",
"dfs.namenode.handler.count" : "10",
"fs.automatic.close" : "false",
"mapreduce.job.submithostaddress" : "10.20.206.115",
"mapreduce.tasktracker.healthchecker.script.timeout" : "600000",
"dfs.datanode.directoryscan.interval" : "21600",
"yarn.resourcemanager.address" : "a2115.smile.com:8032",
"yarn.nodemanager.health-checker.interval-ms" : "600000",
"dfs.client.file-block-storage-locations.num-threads" : "10",
"yarn.resourcemanager.container-tokens.master-key-rolling-interval-secs" : "86400",
"mapreduce.reduce.markreset.buffer.percent" : "0.0",
"hadoop.security.group.mapping.ldap.directory.search.timeout" : "10000",
"mapreduce.map.log.level" : "INFO",
"dfs.bytes-per-checksum" : "512",
"yarn.nodemanager.localizer.address" : "0.0.0.0:8040",
"dfs.namenode.checkpoint.max-retries" : "3",
"ha.health-monitor.rpc-timeout.ms" : "45000",
"yarn.resourcemanager.keytab" : "/etc/krb5.keytab",
"ftp.stream-buffer-size" : "4096",
"dfs.namenode.avoid.write.stale.datanode" : "false",
"hadoop.security.group.mapping.ldap.search.attr.member" : "member",
"mapreduce.output.fileoutputformat.outputdir" : "hdfs://a2115.smile.com:8020/user/jenkins/tera-gen-1",
"dfs.blockreport.initialDelay" : "0",
"yarn.nm.liveness-monitor.expiry-interval-ms" : "600000",
"hadoop.http.authentication.token.validity" : "36000",
"dfs.namenode.delegation.token.max-lifetime" : "604800000",
"mapreduce.job.hdfs-servers" : "${fs.defaultFS}",
"s3native.replication" : "3",
"yarn.nodemanager.localizer.client.thread-count" : "5",
"dfs.heartbeat.interval" : "3",
"rpc.engine.org.apache.hadoop.ipc.ProtocolMetaInfoPB" : "org.apache.hadoop.ipc.ProtobufRpcEngine",
"dfs.ha.fencing.ssh.connect-timeout" : "30000",
"yarn.resourcemanager.container.liveness-monitor.interval-ms" : "600000",
"yarn.am.liveness-monitor.expiry-interval-ms" : "600000",
"mapreduce.task.profile" : "false",
"mapreduce.tasktracker.http.address" : "0.0.0.0:50060",
"mapreduce.tasktracker.instrumentation" : "org.apache.hadoop.mapred.TaskTrackerMetricsInst",
"mapreduce.jobhistory.webapp.address" : "a2115.smile.com:19888",
"ha.failover-controller.graceful-fence.rpc-timeout.ms" : "5000",
"yarn.ipc.rpc.class" : "org.apache.hadoop.yarn.ipc.HadoopYarnProtoRPC",
"mapreduce.job.name" : "TeraGen",
"kfs.blocksize" : "67108864",
"yarn.resourcemanager.am-rm-tokens.master-key-rolling-interval-secs" : "86400",
"mapreduce.job.ubertask.maxmaps" : "9",
"yarn.scheduler.maximum-allocation-mb" : "8192",
"yarn.nodemanager.heartbeat.interval-ms" : "1000",
"mapreduce.job.userlog.retain.hours" : "24",
"dfs.namenode.secondary.http-address" : "0.0.0.0:50090",
"mapreduce.task.timeout" : "600000",
"mapreduce.framework.name" : "yarn",
"ipc.client.idlethreshold" : "4000",
"ftp.bytes-per-checksum" : "512",
"ipc.server.tcpnodelay" : "false",
"dfs.namenode.stale.datanode.interval" : "30000",
"s3.bytes-per-checksum" : "512",
"mapreduce.job.speculative.slowtaskthreshold" : "1.0",
"yarn.nodemanager.localizer.cache.target-size-mb" : "10240",
"yarn.nodemanager.remote-app-log-dir" : "/tmp/logs",
"fs.s3.block.size" : "67108864",
"mapreduce.job.queuename" : "sls_queue_1",
"dfs.client.failover.connection.retries" : "0",
"hadoop.rpc.protection" : "authentication",
"yarn.scheduler.minimum-allocation-mb" : "1024",
"yarn.app.mapreduce.client-am.ipc.max-retries" : "1",
"hadoop.security.auth_to_local" : "DEFAULT",
"dfs.secondary.namenode.kerberos.internal.spnego.principal" : "${dfs.web.authentication.kerberos.principal}",
"ftp.client-write-packet-size" : "65536",
"fs.defaultFS" : "hdfs://a2115.smile.com:8020",
"yarn.nodemanager.address" : "0.0.0.0:0",
"yarn.scheduler.fair.assignmultiple" : "true",
"yarn.resourcemanager.scheduler.client.thread-count" : "50",
"mapreduce.task.merge.progress.records" : "10000",
"file.client-write-packet-size" : "65536",
"yarn.nodemanager.delete.thread-count" : "4",
"yarn.resourcemanager.scheduler.address" : "a2115.smile.com:8030",
"fs.trash.checkpoint.interval" : "0",
"hadoop.http.authentication.signature.secret.file" : "${user.home}/hadoop-http-auth-signature-secret",
"s3native.stream-buffer-size" : "4096",
"mapreduce.reduce.shuffle.read.timeout" : "180000",
"mapreduce.admin.user.env" : "LD_LIBRARY_PATH=$HADOOP_COMMON_HOME/lib/native",
"yarn.app.mapreduce.am.command-opts" : " -Xmx1238932873",
"dfs.namenode.checkpoint.edits.dir" : "${dfs.namenode.checkpoint.dir}",
"fs.permissions.umask-mode" : "022",
"dfs.client.domain.socket.data.traffic" : "false",
"hadoop.common.configuration.version" : "0.23.0",
"mapreduce.tasktracker.dns.interface" : "default",
"mapreduce.output.fileoutputformat.compress.type" : "BLOCK",
"mapreduce.ifile.readahead" : "true",
"hadoop.security.group.mapping.ldap.ssl" : "false",
"io.serializations" : "org.apache.hadoop.io.serializer.WritableSerialization,org.apache.hadoop.io.serializer.avro.AvroSpecificSerialization,org.apache.hadoop.io.serializer.avro.AvroReflectSerialization",
"yarn.nodemanager.aux-services.mapreduce.shuffle.class" : "org.apache.hadoop.mapred.ShuffleHandler",
"fs.df.interval" : "60000",
"mapreduce.reduce.shuffle.input.buffer.percent" : "0.70",
"io.seqfile.compress.blocksize" : "1000000",
"hadoop.security.groups.cache.secs" : "300",
"ipc.client.connect.max.retries" : "10",
"dfs.namenode.delegation.key.update-interval" : "86400000",
"yarn.nodemanager.process-kill-wait.ms" : "2000",
"yarn.application.classpath" : "$HADOOP_CONF_DIR,$HADOOP_COMMON_HOME/*,$HADOOP_COMMON_HOME/lib/*,$HADOOP_HDFS_HOME/*,$HADOOP_HDFS_HOME/lib/*,$HADOOP_MAPRED_HOME/*,$HADOOP_MAPRED_HOME/lib/*,$YARN_HOME/*,$YARN_HOME/lib/*",
"yarn.app.mapreduce.client.max-retries" : "3",
"dfs.datanode.available-space-volume-choosing-policy.balanced-space-preference-fraction" : "0.75f",
"yarn.nodemanager.log-aggregation.compression-type" : "none",
"hadoop.security.group.mapping.ldap.search.filter.user" : "(&(objectClass=user)(sAMAccountName={0}))",
"yarn.nodemanager.localizer.cache.cleanup.interval-ms" : "600000",
"dfs.image.compress" : "false",
"mapred.mapper.new-api" : "true",
"yarn.nodemanager.log-dirs" : "${yarn.log.dir}/userlogs",
"dfs.namenode.kerberos.internal.spnego.principal" : "${dfs.web.authentication.kerberos.principal}",
"fs.s3n.block.size" : "67108864",
"fs.ftp.host" : "0.0.0.0",
"hadoop.security.group.mapping" : "org.apache.hadoop.security.JniBasedUnixGroupsMappingWithFallback",
"dfs.datanode.address" : "0.0.0.0:50010",
"mapreduce.map.skip.maxrecords" : "0",
"dfs.datanode.https.address" : "0.0.0.0:50475",
"file.replication" : "1",
"yarn.resourcemanager.resource-tracker.address" : "a2115.smile.com:8031",
"dfs.datanode.drop.cache.behind.reads" : "false",
"hadoop.fuse.connection.timeout" : "300",
"hadoop.work.around.non.threadsafe.getpwuid" : "false",
"mapreduce.jobtracker.restart.recover" : "false",
"hadoop.tmp.dir" : "/tmp/hadoop-${user.name}",
"mapreduce.output.fileoutputformat.compress" : "false",
"mapreduce.tasktracker.indexcache.mb" : "10",
"mapreduce.client.genericoptionsparser.used" : "true",
"dfs.client.block.write.replace-datanode-on-failure.policy" : "DEFAULT",
"mapreduce.job.committer.setup.cleanup.needed" : "true",
"hadoop.kerberos.kinit.command" : "kinit",
"dfs.datanode.du.reserved" : "0",
"dfs.namenode.fs-limits.max-blocks-per-file" : "1048576",
"file.bytes-per-checksum" : "512",
"mapreduce.task.profile.reduces" : "0-2",
"mapreduce.jobtracker.handler.count" : "10",
"dfs.client.block.write.replace-datanode-on-failure.enable" : "true",
"mapreduce.job.output.value.class" : "org.apache.hadoop.io.Text",
"yarn.dispatcher.exit-on-error" : "true",
"net.topology.script.number.args" : "100",
"mapreduce.task.profile.maps" : "0-2",
"dfs.namenode.decommission.interval" : "30",
"dfs.image.compression.codec" : "org.apache.hadoop.io.compress.DefaultCodec",
"yarn.resourcemanager.webapp.address" : "a2115.smile.com:8088",
"mapreduce.jobtracker.system.dir" : "${hadoop.tmp.dir}/mapred/system",
"hadoop.ssl.hostname.verifier" : "DEFAULT",
"yarn.nodemanager.vmem-pmem-ratio" : "2.1",
"dfs.namenode.support.allow.format" : "true",
"mapreduce.jobhistory.principal" : "jhs/_HOST@REALM.TLD",
"io.mapfile.bloom.error.rate" : "0.005",
"mapreduce.shuffle.ssl.file.buffer.size" : "65536",
"dfs.permissions.superusergroup" : "supergroup",
"dfs.datanode.available-space-volume-choosing-policy.balanced-space-threshold" : "10737418240",
"mapreduce.jobtracker.expire.trackers.interval" : "600000",
"mapreduce.cluster.acls.enabled" : "false",
"yarn.nodemanager.remote-app-log-dir-suffix" : "logs",
"ha.failover-controller.graceful-fence.connection.retries" : "1",
"ha.health-monitor.connect-retry-interval.ms" : "1000",
"mapreduce.reduce.shuffle.merge.percent" : "0.66",
"yarn.app.mapreduce.am.resource.mb" : "1536",
"io.seqfile.local.dir" : "${hadoop.tmp.dir}/io/local",
"dfs.namenode.checkpoint.check.period" : "60",
"yarn.resourcemanager.nm.liveness-monitor.interval-ms" : "1000",
"mapreduce.jobtracker.maxtasks.perjob" : "-1",
"mapreduce.jobtracker.jobhistory.lru.cache.size" : "5",
"file.blocksize" : "67108864",
"tfile.io.chunk.size" : "1048576",
"mapreduce.job.acl-modify-job" : " ",
"yarn.nodemanager.webapp.address" : "0.0.0.0:8042",
"mapreduce.tasktracker.reduce.tasks.maximum" : "2",
"io.skip.checksum.errors" : "false",
"mapreduce.cluster.temp.dir" : "${hadoop.tmp.dir}/mapred/temp",
"yarn.app.mapreduce.am.staging-dir" : "/user",
"dfs.namenode.edits.journal-plugin.qjournal" : "org.apache.hadoop.hdfs.qjournal.client.QuorumJournalManager",
"dfs.datanode.handler.count" : "10",
"fs.ftp.host.port" : "21",
"dfs.namenode.decommission.nodes.per.interval" : "5",
"yarn.resourcemanager.admin.client.thread-count" : "1",
"dfs.namenode.fs-limits.max-component-length" : "0",
"dfs.namenode.checkpoint.period" : "3600",
"fs.AbstractFileSystem.viewfs.impl" : "org.apache.hadoop.fs.viewfs.ViewFs",
"yarn.resourcemanager.resource-tracker.client.thread-count" : "50",
"mapreduce.tasktracker.dns.nameserver" : "default",
"mapreduce.map.output.compress" : "true",
"dfs.datanode.ipc.address" : "0.0.0.0:50020",
"hadoop.ssl.require.client.cert" : "false",
"yarn.nodemanager.delete.debug-delay-sec" : "0",
"dfs.datanode.max.transfer.threads" : "4096"
},
"computonsPerMapInputByte" : -1,
"computonsPerMapOutputByte" : -1,
"computonsPerReduceInputByte" : -1,
"computonsPerReduceOutputByte" : -1,
"heapMegabytes" : 200,
"outcome" : "SUCCESS",
"jobtype" : "JAVA",
"directDependantJobs" : [ ],
"successfulMapAttemptCDFs" : [ {
"maximum" : 9223372036854775807,
"minimum" : -9223372036854775808,
"rankings" : [ ],
"numberValues" : 0
}, {
"maximum" : 9223372036854775807,
"minimum" : -9223372036854775808,
"rankings" : [ ],
"numberValues" : 0
}, {
"maximum" : 9223372036854775807,
"minimum" : -9223372036854775808,
"rankings" : [ ],
"numberValues" : 0
}, {
"maximum" : 47021,
"minimum" : 11143,
"rankings" : [ {
"datum" : 13354,
"relativeRanking" : 0.05
}, {
"datum" : 14101,
"relativeRanking" : 0.1
}, {
"datum" : 15609,
"relativeRanking" : 0.15
}, {
"datum" : 15919,
"relativeRanking" : 0.2
}, {
"datum" : 17003,
"relativeRanking" : 0.25
}, {
"datum" : 17109,
"relativeRanking" : 0.3
}, {
"datum" : 18342,
"relativeRanking" : 0.35
}, {
"datum" : 18870,
"relativeRanking" : 0.4
}, {
"datum" : 19127,
"relativeRanking" : 0.45
}, {
"datum" : 19221,
"relativeRanking" : 0.5
}, {
"datum" : 19481,
"relativeRanking" : 0.55
}, {
"datum" : 19896,
"relativeRanking" : 0.6
}, {
"datum" : 20585,
"relativeRanking" : 0.65
}, {
"datum" : 20784,
"relativeRanking" : 0.7
}, {
"datum" : 21452,
"relativeRanking" : 0.75
}, {
"datum" : 21853,
"relativeRanking" : 0.8
}, {
"datum" : 22436,
"relativeRanking" : 0.85
}, {
"datum" : 32646,
"relativeRanking" : 0.9
}, {
"datum" : 41553,
"relativeRanking" : 0.95
} ],
"numberValues" : 96
} ],
"failedMapAttemptCDFs" : [ {
"maximum" : 9223372036854775807,
"minimum" : -9223372036854775808,
"rankings" : [ ],
"numberValues" : 0
}, {
"maximum" : 9223372036854775807,
"minimum" : -9223372036854775808,
"rankings" : [ ],
"numberValues" : 0
}, {
"maximum" : 9223372036854775807,
"minimum" : -9223372036854775808,
"rankings" : [ ],
"numberValues" : 0
}, {
"maximum" : 9223372036854775807,
"minimum" : -9223372036854775808,
"rankings" : [ ],
"numberValues" : 0
} ],
"successfulReduceAttemptCDF" : {
"maximum" : 9223372036854775807,
"minimum" : -9223372036854775808,
"rankings" : [ ],
"numberValues" : 0
},
"failedReduceAttemptCDF" : {
"maximum" : 9223372036854775807,
"minimum" : -9223372036854775808,
"rankings" : [ ],
"numberValues" : 0
},
"mapperTriesToSucceed" : [ 1.0 ],
"failedMapperFraction" : 0.0,
"relativeTime" : 0,
"clusterMapMB" : -1,
"clusterReduceMB" : -1,
"jobMapMB" : 200,
"jobReduceMB" : 200
}

View File

@ -375,6 +375,9 @@ public class SLSRunner {
new ArrayList<ContainerSimulator>();
// map tasks
for(LoggedTask mapTask : job.getMapTasks()) {
if (mapTask.getAttempts().size() == 0) {
continue;
}
LoggedTaskAttempt taskAttempt = mapTask.getAttempts()
.get(mapTask.getAttempts().size() - 1);
String hostname = taskAttempt.getHostName().getValue();
@ -386,6 +389,9 @@ public class SLSRunner {
// reduce tasks
for(LoggedTask reduceTask : job.getReduceTasks()) {
if (reduceTask.getAttempts().size() == 0) {
continue;
}
LoggedTaskAttempt taskAttempt = reduceTask.getAttempts()
.get(reduceTask.getAttempts().size() - 1);
String hostname = taskAttempt.getHostName().getValue();

View File

@ -65,11 +65,17 @@ public class SLSUtils {
while ((job = reader.getNext()) != null) {
for(LoggedTask mapTask : job.getMapTasks()) {
// select the last attempt
if (mapTask.getAttempts().size() == 0) {
continue;
}
LoggedTaskAttempt taskAttempt = mapTask.getAttempts()
.get(mapTask.getAttempts().size() - 1);
nodeSet.add(taskAttempt.getHostName().getValue());
}
for(LoggedTask reduceTask : job.getReduceTasks()) {
if (reduceTask.getAttempts().size() == 0) {
continue;
}
LoggedTaskAttempt taskAttempt = reduceTask.getAttempts()
.get(reduceTask.getAttempts().size() - 1);
nodeSet.add(taskAttempt.getHostName().getValue());