From 2ac87df578accb6e612f70ded76271cb5082ee10 Mon Sep 17 00:00:00 2001 From: Jason Lowe Date: Wed, 1 Jul 2015 16:00:03 +0000 Subject: [PATCH] MAPREDUCE-6376. Add avro binary support for jhist files. Contributed by Ray Chiang --- hadoop-mapreduce-project/CHANGES.txt | 3 ++ .../jobhistory/JobHistoryEventHandler.java | 19 ++++++++++-- .../mapreduce/jobhistory/TestEvents.java | 3 +- .../v2/jobhistory/JHAdminConfig.java | 7 +++++ .../mapreduce/jobhistory/EventReader.java | 12 ++++---- .../mapreduce/jobhistory/EventWriter.java | 29 +++++++++++++++---- .../src/main/resources/mapred-default.xml | 9 ++++++ 7 files changed, 69 insertions(+), 13 deletions(-) diff --git a/hadoop-mapreduce-project/CHANGES.txt b/hadoop-mapreduce-project/CHANGES.txt index 0baecf8f6ff..409f074752e 100644 --- a/hadoop-mapreduce-project/CHANGES.txt +++ b/hadoop-mapreduce-project/CHANGES.txt @@ -372,6 +372,9 @@ Release 2.8.0 - UNRELEASED OPTIMIZATIONS + MAPREDUCE-6376. Add avro binary support for jhist files (Ray Chiang via + jlowe) + BUG FIXES MAPREDUCE-6314. TestPipeApplication fails on trunk. diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/jobhistory/JobHistoryEventHandler.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/jobhistory/JobHistoryEventHandler.java index 35556a698cd..0457cc5e42c 100644 --- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/jobhistory/JobHistoryEventHandler.java +++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/jobhistory/JobHistoryEventHandler.java @@ -105,7 +105,8 @@ public class JobHistoryEventHandler extends AbstractService private int numUnflushedCompletionEvents = 0; private boolean isTimerActive; - + private EventWriter.WriteMode jhistMode = + EventWriter.WriteMode.JSON; protected BlockingQueue eventQueue = new LinkedBlockingQueue(); @@ -260,6 +261,20 @@ public class JobHistoryEventHandler extends AbstractService LOG.info("Emitting job history data to the timeline server is not enabled"); } + // Flag for setting + String jhistFormat = conf.get(JHAdminConfig.MR_HS_JHIST_FORMAT, + JHAdminConfig.DEFAULT_MR_HS_JHIST_FORMAT); + if (jhistFormat.equals("json")) { + jhistMode = EventWriter.WriteMode.JSON; + } else if (jhistFormat.equals("binary")) { + jhistMode = EventWriter.WriteMode.BINARY; + } else { + LOG.warn("Unrecognized value '" + jhistFormat + "' for property " + + JHAdminConfig.MR_HS_JHIST_FORMAT + ". Valid values are " + + "'json' or 'binary'. Falling back to default value '" + + JHAdminConfig.DEFAULT_MR_HS_JHIST_FORMAT + "'."); + } + super.serviceInit(conf); } @@ -418,7 +433,7 @@ public class JobHistoryEventHandler extends AbstractService protected EventWriter createEventWriter(Path historyFilePath) throws IOException { FSDataOutputStream out = stagingDirFS.create(historyFilePath, true); - return new EventWriter(out); + return new EventWriter(out, this.jhistMode); } /** diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/test/java/org/apache/hadoop/mapreduce/jobhistory/TestEvents.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/test/java/org/apache/hadoop/mapreduce/jobhistory/TestEvents.java index 597f7a0c034..7612ceb0718 100644 --- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/test/java/org/apache/hadoop/mapreduce/jobhistory/TestEvents.java +++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/test/java/org/apache/hadoop/mapreduce/jobhistory/TestEvents.java @@ -190,7 +190,8 @@ public class TestEvents { ByteArrayOutputStream output = new ByteArrayOutputStream(); FSDataOutputStream fsOutput = new FSDataOutputStream(output, new FileSystem.Statistics("scheme")); - EventWriter writer = new EventWriter(fsOutput); + EventWriter writer = new EventWriter(fsOutput, + EventWriter.WriteMode.JSON); writer.write(getJobPriorityChangedEvent()); writer.write(getJobStatusChangedEvent()); writer.write(getTaskUpdatedEvent()); diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-common/src/main/java/org/apache/hadoop/mapreduce/v2/jobhistory/JHAdminConfig.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-common/src/main/java/org/apache/hadoop/mapreduce/v2/jobhistory/JHAdminConfig.java index a97c2ca9e72..86dfad3f7b8 100644 --- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-common/src/main/java/org/apache/hadoop/mapreduce/v2/jobhistory/JHAdminConfig.java +++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-common/src/main/java/org/apache/hadoop/mapreduce/v2/jobhistory/JHAdminConfig.java @@ -221,4 +221,11 @@ public class JHAdminConfig { + "jobname.limit"; public static final int DEFAULT_MR_HS_JOBNAME_LIMIT = 50; + /** + * Settings for .jhist file format. + */ + public static final String MR_HS_JHIST_FORMAT = + MR_HISTORY_PREFIX + "jhist.format"; + public static final String DEFAULT_MR_HS_JHIST_FORMAT = + "json"; } diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/jobhistory/EventReader.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/jobhistory/EventReader.java index e08a9294b19..9898c2d58d0 100644 --- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/jobhistory/EventReader.java +++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/jobhistory/EventReader.java @@ -66,16 +66,18 @@ public class EventReader implements Closeable { public EventReader(DataInputStream in) throws IOException { this.in = in; this.version = in.readLine(); - - if (!EventWriter.VERSION.equals(version)) { - throw new IOException("Incompatible event log version: "+version); - } Schema myschema = new SpecificData(Event.class.getClassLoader()).getSchema(Event.class); Schema.Parser parser = new Schema.Parser(); this.schema = parser.parse(in.readLine()); this.reader = new SpecificDatumReader(schema, myschema); - this.decoder = DecoderFactory.get().jsonDecoder(schema, in); + if (EventWriter.VERSION.equals(version)) { + this.decoder = DecoderFactory.get().jsonDecoder(schema, in); + } else if (EventWriter.VERSION_BINARY.equals(version)) { + this.decoder = DecoderFactory.get().binaryDecoder(in, null); + } else { + throw new IOException("Incompatible event log version: " + version); + } } /** diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/jobhistory/EventWriter.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/jobhistory/EventWriter.java index a548dfe895c..29489a5c8b5 100644 --- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/jobhistory/EventWriter.java +++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/jobhistory/EventWriter.java @@ -43,20 +43,37 @@ import org.apache.hadoop.mapreduce.Counters; */ class EventWriter { static final String VERSION = "Avro-Json"; + static final String VERSION_BINARY = "Avro-Binary"; private FSDataOutputStream out; private DatumWriter writer = new SpecificDatumWriter(Event.class); private Encoder encoder; private static final Log LOG = LogFactory.getLog(EventWriter.class); - - EventWriter(FSDataOutputStream out) throws IOException { + public enum WriteMode { JSON, BINARY } + private final WriteMode writeMode; + private final boolean jsonOutput; // Cache value while we have 2 modes + + EventWriter(FSDataOutputStream out, WriteMode mode) throws IOException { this.out = out; - out.writeBytes(VERSION); + this.writeMode = mode; + if (this.writeMode==WriteMode.JSON) { + this.jsonOutput = true; + out.writeBytes(VERSION); + } else if (this.writeMode==WriteMode.BINARY) { + this.jsonOutput = false; + out.writeBytes(VERSION_BINARY); + } else { + throw new IOException("Unknown mode: " + mode); + } out.writeBytes("\n"); out.writeBytes(Event.SCHEMA$.toString()); out.writeBytes("\n"); - this.encoder = EncoderFactory.get().jsonEncoder(Event.SCHEMA$, out); + if (!this.jsonOutput) { + this.encoder = EncoderFactory.get().binaryEncoder(out, null); + } else { + this.encoder = EncoderFactory.get().jsonEncoder(Event.SCHEMA$, out); + } } synchronized void write(HistoryEvent event) throws IOException { @@ -65,7 +82,9 @@ class EventWriter { wrapper.setEvent(event.getDatum()); writer.write(wrapper, encoder); encoder.flush(); - out.writeBytes("\n"); + if (this.jsonOutput) { + out.writeBytes("\n"); + } } void flush() throws IOException { diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/resources/mapred-default.xml b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/resources/mapred-default.xml index ba63c023bb8..ddcd2dfdee0 100644 --- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/resources/mapred-default.xml +++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/resources/mapred-default.xml @@ -1713,6 +1713,15 @@ + + + File format the AM will use when generating the .jhist file. Valid + values are "json" for text output and "binary" for faster parsing. + + mapreduce.jobhistory.jhist.format + json + + mapreduce.job.heap.memory-mb.ratio 0.8