MAPREDUCE-6376. Add avro binary support for jhist files. Contributed by Ray Chiang
(cherry picked from commit 2ac87df578
)
Conflicts:
hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/resources/mapred-default.xml
This commit is contained in:
parent
c343250657
commit
df4e1e4965
|
@ -102,6 +102,9 @@ Release 2.8.0 - UNRELEASED
|
|||
|
||||
OPTIMIZATIONS
|
||||
|
||||
MAPREDUCE-6376. Add avro binary support for jhist files (Ray Chiang via
|
||||
jlowe)
|
||||
|
||||
BUG FIXES
|
||||
|
||||
MAPREDUCE-6314. TestPipeApplication fails on trunk.
|
||||
|
|
|
@ -105,7 +105,8 @@ public class JobHistoryEventHandler extends AbstractService
|
|||
|
||||
private int numUnflushedCompletionEvents = 0;
|
||||
private boolean isTimerActive;
|
||||
|
||||
private EventWriter.WriteMode jhistMode =
|
||||
EventWriter.WriteMode.JSON;
|
||||
|
||||
protected BlockingQueue<JobHistoryEvent> eventQueue =
|
||||
new LinkedBlockingQueue<JobHistoryEvent>();
|
||||
|
@ -260,6 +261,20 @@ public class JobHistoryEventHandler extends AbstractService
|
|||
LOG.info("Emitting job history data to the timeline server is not enabled");
|
||||
}
|
||||
|
||||
// Flag for setting
|
||||
String jhistFormat = conf.get(JHAdminConfig.MR_HS_JHIST_FORMAT,
|
||||
JHAdminConfig.DEFAULT_MR_HS_JHIST_FORMAT);
|
||||
if (jhistFormat.equals("json")) {
|
||||
jhistMode = EventWriter.WriteMode.JSON;
|
||||
} else if (jhistFormat.equals("binary")) {
|
||||
jhistMode = EventWriter.WriteMode.BINARY;
|
||||
} else {
|
||||
LOG.warn("Unrecognized value '" + jhistFormat + "' for property " +
|
||||
JHAdminConfig.MR_HS_JHIST_FORMAT + ". Valid values are " +
|
||||
"'json' or 'binary'. Falling back to default value '" +
|
||||
JHAdminConfig.DEFAULT_MR_HS_JHIST_FORMAT + "'.");
|
||||
}
|
||||
|
||||
super.serviceInit(conf);
|
||||
}
|
||||
|
||||
|
@ -418,7 +433,7 @@ public class JobHistoryEventHandler extends AbstractService
|
|||
protected EventWriter createEventWriter(Path historyFilePath)
|
||||
throws IOException {
|
||||
FSDataOutputStream out = stagingDirFS.create(historyFilePath, true);
|
||||
return new EventWriter(out);
|
||||
return new EventWriter(out, this.jhistMode);
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
|
@ -190,7 +190,8 @@ public class TestEvents {
|
|||
ByteArrayOutputStream output = new ByteArrayOutputStream();
|
||||
FSDataOutputStream fsOutput = new FSDataOutputStream(output,
|
||||
new FileSystem.Statistics("scheme"));
|
||||
EventWriter writer = new EventWriter(fsOutput);
|
||||
EventWriter writer = new EventWriter(fsOutput,
|
||||
EventWriter.WriteMode.JSON);
|
||||
writer.write(getJobPriorityChangedEvent());
|
||||
writer.write(getJobStatusChangedEvent());
|
||||
writer.write(getTaskUpdatedEvent());
|
||||
|
|
|
@ -221,4 +221,11 @@ public class JHAdminConfig {
|
|||
+ "jobname.limit";
|
||||
public static final int DEFAULT_MR_HS_JOBNAME_LIMIT = 50;
|
||||
|
||||
/**
|
||||
* Settings for .jhist file format.
|
||||
*/
|
||||
public static final String MR_HS_JHIST_FORMAT =
|
||||
MR_HISTORY_PREFIX + "jhist.format";
|
||||
public static final String DEFAULT_MR_HS_JHIST_FORMAT =
|
||||
"json";
|
||||
}
|
||||
|
|
|
@ -66,16 +66,18 @@ public class EventReader implements Closeable {
|
|||
public EventReader(DataInputStream in) throws IOException {
|
||||
this.in = in;
|
||||
this.version = in.readLine();
|
||||
|
||||
if (!EventWriter.VERSION.equals(version)) {
|
||||
throw new IOException("Incompatible event log version: "+version);
|
||||
}
|
||||
|
||||
Schema myschema = new SpecificData(Event.class.getClassLoader()).getSchema(Event.class);
|
||||
Schema.Parser parser = new Schema.Parser();
|
||||
this.schema = parser.parse(in.readLine());
|
||||
this.reader = new SpecificDatumReader(schema, myschema);
|
||||
this.decoder = DecoderFactory.get().jsonDecoder(schema, in);
|
||||
if (EventWriter.VERSION.equals(version)) {
|
||||
this.decoder = DecoderFactory.get().jsonDecoder(schema, in);
|
||||
} else if (EventWriter.VERSION_BINARY.equals(version)) {
|
||||
this.decoder = DecoderFactory.get().binaryDecoder(in, null);
|
||||
} else {
|
||||
throw new IOException("Incompatible event log version: " + version);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
|
@ -43,20 +43,37 @@ import org.apache.hadoop.mapreduce.Counters;
|
|||
*/
|
||||
class EventWriter {
|
||||
static final String VERSION = "Avro-Json";
|
||||
static final String VERSION_BINARY = "Avro-Binary";
|
||||
|
||||
private FSDataOutputStream out;
|
||||
private DatumWriter<Event> writer =
|
||||
new SpecificDatumWriter<Event>(Event.class);
|
||||
private Encoder encoder;
|
||||
private static final Log LOG = LogFactory.getLog(EventWriter.class);
|
||||
|
||||
EventWriter(FSDataOutputStream out) throws IOException {
|
||||
public enum WriteMode { JSON, BINARY }
|
||||
private final WriteMode writeMode;
|
||||
private final boolean jsonOutput; // Cache value while we have 2 modes
|
||||
|
||||
EventWriter(FSDataOutputStream out, WriteMode mode) throws IOException {
|
||||
this.out = out;
|
||||
out.writeBytes(VERSION);
|
||||
this.writeMode = mode;
|
||||
if (this.writeMode==WriteMode.JSON) {
|
||||
this.jsonOutput = true;
|
||||
out.writeBytes(VERSION);
|
||||
} else if (this.writeMode==WriteMode.BINARY) {
|
||||
this.jsonOutput = false;
|
||||
out.writeBytes(VERSION_BINARY);
|
||||
} else {
|
||||
throw new IOException("Unknown mode: " + mode);
|
||||
}
|
||||
out.writeBytes("\n");
|
||||
out.writeBytes(Event.SCHEMA$.toString());
|
||||
out.writeBytes("\n");
|
||||
this.encoder = EncoderFactory.get().jsonEncoder(Event.SCHEMA$, out);
|
||||
if (!this.jsonOutput) {
|
||||
this.encoder = EncoderFactory.get().binaryEncoder(out, null);
|
||||
} else {
|
||||
this.encoder = EncoderFactory.get().jsonEncoder(Event.SCHEMA$, out);
|
||||
}
|
||||
}
|
||||
|
||||
synchronized void write(HistoryEvent event) throws IOException {
|
||||
|
@ -65,7 +82,9 @@ class EventWriter {
|
|||
wrapper.setEvent(event.getDatum());
|
||||
writer.write(wrapper, encoder);
|
||||
encoder.flush();
|
||||
out.writeBytes("\n");
|
||||
if (this.jsonOutput) {
|
||||
out.writeBytes("\n");
|
||||
}
|
||||
}
|
||||
|
||||
void flush() throws IOException {
|
||||
|
|
|
@ -2167,6 +2167,15 @@
|
|||
</description>
|
||||
</property>
|
||||
|
||||
<property>
|
||||
<description>
|
||||
File format the AM will use when generating the .jhist file. Valid
|
||||
values are "json" for text output and "binary" for faster parsing.
|
||||
</description>
|
||||
<name>mapreduce.jobhistory.jhist.format</name>
|
||||
<value>json</value>
|
||||
</property>
|
||||
|
||||
<property>
|
||||
<name>yarn.app.mapreduce.am.containerlauncher.threadpool-initial-size</name>
|
||||
<value>10</value>
|
||||
|
|
Loading…
Reference in New Issue