YARN-3949. Ensure timely flush of timeline writes. Contributed by Sangjin Lee.
(cherry picked from commit 967bef7e0396d857913caa2574afb103a5f0b81b)
This commit is contained in:
parent
57d8dc2fb7
commit
8603736ef2
|
@ -1967,6 +1967,15 @@ public class YarnConfiguration extends Configuration {
|
||||||
public static final String TIMELINE_SERVICE_READER_CLASS =
|
public static final String TIMELINE_SERVICE_READER_CLASS =
|
||||||
TIMELINE_SERVICE_PREFIX + "reader.class";
|
TIMELINE_SERVICE_PREFIX + "reader.class";
|
||||||
|
|
||||||
|
/** The setting that controls how often the timeline collector flushes the
|
||||||
|
* timeline writer.
|
||||||
|
*/
|
||||||
|
public static final String TIMELINE_SERVICE_WRITER_FLUSH_INTERVAL_SECONDS =
|
||||||
|
TIMELINE_SERVICE_PREFIX + "writer.flush-interval-seconds";
|
||||||
|
|
||||||
|
public static final int
|
||||||
|
DEFAULT_TIMELINE_SERVICE_WRITER_FLUSH_INTERVAL_SECONDS = 60;
|
||||||
|
|
||||||
// mark app-history related configs @Private as application history is going
|
// mark app-history related configs @Private as application history is going
|
||||||
// to be integrated into the timeline service
|
// to be integrated into the timeline service
|
||||||
@Private
|
@Private
|
||||||
|
|
|
@ -796,7 +796,15 @@
|
||||||
<name>yarn.system-metrics-publisher.enabled</name>
|
<name>yarn.system-metrics-publisher.enabled</name>
|
||||||
<value>false</value>
|
<value>false</value>
|
||||||
</property>
|
</property>
|
||||||
|
|
||||||
|
<property>
|
||||||
|
<description>The setting that controls whether yarn container metrics is
|
||||||
|
published to the timeline server or not by RM. This configuration setting is
|
||||||
|
for ATS V2.</description>
|
||||||
|
<name>yarn.rm.system-metrics-publisher.emit-container-events</name>
|
||||||
|
<value>false</value>
|
||||||
|
</property>
|
||||||
|
|
||||||
|
|
||||||
<property>
|
<property>
|
||||||
<description>Number of worker threads that send the yarn system metrics
|
<description>Number of worker threads that send the yarn system metrics
|
||||||
|
@ -2197,6 +2205,13 @@
|
||||||
<value>420</value>
|
<value>420</value>
|
||||||
</property>
|
</property>
|
||||||
|
|
||||||
|
<property>
|
||||||
|
<description>The setting that controls how often the timeline collector
|
||||||
|
flushes the timeline writer.</description>
|
||||||
|
<name>yarn.timeline-service.writer.flush-interval-seconds</name>
|
||||||
|
<value>60</value>
|
||||||
|
</property>
|
||||||
|
|
||||||
<!-- Shared Cache Configuration -->
|
<!-- Shared Cache Configuration -->
|
||||||
|
|
||||||
<property>
|
<property>
|
||||||
|
|
|
@ -19,6 +19,13 @@
|
||||||
package org.apache.hadoop.yarn.server.timelineservice.collector;
|
package org.apache.hadoop.yarn.server.timelineservice.collector;
|
||||||
|
|
||||||
|
|
||||||
|
import java.util.Collections;
|
||||||
|
import java.util.HashMap;
|
||||||
|
import java.util.Map;
|
||||||
|
import java.util.concurrent.Executors;
|
||||||
|
import java.util.concurrent.ScheduledExecutorService;
|
||||||
|
import java.util.concurrent.TimeUnit;
|
||||||
|
|
||||||
import org.apache.commons.logging.Log;
|
import org.apache.commons.logging.Log;
|
||||||
import org.apache.commons.logging.LogFactory;
|
import org.apache.commons.logging.LogFactory;
|
||||||
import org.apache.hadoop.classification.InterfaceAudience;
|
import org.apache.hadoop.classification.InterfaceAudience;
|
||||||
|
@ -32,9 +39,7 @@ import org.apache.hadoop.yarn.exceptions.YarnRuntimeException;
|
||||||
import org.apache.hadoop.yarn.server.timelineservice.storage.FileSystemTimelineWriterImpl;
|
import org.apache.hadoop.yarn.server.timelineservice.storage.FileSystemTimelineWriterImpl;
|
||||||
import org.apache.hadoop.yarn.server.timelineservice.storage.TimelineWriter;
|
import org.apache.hadoop.yarn.server.timelineservice.storage.TimelineWriter;
|
||||||
|
|
||||||
import java.util.Collections;
|
import com.google.common.annotations.VisibleForTesting;
|
||||||
import java.util.HashMap;
|
|
||||||
import java.util.Map;
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Class that manages adding and removing collectors and their lifecycle. It
|
* Class that manages adding and removing collectors and their lifecycle. It
|
||||||
|
@ -47,7 +52,10 @@ public abstract class TimelineCollectorManager extends AbstractService {
|
||||||
private static final Log LOG =
|
private static final Log LOG =
|
||||||
LogFactory.getLog(TimelineCollectorManager.class);
|
LogFactory.getLog(TimelineCollectorManager.class);
|
||||||
|
|
||||||
protected TimelineWriter writer;
|
private TimelineWriter writer;
|
||||||
|
private ScheduledExecutorService writerFlusher;
|
||||||
|
private int flushInterval;
|
||||||
|
private boolean writerFlusherRunning;
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public void serviceInit(Configuration conf) throws Exception {
|
public void serviceInit(Configuration conf) throws Exception {
|
||||||
|
@ -56,6 +64,12 @@ public abstract class TimelineCollectorManager extends AbstractService {
|
||||||
FileSystemTimelineWriterImpl.class,
|
FileSystemTimelineWriterImpl.class,
|
||||||
TimelineWriter.class), conf);
|
TimelineWriter.class), conf);
|
||||||
writer.init(conf);
|
writer.init(conf);
|
||||||
|
// create a single dedicated thread for flushing the writer on a periodic
|
||||||
|
// basis
|
||||||
|
writerFlusher = Executors.newSingleThreadScheduledExecutor();
|
||||||
|
flushInterval = conf.getInt(
|
||||||
|
YarnConfiguration.TIMELINE_SERVICE_WRITER_FLUSH_INTERVAL_SECONDS,
|
||||||
|
YarnConfiguration.DEFAULT_TIMELINE_SERVICE_WRITER_FLUSH_INTERVAL_SECONDS);
|
||||||
super.serviceInit(conf);
|
super.serviceInit(conf);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -65,6 +79,10 @@ public abstract class TimelineCollectorManager extends AbstractService {
|
||||||
if (writer != null) {
|
if (writer != null) {
|
||||||
writer.start();
|
writer.start();
|
||||||
}
|
}
|
||||||
|
// schedule the flush task
|
||||||
|
writerFlusher.scheduleAtFixedRate(new WriterFlushTask(writer),
|
||||||
|
flushInterval, flushInterval, TimeUnit.SECONDS);
|
||||||
|
writerFlusherRunning = true;
|
||||||
}
|
}
|
||||||
|
|
||||||
// access to this map is synchronized with the map itself
|
// access to this map is synchronized with the map itself
|
||||||
|
@ -161,9 +179,48 @@ public abstract class TimelineCollectorManager extends AbstractService {
|
||||||
c.serviceStop();
|
c.serviceStop();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
// stop the flusher first
|
||||||
|
if (writerFlusher != null) {
|
||||||
|
writerFlusher.shutdown();
|
||||||
|
writerFlusherRunning = false;
|
||||||
|
if (!writerFlusher.awaitTermination(30, TimeUnit.SECONDS)) {
|
||||||
|
// in reality it should be ample time for the flusher task to finish
|
||||||
|
// even if it times out, writers may be able to handle closing in this
|
||||||
|
// situation fine
|
||||||
|
// proceed to close the writer
|
||||||
|
LOG.warn("failed to stop the flusher task in time. " +
|
||||||
|
"will still proceed to close the writer.");
|
||||||
|
}
|
||||||
|
}
|
||||||
if (writer != null) {
|
if (writer != null) {
|
||||||
writer.close();
|
writer.close();
|
||||||
}
|
}
|
||||||
super.serviceStop();
|
super.serviceStop();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@VisibleForTesting
|
||||||
|
boolean writerFlusherRunning() {
|
||||||
|
return writerFlusherRunning;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Task that invokes the flush operation on the timeline writer.
|
||||||
|
*/
|
||||||
|
private static class WriterFlushTask implements Runnable {
|
||||||
|
private final TimelineWriter writer;
|
||||||
|
|
||||||
|
public WriterFlushTask(TimelineWriter writer) {
|
||||||
|
this.writer = writer;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void run() {
|
||||||
|
try {
|
||||||
|
writer.flush();
|
||||||
|
} catch (Throwable th) {
|
||||||
|
// we need to handle all exceptions or subsequent execution may be
|
||||||
|
// suppressed
|
||||||
|
LOG.error("exception during timeline writer flush!", th);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -127,6 +127,11 @@ public class FileSystemTimelineWriterImpl extends AbstractService
|
||||||
mkdirs(outputRoot, ENTITIES_DIR);
|
mkdirs(outputRoot, ENTITIES_DIR);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void flush() throws IOException {
|
||||||
|
// no op
|
||||||
|
}
|
||||||
|
|
||||||
private static String mkdirs(String... dirStrs) throws IOException {
|
private static String mkdirs(String... dirStrs) throws IOException {
|
||||||
StringBuilder path = new StringBuilder();
|
StringBuilder path = new StringBuilder();
|
||||||
for (String dirStr : dirStrs) {
|
for (String dirStr : dirStrs) {
|
||||||
|
|
|
@ -214,6 +214,12 @@ public class HBaseTimelineWriterImpl extends AbstractService implements
|
||||||
return null;
|
return null;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void flush() throws IOException {
|
||||||
|
// flush all buffered mutators
|
||||||
|
entityTable.flush();
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* close the hbase connections The close APIs perform flushing and release any
|
* close the hbase connections The close APIs perform flushing and release any
|
||||||
* resources held
|
* resources held
|
||||||
|
|
|
@ -187,6 +187,11 @@ public class PhoenixTimelineWriterImpl extends AbstractService
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void flush() throws IOException {
|
||||||
|
// currently no-op
|
||||||
|
}
|
||||||
|
|
||||||
// Utility functions
|
// Utility functions
|
||||||
@Private
|
@Private
|
||||||
@VisibleForTesting
|
@VisibleForTesting
|
||||||
|
|
|
@ -70,4 +70,13 @@ public interface TimelineWriter extends Service {
|
||||||
*/
|
*/
|
||||||
TimelineWriteResponse aggregate(TimelineEntity data,
|
TimelineWriteResponse aggregate(TimelineEntity data,
|
||||||
TimelineAggregationTrack track) throws IOException;
|
TimelineAggregationTrack track) throws IOException;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Flushes the data to the backend storage. Whatever may be buffered will be
|
||||||
|
* written to the storage when the method returns. This may be a potentially
|
||||||
|
* time-consuming operation, and should be used judiciously.
|
||||||
|
*
|
||||||
|
* @throws IOException
|
||||||
|
*/
|
||||||
|
void flush() throws IOException;
|
||||||
}
|
}
|
|
@ -66,6 +66,11 @@ public class TestNMTimelineCollectorManager {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testStartingWriterFlusher() throws Exception {
|
||||||
|
assertTrue(collectorManager.writerFlusherRunning());
|
||||||
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void testStartWebApp() throws Exception {
|
public void testStartWebApp() throws Exception {
|
||||||
assertNotNull(collectorManager.getRestServerBindAddress());
|
assertNotNull(collectorManager.getRestServerBindAddress());
|
||||||
|
|
Loading…
Reference in New Issue