Better handling of ParseExceptions.

Two changes:
- Allow IncrementalIndex to suppress ParseExceptions on "aggregate".
- Add "reportParseExceptions" option to realtime tuning configs. By default this is "false".

Behavior of the counters should now be:

- processed: Number of rows indexed, including rows where some fields could be parsed and some could not.
- thrownAway: Number of rows thrown away due to rejection policy.
- unparseable: Number of rows thrown away due to being completely unparseable (no fields salvageable at all).

If "reportParseExceptions" is true then "unparseable" will always be zero (because a parse error would
cause an exception to be thrown). In addition, "processed" will only include fully parseable rows
(because even partial parse failures will cause exceptions to be thrown).

Fixes #2510.
This commit is contained in:
Gian Merlino 2016-02-19 13:35:06 -08:00
parent 0c984f9e32
commit 3534483433
27 changed files with 448 additions and 104 deletions

View File

@ -153,6 +153,7 @@ The tuningConfig is optional and default parameters will be used if no tuningCon
|buildV9Directly|Boolean|Whether to build v9 index directly instead of building v8 index and convert it to v9 format|no (default = false)| |buildV9Directly|Boolean|Whether to build v9 index directly instead of building v8 index and convert it to v9 format|no (default = false)|
|persistThreadPriority|int|If `-XX:+UseThreadPriorities` is properly enabled, this will set the thread priority of the persisting thread to `Thread.NORM_PRIORITY` plus this value within the bounds of `Thread.MIN_PRIORITY` and `Thread.MAX_PRIORITY`. A value of 0 indicates to not change the thread priority.|no (default = 0; inherit and do not override)| |persistThreadPriority|int|If `-XX:+UseThreadPriorities` is properly enabled, this will set the thread priority of the persisting thread to `Thread.NORM_PRIORITY` plus this value within the bounds of `Thread.MIN_PRIORITY` and `Thread.MAX_PRIORITY`. A value of 0 indicates to not change the thread priority.|no (default = 0; inherit and do not override)|
|mergeThreadPriority|int|If `-XX:+UseThreadPriorities` is properly enabled, this will set the thread priority of the merging thread to `Thread.NORM_PRIORITY` plus this value within the bounds of `Thread.MIN_PRIORITY` and `Thread.MAX_PRIORITY`. A value of 0 indicates to not change the thread priority.|no (default = 0; inherit and do not override)| |mergeThreadPriority|int|If `-XX:+UseThreadPriorities` is properly enabled, this will set the thread priority of the merging thread to `Thread.NORM_PRIORITY` plus this value within the bounds of `Thread.MIN_PRIORITY` and `Thread.MAX_PRIORITY`. A value of 0 indicates to not change the thread priority.|no (default = 0; inherit and do not override)|
|reportParseExceptions|Boolean|If true, exceptions encountered during parsing will be thrown and will halt ingestion. If false, unparseable rows and fields will be skipped. If an entire row is skipped, the "unparseable" counter will be incremented. If some fields in a row were parseable and some were not, the parseable fields will be indexed and the "unparseable" counter will not be incremented.|false|
Before enabling thread priority settings, users are highly encouraged to read the [original pull request](https://github.com/druid-io/druid/pull/984) and other documentation about proper use of `-XX:+UseThreadPriorities`. Before enabling thread priority settings, users are highly encouraged to read the [original pull request](https://github.com/druid-io/druid/pull/984) and other documentation about proper use of `-XX:+UseThreadPriorities`.

View File

@ -229,6 +229,7 @@ public class IndexGeneratorJob implements Jobby
OnheapIncrementalIndex newIndex = new OnheapIncrementalIndex( OnheapIncrementalIndex newIndex = new OnheapIncrementalIndex(
indexSchema, indexSchema,
!tuningConfig.isIgnoreInvalidRows(),
tuningConfig.getRowFlushBoundary() tuningConfig.getRowFlushBoundary()
); );

View File

@ -143,7 +143,8 @@ public class IndexTask extends AbstractFixedIntervalTask
indexSpec, indexSpec,
buildV9Directly, buildV9Directly,
0, 0,
0 0,
true
); );
} }

View File

@ -28,12 +28,10 @@ import com.google.common.collect.ImmutableList;
import com.google.common.collect.ImmutableMap; import com.google.common.collect.ImmutableMap;
import com.google.common.primitives.Ints; import com.google.common.primitives.Ints;
import com.metamx.common.guava.CloseQuietly; import com.metamx.common.guava.CloseQuietly;
import com.metamx.common.parsers.ParseException;
import com.metamx.emitter.EmittingLogger; import com.metamx.emitter.EmittingLogger;
import io.druid.data.input.Committer; import io.druid.data.input.Committer;
import io.druid.data.input.Firehose; import io.druid.data.input.Firehose;
import io.druid.data.input.FirehoseFactory; import io.druid.data.input.FirehoseFactory;
import io.druid.data.input.InputRow;
import io.druid.indexing.common.TaskLock; import io.druid.indexing.common.TaskLock;
import io.druid.indexing.common.TaskStatus; import io.druid.indexing.common.TaskStatus;
import io.druid.indexing.common.TaskToolbox; import io.druid.indexing.common.TaskToolbox;
@ -51,6 +49,7 @@ import io.druid.segment.indexing.DataSchema;
import io.druid.segment.indexing.RealtimeIOConfig; import io.druid.segment.indexing.RealtimeIOConfig;
import io.druid.segment.indexing.RealtimeTuningConfig; import io.druid.segment.indexing.RealtimeTuningConfig;
import io.druid.segment.realtime.FireDepartment; import io.druid.segment.realtime.FireDepartment;
import io.druid.segment.realtime.FireDepartmentMetrics;
import io.druid.segment.realtime.RealtimeMetricsMonitor; import io.druid.segment.realtime.RealtimeMetricsMonitor;
import io.druid.segment.realtime.SegmentPublisher; import io.druid.segment.realtime.SegmentPublisher;
import io.druid.segment.realtime.firehose.ClippedFirehoseFactory; import io.druid.segment.realtime.firehose.ClippedFirehoseFactory;
@ -117,6 +116,9 @@ public class RealtimeIndexTask extends AbstractTask
@JsonIgnore @JsonIgnore
private volatile Firehose firehose = null; private volatile Firehose firehose = null;
@JsonIgnore
private volatile FireDepartmentMetrics metrics = null;
@JsonIgnore @JsonIgnore
private volatile boolean gracefullyStopped = false; private volatile boolean gracefullyStopped = false;
@ -285,6 +287,7 @@ public class RealtimeIndexTask extends AbstractTask
realtimeIOConfig, realtimeIOConfig,
tuningConfig tuningConfig
); );
this.metrics = fireDepartment.getMetrics();
final RealtimeMetricsMonitor metricsMonitor = new RealtimeMetricsMonitor( final RealtimeMetricsMonitor metricsMonitor = new RealtimeMetricsMonitor(
ImmutableList.of(fireDepartment), ImmutableList.of(fireDepartment),
ImmutableMap.of( ImmutableMap.of(
@ -313,7 +316,7 @@ public class RealtimeIndexTask extends AbstractTask
toolbox.getObjectMapper() toolbox.getObjectMapper()
); );
this.plumber = plumberSchool.findPlumber(dataSchema, tuningConfig, fireDepartment.getMetrics()); this.plumber = plumberSchool.findPlumber(dataSchema, tuningConfig, metrics);
Supplier<Committer> committerSupplier = null; Supplier<Committer> committerSupplier = null;
@ -337,7 +340,13 @@ public class RealtimeIndexTask extends AbstractTask
// Time to read data! // Time to read data!
while (firehose != null && (!gracefullyStopped || firehoseDrainableByClosing) && firehose.hasMore()) { while (firehose != null && (!gracefullyStopped || firehoseDrainableByClosing) && firehose.hasMore()) {
Plumbers.addNextRow(committerSupplier, firehose, plumber, fireDepartment.getMetrics()); Plumbers.addNextRow(
committerSupplier,
firehose,
plumber,
tuningConfig.isReportParseExceptions(),
metrics
);
} }
} }
catch (Throwable e) { catch (Throwable e) {
@ -459,6 +468,15 @@ public class RealtimeIndexTask extends AbstractTask
return firehose; return firehose;
} }
/**
* Public for tests.
*/
@JsonIgnore
public FireDepartmentMetrics getMetrics()
{
return metrics;
}
@JsonProperty("spec") @JsonProperty("spec")
public FireDepartment getRealtimeIngestionSchema() public FireDepartment getRealtimeIngestionSchema()
{ {
@ -468,10 +486,12 @@ public class RealtimeIndexTask extends AbstractTask
/** /**
* Is a firehose from this factory drainable by closing it? If so, we should drain on stopGracefully rather than * Is a firehose from this factory drainable by closing it? If so, we should drain on stopGracefully rather than
* abruptly stopping. * abruptly stopping.
* <p> *
* This is a hack to get around the fact that the Firehose and FirehoseFactory interfaces do not help us do this. * This is a hack to get around the fact that the Firehose and FirehoseFactory interfaces do not help us do this.
*
* Protected for tests.
*/ */
private static boolean isFirehoseDrainableByClosing(FirehoseFactory firehoseFactory) protected boolean isFirehoseDrainableByClosing(FirehoseFactory firehoseFactory)
{ {
return firehoseFactory instanceof EventReceiverFirehoseFactory return firehoseFactory instanceof EventReceiverFirehoseFactory
|| (firehoseFactory instanceof TimedShutoffFirehoseFactory || (firehoseFactory instanceof TimedShutoffFirehoseFactory

View File

@ -20,14 +20,15 @@
package io.druid.indexing.common.task; package io.druid.indexing.common.task;
import com.fasterxml.jackson.databind.ObjectMapper; import com.fasterxml.jackson.databind.ObjectMapper;
import com.google.api.client.util.Charsets; import com.google.common.base.Charsets;
import com.google.api.client.util.Sets; import com.google.common.base.Throwables;
import com.google.common.collect.ImmutableList; import com.google.common.collect.ImmutableList;
import com.google.common.collect.ImmutableMap; import com.google.common.collect.ImmutableMap;
import com.google.common.collect.ImmutableSet; import com.google.common.collect.ImmutableSet;
import com.google.common.collect.Iterables; import com.google.common.collect.Iterables;
import com.google.common.collect.Lists; import com.google.common.collect.Lists;
import com.google.common.collect.Maps; import com.google.common.collect.Maps;
import com.google.common.collect.Sets;
import com.google.common.util.concurrent.ListenableFuture; import com.google.common.util.concurrent.ListenableFuture;
import com.google.common.util.concurrent.ListeningExecutorService; import com.google.common.util.concurrent.ListeningExecutorService;
import com.google.common.util.concurrent.MoreExecutors; import com.google.common.util.concurrent.MoreExecutors;
@ -36,6 +37,7 @@ import com.metamx.common.ISE;
import com.metamx.common.Pair; import com.metamx.common.Pair;
import com.metamx.common.guava.Sequences; import com.metamx.common.guava.Sequences;
import com.metamx.common.logger.Logger; import com.metamx.common.logger.Logger;
import com.metamx.common.parsers.ParseException;
import com.metamx.emitter.EmittingLogger; import com.metamx.emitter.EmittingLogger;
import com.metamx.emitter.core.LoggingEmitter; import com.metamx.emitter.core.LoggingEmitter;
import com.metamx.emitter.service.ServiceEmitter; import com.metamx.emitter.service.ServiceEmitter;
@ -43,8 +45,11 @@ import com.metamx.metrics.MonitorScheduler;
import io.druid.client.cache.CacheConfig; import io.druid.client.cache.CacheConfig;
import io.druid.client.cache.MapCache; import io.druid.client.cache.MapCache;
import io.druid.concurrent.Execs; import io.druid.concurrent.Execs;
import io.druid.data.input.Firehose;
import io.druid.data.input.FirehoseFactory;
import io.druid.data.input.InputRow; import io.druid.data.input.InputRow;
import io.druid.data.input.MapBasedInputRow; import io.druid.data.input.MapBasedInputRow;
import io.druid.data.input.impl.InputRowParser;
import io.druid.granularity.QueryGranularity; import io.druid.granularity.QueryGranularity;
import io.druid.indexing.common.SegmentLoaderFactory; import io.druid.indexing.common.SegmentLoaderFactory;
import io.druid.indexing.common.TaskStatus; import io.druid.indexing.common.TaskStatus;
@ -93,14 +98,12 @@ import io.druid.segment.loading.SegmentLoaderConfig;
import io.druid.segment.loading.SegmentLoaderLocalCacheManager; import io.druid.segment.loading.SegmentLoaderLocalCacheManager;
import io.druid.segment.loading.StorageLocationConfig; import io.druid.segment.loading.StorageLocationConfig;
import io.druid.segment.realtime.FireDepartment; import io.druid.segment.realtime.FireDepartment;
import io.druid.segment.realtime.firehose.EventReceiverFirehoseFactory;
import io.druid.segment.realtime.firehose.NoopChatHandlerProvider;
import io.druid.segment.realtime.plumber.SegmentHandoffNotifier; import io.druid.segment.realtime.plumber.SegmentHandoffNotifier;
import io.druid.segment.realtime.plumber.SegmentHandoffNotifierFactory; import io.druid.segment.realtime.plumber.SegmentHandoffNotifierFactory;
import io.druid.server.coordination.DruidServerMetadata; import io.druid.segment.realtime.plumber.ServerTimeRejectionPolicyFactory;
import io.druid.server.metrics.EventReceiverFirehoseRegister;
import io.druid.timeline.DataSegment; import io.druid.timeline.DataSegment;
import org.easymock.EasyMock; import org.easymock.EasyMock;
import org.hamcrest.CoreMatchers;
import org.joda.time.DateTime; import org.joda.time.DateTime;
import org.joda.time.Period; import org.joda.time.Period;
import org.junit.After; import org.junit.After;
@ -108,6 +111,8 @@ import org.junit.Assert;
import org.junit.Before; import org.junit.Before;
import org.junit.Rule; import org.junit.Rule;
import org.junit.Test; import org.junit.Test;
import org.junit.internal.matchers.ThrowableMessageMatcher;
import org.junit.rules.ExpectedException;
import org.junit.rules.TemporaryFolder; import org.junit.rules.TemporaryFolder;
import org.junit.runner.RunWith; import org.junit.runner.RunWith;
import org.junit.runners.Parameterized; import org.junit.runners.Parameterized;
@ -116,24 +121,18 @@ import java.io.File;
import java.io.IOException; import java.io.IOException;
import java.nio.file.Files; import java.nio.file.Files;
import java.util.ArrayList; import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collection; import java.util.Collection;
import java.util.List; import java.util.List;
import java.util.Map; import java.util.Map;
import java.util.concurrent.Callable; import java.util.concurrent.Callable;
import java.util.concurrent.ExecutionException;
import java.util.concurrent.Executor; import java.util.concurrent.Executor;
@RunWith(Parameterized.class) @RunWith(Parameterized.class)
public class RealtimeIndexTaskTest public class RealtimeIndexTaskTest
{ {
private static final Logger log = new Logger(RealtimeIndexTaskTest.class); private static final Logger log = new Logger(RealtimeIndexTaskTest.class);
private static final DruidServerMetadata dummyServer = new DruidServerMetadata(
"dummy",
"dummy_host",
0,
"historical",
"dummy_tier",
0
);
private static final ObjectMapper jsonMapper = new DefaultObjectMapper(); private static final ObjectMapper jsonMapper = new DefaultObjectMapper();
private static final ServiceEmitter emitter = new ServiceEmitter( private static final ServiceEmitter emitter = new ServiceEmitter(
"service", "service",
@ -145,6 +144,89 @@ public class RealtimeIndexTaskTest
) )
); );
private static final String FAIL_DIM = "__fail__";
private static class TestFirehose implements Firehose
{
private final List<InputRow> queue = Lists.newLinkedList();
private boolean closed = false;
public void addRows(List<InputRow> rows)
{
synchronized (this) {
queue.addAll(rows);
notifyAll();
}
}
@Override
public boolean hasMore()
{
try {
synchronized (this) {
while (queue.isEmpty() && !closed) {
wait();
}
return !queue.isEmpty();
}
}
catch (InterruptedException e) {
Thread.currentThread().interrupt();
throw Throwables.propagate(e);
}
}
@Override
public InputRow nextRow()
{
synchronized (this) {
final InputRow row = queue.remove(0);
if (row != null && row.getDimensions().contains(FAIL_DIM)) {
throw new ParseException(FAIL_DIM);
}
return row;
}
}
@Override
public Runnable commit()
{
return new Runnable()
{
@Override
public void run()
{
// do nothing
}
};
}
@Override
public void close() throws IOException
{
synchronized (this) {
closed = true;
notifyAll();
}
}
}
private static class TestFirehoseFactory implements FirehoseFactory
{
public TestFirehoseFactory()
{
}
@Override
public Firehose connect(InputRowParser parser) throws IOException, ParseException
{
return new TestFirehose();
}
}
@Rule
public final ExpectedException expectedException = ExpectedException.none();
@Rule @Rule
public final TemporaryFolder tempFolder = new TemporaryFolder(); public final TemporaryFolder tempFolder = new TemporaryFolder();
@ -206,20 +288,24 @@ public class RealtimeIndexTaskTest
Thread.sleep(50); Thread.sleep(50);
} }
final EventReceiverFirehoseFactory.EventReceiverFirehose firehose = final TestFirehose firehose = (TestFirehose) task.getFirehose();
(EventReceiverFirehoseFactory.EventReceiverFirehose) task.getFirehose();
firehose.addRows( firehose.addRows(
ImmutableList.<InputRow>of( ImmutableList.<InputRow>of(
new MapBasedInputRow( new MapBasedInputRow(
now, now,
ImmutableList.of("dim1"), ImmutableList.of("dim1"),
ImmutableMap.<String, Object>of("dim1", "foo") ImmutableMap.<String, Object>of("dim1", "foo", "met1", "1")
),
new MapBasedInputRow(
now.minus(new Period("P1D")),
ImmutableList.of("dim1"),
ImmutableMap.<String, Object>of("dim1", "foo", "met1", 2.0)
), ),
new MapBasedInputRow( new MapBasedInputRow(
now, now,
ImmutableList.of("dim2"), ImmutableList.of("dim2"),
ImmutableMap.<String, Object>of("dim2", "bar") ImmutableMap.<String, Object>of("dim2", "bar", "met1", 2.0)
) )
) )
); );
@ -234,8 +320,160 @@ public class RealtimeIndexTaskTest
publishedSegment = Iterables.getOnlyElement(mdc.getPublished()); publishedSegment = Iterables.getOnlyElement(mdc.getPublished());
// Do a query. // Check metrics.
Assert.assertEquals(2, countEvents(task)); Assert.assertEquals(2, task.getMetrics().processed());
Assert.assertEquals(1, task.getMetrics().thrownAway());
Assert.assertEquals(0, task.getMetrics().unparseable());
// Do some queries.
Assert.assertEquals(2, sumMetric(task, "rows"));
Assert.assertEquals(3, sumMetric(task, "met1"));
// Simulate handoff.
for (Map.Entry<SegmentDescriptor, Pair<Executor, Runnable>> entry : handOffCallbacks.entrySet()) {
final Pair<Executor, Runnable> executorRunnablePair = entry.getValue();
Assert.assertEquals(
new SegmentDescriptor(
publishedSegment.getInterval(),
publishedSegment.getVersion(),
publishedSegment.getShardSpec().getPartitionNum()
),
entry.getKey()
);
executorRunnablePair.lhs.execute(executorRunnablePair.rhs);
}
handOffCallbacks.clear();
// Wait for the task to finish.
final TaskStatus taskStatus = statusFuture.get();
Assert.assertEquals(TaskStatus.Status.SUCCESS, taskStatus.getStatusCode());
}
@Test(timeout = 60_000L)
public void testReportParseExceptionsOnBadMetric() throws Exception
{
final TestIndexerMetadataStorageCoordinator mdc = new TestIndexerMetadataStorageCoordinator();
final RealtimeIndexTask task = makeRealtimeTask(null, true);
final TaskToolbox taskToolbox = makeToolbox(task, mdc, tempFolder.newFolder());
final ListenableFuture<TaskStatus> statusFuture = runTask(task, taskToolbox);
// Wait for firehose to show up, it starts off null.
while (task.getFirehose() == null) {
Thread.sleep(50);
}
final TestFirehose firehose = (TestFirehose) task.getFirehose();
firehose.addRows(
ImmutableList.<InputRow>of(
new MapBasedInputRow(
now,
ImmutableList.of("dim1"),
ImmutableMap.<String, Object>of("dim1", "foo", "met1", "1")
),
new MapBasedInputRow(
now,
ImmutableList.of("dim1"),
ImmutableMap.<String, Object>of("dim1", "foo", "met1", "foo")
),
new MapBasedInputRow(
now.minus(new Period("P1D")),
ImmutableList.of("dim1"),
ImmutableMap.<String, Object>of("dim1", "foo", "met1", "foo")
),
new MapBasedInputRow(
now,
ImmutableList.of("dim2"),
ImmutableMap.<String, Object>of("dim2", "bar", "met1", 2.0)
)
)
);
// Stop the firehose, this will drain out existing events.
firehose.close();
// Wait for the task to finish.
expectedException.expect(ExecutionException.class);
expectedException.expectCause(CoreMatchers.<Throwable>instanceOf(ParseException.class));
expectedException.expectCause(
ThrowableMessageMatcher.hasMessage(
CoreMatchers.containsString("Unable to parse metrics[met1], value[foo]")
)
);
statusFuture.get();
}
@Test(timeout = 60_000L)
public void testNoReportParseExceptions() throws Exception
{
final TestIndexerMetadataStorageCoordinator mdc = new TestIndexerMetadataStorageCoordinator();
final RealtimeIndexTask task = makeRealtimeTask(null, false);
final TaskToolbox taskToolbox = makeToolbox(task, mdc, tempFolder.newFolder());
final ListenableFuture<TaskStatus> statusFuture = runTask(task, taskToolbox);
final DataSegment publishedSegment;
// Wait for firehose to show up, it starts off null.
while (task.getFirehose() == null) {
Thread.sleep(50);
}
final TestFirehose firehose = (TestFirehose) task.getFirehose();
firehose.addRows(
Arrays.<InputRow>asList(
// Good row- will be processed.
new MapBasedInputRow(
now,
ImmutableList.of("dim1"),
ImmutableMap.<String, Object>of("dim1", "foo", "met1", "1")
),
// Null row- will be unparseable.
null,
// Bad metric- will count as processed, but that particular metric won't update.
new MapBasedInputRow(
now,
ImmutableList.of("dim1"),
ImmutableMap.<String, Object>of("dim1", "foo", "met1", "foo")
),
// Bad row- will be unparseable.
new MapBasedInputRow(
now,
ImmutableList.of("dim1", FAIL_DIM),
ImmutableMap.<String, Object>of("dim1", "foo", "met1", 2.0)
),
// Old row- will be thrownAway.
new MapBasedInputRow(
now.minus(new Period("P1D")),
ImmutableList.of("dim1"),
ImmutableMap.<String, Object>of("dim1", "foo", "met1", 2.0)
),
// Good row- will be processed.
new MapBasedInputRow(
now,
ImmutableList.of("dim2"),
ImmutableMap.<String, Object>of("dim2", "bar", "met1", 2.0)
)
)
);
// Stop the firehose, this will drain out existing events.
firehose.close();
// Wait for publish.
while (mdc.getPublished().isEmpty()) {
Thread.sleep(50);
}
publishedSegment = Iterables.getOnlyElement(mdc.getPublished());
// Check metrics.
Assert.assertEquals(3, task.getMetrics().processed());
Assert.assertEquals(1, task.getMetrics().thrownAway());
Assert.assertEquals(2, task.getMetrics().unparseable());
// Do some queries.
Assert.assertEquals(3, sumMetric(task, "rows"));
Assert.assertEquals(3, sumMetric(task, "met1"));
// Simulate handoff. // Simulate handoff.
for (Map.Entry<SegmentDescriptor, Pair<Executor, Runnable>> entry : handOffCallbacks.entrySet()) { for (Map.Entry<SegmentDescriptor, Pair<Executor, Runnable>> entry : handOffCallbacks.entrySet()) {
@ -275,8 +513,7 @@ public class RealtimeIndexTaskTest
Thread.sleep(50); Thread.sleep(50);
} }
final EventReceiverFirehoseFactory.EventReceiverFirehose firehose = final TestFirehose firehose = (TestFirehose) task1.getFirehose();
(EventReceiverFirehoseFactory.EventReceiverFirehose) task1.getFirehose();
firehose.addRows( firehose.addRows(
ImmutableList.<InputRow>of( ImmutableList.<InputRow>of(
@ -312,10 +549,9 @@ public class RealtimeIndexTaskTest
} }
// Do a query, at this point the previous data should be loaded. // Do a query, at this point the previous data should be loaded.
Assert.assertEquals(1, countEvents(task2)); Assert.assertEquals(1, sumMetric(task2, "rows"));
final EventReceiverFirehoseFactory.EventReceiverFirehose firehose = final TestFirehose firehose = (TestFirehose) task2.getFirehose();
(EventReceiverFirehoseFactory.EventReceiverFirehose) task2.getFirehose();
firehose.addRows( firehose.addRows(
ImmutableList.<InputRow>of( ImmutableList.<InputRow>of(
@ -338,7 +574,7 @@ public class RealtimeIndexTaskTest
publishedSegment = Iterables.getOnlyElement(mdc.getPublished()); publishedSegment = Iterables.getOnlyElement(mdc.getPublished());
// Do a query. // Do a query.
Assert.assertEquals(2, countEvents(task2)); Assert.assertEquals(2, sumMetric(task2, "rows"));
// Simulate handoff. // Simulate handoff.
for (Map.Entry<SegmentDescriptor, Pair<Executor, Runnable>> entry : handOffCallbacks.entrySet()) { for (Map.Entry<SegmentDescriptor, Pair<Executor, Runnable>> entry : handOffCallbacks.entrySet()) {
@ -361,7 +597,7 @@ public class RealtimeIndexTaskTest
} }
} }
@Test(timeout = 10000L) @Test(timeout = 60_000L)
public void testRestoreAfterHandoffAttemptDuringShutdown() throws Exception public void testRestoreAfterHandoffAttemptDuringShutdown() throws Exception
{ {
final TaskStorage taskStorage = new HeapMemoryTaskStorage(new TaskStorageConfig(null)); final TaskStorage taskStorage = new HeapMemoryTaskStorage(new TaskStorageConfig(null));
@ -380,8 +616,7 @@ public class RealtimeIndexTaskTest
Thread.sleep(50); Thread.sleep(50);
} }
final EventReceiverFirehoseFactory.EventReceiverFirehose firehose = final TestFirehose firehose = (TestFirehose) task1.getFirehose();
(EventReceiverFirehoseFactory.EventReceiverFirehose) task1.getFirehose();
firehose.addRows( firehose.addRows(
ImmutableList.<InputRow>of( ImmutableList.<InputRow>of(
@ -404,7 +639,7 @@ public class RealtimeIndexTaskTest
publishedSegment = Iterables.getOnlyElement(mdc.getPublished()); publishedSegment = Iterables.getOnlyElement(mdc.getPublished());
// Do a query. // Do a query.
Assert.assertEquals(1, countEvents(task1)); Assert.assertEquals(1, sumMetric(task1, "rows"));
// Trigger graceful shutdown. // Trigger graceful shutdown.
task1.stopGracefully(); task1.stopGracefully();
@ -427,8 +662,7 @@ public class RealtimeIndexTaskTest
} }
// Stop the firehose again, this will start another handoff. // Stop the firehose again, this will start another handoff.
final EventReceiverFirehoseFactory.EventReceiverFirehose firehose = final TestFirehose firehose = (TestFirehose) task2.getFirehose();
(EventReceiverFirehoseFactory.EventReceiverFirehose) task2.getFirehose();
// Stop the firehose, this will trigger a finishJob. // Stop the firehose, this will trigger a finishJob.
firehose.close(); firehose.close();
@ -462,7 +696,7 @@ public class RealtimeIndexTaskTest
} }
} }
@Test(timeout = 10000L) @Test(timeout = 60_000L)
public void testRestoreCorruptData() throws Exception public void testRestoreCorruptData() throws Exception
{ {
final File directory = tempFolder.newFolder(); final File directory = tempFolder.newFolder();
@ -479,8 +713,7 @@ public class RealtimeIndexTaskTest
Thread.sleep(50); Thread.sleep(50);
} }
final EventReceiverFirehoseFactory.EventReceiverFirehose firehose = final TestFirehose firehose = (TestFirehose) task1.getFirehose();
(EventReceiverFirehoseFactory.EventReceiverFirehose) task1.getFirehose();
firehose.addRows( firehose.addRows(
ImmutableList.<InputRow>of( ImmutableList.<InputRow>of(
@ -536,7 +769,7 @@ public class RealtimeIndexTaskTest
} }
} }
@Test(timeout = 10000L) @Test(timeout = 60_000L)
public void testStopBeforeStarting() throws Exception public void testStopBeforeStarting() throws Exception
{ {
final File directory = tempFolder.newFolder(); final File directory = tempFolder.newFolder();
@ -577,24 +810,22 @@ public class RealtimeIndexTaskTest
} }
private RealtimeIndexTask makeRealtimeTask(final String taskId) private RealtimeIndexTask makeRealtimeTask(final String taskId)
{
return makeRealtimeTask(taskId, true);
}
private RealtimeIndexTask makeRealtimeTask(final String taskId, boolean reportParseExceptions)
{ {
ObjectMapper objectMapper = new DefaultObjectMapper(); ObjectMapper objectMapper = new DefaultObjectMapper();
DataSchema dataSchema = new DataSchema( DataSchema dataSchema = new DataSchema(
"test_ds", "test_ds",
null, null,
new AggregatorFactory[]{new CountAggregatorFactory("rows")}, new AggregatorFactory[]{new CountAggregatorFactory("rows"), new LongSumAggregatorFactory("met1", "met1")},
new UniformGranularitySpec(Granularity.DAY, QueryGranularity.NONE, null), new UniformGranularitySpec(Granularity.DAY, QueryGranularity.NONE, null),
objectMapper objectMapper
); );
RealtimeIOConfig realtimeIOConfig = new RealtimeIOConfig( RealtimeIOConfig realtimeIOConfig = new RealtimeIOConfig(
new EventReceiverFirehoseFactory( new TestFirehoseFactory(),
"foo",
100,
new NoopChatHandlerProvider(),
objectMapper,
null,
new EventReceiverFirehoseRegister()
),
null, null,
null null
); );
@ -604,19 +835,28 @@ public class RealtimeIndexTaskTest
new Period("PT10M"), new Period("PT10M"),
null, null,
null, null,
null, new ServerTimeRejectionPolicyFactory(),
null, null,
null, null,
null, null,
buildV9Directly, buildV9Directly,
0, 0 0,
0,
reportParseExceptions
); );
return new RealtimeIndexTask( return new RealtimeIndexTask(
taskId, taskId,
null, null,
new FireDepartment(dataSchema, realtimeIOConfig, realtimeTuningConfig), new FireDepartment(dataSchema, realtimeIOConfig, realtimeTuningConfig),
null null
); )
{
@Override
protected boolean isFirehoseDrainableByClosing(FirehoseFactory firehoseFactory)
{
return true;
}
};
} }
private TaskToolbox makeToolbox( private TaskToolbox makeToolbox(
@ -760,14 +1000,14 @@ public class RealtimeIndexTaskTest
return toolboxFactory.build(task); return toolboxFactory.build(task);
} }
public long countEvents(final Task task) throws Exception public long sumMetric(final Task task, final String metric) throws Exception
{ {
// Do a query. // Do a query.
TimeseriesQuery query = Druids.newTimeseriesQueryBuilder() TimeseriesQuery query = Druids.newTimeseriesQueryBuilder()
.dataSource("test_ds") .dataSource("test_ds")
.aggregators( .aggregators(
ImmutableList.<AggregatorFactory>of( ImmutableList.<AggregatorFactory>of(
new LongSumAggregatorFactory("rows", "rows") new LongSumAggregatorFactory(metric, metric)
) )
).granularity(QueryGranularity.ALL) ).granularity(QueryGranularity.ALL)
.intervals("2000/3000") .intervals("2000/3000")
@ -777,6 +1017,6 @@ public class RealtimeIndexTaskTest
task.getQueryRunner(query).run(query, ImmutableMap.<String, Object>of()), task.getQueryRunner(query).run(query, ImmutableMap.<String, Object>of()),
Lists.<Result<TimeseriesResultValue>>newArrayList() Lists.<Result<TimeseriesResultValue>>newArrayList()
); );
return results.get(0).getValue().getLongMetric("rows"); return results.isEmpty() ? 0 : results.get(0).getValue().getLongMetric(metric);
} }
} }

View File

@ -335,7 +335,8 @@ public class TaskSerdeTest
indexSpec, indexSpec,
null, null,
0, 0,
0 0,
true
) )
), ),
null null
@ -358,6 +359,7 @@ public class TaskSerdeTest
Granularity.HOUR, Granularity.HOUR,
task.getRealtimeIngestionSchema().getDataSchema().getGranularitySpec().getSegmentGranularity() task.getRealtimeIngestionSchema().getDataSchema().getGranularitySpec().getSegmentGranularity()
); );
Assert.assertTrue(task.getRealtimeIngestionSchema().getTuningConfig().isReportParseExceptions());
Assert.assertEquals(task.getId(), task2.getId()); Assert.assertEquals(task.getId(), task2.getId());
Assert.assertEquals(task.getGroupId(), task2.getGroupId()); Assert.assertEquals(task.getGroupId(), task2.getGroupId());

View File

@ -144,6 +144,7 @@ public class IngestSegmentFirehoseFactoryTest
.build(); .build();
final OnheapIncrementalIndex index = new OnheapIncrementalIndex( final OnheapIncrementalIndex index = new OnheapIncrementalIndex(
schema, schema,
true,
MAX_ROWS * MAX_SHARD_NUMBER MAX_ROWS * MAX_SHARD_NUMBER
); );

View File

@ -218,7 +218,7 @@ public class IngestSegmentFirehoseFactoryTimelineTest
} }
) )
.build(); .build();
final OnheapIncrementalIndex index = new OnheapIncrementalIndex(schema, rows.length); final OnheapIncrementalIndex index = new OnheapIncrementalIndex(schema, true, rows.length);
for (InputRow row : rows) { for (InputRow row : rows) {
try { try {

View File

@ -1179,7 +1179,9 @@ public class TaskLifecycleTest
null, null,
null, null,
null, null,
0, 0 0,
0,
null
); );
FireDepartment fireDepartment = new FireDepartment(dataSchema, realtimeIOConfig, realtimeTuningConfig); FireDepartment fireDepartment = new FireDepartment(dataSchema, realtimeIOConfig, realtimeTuningConfig);
return new RealtimeIndexTask( return new RealtimeIndexTask(

View File

@ -87,6 +87,7 @@ public class GroupByQueryHelper
gran, gran,
aggs.toArray(new AggregatorFactory[aggs.size()]), aggs.toArray(new AggregatorFactory[aggs.size()]),
false, false,
true,
config.getMaxResults(), config.getMaxResults(),
bufferPool bufferPool
); );
@ -98,6 +99,7 @@ public class GroupByQueryHelper
gran, gran,
aggs.toArray(new AggregatorFactory[aggs.size()]), aggs.toArray(new AggregatorFactory[aggs.size()]),
false, false,
true,
config.getMaxResults() config.getMaxResults()
); );
} }

View File

@ -51,7 +51,6 @@ import io.druid.segment.column.Column;
import io.druid.segment.column.ColumnCapabilities; import io.druid.segment.column.ColumnCapabilities;
import io.druid.segment.column.ColumnCapabilitiesImpl; import io.druid.segment.column.ColumnCapabilitiesImpl;
import io.druid.segment.column.ValueType; import io.druid.segment.column.ValueType;
import io.druid.segment.data.Indexed;
import io.druid.segment.data.IndexedInts; import io.druid.segment.data.IndexedInts;
import io.druid.segment.serde.ComplexMetricExtractor; import io.druid.segment.serde.ComplexMetricExtractor;
import io.druid.segment.serde.ComplexMetricSerde; import io.druid.segment.serde.ComplexMetricSerde;
@ -268,6 +267,7 @@ public abstract class IncrementalIndex<AggregatorType> implements Iterable<Row>,
private final AggregatorFactory[] metrics; private final AggregatorFactory[] metrics;
private final AggregatorType[] aggs; private final AggregatorType[] aggs;
private final boolean deserializeComplexMetrics; private final boolean deserializeComplexMetrics;
private final boolean reportParseExceptions;
private final Metadata metadata; private final Metadata metadata;
private final Map<String, MetricDesc> metricDescs; private final Map<String, MetricDesc> metricDescs;
@ -295,10 +295,13 @@ public abstract class IncrementalIndex<AggregatorType> implements Iterable<Row>,
* @param incrementalIndexSchema the schema to use for incremental index * @param incrementalIndexSchema the schema to use for incremental index
* @param deserializeComplexMetrics flag whether or not to call ComplexMetricExtractor.extractValue() on the input * @param deserializeComplexMetrics flag whether or not to call ComplexMetricExtractor.extractValue() on the input
* value for aggregators that return metrics other than float. * value for aggregators that return metrics other than float.
* @param reportParseExceptions flag whether or not to report ParseExceptions that occur while extracting values
* from input rows
*/ */
public IncrementalIndex( public IncrementalIndex(
final IncrementalIndexSchema incrementalIndexSchema, final IncrementalIndexSchema incrementalIndexSchema,
final boolean deserializeComplexMetrics final boolean deserializeComplexMetrics,
final boolean reportParseExceptions
) )
{ {
this.minTimestamp = incrementalIndexSchema.getMinTimestamp(); this.minTimestamp = incrementalIndexSchema.getMinTimestamp();
@ -306,6 +309,7 @@ public abstract class IncrementalIndex<AggregatorType> implements Iterable<Row>,
this.metrics = incrementalIndexSchema.getMetrics(); this.metrics = incrementalIndexSchema.getMetrics();
this.rowTransformers = new CopyOnWriteArrayList<>(); this.rowTransformers = new CopyOnWriteArrayList<>();
this.deserializeComplexMetrics = deserializeComplexMetrics; this.deserializeComplexMetrics = deserializeComplexMetrics;
this.reportParseExceptions = reportParseExceptions;
this.metadata = new Metadata().setAggregators(getCombiningAggregators(metrics)); this.metadata = new Metadata().setAggregators(getCombiningAggregators(metrics));
@ -367,6 +371,7 @@ public abstract class IncrementalIndex<AggregatorType> implements Iterable<Row>,
protected abstract Integer addToFacts( protected abstract Integer addToFacts(
AggregatorFactory[] metrics, AggregatorFactory[] metrics,
boolean deserializeComplexMetrics, boolean deserializeComplexMetrics,
boolean reportParseExceptions,
InputRow row, InputRow row,
AtomicInteger numEntries, AtomicInteger numEntries,
TimeAndDims key, TimeAndDims key,
@ -416,7 +421,16 @@ public abstract class IncrementalIndex<AggregatorType> implements Iterable<Row>,
*/ */
public int add(InputRow row) throws IndexSizeExceededException { public int add(InputRow row) throws IndexSizeExceededException {
TimeAndDims key = toTimeAndDims(row); TimeAndDims key = toTimeAndDims(row);
final int rv = addToFacts(metrics, deserializeComplexMetrics, row, numEntries, key, in, rowSupplier); final int rv = addToFacts(
metrics,
deserializeComplexMetrics,
reportParseExceptions,
row,
numEntries,
key,
in,
rowSupplier
);
updateMaxIngestedTime(row.getTimestamp()); updateMaxIngestedTime(row.getTimestamp());
return rv; return rv;
} }

View File

@ -24,6 +24,7 @@ import com.google.common.base.Throwables;
import com.google.common.collect.Maps; import com.google.common.collect.Maps;
import com.metamx.common.IAE; import com.metamx.common.IAE;
import com.metamx.common.ISE; import com.metamx.common.ISE;
import com.metamx.common.parsers.ParseException;
import io.druid.collections.ResourceHolder; import io.druid.collections.ResourceHolder;
import io.druid.collections.StupidPool; import io.druid.collections.StupidPool;
import io.druid.data.input.InputRow; import io.druid.data.input.InputRow;
@ -69,11 +70,12 @@ public class OffheapIncrementalIndex extends IncrementalIndex<BufferAggregator>
public OffheapIncrementalIndex( public OffheapIncrementalIndex(
IncrementalIndexSchema incrementalIndexSchema, IncrementalIndexSchema incrementalIndexSchema,
boolean deserializeComplexMetrics, boolean deserializeComplexMetrics,
boolean reportParseExceptions,
int maxRowCount, int maxRowCount,
StupidPool<ByteBuffer> bufferPool StupidPool<ByteBuffer> bufferPool
) )
{ {
super(incrementalIndexSchema, deserializeComplexMetrics); super(incrementalIndexSchema, deserializeComplexMetrics, reportParseExceptions);
this.maxRowCount = maxRowCount; this.maxRowCount = maxRowCount;
this.bufferPool = bufferPool; this.bufferPool = bufferPool;
this.facts = new ConcurrentSkipListMap<>(dimsComparator()); this.facts = new ConcurrentSkipListMap<>(dimsComparator());
@ -97,6 +99,7 @@ public class OffheapIncrementalIndex extends IncrementalIndex<BufferAggregator>
QueryGranularity gran, QueryGranularity gran,
final AggregatorFactory[] metrics, final AggregatorFactory[] metrics,
boolean deserializeComplexMetrics, boolean deserializeComplexMetrics,
boolean reportParseExceptions,
int maxRowCount, int maxRowCount,
StupidPool<ByteBuffer> bufferPool StupidPool<ByteBuffer> bufferPool
) )
@ -107,6 +110,7 @@ public class OffheapIncrementalIndex extends IncrementalIndex<BufferAggregator>
.withMetrics(metrics) .withMetrics(metrics)
.build(), .build(),
deserializeComplexMetrics, deserializeComplexMetrics,
reportParseExceptions,
maxRowCount, maxRowCount,
bufferPool bufferPool
); );
@ -126,6 +130,7 @@ public class OffheapIncrementalIndex extends IncrementalIndex<BufferAggregator>
.withMetrics(metrics) .withMetrics(metrics)
.build(), .build(),
true, true,
true,
maxRowCount, maxRowCount,
bufferPool bufferPool
); );
@ -181,6 +186,7 @@ public class OffheapIncrementalIndex extends IncrementalIndex<BufferAggregator>
protected Integer addToFacts( protected Integer addToFacts(
AggregatorFactory[] metrics, AggregatorFactory[] metrics,
boolean deserializeComplexMetrics, boolean deserializeComplexMetrics,
boolean reportParseExceptions,
InputRow row, InputRow row,
AtomicInteger numEntries, AtomicInteger numEntries,
TimeAndDims key, TimeAndDims key,
@ -264,7 +270,14 @@ public class OffheapIncrementalIndex extends IncrementalIndex<BufferAggregator>
final BufferAggregator agg = getAggs()[i]; final BufferAggregator agg = getAggs()[i];
synchronized (agg) { synchronized (agg) {
try {
agg.aggregate(aggBuffer, bufferOffset + aggOffsetInBuffer[i]); agg.aggregate(aggBuffer, bufferOffset + aggOffsetInBuffer[i]);
} catch (ParseException e) {
// "aggregate" can throw ParseExceptions if a selector expects something but gets something else.
if (reportParseExceptions) {
throw e;
}
}
} }
} }
rowContainer.set(null); rowContainer.set(null);

View File

@ -22,6 +22,7 @@ package io.druid.segment.incremental;
import com.google.common.base.Supplier; import com.google.common.base.Supplier;
import com.google.common.collect.Lists; import com.google.common.collect.Lists;
import com.google.common.collect.Maps; import com.google.common.collect.Maps;
import com.metamx.common.parsers.ParseException;
import io.druid.data.input.InputRow; import io.druid.data.input.InputRow;
import io.druid.granularity.QueryGranularity; import io.druid.granularity.QueryGranularity;
import io.druid.query.aggregation.Aggregator; import io.druid.query.aggregation.Aggregator;
@ -56,10 +57,11 @@ public class OnheapIncrementalIndex extends IncrementalIndex<Aggregator>
public OnheapIncrementalIndex( public OnheapIncrementalIndex(
IncrementalIndexSchema incrementalIndexSchema, IncrementalIndexSchema incrementalIndexSchema,
boolean deserializeComplexMetrics, boolean deserializeComplexMetrics,
boolean reportParseExceptions,
int maxRowCount int maxRowCount
) )
{ {
super(incrementalIndexSchema, deserializeComplexMetrics); super(incrementalIndexSchema, deserializeComplexMetrics, reportParseExceptions);
this.maxRowCount = maxRowCount; this.maxRowCount = maxRowCount;
this.facts = new ConcurrentSkipListMap<>(dimsComparator()); this.facts = new ConcurrentSkipListMap<>(dimsComparator());
} }
@ -69,6 +71,7 @@ public class OnheapIncrementalIndex extends IncrementalIndex<Aggregator>
QueryGranularity gran, QueryGranularity gran,
final AggregatorFactory[] metrics, final AggregatorFactory[] metrics,
boolean deserializeComplexMetrics, boolean deserializeComplexMetrics,
boolean reportParseExceptions,
int maxRowCount int maxRowCount
) )
{ {
@ -78,6 +81,7 @@ public class OnheapIncrementalIndex extends IncrementalIndex<Aggregator>
.withMetrics(metrics) .withMetrics(metrics)
.build(), .build(),
deserializeComplexMetrics, deserializeComplexMetrics,
reportParseExceptions,
maxRowCount maxRowCount
); );
} }
@ -95,16 +99,18 @@ public class OnheapIncrementalIndex extends IncrementalIndex<Aggregator>
.withMetrics(metrics) .withMetrics(metrics)
.build(), .build(),
true, true,
true,
maxRowCount maxRowCount
); );
} }
public OnheapIncrementalIndex( public OnheapIncrementalIndex(
IncrementalIndexSchema incrementalIndexSchema, IncrementalIndexSchema incrementalIndexSchema,
boolean reportParseExceptions,
int maxRowCount int maxRowCount
) )
{ {
this(incrementalIndexSchema, true, maxRowCount); this(incrementalIndexSchema, true, reportParseExceptions, maxRowCount);
} }
@Override @Override
@ -139,6 +145,7 @@ public class OnheapIncrementalIndex extends IncrementalIndex<Aggregator>
protected Integer addToFacts( protected Integer addToFacts(
AggregatorFactory[] metrics, AggregatorFactory[] metrics,
boolean deserializeComplexMetrics, boolean deserializeComplexMetrics,
boolean reportParseExceptions,
InputRow row, InputRow row,
AtomicInteger numEntries, AtomicInteger numEntries,
TimeAndDims key, TimeAndDims key,
@ -188,7 +195,14 @@ public class OnheapIncrementalIndex extends IncrementalIndex<Aggregator>
for (Aggregator agg : aggs) { for (Aggregator agg : aggs) {
synchronized (agg) { synchronized (agg) {
try {
agg.aggregate(); agg.aggregate();
} catch (ParseException e) {
// "aggregate" can throw ParseExceptions if a selector expects something but gets something else.
if (reportParseExceptions) {
throw e;
}
}
} }
} }

View File

@ -98,6 +98,7 @@ public class MultiValuedDimensionTest
new CountAggregatorFactory("count") new CountAggregatorFactory("count")
}, },
true, true,
true,
5000 5000
); );

View File

@ -311,7 +311,7 @@ public class AggregationTestHelper
List<File> toMerge = new ArrayList<>(); List<File> toMerge = new ArrayList<>();
try { try {
index = new OnheapIncrementalIndex(minTimestamp, gran, metrics, deserializeComplexMetrics, maxRowCount); index = new OnheapIncrementalIndex(minTimestamp, gran, metrics, deserializeComplexMetrics, true, maxRowCount);
while (rows.hasNext()) { while (rows.hasNext()) {
Object row = rows.next(); Object row = rows.next();
if (!index.canAppendRow()) { if (!index.canAppendRow()) {
@ -319,7 +319,7 @@ public class AggregationTestHelper
toMerge.add(tmp); toMerge.add(tmp);
indexMerger.persist(index, tmp, new IndexSpec()); indexMerger.persist(index, tmp, new IndexSpec());
index.close(); index.close();
index = new OnheapIncrementalIndex(minTimestamp, gran, metrics, deserializeComplexMetrics, maxRowCount); index = new OnheapIncrementalIndex(minTimestamp, gran, metrics, deserializeComplexMetrics, true, maxRowCount);
} }
if (row instanceof String && parser instanceof StringInputRowParser) { if (row instanceof String && parser instanceof StringInputRowParser) {
//Note: this is required because StringInputRowParser is InputRowParser<ByteBuffer> as opposed to //Note: this is required because StringInputRowParser is InputRowParser<ByteBuffer> as opposed to

View File

@ -882,9 +882,9 @@ public class IndexMergerTest
.build(); .build();
IncrementalIndex toPersist1 = new OnheapIncrementalIndex(schema, 1000); IncrementalIndex toPersist1 = new OnheapIncrementalIndex(schema, true, 1000);
IncrementalIndex toPersist2 = new OnheapIncrementalIndex(schema, 1000); IncrementalIndex toPersist2 = new OnheapIncrementalIndex(schema, true, 1000);
IncrementalIndex toPersist3 = new OnheapIncrementalIndex(schema, 1000); IncrementalIndex toPersist3 = new OnheapIncrementalIndex(schema, true, 1000);
addDimValuesToIndex(toPersist1, "dimA", Arrays.asList("1", "2")); addDimValuesToIndex(toPersist1, "dimA", Arrays.asList("1", "2"));
addDimValuesToIndex(toPersist2, "dimA", Arrays.asList("1", "2")); addDimValuesToIndex(toPersist2, "dimA", Arrays.asList("1", "2"));
@ -1626,10 +1626,7 @@ public class IndexMergerTest
new AggregatorFactory[]{new CountAggregatorFactory("count")} new AggregatorFactory[]{new CountAggregatorFactory("count")}
); );
return new OnheapIncrementalIndex( return new OnheapIncrementalIndex(schema, true, 1000);
schema,
1000
);
} }
private AggregatorFactory[] getCombiningAggregators(AggregatorFactory[] aggregators) private AggregatorFactory[] getCombiningAggregators(AggregatorFactory[] aggregators)

View File

@ -181,10 +181,7 @@ public class TestIndex
.withQueryGranularity(QueryGranularity.NONE) .withQueryGranularity(QueryGranularity.NONE)
.withMetrics(METRIC_AGGS) .withMetrics(METRIC_AGGS)
.build(); .build();
final IncrementalIndex retVal = new OnheapIncrementalIndex( final IncrementalIndex retVal = new OnheapIncrementalIndex(schema, true, 10000);
schema,
10000
);
final AtomicLong startTime = new AtomicLong(); final AtomicLong startTime = new AtomicLong();
int lineCount; int lineCount;

View File

@ -32,6 +32,7 @@ import com.google.common.util.concurrent.ListeningExecutorService;
import com.google.common.util.concurrent.MoreExecutors; import com.google.common.util.concurrent.MoreExecutors;
import com.google.common.util.concurrent.ThreadFactoryBuilder; import com.google.common.util.concurrent.ThreadFactoryBuilder;
import com.metamx.common.guava.Sequences; import com.metamx.common.guava.Sequences;
import com.metamx.common.parsers.ParseException;
import io.druid.data.input.InputRow; import io.druid.data.input.InputRow;
import io.druid.data.input.MapBasedInputRow; import io.druid.data.input.MapBasedInputRow;
import io.druid.granularity.QueryGranularity; import io.druid.granularity.QueryGranularity;
@ -135,6 +136,7 @@ public class OnheapIncrementalIndexBenchmark extends AbstractBenchmark
protected Integer addToFacts( protected Integer addToFacts(
AggregatorFactory[] metrics, AggregatorFactory[] metrics,
boolean deserializeComplexMetrics, boolean deserializeComplexMetrics,
boolean reportParseExceptions,
InputRow row, InputRow row,
AtomicInteger numEntries, AtomicInteger numEntries,
TimeAndDims key, TimeAndDims key,
@ -185,8 +187,16 @@ public class OnheapIncrementalIndexBenchmark extends AbstractBenchmark
for (Aggregator agg : aggs) { for (Aggregator agg : aggs) {
synchronized (agg) { synchronized (agg) {
try {
agg.aggregate(); agg.aggregate();
} }
catch (ParseException e) {
// "aggregate" can throw ParseExceptions if a selector expects something but gets something else.
if (reportParseExceptions) {
throw e;
}
}
}
} }
rowContainer.set(null); rowContainer.set(null);
@ -225,7 +235,8 @@ public class OnheapIncrementalIndexBenchmark extends AbstractBenchmark
return new MapBasedInputRow(timestamp, dimensionList, builder.build()); return new MapBasedInputRow(timestamp, dimensionList, builder.build());
} }
@Ignore @Test @Ignore
@Test
@BenchmarkOptions(callgc = true, clock = Clock.REAL_TIME, warmupRounds = 10, benchmarkRounds = 20) @BenchmarkOptions(callgc = true, clock = Clock.REAL_TIME, warmupRounds = 10, benchmarkRounds = 20)
public void testConcurrentAddRead() public void testConcurrentAddRead()
throws InterruptedException, ExecutionException, NoSuchMethodException, IllegalAccessException, throws InterruptedException, ExecutionException, NoSuchMethodException, IllegalAccessException,

View File

@ -47,6 +47,7 @@ public class RealtimeTuningConfig implements TuningConfig
private static final ShardSpec defaultShardSpec = new NoneShardSpec(); private static final ShardSpec defaultShardSpec = new NoneShardSpec();
private static final IndexSpec defaultIndexSpec = new IndexSpec(); private static final IndexSpec defaultIndexSpec = new IndexSpec();
private static final Boolean defaultBuildV9Directly = Boolean.FALSE; private static final Boolean defaultBuildV9Directly = Boolean.FALSE;
private static final Boolean defaultReportParseExceptions = Boolean.FALSE;
// Might make sense for this to be a builder // Might make sense for this to be a builder
public static RealtimeTuningConfig makeDefaultTuningConfig() public static RealtimeTuningConfig makeDefaultTuningConfig()
@ -63,7 +64,8 @@ public class RealtimeTuningConfig implements TuningConfig
defaultIndexSpec, defaultIndexSpec,
defaultBuildV9Directly, defaultBuildV9Directly,
0, 0,
0 0,
defaultReportParseExceptions
); );
} }
@ -76,9 +78,10 @@ public class RealtimeTuningConfig implements TuningConfig
private final int maxPendingPersists; private final int maxPendingPersists;
private final ShardSpec shardSpec; private final ShardSpec shardSpec;
private final IndexSpec indexSpec; private final IndexSpec indexSpec;
private final Boolean buildV9Directly; private final boolean buildV9Directly;
private final int persistThreadPriority; private final int persistThreadPriority;
private final int mergeThreadPriority; private final int mergeThreadPriority;
private final boolean reportParseExceptions;
@JsonCreator @JsonCreator
public RealtimeTuningConfig( public RealtimeTuningConfig(
@ -93,7 +96,8 @@ public class RealtimeTuningConfig implements TuningConfig
@JsonProperty("indexSpec") IndexSpec indexSpec, @JsonProperty("indexSpec") IndexSpec indexSpec,
@JsonProperty("buildV9Directly") Boolean buildV9Directly, @JsonProperty("buildV9Directly") Boolean buildV9Directly,
@JsonProperty("persistThreadPriority") int persistThreadPriority, @JsonProperty("persistThreadPriority") int persistThreadPriority,
@JsonProperty("mergeThreadPriority") int mergeThreadPriority @JsonProperty("mergeThreadPriority") int mergeThreadPriority,
@JsonProperty("reportParseExceptions") Boolean reportParseExceptions
) )
{ {
this.maxRowsInMemory = maxRowsInMemory == null ? defaultMaxRowsInMemory : maxRowsInMemory; this.maxRowsInMemory = maxRowsInMemory == null ? defaultMaxRowsInMemory : maxRowsInMemory;
@ -112,6 +116,9 @@ public class RealtimeTuningConfig implements TuningConfig
this.buildV9Directly = buildV9Directly == null ? defaultBuildV9Directly : buildV9Directly; this.buildV9Directly = buildV9Directly == null ? defaultBuildV9Directly : buildV9Directly;
this.mergeThreadPriority = mergeThreadPriority; this.mergeThreadPriority = mergeThreadPriority;
this.persistThreadPriority = persistThreadPriority; this.persistThreadPriority = persistThreadPriority;
this.reportParseExceptions = reportParseExceptions == null
? defaultReportParseExceptions
: reportParseExceptions;
} }
@JsonProperty @JsonProperty
@ -174,6 +181,7 @@ public class RealtimeTuningConfig implements TuningConfig
return buildV9Directly; return buildV9Directly;
} }
@JsonProperty
public int getPersistThreadPriority() public int getPersistThreadPriority()
{ {
return this.persistThreadPriority; return this.persistThreadPriority;
@ -185,6 +193,12 @@ public class RealtimeTuningConfig implements TuningConfig
return this.mergeThreadPriority; return this.mergeThreadPriority;
} }
@JsonProperty
public boolean isReportParseExceptions()
{
return reportParseExceptions;
}
public RealtimeTuningConfig withVersioningPolicy(VersioningPolicy policy) public RealtimeTuningConfig withVersioningPolicy(VersioningPolicy policy)
{ {
return new RealtimeTuningConfig( return new RealtimeTuningConfig(
@ -199,7 +213,8 @@ public class RealtimeTuningConfig implements TuningConfig
indexSpec, indexSpec,
buildV9Directly, buildV9Directly,
persistThreadPriority, persistThreadPriority,
mergeThreadPriority mergeThreadPriority,
reportParseExceptions
); );
} }
@ -217,7 +232,8 @@ public class RealtimeTuningConfig implements TuningConfig
indexSpec, indexSpec,
buildV9Directly, buildV9Directly,
persistThreadPriority, persistThreadPriority,
mergeThreadPriority mergeThreadPriority,
reportParseExceptions
); );
} }
} }

View File

@ -30,7 +30,6 @@ import com.google.inject.Inject;
import com.metamx.common.guava.CloseQuietly; import com.metamx.common.guava.CloseQuietly;
import com.metamx.common.lifecycle.LifecycleStart; import com.metamx.common.lifecycle.LifecycleStart;
import com.metamx.common.lifecycle.LifecycleStop; import com.metamx.common.lifecycle.LifecycleStop;
import com.metamx.common.parsers.ParseException;
import com.metamx.emitter.EmittingLogger; import com.metamx.emitter.EmittingLogger;
import io.druid.data.input.Committer; import io.druid.data.input.Committer;
import io.druid.data.input.Firehose; import io.druid.data.input.Firehose;
@ -45,7 +44,6 @@ import io.druid.query.QueryRunnerFactoryConglomerate;
import io.druid.query.QuerySegmentWalker; import io.druid.query.QuerySegmentWalker;
import io.druid.query.QueryToolChest; import io.druid.query.QueryToolChest;
import io.druid.query.SegmentDescriptor; import io.druid.query.SegmentDescriptor;
import io.druid.segment.incremental.IndexSizeExceededException;
import io.druid.segment.indexing.DataSchema; import io.druid.segment.indexing.DataSchema;
import io.druid.segment.indexing.RealtimeTuningConfig; import io.druid.segment.indexing.RealtimeTuningConfig;
import io.druid.segment.realtime.plumber.Committers; import io.druid.segment.realtime.plumber.Committers;
@ -340,7 +338,7 @@ public class RealtimeManager implements QuerySegmentWalker
{ {
final Supplier<Committer> committerSupplier = Committers.supplierFromFirehose(firehose); final Supplier<Committer> committerSupplier = Committers.supplierFromFirehose(firehose);
while (firehose.hasMore()) { while (firehose.hasMore()) {
Plumbers.addNextRow(committerSupplier, firehose, plumber, metrics); Plumbers.addNextRow(committerSupplier, firehose, plumber, config.isReportParseExceptions(), metrics);
} }
} }

View File

@ -42,6 +42,7 @@ public class Plumbers
final Supplier<Committer> committerSupplier, final Supplier<Committer> committerSupplier,
final Firehose firehose, final Firehose firehose,
final Plumber plumber, final Plumber plumber,
final boolean reportParseExceptions,
final FireDepartmentMetrics metrics final FireDepartmentMetrics metrics
) )
{ {
@ -49,10 +50,14 @@ public class Plumbers
final InputRow inputRow = firehose.nextRow(); final InputRow inputRow = firehose.nextRow();
if (inputRow == null) { if (inputRow == null) {
if (reportParseExceptions) {
throw new ParseException("null input row");
} else {
log.debug("Discarded null input row, considering unparseable."); log.debug("Discarded null input row, considering unparseable.");
metrics.incrementUnparseable(); metrics.incrementUnparseable();
return; return;
} }
}
// Included in ParseException try/catch, as additional parsing can be done during indexing. // Included in ParseException try/catch, as additional parsing can be done during indexing.
int numRows = plumber.add(inputRow, committerSupplier); int numRows = plumber.add(inputRow, committerSupplier);
@ -66,9 +71,13 @@ public class Plumbers
metrics.incrementProcessed(); metrics.incrementProcessed();
} }
catch (ParseException e) { catch (ParseException e) {
if (reportParseExceptions) {
throw e;
} else {
log.debug(e, "Discarded row due to exception, considering unparseable."); log.debug(e, "Discarded row due to exception, considering unparseable.");
metrics.incrementUnparseable(); metrics.incrementUnparseable();
} }
}
catch (IndexSizeExceededException e) { catch (IndexSizeExceededException e) {
// Shouldn't happen if this is only being called by a single thread. // Shouldn't happen if this is only being called by a single thread.
// plumber.add should be swapping out indexes before they fill up. // plumber.add should be swapping out indexes before they fill up.

View File

@ -199,6 +199,7 @@ public class Sink implements Iterable<FireHydrant>
.build(); .build();
final IncrementalIndex newIndex = new OnheapIncrementalIndex( final IncrementalIndex newIndex = new OnheapIncrementalIndex(
indexSchema, indexSchema,
config.isReportParseExceptions(),
config.getMaxRowsInMemory() config.getMaxRowsInMemory()
); );

View File

@ -116,9 +116,7 @@ public class FireDepartmentTest
), ),
null null
), ),
new RealtimeTuningConfig( RealtimeTuningConfig.makeDefaultTuningConfig()
null, null, null, null, null, null, null, null, null, null, 0, 0
)
); );
String json = jsonMapper.writeValueAsString(schema); String json = jsonMapper.writeValueAsString(schema);

View File

@ -153,7 +153,8 @@ public class RealtimeManagerTest
null, null,
null, null,
0, 0,
0 0,
null
); );
plumber = new TestPlumber(new Sink(new Interval("0/P5000Y"), schema, tuningConfig, new DateTime().toString())); plumber = new TestPlumber(new Sink(new Interval("0/P5000Y"), schema, tuningConfig, new DateTime().toString()));

View File

@ -120,7 +120,7 @@ public class IngestSegmentFirehoseTest
IncrementalIndex index = null; IncrementalIndex index = null;
try { try {
index = new OnheapIncrementalIndex(0, QueryGranularity.NONE, aggregators, true, 5000); index = new OnheapIncrementalIndex(0, QueryGranularity.NONE, aggregators, true, true, 5000);
for (String line : rows) { for (String line : rows) {
index.add(parser.parse(line)); index.add(parser.parse(line));
} }

View File

@ -195,7 +195,9 @@ public class RealtimePlumberSchoolTest
null, null,
null, null,
buildV9Directly, buildV9Directly,
0, 0 0,
0,
false
); );
realtimePlumberSchool = new RealtimePlumberSchool( realtimePlumberSchool = new RealtimePlumberSchool(

View File

@ -68,7 +68,9 @@ public class SinkTest
null, null,
null, null,
null, null,
0, 0 0,
0,
null
); );
final Sink sink = new Sink(interval, schema, tuningConfig, version); final Sink sink = new Sink(interval, schema, tuningConfig, version);