mirror of https://github.com/apache/druid.git
IndexTask improvements (#3611)
* index task improvements * code review changes * add null check
This commit is contained in:
parent
7004f5d499
commit
ff52581bd3
|
@ -53,7 +53,7 @@ public interface Firehose extends Closeable
|
||||||
*
|
*
|
||||||
* @return The next row
|
* @return The next row
|
||||||
*/
|
*/
|
||||||
public InputRow nextRow() ;
|
public InputRow nextRow();
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Returns a runnable that will "commit" everything read up to the point at which commit() is called. This is
|
* Returns a runnable that will "commit" everything read up to the point at which commit() is called. This is
|
||||||
|
|
|
@ -187,7 +187,7 @@ This spec is used to generated segments with uniform intervals.
|
||||||
| segmentGranularity | string | The granularity to create segments at. | no (default == 'DAY') |
|
| segmentGranularity | string | The granularity to create segments at. | no (default == 'DAY') |
|
||||||
| queryGranularity | string | The minimum granularity to be able to query results at and the granularity of the data inside the segment. E.g. a value of "minute" will mean that data is aggregated at minutely granularity. That is, if there are collisions in the tuple (minute(timestamp), dimensions), then it will aggregate values together using the aggregators instead of storing individual rows. | no (default == 'NONE') |
|
| queryGranularity | string | The minimum granularity to be able to query results at and the granularity of the data inside the segment. E.g. a value of "minute" will mean that data is aggregated at minutely granularity. That is, if there are collisions in the tuple (minute(timestamp), dimensions), then it will aggregate values together using the aggregators instead of storing individual rows. | no (default == 'NONE') |
|
||||||
| rollup | boolean | rollup or not | no (default == true) |
|
| rollup | boolean | rollup or not | no (default == true) |
|
||||||
| intervals | string | A list of intervals for the raw data being ingested. Ignored for real-time ingestion. | yes for batch, no for real-time |
|
| intervals | string | A list of intervals for the raw data being ingested. Ignored for real-time ingestion. | yes for Hadoop ingestion, no otherwise |
|
||||||
| timezone | string | The timezone to represent the interval offsets in. Only valid if intervals are explicitly specified for batch ingestion. Will not be valid for kafka based ingestion. | no (default == 'UTC')
|
| timezone | string | The timezone to represent the interval offsets in. Only valid if intervals are explicitly specified for batch ingestion. Will not be valid for kafka based ingestion. | no (default == 'UTC')
|
||||||
|
|
||||||
### Arbitrary Granularity Spec
|
### Arbitrary Granularity Spec
|
||||||
|
|
|
@ -76,9 +76,8 @@ The Index Task is a simpler variation of the Index Hadoop task that is designed
|
||||||
},
|
},
|
||||||
"tuningConfig" : {
|
"tuningConfig" : {
|
||||||
"type" : "index",
|
"type" : "index",
|
||||||
"targetPartitionSize" : -1,
|
"targetPartitionSize" : 5000000,
|
||||||
"rowFlushBoundary" : 0,
|
"maxRowsInMemory" : 75000
|
||||||
"numShards": 1
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -100,7 +99,12 @@ See [Ingestion](../ingestion/index.html)
|
||||||
|
|
||||||
#### IOConfig
|
#### IOConfig
|
||||||
|
|
||||||
This field is required. You can specify a type of [Firehose](../ingestion/firehose.html) here.
|
|property|description|default|required?|
|
||||||
|
|--------|-----------|-------|---------|
|
||||||
|
|type|The task type, this should always be "index".|none|yes|
|
||||||
|
|firehose|Specify a [Firehose](../ingestion/firehose.html) here.|none|yes|
|
||||||
|
|appendToExisting|Creates segments as additional shards of the latest version, effectively appending to the segment set instead of replacing it. This will only work if the existing segment set has extendable-type shardSpecs (which can be forced by setting 'forceExtendableShardSpecs' in the tuning config).|false|no|
|
||||||
|
|skipFirehoseCaching|By default the IndexTask will fully read the supplied firehose to disk before processing the data. This prevents the task from doing multiple remote fetches and enforces determinism if more than one pass through the data is required. It also allows the task to retry fetching the data if the firehose throws an exception during reading. This requires sufficient disk space for the temporary cache.|false|no|
|
||||||
|
|
||||||
#### TuningConfig
|
#### TuningConfig
|
||||||
|
|
||||||
|
@ -108,13 +112,15 @@ The tuningConfig is optional and default parameters will be used if no tuningCon
|
||||||
|
|
||||||
|property|description|default|required?|
|
|property|description|default|required?|
|
||||||
|--------|-----------|-------|---------|
|
|--------|-----------|-------|---------|
|
||||||
|type|The task type, this should always be "index".|None.|yes|
|
|type|The task type, this should always be "index".|none|yes|
|
||||||
|targetPartitionSize|Used in sharding. Determines how many rows are in each segment. Set this to -1 to use numShards instead for sharding.|5000000|no|
|
|targetPartitionSize|Used in sharding. Determines how many rows are in each segment.|5000000|no|
|
||||||
|rowFlushBoundary|Used in determining when intermediate persist should occur to disk.|75000|no|
|
|maxRowsInMemory|Used in determining when intermediate persists to disk should occur.|75000|no|
|
||||||
|numShards|Directly specify the number of shards to create. You can skip the intermediate persist step if you specify the number of shards you want and set targetPartitionSize=-1.|null|no|
|
|numShards|Directly specify the number of shards to create. If this is specified and 'intervals' is specified in the granularitySpec, the index task can skip the determine intervals/partitions pass through the data. numShards cannot be specified if targetPartitionSize is set.|null|no|
|
||||||
|indexSpec|defines segment storage format options to be used at indexing time, see [IndexSpec](#indexspec)|null|no|
|
|indexSpec|defines segment storage format options to be used at indexing time, see [IndexSpec](#indexspec)|null|no|
|
||||||
|
|maxPendingPersists|Maximum number of persists that can be pending but not started. If this limit would be exceeded by a new intermediate persist, ingestion will block until the currently-running persist finishes. Maximum heap memory usage for indexing scales with maxRowsInMemory * (2 + maxPendingPersists).|0 (meaning one persist can be running concurrently with ingestion, and none can be queued up)|no|
|
||||||
|buildV9Directly|Whether to build a v9 index directly instead of first building a v8 index and then converting it to v9 format.|true|no|
|
|buildV9Directly|Whether to build a v9 index directly instead of first building a v8 index and then converting it to v9 format.|true|no|
|
||||||
|forceExtendableShardSpecs|Forces use of extendable shardSpecs. Experimental feature intended for use with the [Kafka indexing service extension](../development/extensions-core/kafka-ingestion.html).|false|no|
|
|forceExtendableShardSpecs|Forces use of extendable shardSpecs. Experimental feature intended for use with the [Kafka indexing service extension](../development/extensions-core/kafka-ingestion.html).|false|no|
|
||||||
|
|reportParseExceptions|If true, exceptions encountered during parsing will be thrown and will halt ingestion; if false, unparseable rows and fields will be skipped.|false|no|
|
||||||
|
|
||||||
#### IndexSpec
|
#### IndexSpec
|
||||||
|
|
||||||
|
|
File diff suppressed because it is too large
Load Diff
|
@ -24,6 +24,7 @@ import com.google.common.collect.Sets;
|
||||||
import com.google.inject.Inject;
|
import com.google.inject.Inject;
|
||||||
import io.druid.indexing.common.TaskStatus;
|
import io.druid.indexing.common.TaskStatus;
|
||||||
import io.druid.indexing.common.actions.SegmentInsertAction;
|
import io.druid.indexing.common.actions.SegmentInsertAction;
|
||||||
|
import io.druid.indexing.common.actions.SegmentTransactionalInsertAction;
|
||||||
import io.druid.indexing.common.actions.TaskAction;
|
import io.druid.indexing.common.actions.TaskAction;
|
||||||
import io.druid.indexing.common.task.Task;
|
import io.druid.indexing.common.task.Task;
|
||||||
import io.druid.timeline.DataSegment;
|
import io.druid.timeline.DataSegment;
|
||||||
|
@ -80,6 +81,8 @@ public class TaskStorageQueryAdapter
|
||||||
for (final TaskAction action : storage.getAuditLogs(taskid)) {
|
for (final TaskAction action : storage.getAuditLogs(taskid)) {
|
||||||
if (action instanceof SegmentInsertAction) {
|
if (action instanceof SegmentInsertAction) {
|
||||||
segments.addAll(((SegmentInsertAction) action).getSegments());
|
segments.addAll(((SegmentInsertAction) action).getSegments());
|
||||||
|
} else if (action instanceof SegmentTransactionalInsertAction) {
|
||||||
|
segments.addAll(((SegmentTransactionalInsertAction) action).getSegments());
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
return segments;
|
return segments;
|
||||||
|
|
|
@ -21,7 +21,6 @@ package io.druid.indexing.common.task;
|
||||||
|
|
||||||
import com.fasterxml.jackson.databind.ObjectMapper;
|
import com.fasterxml.jackson.databind.ObjectMapper;
|
||||||
import com.google.common.collect.Lists;
|
import com.google.common.collect.Lists;
|
||||||
|
|
||||||
import io.druid.data.input.impl.CSVParseSpec;
|
import io.druid.data.input.impl.CSVParseSpec;
|
||||||
import io.druid.data.input.impl.DimensionsSpec;
|
import io.druid.data.input.impl.DimensionsSpec;
|
||||||
import io.druid.data.input.impl.SpatialDimensionSchema;
|
import io.druid.data.input.impl.SpatialDimensionSchema;
|
||||||
|
@ -31,9 +30,13 @@ import io.druid.granularity.QueryGranularities;
|
||||||
import io.druid.indexing.common.TaskLock;
|
import io.druid.indexing.common.TaskLock;
|
||||||
import io.druid.indexing.common.TaskToolbox;
|
import io.druid.indexing.common.TaskToolbox;
|
||||||
import io.druid.indexing.common.TestUtils;
|
import io.druid.indexing.common.TestUtils;
|
||||||
|
import io.druid.indexing.common.actions.LockAcquireAction;
|
||||||
import io.druid.indexing.common.actions.LockListAction;
|
import io.druid.indexing.common.actions.LockListAction;
|
||||||
|
import io.druid.indexing.common.actions.SegmentAllocateAction;
|
||||||
|
import io.druid.indexing.common.actions.SegmentTransactionalInsertAction;
|
||||||
import io.druid.indexing.common.actions.TaskAction;
|
import io.druid.indexing.common.actions.TaskAction;
|
||||||
import io.druid.indexing.common.actions.TaskActionClient;
|
import io.druid.indexing.common.actions.TaskActionClient;
|
||||||
|
import io.druid.indexing.overlord.SegmentPublishResult;
|
||||||
import io.druid.java.util.common.Granularity;
|
import io.druid.java.util.common.Granularity;
|
||||||
import io.druid.query.aggregation.AggregatorFactory;
|
import io.druid.query.aggregation.AggregatorFactory;
|
||||||
import io.druid.query.aggregation.LongSumAggregatorFactory;
|
import io.druid.query.aggregation.LongSumAggregatorFactory;
|
||||||
|
@ -42,13 +45,15 @@ import io.druid.segment.IndexMerger;
|
||||||
import io.druid.segment.IndexMergerV9;
|
import io.druid.segment.IndexMergerV9;
|
||||||
import io.druid.segment.IndexSpec;
|
import io.druid.segment.IndexSpec;
|
||||||
import io.druid.segment.indexing.DataSchema;
|
import io.druid.segment.indexing.DataSchema;
|
||||||
import io.druid.segment.indexing.RealtimeTuningConfig;
|
|
||||||
import io.druid.segment.indexing.granularity.ArbitraryGranularitySpec;
|
import io.druid.segment.indexing.granularity.ArbitraryGranularitySpec;
|
||||||
|
import io.druid.segment.indexing.granularity.GranularitySpec;
|
||||||
import io.druid.segment.indexing.granularity.UniformGranularitySpec;
|
import io.druid.segment.indexing.granularity.UniformGranularitySpec;
|
||||||
import io.druid.segment.loading.DataSegmentPusher;
|
import io.druid.segment.loading.DataSegmentPusher;
|
||||||
|
import io.druid.segment.realtime.appenderator.SegmentIdentifier;
|
||||||
import io.druid.segment.realtime.firehose.LocalFirehoseFactory;
|
import io.druid.segment.realtime.firehose.LocalFirehoseFactory;
|
||||||
import io.druid.timeline.DataSegment;
|
import io.druid.timeline.DataSegment;
|
||||||
import io.druid.timeline.partition.HashBasedNumberedShardSpec;
|
import io.druid.timeline.partition.HashBasedNumberedShardSpec;
|
||||||
|
import io.druid.timeline.partition.NoneShardSpec;
|
||||||
import io.druid.timeline.partition.NumberedShardSpec;
|
import io.druid.timeline.partition.NumberedShardSpec;
|
||||||
import io.druid.timeline.partition.ShardSpec;
|
import io.druid.timeline.partition.ShardSpec;
|
||||||
import org.joda.time.DateTime;
|
import org.joda.time.DateTime;
|
||||||
|
@ -75,6 +80,7 @@ public class IndexTaskTest
|
||||||
private IndexMerger indexMerger;
|
private IndexMerger indexMerger;
|
||||||
private IndexMergerV9 indexMergerV9;
|
private IndexMergerV9 indexMergerV9;
|
||||||
private IndexIO indexIO;
|
private IndexIO indexIO;
|
||||||
|
private volatile int segmentAllocatePartitionCounter;
|
||||||
|
|
||||||
public IndexTaskTest()
|
public IndexTaskTest()
|
||||||
{
|
{
|
||||||
|
@ -102,57 +108,9 @@ public class IndexTaskTest
|
||||||
IndexTask indexTask = new IndexTask(
|
IndexTask indexTask = new IndexTask(
|
||||||
null,
|
null,
|
||||||
null,
|
null,
|
||||||
new IndexTask.IndexIngestionSpec(
|
createIngestionSpec(tmpDir, null, 2, null, false, false),
|
||||||
new DataSchema(
|
null,
|
||||||
"test",
|
jsonMapper
|
||||||
jsonMapper.convertValue(
|
|
||||||
new StringInputRowParser(
|
|
||||||
new CSVParseSpec(
|
|
||||||
new TimestampSpec(
|
|
||||||
"ts",
|
|
||||||
"auto",
|
|
||||||
null
|
|
||||||
),
|
|
||||||
new DimensionsSpec(
|
|
||||||
DimensionsSpec.getDefaultSchemas(Arrays.asList("ts")),
|
|
||||||
Lists.<String>newArrayList(),
|
|
||||||
Lists.<SpatialDimensionSchema>newArrayList()
|
|
||||||
),
|
|
||||||
null,
|
|
||||||
Arrays.asList("ts", "dim", "val")
|
|
||||||
),
|
|
||||||
null
|
|
||||||
),
|
|
||||||
Map.class
|
|
||||||
),
|
|
||||||
new AggregatorFactory[]{
|
|
||||||
new LongSumAggregatorFactory("val", "val")
|
|
||||||
},
|
|
||||||
new UniformGranularitySpec(
|
|
||||||
Granularity.DAY,
|
|
||||||
QueryGranularities.MINUTE,
|
|
||||||
Arrays.asList(new Interval("2014/2015"))
|
|
||||||
),
|
|
||||||
jsonMapper
|
|
||||||
),
|
|
||||||
new IndexTask.IndexIOConfig(
|
|
||||||
new LocalFirehoseFactory(
|
|
||||||
tmpDir,
|
|
||||||
"druid*",
|
|
||||||
null
|
|
||||||
)
|
|
||||||
),
|
|
||||||
new IndexTask.IndexTuningConfig(
|
|
||||||
2,
|
|
||||||
0,
|
|
||||||
null,
|
|
||||||
indexSpec,
|
|
||||||
null,
|
|
||||||
false
|
|
||||||
)
|
|
||||||
),
|
|
||||||
jsonMapper,
|
|
||||||
null
|
|
||||||
);
|
);
|
||||||
|
|
||||||
final List<DataSegment> segments = runTask(indexTask);
|
final List<DataSegment> segments = runTask(indexTask);
|
||||||
|
@ -188,57 +146,9 @@ public class IndexTaskTest
|
||||||
IndexTask indexTask = new IndexTask(
|
IndexTask indexTask = new IndexTask(
|
||||||
null,
|
null,
|
||||||
null,
|
null,
|
||||||
new IndexTask.IndexIngestionSpec(
|
createIngestionSpec(tmpDir, null, 2, null, true, false),
|
||||||
new DataSchema(
|
null,
|
||||||
"test",
|
jsonMapper
|
||||||
jsonMapper.convertValue(
|
|
||||||
new StringInputRowParser(
|
|
||||||
new CSVParseSpec(
|
|
||||||
new TimestampSpec(
|
|
||||||
"ts",
|
|
||||||
"auto",
|
|
||||||
null
|
|
||||||
),
|
|
||||||
new DimensionsSpec(
|
|
||||||
DimensionsSpec.getDefaultSchemas(Arrays.asList("ts")),
|
|
||||||
Lists.<String>newArrayList(),
|
|
||||||
Lists.<SpatialDimensionSchema>newArrayList()
|
|
||||||
),
|
|
||||||
null,
|
|
||||||
Arrays.asList("ts", "dim", "val")
|
|
||||||
),
|
|
||||||
null
|
|
||||||
),
|
|
||||||
Map.class
|
|
||||||
),
|
|
||||||
new AggregatorFactory[]{
|
|
||||||
new LongSumAggregatorFactory("val", "val")
|
|
||||||
},
|
|
||||||
new UniformGranularitySpec(
|
|
||||||
Granularity.DAY,
|
|
||||||
QueryGranularities.MINUTE,
|
|
||||||
Arrays.asList(new Interval("2014/2015"))
|
|
||||||
),
|
|
||||||
jsonMapper
|
|
||||||
),
|
|
||||||
new IndexTask.IndexIOConfig(
|
|
||||||
new LocalFirehoseFactory(
|
|
||||||
tmpDir,
|
|
||||||
"druid*",
|
|
||||||
null
|
|
||||||
)
|
|
||||||
),
|
|
||||||
new IndexTask.IndexTuningConfig(
|
|
||||||
2,
|
|
||||||
0,
|
|
||||||
null,
|
|
||||||
indexSpec,
|
|
||||||
null,
|
|
||||||
true
|
|
||||||
)
|
|
||||||
),
|
|
||||||
jsonMapper,
|
|
||||||
null
|
|
||||||
);
|
);
|
||||||
|
|
||||||
final List<DataSegment> segments = runTask(indexTask);
|
final List<DataSegment> segments = runTask(indexTask);
|
||||||
|
@ -274,49 +184,19 @@ public class IndexTaskTest
|
||||||
IndexTask indexTask = new IndexTask(
|
IndexTask indexTask = new IndexTask(
|
||||||
null,
|
null,
|
||||||
null,
|
null,
|
||||||
new IndexTask.IndexIngestionSpec(
|
createIngestionSpec(
|
||||||
new DataSchema(
|
tmpDir,
|
||||||
"test",
|
new ArbitraryGranularitySpec(
|
||||||
jsonMapper.convertValue(
|
QueryGranularities.MINUTE,
|
||||||
new StringInputRowParser(
|
Arrays.asList(new Interval("2014/2015"))
|
||||||
new CSVParseSpec(
|
|
||||||
new TimestampSpec(
|
|
||||||
"ts",
|
|
||||||
"auto",
|
|
||||||
null
|
|
||||||
),
|
|
||||||
new DimensionsSpec(
|
|
||||||
DimensionsSpec.getDefaultSchemas(Arrays.asList("ts")),
|
|
||||||
Lists.<String>newArrayList(),
|
|
||||||
Lists.<SpatialDimensionSchema>newArrayList()
|
|
||||||
),
|
|
||||||
null,
|
|
||||||
Arrays.asList("ts", "dim", "val")
|
|
||||||
),
|
|
||||||
null
|
|
||||||
),
|
|
||||||
Map.class
|
|
||||||
),
|
|
||||||
new AggregatorFactory[]{
|
|
||||||
new LongSumAggregatorFactory("val", "val")
|
|
||||||
},
|
|
||||||
new ArbitraryGranularitySpec(
|
|
||||||
QueryGranularities.MINUTE,
|
|
||||||
Arrays.asList(new Interval("2014/2015"))
|
|
||||||
),
|
|
||||||
jsonMapper
|
|
||||||
),
|
),
|
||||||
new IndexTask.IndexIOConfig(
|
10,
|
||||||
new LocalFirehoseFactory(
|
null,
|
||||||
tmpDir,
|
false,
|
||||||
"druid*",
|
false
|
||||||
null
|
|
||||||
)
|
|
||||||
),
|
|
||||||
null
|
|
||||||
),
|
),
|
||||||
jsonMapper,
|
null,
|
||||||
null
|
jsonMapper
|
||||||
);
|
);
|
||||||
|
|
||||||
List<DataSegment> segments = runTask(indexTask);
|
List<DataSegment> segments = runTask(indexTask);
|
||||||
|
@ -324,6 +204,160 @@ public class IndexTaskTest
|
||||||
Assert.assertEquals(1, segments.size());
|
Assert.assertEquals(1, segments.size());
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testIntervalBucketing() throws Exception
|
||||||
|
{
|
||||||
|
File tmpDir = temporaryFolder.newFolder();
|
||||||
|
|
||||||
|
File tmpFile = File.createTempFile("druid", "index", tmpDir);
|
||||||
|
|
||||||
|
PrintWriter writer = new PrintWriter(tmpFile);
|
||||||
|
writer.println("2015-03-01T07:59:59.977Z,a,1");
|
||||||
|
writer.println("2015-03-01T08:00:00.000Z,b,1");
|
||||||
|
writer.close();
|
||||||
|
|
||||||
|
IndexTask indexTask = new IndexTask(
|
||||||
|
null,
|
||||||
|
null,
|
||||||
|
createIngestionSpec(
|
||||||
|
tmpDir,
|
||||||
|
new UniformGranularitySpec(
|
||||||
|
Granularity.HOUR,
|
||||||
|
QueryGranularities.HOUR,
|
||||||
|
Arrays.asList(new Interval("2015-03-01T08:00:00Z/2015-03-01T09:00:00Z"))
|
||||||
|
),
|
||||||
|
50,
|
||||||
|
null,
|
||||||
|
false,
|
||||||
|
false
|
||||||
|
),
|
||||||
|
null,
|
||||||
|
jsonMapper
|
||||||
|
);
|
||||||
|
|
||||||
|
final List<DataSegment> segments = runTask(indexTask);
|
||||||
|
|
||||||
|
Assert.assertEquals(1, segments.size());
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testNumShardsProvided() throws Exception
|
||||||
|
{
|
||||||
|
File tmpDir = temporaryFolder.newFolder();
|
||||||
|
File tmpFile = File.createTempFile("druid", "index", tmpDir);
|
||||||
|
|
||||||
|
PrintWriter writer = new PrintWriter(tmpFile);
|
||||||
|
writer.println("2014-01-01T00:00:10Z,a,1");
|
||||||
|
writer.println("2014-01-01T01:00:20Z,b,1");
|
||||||
|
writer.println("2014-01-01T02:00:30Z,c,1");
|
||||||
|
writer.close();
|
||||||
|
|
||||||
|
IndexTask indexTask = new IndexTask(
|
||||||
|
null,
|
||||||
|
null,
|
||||||
|
createIngestionSpec(tmpDir, null, null, 1, false, false),
|
||||||
|
null,
|
||||||
|
jsonMapper
|
||||||
|
);
|
||||||
|
|
||||||
|
final List<DataSegment> segments = runTask(indexTask);
|
||||||
|
|
||||||
|
Assert.assertEquals(1, segments.size());
|
||||||
|
|
||||||
|
Assert.assertEquals("test", segments.get(0).getDataSource());
|
||||||
|
Assert.assertEquals(new Interval("2014/P1D"), segments.get(0).getInterval());
|
||||||
|
Assert.assertTrue(segments.get(0).getShardSpec().getClass().equals(NoneShardSpec.class));
|
||||||
|
Assert.assertEquals(0, segments.get(0).getShardSpec().getPartitionNum());
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testAppendToExisting() throws Exception
|
||||||
|
{
|
||||||
|
segmentAllocatePartitionCounter = 0;
|
||||||
|
File tmpDir = temporaryFolder.newFolder();
|
||||||
|
File tmpFile = File.createTempFile("druid", "index", tmpDir);
|
||||||
|
|
||||||
|
PrintWriter writer = new PrintWriter(tmpFile);
|
||||||
|
writer.println("2014-01-01T00:00:10Z,a,1");
|
||||||
|
writer.println("2014-01-01T01:00:20Z,b,1");
|
||||||
|
writer.println("2014-01-01T02:00:30Z,c,1");
|
||||||
|
writer.close();
|
||||||
|
|
||||||
|
IndexTask indexTask = new IndexTask(
|
||||||
|
null,
|
||||||
|
null,
|
||||||
|
createIngestionSpec(tmpDir, null, 2, null, false, true),
|
||||||
|
null,
|
||||||
|
jsonMapper
|
||||||
|
);
|
||||||
|
|
||||||
|
final List<DataSegment> segments = runTask(indexTask);
|
||||||
|
|
||||||
|
Assert.assertEquals(2, segmentAllocatePartitionCounter);
|
||||||
|
Assert.assertEquals(2, segments.size());
|
||||||
|
|
||||||
|
Assert.assertEquals("test", segments.get(0).getDataSource());
|
||||||
|
Assert.assertEquals(new Interval("2014/P1D"), segments.get(0).getInterval());
|
||||||
|
Assert.assertTrue(segments.get(0).getShardSpec().getClass().equals(NumberedShardSpec.class));
|
||||||
|
Assert.assertEquals(0, segments.get(0).getShardSpec().getPartitionNum());
|
||||||
|
|
||||||
|
Assert.assertEquals("test", segments.get(1).getDataSource());
|
||||||
|
Assert.assertEquals(new Interval("2014/P1D"), segments.get(1).getInterval());
|
||||||
|
Assert.assertTrue(segments.get(1).getShardSpec().getClass().equals(NumberedShardSpec.class));
|
||||||
|
Assert.assertEquals(1, segments.get(1).getShardSpec().getPartitionNum());
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testIntervalNotSpecified() throws Exception
|
||||||
|
{
|
||||||
|
File tmpDir = temporaryFolder.newFolder();
|
||||||
|
File tmpFile = File.createTempFile("druid", "index", tmpDir);
|
||||||
|
|
||||||
|
PrintWriter writer = new PrintWriter(tmpFile);
|
||||||
|
writer.println("2014-01-01T00:00:10Z,a,1");
|
||||||
|
writer.println("2014-01-01T01:00:20Z,b,1");
|
||||||
|
writer.println("2014-01-01T02:00:30Z,c,1");
|
||||||
|
writer.close();
|
||||||
|
|
||||||
|
IndexTask indexTask = new IndexTask(
|
||||||
|
null,
|
||||||
|
null,
|
||||||
|
createIngestionSpec(
|
||||||
|
tmpDir,
|
||||||
|
new UniformGranularitySpec(
|
||||||
|
Granularity.HOUR,
|
||||||
|
QueryGranularities.MINUTE,
|
||||||
|
null
|
||||||
|
),
|
||||||
|
2,
|
||||||
|
null,
|
||||||
|
false,
|
||||||
|
false
|
||||||
|
),
|
||||||
|
null,
|
||||||
|
jsonMapper
|
||||||
|
);
|
||||||
|
|
||||||
|
final List<DataSegment> segments = runTask(indexTask);
|
||||||
|
|
||||||
|
Assert.assertEquals(3, segments.size());
|
||||||
|
|
||||||
|
Assert.assertEquals("test", segments.get(0).getDataSource());
|
||||||
|
Assert.assertEquals(new Interval("2014-01-01T00/PT1H"), segments.get(0).getInterval());
|
||||||
|
Assert.assertTrue(segments.get(0).getShardSpec().getClass().equals(NoneShardSpec.class));
|
||||||
|
Assert.assertEquals(0, segments.get(0).getShardSpec().getPartitionNum());
|
||||||
|
|
||||||
|
Assert.assertEquals("test", segments.get(1).getDataSource());
|
||||||
|
Assert.assertEquals(new Interval("2014-01-01T01/PT1H"), segments.get(1).getInterval());
|
||||||
|
Assert.assertTrue(segments.get(1).getShardSpec().getClass().equals(NoneShardSpec.class));
|
||||||
|
Assert.assertEquals(0, segments.get(1).getShardSpec().getPartitionNum());
|
||||||
|
|
||||||
|
Assert.assertEquals("test", segments.get(2).getDataSource());
|
||||||
|
Assert.assertEquals(new Interval("2014-01-01T02/PT1H"), segments.get(2).getInterval());
|
||||||
|
Assert.assertTrue(segments.get(2).getShardSpec().getClass().equals(NoneShardSpec.class));
|
||||||
|
Assert.assertEquals(0, segments.get(2).getShardSpec().getPartitionNum());
|
||||||
|
}
|
||||||
|
|
||||||
private final List<DataSegment> runTask(final IndexTask indexTask) throws Exception
|
private final List<DataSegment> runTask(final IndexTask indexTask) throws Exception
|
||||||
{
|
{
|
||||||
final List<DataSegment> segments = Lists.newArrayList();
|
final List<DataSegment> segments = Lists.newArrayList();
|
||||||
|
@ -342,6 +376,30 @@ public class IndexTaskTest
|
||||||
)
|
)
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (taskAction instanceof LockAcquireAction) {
|
||||||
|
return (RetType) new TaskLock(
|
||||||
|
"groupId",
|
||||||
|
"test",
|
||||||
|
((LockAcquireAction) taskAction).getInterval(),
|
||||||
|
new DateTime().toString()
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (taskAction instanceof SegmentTransactionalInsertAction) {
|
||||||
|
return (RetType) new SegmentPublishResult(
|
||||||
|
((SegmentTransactionalInsertAction) taskAction).getSegments(),
|
||||||
|
true
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (taskAction instanceof SegmentAllocateAction) {
|
||||||
|
SegmentAllocateAction action = (SegmentAllocateAction) taskAction;
|
||||||
|
Interval interval = action.getPreferredSegmentGranularity().bucket(action.getTimestamp());
|
||||||
|
ShardSpec shardSpec = new NumberedShardSpec(segmentAllocatePartitionCounter++, 0);
|
||||||
|
return (RetType) new SegmentIdentifier(action.getDataSource(), interval, "latestVersion", shardSpec);
|
||||||
|
}
|
||||||
|
|
||||||
return null;
|
return null;
|
||||||
}
|
}
|
||||||
}, null, new DataSegmentPusher()
|
}, null, new DataSegmentPusher()
|
||||||
|
@ -365,7 +423,7 @@ public class IndexTaskTest
|
||||||
segments.add(segment);
|
segments.add(segment);
|
||||||
return segment;
|
return segment;
|
||||||
}
|
}
|
||||||
}, null, null, null, null, null, null, null, null, null, null, temporaryFolder.newFolder(),
|
}, null, null, null, null, null, null, null, null, null, jsonMapper, temporaryFolder.newFolder(),
|
||||||
indexMerger, indexIO, null, null, indexMergerV9
|
indexMerger, indexIO, null, null, indexMergerV9
|
||||||
)
|
)
|
||||||
);
|
);
|
||||||
|
@ -375,93 +433,66 @@ public class IndexTaskTest
|
||||||
return segments;
|
return segments;
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test
|
private IndexTask.IndexIngestionSpec createIngestionSpec(
|
||||||
public void testIntervalBucketing() throws Exception
|
File baseDir,
|
||||||
|
GranularitySpec granularitySpec,
|
||||||
|
Integer targetPartitionSize,
|
||||||
|
Integer numShards,
|
||||||
|
boolean forceExtendableShardSpecs,
|
||||||
|
boolean appendToExisting
|
||||||
|
)
|
||||||
{
|
{
|
||||||
File tmpDir = temporaryFolder.newFolder();
|
return new IndexTask.IndexIngestionSpec(
|
||||||
|
new DataSchema(
|
||||||
File tmpFile = File.createTempFile("druid", "index", tmpDir);
|
"test",
|
||||||
|
jsonMapper.convertValue(
|
||||||
PrintWriter writer = new PrintWriter(tmpFile);
|
new StringInputRowParser(
|
||||||
writer.println("2015-03-01T07:59:59.977Z,a,1");
|
new CSVParseSpec(
|
||||||
writer.println("2015-03-01T08:00:00.000Z,b,1");
|
new TimestampSpec(
|
||||||
writer.close();
|
"ts",
|
||||||
|
"auto",
|
||||||
IndexTask indexTask = new IndexTask(
|
null
|
||||||
null,
|
|
||||||
null,
|
|
||||||
new IndexTask.IndexIngestionSpec(
|
|
||||||
new DataSchema(
|
|
||||||
"test",
|
|
||||||
jsonMapper.convertValue(
|
|
||||||
new StringInputRowParser(
|
|
||||||
new CSVParseSpec(
|
|
||||||
new TimestampSpec(
|
|
||||||
"ts",
|
|
||||||
"auto",
|
|
||||||
null
|
|
||||||
),
|
|
||||||
new DimensionsSpec(
|
|
||||||
DimensionsSpec.getDefaultSchemas(Arrays.asList("dim")),
|
|
||||||
Lists.<String>newArrayList(),
|
|
||||||
Lists.<SpatialDimensionSchema>newArrayList()
|
|
||||||
),
|
|
||||||
null,
|
|
||||||
Arrays.asList("ts", "dim", "val")
|
|
||||||
),
|
),
|
||||||
null
|
new DimensionsSpec(
|
||||||
|
DimensionsSpec.getDefaultSchemas(Arrays.asList("ts", "dim")),
|
||||||
|
Lists.<String>newArrayList(),
|
||||||
|
Lists.<SpatialDimensionSchema>newArrayList()
|
||||||
|
),
|
||||||
|
null,
|
||||||
|
Arrays.asList("ts", "dim", "val")
|
||||||
),
|
),
|
||||||
Map.class
|
|
||||||
),
|
|
||||||
new AggregatorFactory[]{
|
|
||||||
new LongSumAggregatorFactory("val", "val")
|
|
||||||
},
|
|
||||||
new UniformGranularitySpec(
|
|
||||||
Granularity.HOUR,
|
|
||||||
QueryGranularities.HOUR,
|
|
||||||
Arrays.asList(new Interval("2015-03-01T08:00:00Z/2015-03-01T09:00:00Z"))
|
|
||||||
),
|
|
||||||
jsonMapper
|
|
||||||
),
|
|
||||||
new IndexTask.IndexIOConfig(
|
|
||||||
new LocalFirehoseFactory(
|
|
||||||
tmpDir,
|
|
||||||
"druid*",
|
|
||||||
null
|
null
|
||||||
)
|
),
|
||||||
|
Map.class
|
||||||
),
|
),
|
||||||
null
|
new AggregatorFactory[]{
|
||||||
|
new LongSumAggregatorFactory("val", "val")
|
||||||
|
},
|
||||||
|
granularitySpec != null ? granularitySpec : new UniformGranularitySpec(
|
||||||
|
Granularity.DAY,
|
||||||
|
QueryGranularities.MINUTE,
|
||||||
|
Arrays.asList(new Interval("2014/2015"))
|
||||||
|
),
|
||||||
|
jsonMapper
|
||||||
),
|
),
|
||||||
jsonMapper,
|
new IndexTask.IndexIOConfig(
|
||||||
null
|
new LocalFirehoseFactory(
|
||||||
|
baseDir,
|
||||||
|
"druid*",
|
||||||
|
null
|
||||||
|
), appendToExisting, null
|
||||||
|
),
|
||||||
|
new IndexTask.IndexTuningConfig(
|
||||||
|
targetPartitionSize,
|
||||||
|
1,
|
||||||
|
null,
|
||||||
|
numShards,
|
||||||
|
indexSpec,
|
||||||
|
null,
|
||||||
|
true,
|
||||||
|
forceExtendableShardSpecs,
|
||||||
|
true
|
||||||
|
)
|
||||||
);
|
);
|
||||||
|
|
||||||
final List<DataSegment> segments = runTask(indexTask);
|
|
||||||
|
|
||||||
Assert.assertEquals(1, segments.size());
|
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test
|
|
||||||
public void testConvertProps()
|
|
||||||
{
|
|
||||||
ShardSpec spec = new NumberedShardSpec(1, 2);
|
|
||||||
IndexTask.IndexTuningConfig config = new IndexTask.IndexTuningConfig(
|
|
||||||
100,
|
|
||||||
1000,
|
|
||||||
null,
|
|
||||||
new IndexSpec(),
|
|
||||||
null,
|
|
||||||
false
|
|
||||||
);
|
|
||||||
RealtimeTuningConfig realtimeTuningConfig = IndexTask.convertTuningConfig(
|
|
||||||
spec,
|
|
||||||
config.getRowFlushBoundary(),
|
|
||||||
config.getIndexSpec(),
|
|
||||||
config.getBuildV9Directly()
|
|
||||||
);
|
|
||||||
Assert.assertEquals(realtimeTuningConfig.getMaxRowsInMemory(), config.getRowFlushBoundary());
|
|
||||||
Assert.assertEquals(realtimeTuningConfig.getShardSpec(), spec);
|
|
||||||
Assert.assertEquals(realtimeTuningConfig.getIndexSpec(), indexSpec);
|
|
||||||
}
|
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
|
@ -23,7 +23,6 @@ import com.fasterxml.jackson.databind.Module;
|
||||||
import com.fasterxml.jackson.databind.ObjectMapper;
|
import com.fasterxml.jackson.databind.ObjectMapper;
|
||||||
import com.google.common.collect.ImmutableList;
|
import com.google.common.collect.ImmutableList;
|
||||||
import com.google.common.collect.ImmutableMap;
|
import com.google.common.collect.ImmutableMap;
|
||||||
|
|
||||||
import io.druid.client.indexing.ClientAppendQuery;
|
import io.druid.client.indexing.ClientAppendQuery;
|
||||||
import io.druid.client.indexing.ClientKillQuery;
|
import io.druid.client.indexing.ClientKillQuery;
|
||||||
import io.druid.client.indexing.ClientMergeQuery;
|
import io.druid.client.indexing.ClientMergeQuery;
|
||||||
|
@ -51,10 +50,13 @@ import io.druid.segment.realtime.plumber.Plumber;
|
||||||
import io.druid.segment.realtime.plumber.PlumberSchool;
|
import io.druid.segment.realtime.plumber.PlumberSchool;
|
||||||
import io.druid.timeline.DataSegment;
|
import io.druid.timeline.DataSegment;
|
||||||
import io.druid.timeline.partition.NoneShardSpec;
|
import io.druid.timeline.partition.NoneShardSpec;
|
||||||
|
import org.hamcrest.CoreMatchers;
|
||||||
import org.joda.time.Interval;
|
import org.joda.time.Interval;
|
||||||
import org.joda.time.Period;
|
import org.joda.time.Period;
|
||||||
import org.junit.Assert;
|
import org.junit.Assert;
|
||||||
|
import org.junit.Rule;
|
||||||
import org.junit.Test;
|
import org.junit.Test;
|
||||||
|
import org.junit.rules.ExpectedException;
|
||||||
|
|
||||||
import java.io.File;
|
import java.io.File;
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
|
@ -65,6 +67,9 @@ public class TaskSerdeTest
|
||||||
private final ObjectMapper jsonMapper;
|
private final ObjectMapper jsonMapper;
|
||||||
private final IndexSpec indexSpec = new IndexSpec();
|
private final IndexSpec indexSpec = new IndexSpec();
|
||||||
|
|
||||||
|
@Rule
|
||||||
|
public ExpectedException thrown = ExpectedException.none();
|
||||||
|
|
||||||
public TaskSerdeTest()
|
public TaskSerdeTest()
|
||||||
{
|
{
|
||||||
TestUtils testUtils = new TestUtils();
|
TestUtils testUtils = new TestUtils();
|
||||||
|
@ -75,6 +80,76 @@ public class TaskSerdeTest
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testIndexTaskIOConfigDefaults() throws Exception
|
||||||
|
{
|
||||||
|
final IndexTask.IndexIOConfig ioConfig = jsonMapper.readValue(
|
||||||
|
"{\"type\":\"index\"}",
|
||||||
|
IndexTask.IndexIOConfig.class
|
||||||
|
);
|
||||||
|
|
||||||
|
Assert.assertEquals(false, ioConfig.isAppendToExisting());
|
||||||
|
Assert.assertEquals(false, ioConfig.isSkipFirehoseCaching());
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testIndexTaskTuningConfigDefaults() throws Exception
|
||||||
|
{
|
||||||
|
final IndexTask.IndexTuningConfig tuningConfig = jsonMapper.readValue(
|
||||||
|
"{\"type\":\"index\"}",
|
||||||
|
IndexTask.IndexTuningConfig.class
|
||||||
|
);
|
||||||
|
|
||||||
|
Assert.assertEquals(true, tuningConfig.isBuildV9Directly());
|
||||||
|
Assert.assertEquals(false, tuningConfig.isForceExtendableShardSpecs());
|
||||||
|
Assert.assertEquals(false, tuningConfig.isReportParseExceptions());
|
||||||
|
Assert.assertEquals(new IndexSpec(), tuningConfig.getIndexSpec());
|
||||||
|
Assert.assertEquals(new Period(Integer.MAX_VALUE), tuningConfig.getIntermediatePersistPeriod());
|
||||||
|
Assert.assertEquals(0, tuningConfig.getMaxPendingPersists());
|
||||||
|
Assert.assertEquals(75000, tuningConfig.getMaxRowsInMemory());
|
||||||
|
Assert.assertEquals(null, tuningConfig.getNumShards());
|
||||||
|
Assert.assertEquals(5000000, (int) tuningConfig.getTargetPartitionSize());
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testIndexTaskTuningConfigTargetPartitionSizeOrNumShards() throws Exception
|
||||||
|
{
|
||||||
|
IndexTask.IndexTuningConfig tuningConfig = jsonMapper.readValue(
|
||||||
|
"{\"type\":\"index\", \"targetPartitionSize\":10}",
|
||||||
|
IndexTask.IndexTuningConfig.class
|
||||||
|
);
|
||||||
|
|
||||||
|
Assert.assertEquals(10, (int) tuningConfig.getTargetPartitionSize());
|
||||||
|
Assert.assertEquals(null, tuningConfig.getNumShards());
|
||||||
|
|
||||||
|
tuningConfig = jsonMapper.readValue(
|
||||||
|
"{\"type\":\"index\", \"numShards\":10}",
|
||||||
|
IndexTask.IndexTuningConfig.class
|
||||||
|
);
|
||||||
|
|
||||||
|
Assert.assertEquals(null, tuningConfig.getTargetPartitionSize());
|
||||||
|
Assert.assertEquals(10, (int) tuningConfig.getNumShards());
|
||||||
|
|
||||||
|
tuningConfig = jsonMapper.readValue(
|
||||||
|
"{\"type\":\"index\", \"targetPartitionSize\":-1, \"numShards\":10}",
|
||||||
|
IndexTask.IndexTuningConfig.class
|
||||||
|
);
|
||||||
|
|
||||||
|
Assert.assertEquals(null, tuningConfig.getTargetPartitionSize());
|
||||||
|
Assert.assertEquals(10, (int) tuningConfig.getNumShards());
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testIndexTaskTuningConfigTargetPartitionSizeAndNumShards() throws Exception
|
||||||
|
{
|
||||||
|
thrown.expectCause(CoreMatchers.isA(IllegalArgumentException.class));
|
||||||
|
|
||||||
|
jsonMapper.readValue(
|
||||||
|
"{\"type\":\"index\", \"targetPartitionSize\":10, \"numShards\":10}",
|
||||||
|
IndexTask.IndexTuningConfig.class
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void testIndexTaskSerde() throws Exception
|
public void testIndexTaskSerde() throws Exception
|
||||||
{
|
{
|
||||||
|
@ -93,11 +168,11 @@ public class TaskSerdeTest
|
||||||
),
|
),
|
||||||
jsonMapper
|
jsonMapper
|
||||||
),
|
),
|
||||||
new IndexTask.IndexIOConfig(new LocalFirehoseFactory(new File("lol"), "rofl", null)),
|
new IndexTask.IndexIOConfig(new LocalFirehoseFactory(new File("lol"), "rofl", null), true, true),
|
||||||
new IndexTask.IndexTuningConfig(10000, 10, -1, indexSpec, null, false)
|
new IndexTask.IndexTuningConfig(10000, 10, 9999, null, indexSpec, 3, true, true, true)
|
||||||
),
|
),
|
||||||
jsonMapper,
|
null,
|
||||||
null
|
jsonMapper
|
||||||
);
|
);
|
||||||
|
|
||||||
final String json = jsonMapper.writeValueAsString(task);
|
final String json = jsonMapper.writeValueAsString(task);
|
||||||
|
@ -106,14 +181,38 @@ public class TaskSerdeTest
|
||||||
final IndexTask task2 = (IndexTask) jsonMapper.readValue(json, Task.class);
|
final IndexTask task2 = (IndexTask) jsonMapper.readValue(json, Task.class);
|
||||||
|
|
||||||
Assert.assertEquals("foo", task.getDataSource());
|
Assert.assertEquals("foo", task.getDataSource());
|
||||||
Assert.assertEquals(new Interval("2010-01-01/P2D"), task.getInterval());
|
|
||||||
|
|
||||||
Assert.assertEquals(task.getId(), task2.getId());
|
Assert.assertEquals(task.getId(), task2.getId());
|
||||||
Assert.assertEquals(task.getGroupId(), task2.getGroupId());
|
Assert.assertEquals(task.getGroupId(), task2.getGroupId());
|
||||||
Assert.assertEquals(task.getDataSource(), task2.getDataSource());
|
Assert.assertEquals(task.getDataSource(), task2.getDataSource());
|
||||||
Assert.assertEquals(task.getInterval(), task2.getInterval());
|
|
||||||
Assert.assertTrue(task.getIngestionSchema().getIOConfig().getFirehoseFactory() instanceof LocalFirehoseFactory);
|
IndexTask.IndexIOConfig taskIoConfig = task.getIngestionSchema().getIOConfig();
|
||||||
Assert.assertTrue(task2.getIngestionSchema().getIOConfig().getFirehoseFactory() instanceof LocalFirehoseFactory);
|
IndexTask.IndexIOConfig task2IoConfig = task2.getIngestionSchema().getIOConfig();
|
||||||
|
|
||||||
|
Assert.assertTrue(taskIoConfig.getFirehoseFactory() instanceof LocalFirehoseFactory);
|
||||||
|
Assert.assertTrue(task2IoConfig.getFirehoseFactory() instanceof LocalFirehoseFactory);
|
||||||
|
Assert.assertEquals(taskIoConfig.isAppendToExisting(), task2IoConfig.isAppendToExisting());
|
||||||
|
Assert.assertEquals(taskIoConfig.isSkipFirehoseCaching(), task2IoConfig.isSkipFirehoseCaching());
|
||||||
|
|
||||||
|
IndexTask.IndexTuningConfig taskTuningConfig = task.getIngestionSchema().getTuningConfig();
|
||||||
|
IndexTask.IndexTuningConfig task2TuningConfig = task2.getIngestionSchema().getTuningConfig();
|
||||||
|
|
||||||
|
Assert.assertEquals(taskTuningConfig.getBasePersistDirectory(), task2TuningConfig.getBasePersistDirectory());
|
||||||
|
Assert.assertEquals(taskTuningConfig.getIndexSpec(), task2TuningConfig.getIndexSpec());
|
||||||
|
Assert.assertEquals(
|
||||||
|
taskTuningConfig.getIntermediatePersistPeriod(),
|
||||||
|
task2TuningConfig.getIntermediatePersistPeriod()
|
||||||
|
);
|
||||||
|
Assert.assertEquals(taskTuningConfig.getMaxPendingPersists(), task2TuningConfig.getMaxPendingPersists());
|
||||||
|
Assert.assertEquals(taskTuningConfig.getMaxRowsInMemory(), task2TuningConfig.getMaxRowsInMemory());
|
||||||
|
Assert.assertEquals(taskTuningConfig.getNumShards(), task2TuningConfig.getNumShards());
|
||||||
|
Assert.assertEquals(taskTuningConfig.getTargetPartitionSize(), task2TuningConfig.getTargetPartitionSize());
|
||||||
|
Assert.assertEquals(taskTuningConfig.isBuildV9Directly(), task2TuningConfig.isBuildV9Directly());
|
||||||
|
Assert.assertEquals(
|
||||||
|
taskTuningConfig.isForceExtendableShardSpecs(),
|
||||||
|
task2TuningConfig.isForceExtendableShardSpecs()
|
||||||
|
);
|
||||||
|
Assert.assertEquals(taskTuningConfig.isReportParseExceptions(), task2TuningConfig.isReportParseExceptions());
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
|
@ -134,11 +233,11 @@ public class TaskSerdeTest
|
||||||
),
|
),
|
||||||
jsonMapper
|
jsonMapper
|
||||||
),
|
),
|
||||||
new IndexTask.IndexIOConfig(new LocalFirehoseFactory(new File("lol"), "rofl", null)),
|
new IndexTask.IndexIOConfig(new LocalFirehoseFactory(new File("lol"), "rofl", null), true, null),
|
||||||
new IndexTask.IndexTuningConfig(10000, 10, -1, indexSpec, null, false)
|
new IndexTask.IndexTuningConfig(10000, 10, null, null, indexSpec, 3, true, true, true)
|
||||||
),
|
),
|
||||||
jsonMapper,
|
null,
|
||||||
null
|
jsonMapper
|
||||||
);
|
);
|
||||||
|
|
||||||
for (final Module jacksonModule : new FirehoseModule().getJacksonModules()) {
|
for (final Module jacksonModule : new FirehoseModule().getJacksonModules()) {
|
||||||
|
@ -151,7 +250,6 @@ public class TaskSerdeTest
|
||||||
final IndexTask task2 = (IndexTask) jsonMapper.readValue(json, Task.class);
|
final IndexTask task2 = (IndexTask) jsonMapper.readValue(json, Task.class);
|
||||||
|
|
||||||
Assert.assertEquals("foo", task.getDataSource());
|
Assert.assertEquals("foo", task.getDataSource());
|
||||||
Assert.assertEquals(new Interval("2010-01-01/P2D"), task.getInterval());
|
|
||||||
|
|
||||||
Assert.assertEquals(task.getId(), task2.getId());
|
Assert.assertEquals(task.getId(), task2.getId());
|
||||||
Assert.assertEquals(2, task.getTaskResource().getRequiredCapacity());
|
Assert.assertEquals(2, task.getTaskResource().getRequiredCapacity());
|
||||||
|
@ -160,7 +258,6 @@ public class TaskSerdeTest
|
||||||
Assert.assertEquals(task.getTaskResource().getAvailabilityGroup(), task2.getTaskResource().getAvailabilityGroup());
|
Assert.assertEquals(task.getTaskResource().getAvailabilityGroup(), task2.getTaskResource().getAvailabilityGroup());
|
||||||
Assert.assertEquals(task.getGroupId(), task2.getGroupId());
|
Assert.assertEquals(task.getGroupId(), task2.getGroupId());
|
||||||
Assert.assertEquals(task.getDataSource(), task2.getDataSource());
|
Assert.assertEquals(task.getDataSource(), task2.getDataSource());
|
||||||
Assert.assertEquals(task.getInterval(), task2.getInterval());
|
|
||||||
Assert.assertTrue(task.getIngestionSchema().getIOConfig().getFirehoseFactory() instanceof LocalFirehoseFactory);
|
Assert.assertTrue(task.getIngestionSchema().getIOConfig().getFirehoseFactory() instanceof LocalFirehoseFactory);
|
||||||
Assert.assertTrue(task2.getIngestionSchema().getIOConfig().getFirehoseFactory() instanceof LocalFirehoseFactory);
|
Assert.assertTrue(task2.getIngestionSchema().getIOConfig().getFirehoseFactory() instanceof LocalFirehoseFactory);
|
||||||
}
|
}
|
||||||
|
@ -168,11 +265,13 @@ public class TaskSerdeTest
|
||||||
@Test
|
@Test
|
||||||
public void testMergeTaskSerde() throws Exception
|
public void testMergeTaskSerde() throws Exception
|
||||||
{
|
{
|
||||||
final List<DataSegment> segments = ImmutableList.<DataSegment>of(DataSegment.builder()
|
final List<DataSegment> segments = ImmutableList.<DataSegment>of(
|
||||||
.dataSource("foo")
|
DataSegment.builder()
|
||||||
.interval(new Interval("2010-01-01/P1D"))
|
.dataSource("foo")
|
||||||
.version("1234")
|
.interval(new Interval("2010-01-01/P1D"))
|
||||||
.build());
|
.version("1234")
|
||||||
|
.build()
|
||||||
|
);
|
||||||
final List<AggregatorFactory> aggregators = ImmutableList.<AggregatorFactory>of(new CountAggregatorFactory("cnt"));
|
final List<AggregatorFactory> aggregators = ImmutableList.<AggregatorFactory>of(new CountAggregatorFactory("cnt"));
|
||||||
final MergeTask task = new MergeTask(
|
final MergeTask task = new MergeTask(
|
||||||
null,
|
null,
|
||||||
|
@ -202,11 +301,15 @@ public class TaskSerdeTest
|
||||||
task2.getAggregators().get(0).getName()
|
task2.getAggregators().get(0).getName()
|
||||||
);
|
);
|
||||||
|
|
||||||
final MergeTask task3 = (MergeTask) jsonMapper.readValue(jsonMapper.writeValueAsString(new ClientMergeQuery(
|
final MergeTask task3 = (MergeTask) jsonMapper.readValue(
|
||||||
"foo",
|
jsonMapper.writeValueAsString(
|
||||||
segments,
|
new ClientMergeQuery(
|
||||||
aggregators
|
"foo",
|
||||||
)), Task.class);
|
segments,
|
||||||
|
aggregators
|
||||||
|
)
|
||||||
|
), Task.class
|
||||||
|
);
|
||||||
|
|
||||||
Assert.assertEquals("foo", task3.getDataSource());
|
Assert.assertEquals("foo", task3.getDataSource());
|
||||||
Assert.assertEquals(new Interval("2010-01-01/P1D"), task3.getInterval());
|
Assert.assertEquals(new Interval("2010-01-01/P1D"), task3.getInterval());
|
||||||
|
@ -237,10 +340,14 @@ public class TaskSerdeTest
|
||||||
Assert.assertEquals(task.getDataSource(), task2.getDataSource());
|
Assert.assertEquals(task.getDataSource(), task2.getDataSource());
|
||||||
Assert.assertEquals(task.getInterval(), task2.getInterval());
|
Assert.assertEquals(task.getInterval(), task2.getInterval());
|
||||||
|
|
||||||
final KillTask task3 = (KillTask) jsonMapper.readValue(jsonMapper.writeValueAsString(new ClientKillQuery(
|
final KillTask task3 = (KillTask) jsonMapper.readValue(
|
||||||
"foo",
|
jsonMapper.writeValueAsString(
|
||||||
new Interval("2010-01-01/P1D")
|
new ClientKillQuery(
|
||||||
)), Task.class);
|
"foo",
|
||||||
|
new Interval("2010-01-01/P1D")
|
||||||
|
)
|
||||||
|
), Task.class
|
||||||
|
);
|
||||||
|
|
||||||
Assert.assertEquals("foo", task3.getDataSource());
|
Assert.assertEquals("foo", task3.getDataSource());
|
||||||
Assert.assertEquals(new Interval("2010-01-01/P1D"), task3.getInterval());
|
Assert.assertEquals(new Interval("2010-01-01/P1D"), task3.getInterval());
|
||||||
|
@ -421,10 +528,14 @@ public class TaskSerdeTest
|
||||||
Assert.assertEquals(task.getInterval(), task2.getInterval());
|
Assert.assertEquals(task.getInterval(), task2.getInterval());
|
||||||
Assert.assertEquals(task.getSegments(), task2.getSegments());
|
Assert.assertEquals(task.getSegments(), task2.getSegments());
|
||||||
|
|
||||||
final AppendTask task3 = (AppendTask) jsonMapper.readValue(jsonMapper.writeValueAsString(new ClientAppendQuery(
|
final AppendTask task3 = (AppendTask) jsonMapper.readValue(
|
||||||
"foo",
|
jsonMapper.writeValueAsString(
|
||||||
segments
|
new ClientAppendQuery(
|
||||||
)), Task.class);
|
"foo",
|
||||||
|
segments
|
||||||
|
)
|
||||||
|
), Task.class
|
||||||
|
);
|
||||||
|
|
||||||
Assert.assertEquals("foo", task3.getDataSource());
|
Assert.assertEquals("foo", task3.getDataSource());
|
||||||
Assert.assertEquals(new Interval("2010-01-01/P2D"), task3.getInterval());
|
Assert.assertEquals(new Interval("2010-01-01/P2D"), task3.getInterval());
|
||||||
|
@ -521,10 +632,12 @@ public class TaskSerdeTest
|
||||||
);
|
);
|
||||||
final ConvertSegmentTask convertSegmentTaskOriginal = ConvertSegmentTask.create(
|
final ConvertSegmentTask convertSegmentTaskOriginal = ConvertSegmentTask.create(
|
||||||
segment,
|
segment,
|
||||||
new IndexSpec(new RoaringBitmapSerdeFactory(null),
|
new IndexSpec(
|
||||||
CompressedObjectStrategy.CompressionStrategy.LZF,
|
new RoaringBitmapSerdeFactory(null),
|
||||||
CompressedObjectStrategy.CompressionStrategy.UNCOMPRESSED,
|
CompressedObjectStrategy.CompressionStrategy.LZF,
|
||||||
CompressionFactory.LongEncodingStrategy.LONGS),
|
CompressedObjectStrategy.CompressionStrategy.UNCOMPRESSED,
|
||||||
|
CompressionFactory.LongEncodingStrategy.LONGS
|
||||||
|
),
|
||||||
false,
|
false,
|
||||||
true,
|
true,
|
||||||
null
|
null
|
||||||
|
|
|
@ -651,11 +651,11 @@ public class TaskLifecycleTest
|
||||||
),
|
),
|
||||||
mapper
|
mapper
|
||||||
),
|
),
|
||||||
new IndexTask.IndexIOConfig(new MockFirehoseFactory(false)),
|
new IndexTask.IndexIOConfig(new MockFirehoseFactory(false), false, null),
|
||||||
new IndexTask.IndexTuningConfig(10000, 10, -1, indexSpec, null, false)
|
new IndexTask.IndexTuningConfig(10000, 10, null, null, indexSpec, 3, true, true, true)
|
||||||
),
|
),
|
||||||
mapper,
|
null,
|
||||||
null
|
MAPPER
|
||||||
);
|
);
|
||||||
|
|
||||||
final Optional<TaskStatus> preRunTaskStatus = tsqa.getStatus(indexTask.getId());
|
final Optional<TaskStatus> preRunTaskStatus = tsqa.getStatus(indexTask.getId());
|
||||||
|
@ -709,11 +709,11 @@ public class TaskLifecycleTest
|
||||||
),
|
),
|
||||||
mapper
|
mapper
|
||||||
),
|
),
|
||||||
new IndexTask.IndexIOConfig(new MockExceptionalFirehoseFactory()),
|
new IndexTask.IndexIOConfig(new MockExceptionalFirehoseFactory(), false, null),
|
||||||
new IndexTask.IndexTuningConfig(10000, 10, -1, indexSpec, null, false)
|
new IndexTask.IndexTuningConfig(10000, 10, null, null, indexSpec, 3, true, true, true)
|
||||||
),
|
),
|
||||||
mapper,
|
null,
|
||||||
null
|
MAPPER
|
||||||
);
|
);
|
||||||
|
|
||||||
final TaskStatus status = runTask(indexTask);
|
final TaskStatus status = runTask(indexTask);
|
||||||
|
@ -1068,11 +1068,11 @@ public class TaskLifecycleTest
|
||||||
),
|
),
|
||||||
mapper
|
mapper
|
||||||
),
|
),
|
||||||
new IndexTask.IndexIOConfig(new MockFirehoseFactory(false)),
|
new IndexTask.IndexIOConfig(new MockFirehoseFactory(false), false, null),
|
||||||
new IndexTask.IndexTuningConfig(10000, 10, -1, indexSpec, null, false)
|
new IndexTask.IndexTuningConfig(10000, 10, null, null, indexSpec, null, false, null, null)
|
||||||
),
|
),
|
||||||
mapper,
|
null,
|
||||||
null
|
MAPPER
|
||||||
);
|
);
|
||||||
|
|
||||||
final long startTime = System.currentTimeMillis();
|
final long startTime = System.currentTimeMillis();
|
||||||
|
|
|
@ -30,6 +30,7 @@ import io.druid.segment.realtime.firehose.EventReceiverFirehoseFactory;
|
||||||
import io.druid.segment.realtime.firehose.FixedCountFirehoseFactory;
|
import io.druid.segment.realtime.firehose.FixedCountFirehoseFactory;
|
||||||
import io.druid.segment.realtime.firehose.IrcFirehoseFactory;
|
import io.druid.segment.realtime.firehose.IrcFirehoseFactory;
|
||||||
import io.druid.segment.realtime.firehose.LocalFirehoseFactory;
|
import io.druid.segment.realtime.firehose.LocalFirehoseFactory;
|
||||||
|
import io.druid.segment.realtime.firehose.ReplayableFirehoseFactory;
|
||||||
import io.druid.segment.realtime.firehose.TimedShutoffFirehoseFactory;
|
import io.druid.segment.realtime.firehose.TimedShutoffFirehoseFactory;
|
||||||
|
|
||||||
import java.util.Arrays;
|
import java.util.Arrays;
|
||||||
|
@ -56,6 +57,7 @@ public class FirehoseModule implements DruidModule
|
||||||
new NamedType(LocalFirehoseFactory.class, "local"),
|
new NamedType(LocalFirehoseFactory.class, "local"),
|
||||||
new NamedType(EventReceiverFirehoseFactory.class, "receiver"),
|
new NamedType(EventReceiverFirehoseFactory.class, "receiver"),
|
||||||
new NamedType(CombiningFirehoseFactory.class, "combining"),
|
new NamedType(CombiningFirehoseFactory.class, "combining"),
|
||||||
|
new NamedType(ReplayableFirehoseFactory.class, "replayable"),
|
||||||
new NamedType(FixedCountFirehoseFactory.class, "fixedCount")
|
new NamedType(FixedCountFirehoseFactory.class, "fixedCount")
|
||||||
)
|
)
|
||||||
);
|
);
|
||||||
|
|
|
@ -194,4 +194,9 @@ public class ArbitraryGranularitySpec implements GranularitySpec
|
||||||
result = 31 * result + (timezone != null ? timezone.hashCode() : 0);
|
result = 31 * result + (timezone != null ? timezone.hashCode() : 0);
|
||||||
return result;
|
return result;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public GranularitySpec withIntervals(List<Interval> inputIntervals) {
|
||||||
|
return new ArbitraryGranularitySpec(queryGranularity, rollup, inputIntervals, timezone);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -66,11 +66,13 @@ public interface GranularitySpec
|
||||||
*/
|
*/
|
||||||
public Optional<Interval> bucketInterval(DateTime dt);
|
public Optional<Interval> bucketInterval(DateTime dt);
|
||||||
|
|
||||||
public Granularity getSegmentGranularity();
|
Granularity getSegmentGranularity();
|
||||||
|
|
||||||
public boolean isRollup();
|
boolean isRollup();
|
||||||
|
|
||||||
public QueryGranularity getQueryGranularity();
|
QueryGranularity getQueryGranularity();
|
||||||
|
|
||||||
public String getTimezone();
|
String getTimezone();
|
||||||
|
|
||||||
|
GranularitySpec withIntervals(List<Interval> inputIntervals);
|
||||||
}
|
}
|
||||||
|
|
|
@ -189,4 +189,9 @@ public class UniformGranularitySpec implements GranularitySpec
|
||||||
result = 31 * result + (wrappedSpec != null ? wrappedSpec.hashCode() : 0);
|
result = 31 * result + (wrappedSpec != null ? wrappedSpec.hashCode() : 0);
|
||||||
return result;
|
return result;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public GranularitySpec withIntervals(List<Interval> inputIntervals) {
|
||||||
|
return new UniformGranularitySpec(segmentGranularity, queryGranularity, rollup, inputIntervals, timezone);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -0,0 +1,319 @@
|
||||||
|
/*
|
||||||
|
* Licensed to Metamarkets Group Inc. (Metamarkets) under one
|
||||||
|
* or more contributor license agreements. See the NOTICE file
|
||||||
|
* distributed with this work for additional information
|
||||||
|
* regarding copyright ownership. Metamarkets licenses this file
|
||||||
|
* to you under the Apache License, Version 2.0 (the
|
||||||
|
* "License"); you may not use this file except in compliance
|
||||||
|
* with the License. You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing,
|
||||||
|
* software distributed under the License is distributed on an
|
||||||
|
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||||
|
* KIND, either express or implied. See the License for the
|
||||||
|
* specific language governing permissions and limitations
|
||||||
|
* under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
package io.druid.segment.realtime.firehose;
|
||||||
|
|
||||||
|
import com.fasterxml.jackson.annotation.JacksonInject;
|
||||||
|
import com.fasterxml.jackson.annotation.JsonCreator;
|
||||||
|
import com.fasterxml.jackson.annotation.JsonProperty;
|
||||||
|
import com.fasterxml.jackson.core.JsonFactory;
|
||||||
|
import com.fasterxml.jackson.core.JsonGenerator;
|
||||||
|
import com.fasterxml.jackson.core.JsonParser;
|
||||||
|
import com.fasterxml.jackson.databind.ObjectMapper;
|
||||||
|
import com.fasterxml.jackson.dataformat.smile.SmileFactory;
|
||||||
|
import com.fasterxml.jackson.dataformat.smile.SmileGenerator;
|
||||||
|
import com.google.common.base.Preconditions;
|
||||||
|
import com.google.common.base.Throwables;
|
||||||
|
import com.google.common.collect.Iterators;
|
||||||
|
import com.google.common.collect.Ordering;
|
||||||
|
import com.google.common.io.CountingOutputStream;
|
||||||
|
import com.google.common.io.Files;
|
||||||
|
import com.metamx.emitter.EmittingLogger;
|
||||||
|
import io.druid.data.input.Firehose;
|
||||||
|
import io.druid.data.input.FirehoseFactory;
|
||||||
|
import io.druid.data.input.InputRow;
|
||||||
|
import io.druid.data.input.Row;
|
||||||
|
import io.druid.data.input.Rows;
|
||||||
|
import io.druid.data.input.impl.InputRowParser;
|
||||||
|
import io.druid.guice.annotations.Smile;
|
||||||
|
import io.druid.java.util.common.parsers.ParseException;
|
||||||
|
import io.druid.utils.Runnables;
|
||||||
|
|
||||||
|
import java.io.File;
|
||||||
|
import java.io.FileOutputStream;
|
||||||
|
import java.io.IOException;
|
||||||
|
import java.util.ArrayList;
|
||||||
|
import java.util.HashSet;
|
||||||
|
import java.util.Iterator;
|
||||||
|
import java.util.List;
|
||||||
|
import java.util.Set;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Creates a wrapper firehose that writes from another firehose to disk and then serves nextRow() from disk. Useful for
|
||||||
|
* tasks that require multiple passes through the data to prevent multiple remote fetches. Also has support for
|
||||||
|
* retrying fetches if the underlying firehose throws an exception while the local cache is being generated.
|
||||||
|
*/
|
||||||
|
public class ReplayableFirehoseFactory implements FirehoseFactory<InputRowParser>
|
||||||
|
{
|
||||||
|
private static final EmittingLogger log = new EmittingLogger(ReplayableFirehoseFactory.class);
|
||||||
|
private static final boolean DEFAULT_REPORT_PARSE_EXCEPTIONS = false;
|
||||||
|
private static final int DEFAULT_MAX_TEMP_FILE_SIZE = 250000000;
|
||||||
|
private static final int DEFAULT_READ_FIREHOSE_RETRIES = 3;
|
||||||
|
|
||||||
|
private final FirehoseFactory delegateFactory;
|
||||||
|
private final boolean reportParseExceptions;
|
||||||
|
|
||||||
|
// This is *roughly* the max size of the temp files that will be generated, but they may be slightly larger. The
|
||||||
|
// reason for the approximation is that we're not forcing flushes after writing to the generator so the number of
|
||||||
|
// bytes written to the stream won't be updated until the flush occurs. It's probably more important to optimize for
|
||||||
|
// I/O speed rather than maintaining a strict max on the size of the temp file before it's split.
|
||||||
|
private final int maxTempFileSize;
|
||||||
|
|
||||||
|
private final int readFirehoseRetries;
|
||||||
|
private final ObjectMapper smileMapper;
|
||||||
|
|
||||||
|
private ReplayableFirehose firehose;
|
||||||
|
|
||||||
|
@JsonCreator
|
||||||
|
public ReplayableFirehoseFactory(
|
||||||
|
@JsonProperty("delegate") FirehoseFactory delegateFactory,
|
||||||
|
@JsonProperty("reportParseExceptions") Boolean reportParseExceptions,
|
||||||
|
@JsonProperty("maxTempFileSize") Integer maxTempFileSize,
|
||||||
|
@JsonProperty("readFirehoseRetries") Integer readFirehoseRetries,
|
||||||
|
@Smile @JacksonInject ObjectMapper smileMapper
|
||||||
|
)
|
||||||
|
{
|
||||||
|
Preconditions.checkNotNull(delegateFactory, "delegate cannot be null");
|
||||||
|
Preconditions.checkArgument(
|
||||||
|
!(delegateFactory instanceof ReplayableFirehoseFactory),
|
||||||
|
"Refusing to wrap another ReplayableFirehoseFactory"
|
||||||
|
);
|
||||||
|
|
||||||
|
this.delegateFactory = delegateFactory;
|
||||||
|
this.reportParseExceptions = reportParseExceptions == null
|
||||||
|
? DEFAULT_REPORT_PARSE_EXCEPTIONS
|
||||||
|
: reportParseExceptions;
|
||||||
|
this.maxTempFileSize = maxTempFileSize == null ? DEFAULT_MAX_TEMP_FILE_SIZE : maxTempFileSize;
|
||||||
|
this.readFirehoseRetries = readFirehoseRetries == null ? DEFAULT_READ_FIREHOSE_RETRIES : readFirehoseRetries;
|
||||||
|
|
||||||
|
this.smileMapper = smileMapper;
|
||||||
|
|
||||||
|
log.info(this.toString());
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public Firehose connect(InputRowParser parser) throws IOException
|
||||||
|
{
|
||||||
|
if (firehose == null) {
|
||||||
|
firehose = new ReplayableFirehose(parser);
|
||||||
|
} else {
|
||||||
|
log.info("Rewinding and returning existing firehose");
|
||||||
|
firehose.rewind();
|
||||||
|
}
|
||||||
|
|
||||||
|
return firehose;
|
||||||
|
}
|
||||||
|
|
||||||
|
public class ReplayableFirehose implements Firehose
|
||||||
|
{
|
||||||
|
private final List<File> files = new ArrayList<>();
|
||||||
|
private final List<String> dimensions;
|
||||||
|
|
||||||
|
private int fileIndex = 0;
|
||||||
|
private JsonFactory jsonFactory;
|
||||||
|
private JsonParser jsonParser;
|
||||||
|
private Iterator<Row> it;
|
||||||
|
|
||||||
|
public ReplayableFirehose(InputRowParser parser) throws IOException
|
||||||
|
{
|
||||||
|
jsonFactory = smileMapper.getFactory();
|
||||||
|
|
||||||
|
if (jsonFactory instanceof SmileFactory) {
|
||||||
|
jsonFactory = ((SmileFactory) jsonFactory).enable(SmileGenerator.Feature.CHECK_SHARED_STRING_VALUES);
|
||||||
|
}
|
||||||
|
|
||||||
|
long counter = 0, totalBytes = 0, unparseable = 0, retryCount = 0;
|
||||||
|
Set<String> dimensionScratch = new HashSet<>();
|
||||||
|
|
||||||
|
File tmpDir = Files.createTempDir();
|
||||||
|
tmpDir.deleteOnExit();
|
||||||
|
|
||||||
|
long startTime = System.nanoTime();
|
||||||
|
boolean isDone = false;
|
||||||
|
do {
|
||||||
|
deleteTempFiles();
|
||||||
|
try (Firehose delegateFirehose = delegateFactory.connect(parser)) {
|
||||||
|
while (delegateFirehose.hasMore()) {
|
||||||
|
File tmpFile = File.createTempFile("replayable-", null, tmpDir);
|
||||||
|
tmpFile.deleteOnExit();
|
||||||
|
|
||||||
|
files.add(tmpFile);
|
||||||
|
log.debug("Created file [%s]", tmpFile.getAbsolutePath());
|
||||||
|
|
||||||
|
try (CountingOutputStream cos = new CountingOutputStream(new FileOutputStream(tmpFile));
|
||||||
|
JsonGenerator generator = jsonFactory.createGenerator(cos)) {
|
||||||
|
|
||||||
|
while (delegateFirehose.hasMore() && cos.getCount() < getMaxTempFileSize()) {
|
||||||
|
try {
|
||||||
|
InputRow row = delegateFirehose.nextRow();
|
||||||
|
generator.writeObject(row);
|
||||||
|
dimensionScratch.addAll(row.getDimensions());
|
||||||
|
counter++;
|
||||||
|
}
|
||||||
|
catch (ParseException e) {
|
||||||
|
if (reportParseExceptions) {
|
||||||
|
throw e;
|
||||||
|
}
|
||||||
|
unparseable++;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
totalBytes += cos.getCount();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
isDone = true;
|
||||||
|
}
|
||||||
|
catch (Exception e) {
|
||||||
|
if (++retryCount <= readFirehoseRetries && !(e instanceof ParseException)) {
|
||||||
|
log.error(e, "Delegate firehose threw an exception, retrying (%d of %d)", retryCount, readFirehoseRetries);
|
||||||
|
} else {
|
||||||
|
log.error(e, "Delegate firehose threw an exception, retries exhausted, aborting");
|
||||||
|
Throwables.propagate(e);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
} while (!isDone);
|
||||||
|
|
||||||
|
log.info(
|
||||||
|
"Finished reading from firehose in [%,dms], [%,d] events parsed, [%,d] bytes written, [%,d] events unparseable",
|
||||||
|
(System.nanoTime() - startTime) / 1000000,
|
||||||
|
counter,
|
||||||
|
totalBytes,
|
||||||
|
unparseable
|
||||||
|
);
|
||||||
|
|
||||||
|
dimensions = Ordering.natural().immutableSortedCopy(dimensionScratch);
|
||||||
|
|
||||||
|
if (counter == 0) {
|
||||||
|
log.warn("Firehose contains no events!");
|
||||||
|
deleteTempFiles();
|
||||||
|
it = Iterators.emptyIterator();
|
||||||
|
} else {
|
||||||
|
jsonParser = jsonFactory.createParser(files.get(fileIndex));
|
||||||
|
it = jsonParser.readValuesAs(Row.class);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public boolean hasMore()
|
||||||
|
{
|
||||||
|
if (it.hasNext()) {
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
try {
|
||||||
|
if (jsonParser != null) {
|
||||||
|
jsonParser.close();
|
||||||
|
}
|
||||||
|
|
||||||
|
if (++fileIndex >= files.size() || files.get(fileIndex).length() == 0) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
jsonParser = jsonFactory.createParser(files.get(fileIndex));
|
||||||
|
it = jsonParser.readValuesAs(Row.class);
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
catch (IOException e) {
|
||||||
|
throw new RuntimeException(e);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public InputRow nextRow()
|
||||||
|
{
|
||||||
|
return Rows.toCaseInsensitiveInputRow(it.next(), dimensions);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public Runnable commit()
|
||||||
|
{
|
||||||
|
return Runnables.getNoopRunnable();
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Closes the firehose by closing the input stream and setting an empty iterator. The underlying cache files
|
||||||
|
* backing the firehose are retained for when the firehose is "replayed" by calling rewind(). The cache files are
|
||||||
|
* deleted by File.deleteOnExit() when the process exits.
|
||||||
|
*/
|
||||||
|
@Override
|
||||||
|
public void close() throws IOException
|
||||||
|
{
|
||||||
|
if (jsonParser != null) {
|
||||||
|
jsonParser.close();
|
||||||
|
}
|
||||||
|
it = Iterators.emptyIterator();
|
||||||
|
}
|
||||||
|
|
||||||
|
private void rewind() throws IOException
|
||||||
|
{
|
||||||
|
close();
|
||||||
|
|
||||||
|
if (!files.isEmpty()) {
|
||||||
|
fileIndex = 0;
|
||||||
|
jsonParser = jsonFactory.createParser(files.get(fileIndex));
|
||||||
|
it = jsonParser.readValuesAs(Row.class);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private void deleteTempFiles()
|
||||||
|
{
|
||||||
|
for (File file : files) {
|
||||||
|
log.debug("Deleting temp file: %s", file.getAbsolutePath());
|
||||||
|
file.delete();
|
||||||
|
}
|
||||||
|
|
||||||
|
files.clear();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
@JsonProperty("delegate")
|
||||||
|
public FirehoseFactory getDelegateFactory()
|
||||||
|
{
|
||||||
|
return delegateFactory;
|
||||||
|
}
|
||||||
|
|
||||||
|
@JsonProperty("reportParseExceptions")
|
||||||
|
public boolean isReportParseExceptions()
|
||||||
|
{
|
||||||
|
return reportParseExceptions;
|
||||||
|
}
|
||||||
|
|
||||||
|
@JsonProperty("maxTempFileSize")
|
||||||
|
public int getMaxTempFileSize()
|
||||||
|
{
|
||||||
|
return maxTempFileSize;
|
||||||
|
}
|
||||||
|
|
||||||
|
@JsonProperty("readFirehoseRetries")
|
||||||
|
public int getReadFirehoseRetries()
|
||||||
|
{
|
||||||
|
return readFirehoseRetries;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public String toString()
|
||||||
|
{
|
||||||
|
return "ReplayableFirehoseFactory{" +
|
||||||
|
"delegateFactory=" + delegateFactory +
|
||||||
|
", reportParseExceptions=" + reportParseExceptions +
|
||||||
|
", maxTempFileSize=" + maxTempFileSize +
|
||||||
|
", readFirehoseRetries=" + readFirehoseRetries +
|
||||||
|
'}';
|
||||||
|
}
|
||||||
|
}
|
|
@ -0,0 +1,53 @@
|
||||||
|
/*
|
||||||
|
* Licensed to Metamarkets Group Inc. (Metamarkets) under one
|
||||||
|
* or more contributor license agreements. See the NOTICE file
|
||||||
|
* distributed with this work for additional information
|
||||||
|
* regarding copyright ownership. Metamarkets licenses this file
|
||||||
|
* to you under the Apache License, Version 2.0 (the
|
||||||
|
* "License"); you may not use this file except in compliance
|
||||||
|
* with the License. You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing,
|
||||||
|
* software distributed under the License is distributed on an
|
||||||
|
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||||
|
* KIND, either express or implied. See the License for the
|
||||||
|
* specific language governing permissions and limitations
|
||||||
|
* under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
package io.druid.segment.realtime.plumber;
|
||||||
|
|
||||||
|
import io.druid.java.util.common.logger.Logger;
|
||||||
|
import io.druid.query.SegmentDescriptor;
|
||||||
|
|
||||||
|
import java.util.concurrent.Executor;
|
||||||
|
|
||||||
|
public class NoopSegmentHandoffNotifierFactory implements SegmentHandoffNotifierFactory
|
||||||
|
{
|
||||||
|
private static final Logger log = new Logger(NoopSegmentHandoffNotifierFactory.class);
|
||||||
|
|
||||||
|
private static final SegmentHandoffNotifier NOTIFIER = new SegmentHandoffNotifier()
|
||||||
|
{
|
||||||
|
@Override
|
||||||
|
public boolean registerSegmentHandoffCallback(SegmentDescriptor descriptor, Executor exec, Runnable handOffRunnable)
|
||||||
|
{
|
||||||
|
log.info("Not waiting for segment to be handed off, executing handOffRunnable");
|
||||||
|
exec.execute(handOffRunnable);
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void start() {}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void close() {}
|
||||||
|
};
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public SegmentHandoffNotifier createSegmentHandoffNotifier(String dataSource)
|
||||||
|
{
|
||||||
|
return NOTIFIER;
|
||||||
|
}
|
||||||
|
}
|
|
@ -0,0 +1,447 @@
|
||||||
|
/*
|
||||||
|
* Licensed to Metamarkets Group Inc. (Metamarkets) under one
|
||||||
|
* or more contributor license agreements. See the NOTICE file
|
||||||
|
* distributed with this work for additional information
|
||||||
|
* regarding copyright ownership. Metamarkets licenses this file
|
||||||
|
* to you under the Apache License, Version 2.0 (the
|
||||||
|
* "License"); you may not use this file except in compliance
|
||||||
|
* with the License. You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing,
|
||||||
|
* software distributed under the License is distributed on an
|
||||||
|
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||||
|
* KIND, either express or implied. See the License for the
|
||||||
|
* specific language governing permissions and limitations
|
||||||
|
* under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
package io.druid.realtime.firehose;
|
||||||
|
|
||||||
|
import com.fasterxml.jackson.databind.ObjectMapper;
|
||||||
|
import com.google.common.collect.ImmutableMap;
|
||||||
|
import com.google.common.collect.Lists;
|
||||||
|
import io.druid.data.input.Firehose;
|
||||||
|
import io.druid.data.input.FirehoseFactory;
|
||||||
|
import io.druid.data.input.InputRow;
|
||||||
|
import io.druid.data.input.MapBasedInputRow;
|
||||||
|
import io.druid.data.input.impl.InputRowParser;
|
||||||
|
import io.druid.data.input.impl.MapInputRowParser;
|
||||||
|
import io.druid.data.input.impl.TimeAndDimsParseSpec;
|
||||||
|
import io.druid.jackson.DefaultObjectMapper;
|
||||||
|
import io.druid.java.util.common.parsers.ParseException;
|
||||||
|
import io.druid.segment.realtime.firehose.ReplayableFirehoseFactory;
|
||||||
|
import org.easymock.EasyMockSupport;
|
||||||
|
import org.easymock.IAnswer;
|
||||||
|
import org.joda.time.DateTime;
|
||||||
|
import org.junit.Assert;
|
||||||
|
import org.junit.Before;
|
||||||
|
import org.junit.Test;
|
||||||
|
|
||||||
|
import java.io.IOException;
|
||||||
|
import java.util.List;
|
||||||
|
|
||||||
|
import static org.easymock.EasyMock.expect;
|
||||||
|
import static org.easymock.EasyMock.expectLastCall;
|
||||||
|
|
||||||
|
public class ReplayableFirehoseFactoryTest extends EasyMockSupport
|
||||||
|
{
|
||||||
|
private FirehoseFactory delegateFactory = createMock(FirehoseFactory.class);
|
||||||
|
private Firehose delegateFirehose = createMock(Firehose.class);
|
||||||
|
private InputRowParser parser = new MapInputRowParser(new TimeAndDimsParseSpec(null, null));
|
||||||
|
private ObjectMapper mapper = new DefaultObjectMapper();
|
||||||
|
|
||||||
|
|
||||||
|
private List<InputRow> testRows = Lists.<InputRow>newArrayList(
|
||||||
|
new MapBasedInputRow(
|
||||||
|
DateTime.now(), Lists.newArrayList("dim1", "dim2"),
|
||||||
|
ImmutableMap.<String, Object>of("dim1", "val1", "dim2", "val2", "met1", 1)
|
||||||
|
),
|
||||||
|
new MapBasedInputRow(
|
||||||
|
DateTime.now(), Lists.newArrayList("dim1", "dim2"),
|
||||||
|
ImmutableMap.<String, Object>of("dim1", "val5", "dim2", "val2", "met1", 2)
|
||||||
|
),
|
||||||
|
new MapBasedInputRow(
|
||||||
|
DateTime.now(), Lists.newArrayList("dim2", "dim3"),
|
||||||
|
ImmutableMap.<String, Object>of("dim2", "val1", "dim3", "val2", "met1", 3)
|
||||||
|
)
|
||||||
|
);
|
||||||
|
|
||||||
|
private ReplayableFirehoseFactory replayableFirehoseFactory;
|
||||||
|
|
||||||
|
@Before
|
||||||
|
public void setup()
|
||||||
|
{
|
||||||
|
replayableFirehoseFactory = new ReplayableFirehoseFactory(
|
||||||
|
delegateFactory,
|
||||||
|
true,
|
||||||
|
10000,
|
||||||
|
3,
|
||||||
|
mapper
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testConstructor() throws Exception
|
||||||
|
{
|
||||||
|
Assert.assertEquals(delegateFactory, replayableFirehoseFactory.getDelegateFactory());
|
||||||
|
Assert.assertEquals(10000, replayableFirehoseFactory.getMaxTempFileSize());
|
||||||
|
Assert.assertEquals(3, replayableFirehoseFactory.getReadFirehoseRetries());
|
||||||
|
Assert.assertEquals(true, replayableFirehoseFactory.isReportParseExceptions());
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testReplayableFirehoseNoEvents() throws Exception
|
||||||
|
{
|
||||||
|
expect(delegateFactory.connect(parser)).andReturn(delegateFirehose);
|
||||||
|
expect(delegateFirehose.hasMore()).andReturn(false);
|
||||||
|
delegateFirehose.close();
|
||||||
|
replayAll();
|
||||||
|
|
||||||
|
try (Firehose firehose = replayableFirehoseFactory.connect(parser)) {
|
||||||
|
Assert.assertFalse(firehose.hasMore());
|
||||||
|
}
|
||||||
|
verifyAll();
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testReplayableFirehoseWithEvents() throws Exception
|
||||||
|
{
|
||||||
|
final boolean hasMore[] = {true};
|
||||||
|
|
||||||
|
expect(delegateFactory.connect(parser)).andReturn(delegateFirehose);
|
||||||
|
expect(delegateFirehose.hasMore()).andAnswer(
|
||||||
|
new IAnswer<Boolean>()
|
||||||
|
{
|
||||||
|
@Override
|
||||||
|
public Boolean answer() throws Throwable
|
||||||
|
{
|
||||||
|
return hasMore[0];
|
||||||
|
}
|
||||||
|
}
|
||||||
|
).anyTimes();
|
||||||
|
expect(delegateFirehose.nextRow())
|
||||||
|
.andReturn(testRows.get(0))
|
||||||
|
.andReturn(testRows.get(1))
|
||||||
|
.andAnswer(
|
||||||
|
new IAnswer<InputRow>()
|
||||||
|
{
|
||||||
|
@Override
|
||||||
|
public InputRow answer() throws Throwable
|
||||||
|
{
|
||||||
|
hasMore[0] = false;
|
||||||
|
return testRows.get(2);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
);
|
||||||
|
|
||||||
|
delegateFirehose.close();
|
||||||
|
replayAll();
|
||||||
|
|
||||||
|
List<InputRow> rows = Lists.newArrayList();
|
||||||
|
try (Firehose firehose = replayableFirehoseFactory.connect(parser)) {
|
||||||
|
while (firehose.hasMore()) {
|
||||||
|
rows.add(firehose.nextRow());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
Assert.assertEquals(testRows, rows);
|
||||||
|
|
||||||
|
// now replay!
|
||||||
|
rows.clear();
|
||||||
|
try (Firehose firehose = replayableFirehoseFactory.connect(parser)) {
|
||||||
|
while (firehose.hasMore()) {
|
||||||
|
rows.add(firehose.nextRow());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
Assert.assertEquals(testRows, rows);
|
||||||
|
|
||||||
|
verifyAll();
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testReplayableFirehoseWithoutReportParseExceptions() throws Exception
|
||||||
|
{
|
||||||
|
final boolean hasMore[] = {true};
|
||||||
|
replayableFirehoseFactory = new ReplayableFirehoseFactory(
|
||||||
|
delegateFactory,
|
||||||
|
false,
|
||||||
|
10000,
|
||||||
|
3,
|
||||||
|
mapper
|
||||||
|
);
|
||||||
|
|
||||||
|
expect(delegateFactory.connect(parser)).andReturn(delegateFirehose);
|
||||||
|
expect(delegateFirehose.hasMore()).andAnswer(
|
||||||
|
new IAnswer<Boolean>()
|
||||||
|
{
|
||||||
|
@Override
|
||||||
|
public Boolean answer() throws Throwable
|
||||||
|
{
|
||||||
|
return hasMore[0];
|
||||||
|
}
|
||||||
|
}
|
||||||
|
).anyTimes();
|
||||||
|
expect(delegateFirehose.nextRow())
|
||||||
|
.andReturn(testRows.get(0))
|
||||||
|
.andReturn(testRows.get(1))
|
||||||
|
.andThrow(new ParseException("unparseable!"))
|
||||||
|
.andAnswer(
|
||||||
|
new IAnswer<InputRow>()
|
||||||
|
{
|
||||||
|
@Override
|
||||||
|
public InputRow answer() throws Throwable
|
||||||
|
{
|
||||||
|
hasMore[0] = false;
|
||||||
|
return testRows.get(2);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
);
|
||||||
|
|
||||||
|
delegateFirehose.close();
|
||||||
|
replayAll();
|
||||||
|
|
||||||
|
List<InputRow> rows = Lists.newArrayList();
|
||||||
|
try (Firehose firehose = replayableFirehoseFactory.connect(parser)) {
|
||||||
|
while (firehose.hasMore()) {
|
||||||
|
rows.add(firehose.nextRow());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
Assert.assertEquals(testRows, rows);
|
||||||
|
|
||||||
|
verifyAll();
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test(expected = ParseException.class)
|
||||||
|
public void testReplayableFirehoseWithReportParseExceptions() throws Exception
|
||||||
|
{
|
||||||
|
final boolean hasMore[] = {true};
|
||||||
|
|
||||||
|
expect(delegateFactory.connect(parser)).andReturn(delegateFirehose);
|
||||||
|
expect(delegateFirehose.hasMore()).andAnswer(
|
||||||
|
new IAnswer<Boolean>()
|
||||||
|
{
|
||||||
|
@Override
|
||||||
|
public Boolean answer() throws Throwable
|
||||||
|
{
|
||||||
|
return hasMore[0];
|
||||||
|
}
|
||||||
|
}
|
||||||
|
).anyTimes();
|
||||||
|
expect(delegateFirehose.nextRow())
|
||||||
|
.andReturn(testRows.get(0))
|
||||||
|
.andReturn(testRows.get(1))
|
||||||
|
.andThrow(new ParseException("unparseable!"))
|
||||||
|
.andAnswer(
|
||||||
|
new IAnswer<InputRow>()
|
||||||
|
{
|
||||||
|
@Override
|
||||||
|
public InputRow answer() throws Throwable
|
||||||
|
{
|
||||||
|
hasMore[0] = false;
|
||||||
|
return testRows.get(2);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
);
|
||||||
|
|
||||||
|
delegateFirehose.close();
|
||||||
|
replayAll();
|
||||||
|
|
||||||
|
replayableFirehoseFactory.connect(parser);
|
||||||
|
verifyAll();
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testReplayableFirehoseWithConnectRetries() throws Exception
|
||||||
|
{
|
||||||
|
final boolean hasMore[] = {true};
|
||||||
|
|
||||||
|
expect(delegateFactory.connect(parser)).andThrow(new IOException())
|
||||||
|
.andReturn(delegateFirehose);
|
||||||
|
expect(delegateFirehose.hasMore()).andAnswer(
|
||||||
|
new IAnswer<Boolean>()
|
||||||
|
{
|
||||||
|
@Override
|
||||||
|
public Boolean answer() throws Throwable
|
||||||
|
{
|
||||||
|
return hasMore[0];
|
||||||
|
}
|
||||||
|
}
|
||||||
|
).anyTimes();
|
||||||
|
expect(delegateFirehose.nextRow())
|
||||||
|
.andReturn(testRows.get(0))
|
||||||
|
.andReturn(testRows.get(1))
|
||||||
|
.andAnswer(
|
||||||
|
new IAnswer<InputRow>()
|
||||||
|
{
|
||||||
|
@Override
|
||||||
|
public InputRow answer() throws Throwable
|
||||||
|
{
|
||||||
|
hasMore[0] = false;
|
||||||
|
return testRows.get(2);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
);
|
||||||
|
delegateFirehose.close();
|
||||||
|
replayAll();
|
||||||
|
|
||||||
|
List<InputRow> rows = Lists.newArrayList();
|
||||||
|
try (Firehose firehose = replayableFirehoseFactory.connect(parser)) {
|
||||||
|
while (firehose.hasMore()) {
|
||||||
|
rows.add(firehose.nextRow());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
Assert.assertEquals(testRows, rows);
|
||||||
|
|
||||||
|
verifyAll();
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testReplayableFirehoseWithNextRowRetries() throws Exception
|
||||||
|
{
|
||||||
|
final boolean hasMore[] = {true};
|
||||||
|
|
||||||
|
expect(delegateFactory.connect(parser)).andReturn(delegateFirehose).times(2);
|
||||||
|
expect(delegateFirehose.hasMore()).andAnswer(
|
||||||
|
new IAnswer<Boolean>()
|
||||||
|
{
|
||||||
|
@Override
|
||||||
|
public Boolean answer() throws Throwable
|
||||||
|
{
|
||||||
|
return hasMore[0];
|
||||||
|
}
|
||||||
|
}
|
||||||
|
).anyTimes();
|
||||||
|
expect(delegateFirehose.nextRow())
|
||||||
|
.andReturn(testRows.get(0))
|
||||||
|
.andThrow(new RuntimeException())
|
||||||
|
.andReturn(testRows.get(0))
|
||||||
|
.andReturn(testRows.get(1))
|
||||||
|
.andAnswer(
|
||||||
|
new IAnswer<InputRow>()
|
||||||
|
{
|
||||||
|
@Override
|
||||||
|
public InputRow answer() throws Throwable
|
||||||
|
{
|
||||||
|
hasMore[0] = false;
|
||||||
|
return testRows.get(2);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
);
|
||||||
|
delegateFirehose.close();
|
||||||
|
expectLastCall().times(2);
|
||||||
|
replayAll();
|
||||||
|
|
||||||
|
List<InputRow> rows = Lists.newArrayList();
|
||||||
|
try (Firehose firehose = replayableFirehoseFactory.connect(parser)) {
|
||||||
|
while (firehose.hasMore()) {
|
||||||
|
rows.add(firehose.nextRow());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
Assert.assertEquals(testRows, rows);
|
||||||
|
|
||||||
|
verifyAll();
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test(expected = TestReadingException.class)
|
||||||
|
public void testReplayableFirehoseWithNoRetries() throws Exception
|
||||||
|
{
|
||||||
|
replayableFirehoseFactory = new ReplayableFirehoseFactory(
|
||||||
|
delegateFactory,
|
||||||
|
false,
|
||||||
|
10000,
|
||||||
|
0,
|
||||||
|
mapper
|
||||||
|
);
|
||||||
|
|
||||||
|
expect(delegateFactory.connect(parser)).andReturn(delegateFirehose);
|
||||||
|
expect(delegateFirehose.hasMore()).andReturn(true).times(2);
|
||||||
|
expect(delegateFirehose.nextRow()).andThrow(new TestReadingException());
|
||||||
|
|
||||||
|
delegateFirehose.close();
|
||||||
|
expectLastCall();
|
||||||
|
replayAll();
|
||||||
|
|
||||||
|
replayableFirehoseFactory.connect(parser);
|
||||||
|
verifyAll();
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testReplayableFirehoseWithMultipleFiles() throws Exception
|
||||||
|
{
|
||||||
|
replayableFirehoseFactory = new ReplayableFirehoseFactory(delegateFactory, false, 1, 3, mapper);
|
||||||
|
|
||||||
|
final boolean hasMore[] = {true};
|
||||||
|
final int multiplicationFactor = 500;
|
||||||
|
|
||||||
|
final InputRow finalRow = new MapBasedInputRow(
|
||||||
|
DateTime.now(), Lists.newArrayList("dim4", "dim5"),
|
||||||
|
ImmutableMap.<String, Object>of("dim4", "val12", "dim5", "val20", "met1", 30)
|
||||||
|
);
|
||||||
|
|
||||||
|
expect(delegateFactory.connect(parser)).andReturn(delegateFirehose);
|
||||||
|
expect(delegateFirehose.hasMore()).andAnswer(
|
||||||
|
new IAnswer<Boolean>()
|
||||||
|
{
|
||||||
|
@Override
|
||||||
|
public Boolean answer() throws Throwable
|
||||||
|
{
|
||||||
|
return hasMore[0];
|
||||||
|
}
|
||||||
|
}
|
||||||
|
).anyTimes();
|
||||||
|
|
||||||
|
expect(delegateFirehose.nextRow())
|
||||||
|
.andReturn(testRows.get(0)).times(multiplicationFactor)
|
||||||
|
.andReturn(testRows.get(1)).times(multiplicationFactor)
|
||||||
|
.andReturn(testRows.get(2)).times(multiplicationFactor)
|
||||||
|
.andAnswer(
|
||||||
|
new IAnswer<InputRow>()
|
||||||
|
{
|
||||||
|
@Override
|
||||||
|
public InputRow answer() throws Throwable
|
||||||
|
{
|
||||||
|
hasMore[0] = false;
|
||||||
|
return finalRow;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
);
|
||||||
|
|
||||||
|
delegateFirehose.close();
|
||||||
|
replayAll();
|
||||||
|
|
||||||
|
List<InputRow> testRowsMultiplied = Lists.newArrayList();
|
||||||
|
for (InputRow row : testRows) {
|
||||||
|
for (int i = 0; i < multiplicationFactor; i++) {
|
||||||
|
testRowsMultiplied.add(row);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
testRowsMultiplied.add(finalRow);
|
||||||
|
|
||||||
|
List<InputRow> rows = Lists.newArrayList();
|
||||||
|
try (Firehose firehose = replayableFirehoseFactory.connect(parser)) {
|
||||||
|
while (firehose.hasMore()) {
|
||||||
|
rows.add(firehose.nextRow());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
Assert.assertEquals(testRowsMultiplied, rows);
|
||||||
|
|
||||||
|
// now replay!
|
||||||
|
rows.clear();
|
||||||
|
try (Firehose firehose = replayableFirehoseFactory.connect(parser)) {
|
||||||
|
while (firehose.hasMore()) {
|
||||||
|
rows.add(firehose.nextRow());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
Assert.assertEquals(testRowsMultiplied, rows);
|
||||||
|
|
||||||
|
verifyAll();
|
||||||
|
}
|
||||||
|
|
||||||
|
private class TestReadingException extends RuntimeException
|
||||||
|
{
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue