Add way to skip determine partitions for index task

Add a way to skip determinePartitions for IndexTask by manually
specifying numShards.
This commit is contained in:
nishantmonu51 2014-07-18 18:52:15 +05:30
parent 6430776607
commit 4ce12470a1
7 changed files with 70 additions and 21 deletions

View File

@ -179,7 +179,16 @@ public class DetermineHashedPartitionsJob implements Jobby
actualSpecs.add(new HadoopyShardSpec(new NoneShardSpec(), shardCount++)); actualSpecs.add(new HadoopyShardSpec(new NoneShardSpec(), shardCount++));
} else { } else {
for (int i = 0; i < numberOfShards; ++i) { for (int i = 0; i < numberOfShards; ++i) {
actualSpecs.add(new HadoopyShardSpec(new HashBasedNumberedShardSpec(i, numberOfShards), shardCount++)); actualSpecs.add(
new HadoopyShardSpec(
new HashBasedNumberedShardSpec(
i,
numberOfShards,
HadoopDruidIndexerConfig.jsonMapper
),
shardCount++
)
);
log.info("DateTime[%s], partition[%d], spec[%s]", bucket, i, actualSpecs.get(i)); log.info("DateTime[%s], partition[%d], spec[%s]", bucket, i, actualSpecs.get(i));
} }
} }

View File

@ -67,7 +67,7 @@ public class HadoopDruidDetermineConfigurationJob implements Jobby
for (int i = 0; i < shardsPerInterval; i++) { for (int i = 0; i < shardsPerInterval; i++) {
specs.add( specs.add(
new HadoopyShardSpec( new HadoopyShardSpec(
new HashBasedNumberedShardSpec(i, shardsPerInterval), new HashBasedNumberedShardSpec(i, shardsPerInterval, HadoopDruidIndexerConfig.jsonMapper),
shardCount++ shardCount++
) )
); );

View File

@ -19,10 +19,13 @@
package io.druid.indexing.common.task; package io.druid.indexing.common.task;
import com.fasterxml.jackson.annotation.JacksonInject;
import com.fasterxml.jackson.annotation.JsonCreator; import com.fasterxml.jackson.annotation.JsonCreator;
import com.fasterxml.jackson.annotation.JsonIgnore; import com.fasterxml.jackson.annotation.JsonIgnore;
import com.fasterxml.jackson.annotation.JsonProperty; import com.fasterxml.jackson.annotation.JsonProperty;
import com.fasterxml.jackson.annotation.JsonTypeName; import com.fasterxml.jackson.annotation.JsonTypeName;
import com.fasterxml.jackson.databind.ObjectMapper;
import com.google.common.base.Preconditions;
import com.google.common.collect.ImmutableList; import com.google.common.collect.ImmutableList;
import com.google.common.collect.Iterables; import com.google.common.collect.Iterables;
import com.google.common.collect.Lists; import com.google.common.collect.Lists;
@ -53,12 +56,14 @@ import io.druid.segment.loading.DataSegmentPusher;
import io.druid.segment.realtime.FireDepartmentMetrics; import io.druid.segment.realtime.FireDepartmentMetrics;
import io.druid.segment.realtime.plumber.Plumber; import io.druid.segment.realtime.plumber.Plumber;
import io.druid.timeline.DataSegment; import io.druid.timeline.DataSegment;
import io.druid.timeline.partition.HashBasedNumberedShardSpec;
import io.druid.timeline.partition.NoneShardSpec; import io.druid.timeline.partition.NoneShardSpec;
import io.druid.timeline.partition.ShardSpec; import io.druid.timeline.partition.ShardSpec;
import io.druid.timeline.partition.SingleDimensionShardSpec; import io.druid.timeline.partition.SingleDimensionShardSpec;
import org.joda.time.DateTime; import org.joda.time.DateTime;
import org.joda.time.Interval; import org.joda.time.Interval;
import javax.annotation.Nullable;
import java.io.File; import java.io.File;
import java.io.IOException; import java.io.IOException;
import java.util.List; import java.util.List;
@ -107,6 +112,8 @@ public class IndexTask extends AbstractFixedIntervalTask
@JsonIgnore @JsonIgnore
private final IndexIngestionSpec ingestionSchema; private final IndexIngestionSpec ingestionSchema;
private final ObjectMapper jsonMapper;
@JsonCreator @JsonCreator
public IndexTask( public IndexTask(
@JsonProperty("id") String id, @JsonProperty("id") String id,
@ -118,7 +125,8 @@ public class IndexTask extends AbstractFixedIntervalTask
@JsonProperty("indexGranularity") final QueryGranularity indexGranularity, @JsonProperty("indexGranularity") final QueryGranularity indexGranularity,
@JsonProperty("targetPartitionSize") final int targetPartitionSize, @JsonProperty("targetPartitionSize") final int targetPartitionSize,
@JsonProperty("firehose") final FirehoseFactory firehoseFactory, @JsonProperty("firehose") final FirehoseFactory firehoseFactory,
@JsonProperty("rowFlushBoundary") final int rowFlushBoundary @JsonProperty("rowFlushBoundary") final int rowFlushBoundary,
@JacksonInject ObjectMapper jsonMapper
) )
{ {
super( super(
@ -139,9 +147,10 @@ public class IndexTask extends AbstractFixedIntervalTask
granularitySpec.withQueryGranularity(indexGranularity == null ? QueryGranularity.NONE : indexGranularity) granularitySpec.withQueryGranularity(indexGranularity == null ? QueryGranularity.NONE : indexGranularity)
), ),
new IndexIOConfig(firehoseFactory), new IndexIOConfig(firehoseFactory),
new IndexTuningConfig(targetPartitionSize, rowFlushBoundary) new IndexTuningConfig(targetPartitionSize, rowFlushBoundary, null)
); );
} }
this.jsonMapper = jsonMapper;
} }
@Override @Override
@ -175,7 +184,15 @@ public class IndexTask extends AbstractFixedIntervalTask
if (targetPartitionSize > 0) { if (targetPartitionSize > 0) {
shardSpecs = determinePartitions(bucket, targetPartitionSize); shardSpecs = determinePartitions(bucket, targetPartitionSize);
} else { } else {
shardSpecs = ImmutableList.<ShardSpec>of(new NoneShardSpec()); int numShards = ingestionSchema.getTuningConfig().getNumShards();
if (numShards > 0) {
shardSpecs = Lists.newArrayList();
for (int i = 0; i < numShards; i++) {
shardSpecs.add(new HashBasedNumberedShardSpec(i, numShards, jsonMapper));
}
} else {
shardSpecs = ImmutableList.<ShardSpec>of(new NoneShardSpec());
}
} }
for (final ShardSpec shardSpec : shardSpecs) { for (final ShardSpec shardSpec : shardSpecs) {
final DataSegment segment = generateSegment( final DataSegment segment = generateSegment(
@ -206,6 +223,7 @@ public class IndexTask extends AbstractFixedIntervalTask
retVal.add(interval); retVal.add(interval);
} }
} }
return retVal; return retVal;
} }
@ -477,7 +495,7 @@ public class IndexTask extends AbstractFixedIntervalTask
this.dataSchema = dataSchema; this.dataSchema = dataSchema;
this.ioConfig = ioConfig; this.ioConfig = ioConfig;
this.tuningConfig = tuningConfig == null ? new IndexTuningConfig(0, 0) : tuningConfig; this.tuningConfig = tuningConfig == null ? new IndexTuningConfig(0, 0, null) : tuningConfig;
} }
@Override @Override
@ -530,15 +548,22 @@ public class IndexTask extends AbstractFixedIntervalTask
private final int targetPartitionSize; private final int targetPartitionSize;
private final int rowFlushBoundary; private final int rowFlushBoundary;
private final int numShards;
@JsonCreator @JsonCreator
public IndexTuningConfig( public IndexTuningConfig(
@JsonProperty("targetPartitionSize") int targetPartitionSize, @JsonProperty("targetPartitionSize") int targetPartitionSize,
@JsonProperty("rowFlushBoundary") int rowFlushBoundary @JsonProperty("rowFlushBoundary") int rowFlushBoundary,
) @JsonProperty("numShards") @Nullable Integer numShards
)
{ {
this.targetPartitionSize = targetPartitionSize == 0 ? DEFAULT_TARGET_PARTITION_SIZE : targetPartitionSize; this.targetPartitionSize = targetPartitionSize == 0 ? DEFAULT_TARGET_PARTITION_SIZE : targetPartitionSize;
this.rowFlushBoundary = rowFlushBoundary == 0 ? DEFAULT_ROW_FLUSH_BOUNDARY : rowFlushBoundary; this.rowFlushBoundary = rowFlushBoundary == 0 ? DEFAULT_ROW_FLUSH_BOUNDARY : rowFlushBoundary;
this.numShards = numShards == null ? -1 : numShards;
Preconditions.checkArgument(
this.targetPartitionSize == -1 || this.numShards == -1,
"targetPartitionsSize and shardCount both cannot be set"
);
} }
@JsonProperty @JsonProperty
@ -552,5 +577,11 @@ public class IndexTask extends AbstractFixedIntervalTask
{ {
return rowFlushBoundary; return rowFlushBoundary;
} }
@JsonProperty
public int getNumShards()
{
return numShards;
}
} }
} }

View File

@ -19,6 +19,8 @@
package io.druid.indexing.common.task; package io.druid.indexing.common.task;
import com.fasterxml.jackson.annotation.JsonTypeInfo;
import com.fasterxml.jackson.databind.InjectableValues;
import com.fasterxml.jackson.databind.Module; import com.fasterxml.jackson.databind.Module;
import com.fasterxml.jackson.databind.ObjectMapper; import com.fasterxml.jackson.databind.ObjectMapper;
import com.google.common.collect.ImmutableList; import com.google.common.collect.ImmutableList;
@ -67,16 +69,19 @@ public class TaskSerdeTest
QueryGranularity.NONE, QueryGranularity.NONE,
10000, 10000,
new LocalFirehoseFactory(new File("lol"), "rofl", null), new LocalFirehoseFactory(new File("lol"), "rofl", null),
-1 -1,
jsonMapper
); );
for (final Module jacksonModule : new FirehoseModule().getJacksonModules()) { for (final Module jacksonModule : new FirehoseModule().getJacksonModules()) {
jsonMapper.registerModule(jacksonModule); jsonMapper.registerModule(jacksonModule);
} }
InjectableValues inject = new InjectableValues.Std()
.addValue(ObjectMapper.class, jsonMapper);
final String json = jsonMapper.writeValueAsString(task); final String json = jsonMapper.writeValueAsString(task);
Thread.sleep(100); // Just want to run the clock a bit to make sure the task id doesn't change Thread.sleep(100); // Just want to run the clock a bit to make sure the task id doesn't change
final IndexTask task2 = (IndexTask) jsonMapper.readValue(json, Task.class); final IndexTask task2 = jsonMapper.reader(Task.class).with(inject).readValue(json);
Assert.assertEquals("foo", task.getDataSource()); Assert.assertEquals("foo", task.getDataSource());
Assert.assertEquals(new Interval("2010-01-01/P2D"), task.getInterval()); Assert.assertEquals(new Interval("2010-01-01/P2D"), task.getInterval());

View File

@ -43,6 +43,7 @@ import io.druid.data.input.InputRow;
import io.druid.data.input.MapBasedInputRow; import io.druid.data.input.MapBasedInputRow;
import io.druid.data.input.impl.InputRowParser; import io.druid.data.input.impl.InputRowParser;
import io.druid.granularity.QueryGranularity; import io.druid.granularity.QueryGranularity;
import io.druid.indexing.common.TestUtils;
import io.druid.segment.column.ColumnConfig; import io.druid.segment.column.ColumnConfig;
import io.druid.segment.indexing.granularity.UniformGranularitySpec; import io.druid.segment.indexing.granularity.UniformGranularitySpec;
import io.druid.indexing.common.SegmentLoaderFactory; import io.druid.indexing.common.SegmentLoaderFactory;
@ -249,7 +250,8 @@ public class TaskLifecycleTest
IR("2010-01-02T01", "a", "c", 1) IR("2010-01-02T01", "a", "c", 1)
) )
), ),
-1 -1,
TestUtils.MAPPER
); );
final Optional<TaskStatus> preRunTaskStatus = tsqa.getStatus(indexTask.getId()); final Optional<TaskStatus> preRunTaskStatus = tsqa.getStatus(indexTask.getId());
@ -297,7 +299,8 @@ public class TaskLifecycleTest
QueryGranularity.NONE, QueryGranularity.NONE,
10000, 10000,
newMockExceptionalFirehoseFactory(), newMockExceptionalFirehoseFactory(),
-1 -1,
TestUtils.MAPPER
); );
final TaskStatus status = runTask(indexTask); final TaskStatus status = runTask(indexTask);

View File

@ -34,18 +34,18 @@ import java.util.List;
public class HashBasedNumberedShardSpec extends NumberedShardSpec public class HashBasedNumberedShardSpec extends NumberedShardSpec
{ {
private static final HashFunction hashFunction = Hashing.murmur3_32(); private static final HashFunction hashFunction = Hashing.murmur3_32();
@JacksonInject private final ObjectMapper jsonMapper;
private ObjectMapper jsonMapper;
@JsonCreator @JsonCreator
public HashBasedNumberedShardSpec( public HashBasedNumberedShardSpec(
@JsonProperty("partitionNum") int partitionNum, @JsonProperty("partitionNum") int partitionNum,
@JsonProperty("partitions") int partitions @JsonProperty("partitions") int partitions,
@JacksonInject ObjectMapper jsonMapper
) )
{ {
super(partitionNum, partitions); super(partitionNum, partitions);
this.jsonMapper = jsonMapper;
} }
@Override @Override

View File

@ -26,6 +26,7 @@ import com.metamx.common.ISE;
import io.druid.TestUtil; import io.druid.TestUtil;
import io.druid.data.input.InputRow; import io.druid.data.input.InputRow;
import io.druid.data.input.Row; import io.druid.data.input.Row;
import io.druid.jackson.DefaultObjectMapper;
import io.druid.timeline.partition.HashBasedNumberedShardSpec; import io.druid.timeline.partition.HashBasedNumberedShardSpec;
import io.druid.timeline.partition.PartitionChunk; import io.druid.timeline.partition.PartitionChunk;
import io.druid.timeline.partition.ShardSpec; import io.druid.timeline.partition.ShardSpec;
@ -43,7 +44,7 @@ public class HashBasedNumberedShardSpecTest
{ {
final ShardSpec spec = TestUtil.MAPPER.readValue( final ShardSpec spec = TestUtil.MAPPER.readValue(
TestUtil.MAPPER.writeValueAsBytes(new HashBasedNumberedShardSpec(1, 2)), TestUtil.MAPPER.writeValueAsBytes(new HashBasedNumberedShardSpec(1, 2, TestUtil.MAPPER)),
ShardSpec.class ShardSpec.class
); );
Assert.assertEquals(1, spec.getPartitionNum()); Assert.assertEquals(1, spec.getPartitionNum());
@ -65,9 +66,9 @@ public class HashBasedNumberedShardSpecTest
public void testPartitionChunks() public void testPartitionChunks()
{ {
final List<ShardSpec> specs = ImmutableList.<ShardSpec>of( final List<ShardSpec> specs = ImmutableList.<ShardSpec>of(
new HashBasedNumberedShardSpec(0, 3), new HashBasedNumberedShardSpec(0, 3, TestUtil.MAPPER),
new HashBasedNumberedShardSpec(1, 3), new HashBasedNumberedShardSpec(1, 3, TestUtil.MAPPER),
new HashBasedNumberedShardSpec(2, 3) new HashBasedNumberedShardSpec(2, 3, TestUtil.MAPPER)
); );
final List<PartitionChunk<String>> chunks = Lists.transform( final List<PartitionChunk<String>> chunks = Lists.transform(
@ -141,7 +142,7 @@ public class HashBasedNumberedShardSpecTest
int partitions int partitions
) )
{ {
super(partitionNum, partitions); super(partitionNum, partitions, TestUtil.MAPPER);
} }
@Override @Override