Revert "Merge branch 'determine-partitions-improvements'"

This reverts commit 7ad228ceb5, reversing changes made to 9c55e2b779.
2014-02-14 12:47:34 -08:00 · 2014-02-14 12:47:34 -08:00 · 189b3e2b9b
parent 5607edb813
commit 189b3e2b9b
29 changed files with 380 additions and 1280 deletions
--- a/docs/content/Batch-ingestion.md
+++ b/docs/content/Batch-ingestion.md
@ -82,7 +82,6 @@ The interval is the [ISO8601 interval](http://en.wikipedia.org/wiki/ISO_8601#Tim
  "segmentOutputPath": "s3n:\/\/billy-bucket\/the\/segments\/go\/here",
  "leaveIntermediate": "false",
  "partitionsSpec": {
    "type": "random"
    "targetPartitionSize": 5000000
  },
  "updaterJobSpec": {
@ -146,20 +145,12 @@ The indexing process has the ability to roll data up as it processes the incomin
 ### Partitioning specification
-Segments are always partitioned based on timestamp (according to the granularitySpec) and may be further partitioned in some other way depending on partition type.
+Segments are always partitioned based on timestamp (according to the granularitySpec) and may be further partitioned in some other way. For example, data for a day may be split by the dimension "last\_name" into two segments: one with all values from A-M and one with all values from N-Z.
 Druid supports two types of partitions spec - singleDimension and random.
 In SingleDimension partition type data is partitioned based on the values in that dimension.
 For example, data for a day may be split by the dimension "last\_name" into two segments: one with all values from A-M and one with all values from N-Z.
 In random partition type, the number of partitions is determined based on the targetPartitionSize and cardinality of input set and the data is partitioned based on the hashcode of the row.
 Random partition type is more efficient and gives better distribution of data.
 To use this option, the indexer must be given a target partition size. It can then find a good set of partition ranges on its own.
 |property|description|required?|
 |--------|-----------|---------|
 |type|type of partitionSpec to be used |no, default : singleDimension|
 |targetPartitionSize|target number of rows to include in a partition, should be a number that targets segments of 700MB\~1GB.|yes|
 |partitionDimension|the dimension to partition on. Leave blank to select a dimension automatically.|no|
 |assumeGrouped|assume input data has already been grouped on time and dimensions. This is faster, but can choose suboptimal partitions if the assumption is violated.|no|
--- a/indexing-hadoop/pom.xml
+++ b/indexing-hadoop/pom.xml
@ -97,11 +97,6 @@
            <artifactId>junit</artifactId>
            <scope>test</scope>
        </dependency>
      <dependency>
        <groupId>com.clearspring.analytics</groupId>
        <artifactId>stream</artifactId>
        <version>2.5.2</version>
      </dependency>
    </dependencies>
    <build>
--- a/indexing-hadoop/src/main/java/io/druid/indexer/DetermineHashedPartitionsJob.java
+++ b/indexing-hadoop/src/main/java/io/druid/indexer/DetermineHashedPartitionsJob.java
@ -1,357 +0,0 @@
 /*
 * Druid - a distributed column store.
 * Copyright (C) 2014  Metamarkets Group Inc.
 *
 * This program is free software; you can redistribute it and/or
 * modify it under the terms of the GNU General Public License
 * as published by the Free Software Foundation; either version 2
 * of the License, or (at your option) any later version.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License
 * along with this program; if not, write to the Free Software
 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
 */
 package io.druid.indexer;
 import com.clearspring.analytics.stream.cardinality.CardinalityMergeException;
 import com.clearspring.analytics.stream.cardinality.HyperLogLog;
 import com.fasterxml.jackson.core.type.TypeReference;
 import com.google.common.base.Optional;
 import com.google.common.base.Throwables;
 import com.google.common.collect.ImmutableMap;
 import com.google.common.collect.Lists;
 import com.google.common.collect.Maps;
 import com.google.common.hash.HashFunction;
 import com.google.common.hash.Hashing;
 import com.google.common.io.Closeables;
 import com.metamx.common.ISE;
 import com.metamx.common.logger.Logger;
 import io.druid.data.input.InputRow;
 import io.druid.data.input.Rows;
 import io.druid.granularity.QueryGranularity;
 import io.druid.indexer.granularity.UniformGranularitySpec;
 import io.druid.timeline.partition.HashBasedNumberedShardSpec;
 import io.druid.timeline.partition.NoneShardSpec;
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.fs.FileSystem;
 import org.apache.hadoop.fs.Path;
 import org.apache.hadoop.io.BytesWritable;
 import org.apache.hadoop.io.LongWritable;
 import org.apache.hadoop.io.NullWritable;
 import org.apache.hadoop.io.Text;
 import org.apache.hadoop.mapreduce.Job;
 import org.apache.hadoop.mapreduce.Reducer;
 import org.apache.hadoop.mapreduce.lib.input.TextInputFormat;
 import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
 import org.apache.hadoop.mapreduce.lib.output.SequenceFileOutputFormat;
 import org.joda.time.DateTime;
 import org.joda.time.DateTimeComparator;
 import org.joda.time.Interval;
 import java.io.IOException;
 import java.io.OutputStream;
 import java.util.List;
 import java.util.Map;
 import java.util.Set;
 /**
 * Determines appropriate ShardSpecs for a job by determining approximate cardinality of data set using HyperLogLog
 */
 public class DetermineHashedPartitionsJob implements Jobby
 {
  private static final int MAX_SHARDS = 128;
  private static final Logger log = new Logger(DetermineHashedPartitionsJob.class);
  private static final int HYPER_LOG_LOG_BIT_SIZE = 20;
  private final HadoopDruidIndexerConfig config;
  public DetermineHashedPartitionsJob(
      HadoopDruidIndexerConfig config
  )
  {
    this.config = config;
  }
  public boolean run()
  {
    try {
      /*
       * Group by (timestamp, dimensions) so we can correctly count dimension values as they would appear
       * in the final segment.
       */
      long startTime = System.currentTimeMillis();
      final Job groupByJob = new Job(
          new Configuration(),
          String.format("%s-determine_partitions_hashed-%s", config.getDataSource(), config.getIntervals())
      );
      JobHelper.injectSystemProperties(groupByJob);
      groupByJob.setInputFormatClass(TextInputFormat.class);
      groupByJob.setMapperClass(DetermineCardinalityMapper.class);
      groupByJob.setMapOutputKeyClass(LongWritable.class);
      groupByJob.setMapOutputValueClass(BytesWritable.class);
      groupByJob.setReducerClass(DetermineCardinalityReducer.class);
      groupByJob.setOutputKeyClass(NullWritable.class);
      groupByJob.setOutputValueClass(NullWritable.class);
      groupByJob.setOutputFormatClass(SequenceFileOutputFormat.class);
      groupByJob.setNumReduceTasks(1);
      JobHelper.setupClasspath(config, groupByJob);
      config.addInputPaths(groupByJob);
      config.intoConfiguration(groupByJob);
      FileOutputFormat.setOutputPath(groupByJob, config.makeGroupedDataDir());
      groupByJob.submit();
      log.info("Job %s submitted, status available at: %s", groupByJob.getJobName(), groupByJob.getTrackingURL());
      if (!groupByJob.waitForCompletion(true)) {
        log.error("Job failed: %s", groupByJob.getJobID());
        return false;
      }
      /*
       * Load partitions and intervals determined by the previous job.
       */
      log.info("Job completed, loading up partitions for intervals[%s].", config.getSegmentGranularIntervals());
      FileSystem fileSystem = null;
      if (!config.getSegmentGranularIntervals().isPresent()) {
        final Path intervalInfoPath = config.makeIntervalInfoPath();
        fileSystem = intervalInfoPath.getFileSystem(groupByJob.getConfiguration());
        if (!fileSystem.exists(intervalInfoPath)) {
          throw new ISE("Path[%s] didn't exist!?", intervalInfoPath);
        }
        List<Interval> intervals = config.jsonMapper.readValue(
            Utils.openInputStream(groupByJob, intervalInfoPath), new TypeReference<List<Interval>>()
        {
        }
        );
        config.setGranularitySpec(new UniformGranularitySpec(config.getGranularitySpec().getGranularity(), intervals));
        log.info("Determined Intervals for Job [%s]" + config.getSegmentGranularIntervals());
      }
      Map<DateTime, List<HadoopyShardSpec>> shardSpecs = Maps.newTreeMap(DateTimeComparator.getInstance());
      int shardCount = 0;
      for (Interval segmentGranularity : config.getSegmentGranularIntervals().get()) {
        DateTime bucket = segmentGranularity.getStart();
        final Path partitionInfoPath = config.makeSegmentPartitionInfoPath(segmentGranularity);
        if (fileSystem == null) {
          fileSystem = partitionInfoPath.getFileSystem(groupByJob.getConfiguration());
        }
        if (fileSystem.exists(partitionInfoPath)) {
          Long cardinality = config.jsonMapper.readValue(
              Utils.openInputStream(groupByJob, partitionInfoPath), new TypeReference<Long>()
          {
          }
          );
          int numberOfShards = (int) Math.ceil((double) cardinality / config.getTargetPartitionSize());
          if (numberOfShards > MAX_SHARDS) {
            throw new ISE(
                "Number of shards [%d] exceed the maximum limit of [%d], either targetPartitionSize is too low or data volume is too high",
                numberOfShards,
                MAX_SHARDS
            );
          }
          List<HadoopyShardSpec> actualSpecs = Lists.newArrayListWithExpectedSize(numberOfShards);
          if (numberOfShards == 1) {
            actualSpecs.add(new HadoopyShardSpec(new NoneShardSpec(), shardCount++));
          } else {
            for (int i = 0; i < numberOfShards; ++i) {
              actualSpecs.add(new HadoopyShardSpec(new HashBasedNumberedShardSpec(i, numberOfShards), shardCount++));
              log.info("DateTime[%s], partition[%d], spec[%s]", bucket, i, actualSpecs.get(i));
            }
          }
          shardSpecs.put(bucket, actualSpecs);
        } else {
          log.info("Path[%s] didn't exist!?", partitionInfoPath);
        }
      }
      config.setShardSpecs(shardSpecs);
      log.info(
          "DetermineHashedPartitionsJob took %d millis",
          (System.currentTimeMillis() - startTime)
      );
      return true;
    }
    catch (Exception e) {
      throw Throwables.propagate(e);
    }
  }
  public static class DetermineCardinalityMapper extends HadoopDruidIndexerMapper<LongWritable, BytesWritable>
  {
    private static HashFunction hashFunction = Hashing.murmur3_128();
    private QueryGranularity rollupGranularity = null;
    private Map<Interval, HyperLogLog> hyperLogLogs;
    private HadoopDruidIndexerConfig config;
    private boolean determineIntervals;
    @Override
    protected void setup(Context context)
        throws IOException, InterruptedException
    {
      super.setup(context);
      rollupGranularity = getConfig().getRollupSpec().getRollupGranularity();
      config = HadoopDruidIndexerConfigBuilder.fromConfiguration(context.getConfiguration());
      Optional<Set<Interval>> intervals = config.getSegmentGranularIntervals();
      if (intervals.isPresent()) {
        determineIntervals = false;
        final ImmutableMap.Builder<Interval, HyperLogLog> builder = ImmutableMap.builder();
        for (final Interval bucketInterval : intervals.get()) {
          builder.put(bucketInterval, new HyperLogLog(HYPER_LOG_LOG_BIT_SIZE));
        }
        hyperLogLogs = builder.build();
      } else {
        determineIntervals = true;
        hyperLogLogs = Maps.newHashMap();
      }
    }
    @Override
    protected void innerMap(
        InputRow inputRow,
        Text text,
        Context context
    ) throws IOException, InterruptedException
    {
      final List<Object> groupKey = Rows.toGroupKey(
          rollupGranularity.truncate(inputRow.getTimestampFromEpoch()),
          inputRow
      );
      Interval interval;
      if (determineIntervals) {
        interval = config.getGranularitySpec().getGranularity().bucket(new DateTime(inputRow.getTimestampFromEpoch()));
        if (!hyperLogLogs.containsKey(interval)) {
          hyperLogLogs.put(interval, new HyperLogLog(HYPER_LOG_LOG_BIT_SIZE));
        }
      } else {
        final Optional<Interval> maybeInterval = config.getGranularitySpec()
                                                       .bucketInterval(new DateTime(inputRow.getTimestampFromEpoch()));
        if (!maybeInterval.isPresent()) {
          throw new ISE("WTF?! No bucket found for timestamp: %s", inputRow.getTimestampFromEpoch());
        }
        interval = maybeInterval.get();
      }
      hyperLogLogs.get(interval)
                  .offerHashed(
                      hashFunction.hashBytes(HadoopDruidIndexerConfig.jsonMapper.writeValueAsBytes(groupKey))
                                  .asLong()
                  );
    }
    @Override
    public void run(Context context) throws IOException, InterruptedException
    {
      setup(context);
      while (context.nextKeyValue()) {
        map(context.getCurrentKey(), context.getCurrentValue(), context);
      }
      for (Map.Entry<Interval, HyperLogLog> entry : hyperLogLogs.entrySet()) {
        context.write(
            new LongWritable(entry.getKey().getStartMillis()),
            new BytesWritable(entry.getValue().getBytes())
        );
      }
      cleanup(context);
    }
  }
  public static class DetermineCardinalityReducer
      extends Reducer<LongWritable, BytesWritable, NullWritable, NullWritable>
  {
    private final List<Interval> intervals = Lists.newArrayList();
    protected HadoopDruidIndexerConfig config = null;
    @Override
    protected void setup(Context context)
        throws IOException, InterruptedException
    {
      config = HadoopDruidIndexerConfigBuilder.fromConfiguration(context.getConfiguration());
    }
    @Override
    protected void reduce(
        LongWritable key,
        Iterable<BytesWritable> values,
        Context context
    ) throws IOException, InterruptedException
    {
      HyperLogLog aggregate = new HyperLogLog(HYPER_LOG_LOG_BIT_SIZE);
      for (BytesWritable value : values) {
        HyperLogLog logValue = HyperLogLog.Builder.build(value.getBytes());
        try {
          aggregate.addAll(logValue);
        }
        catch (CardinalityMergeException e) {
          e.printStackTrace(); // TODO: check for better handling
        }
      }
      Interval interval = config.getGranularitySpec().getGranularity().bucket(new DateTime(key.get()));
      intervals.add(interval);
      final Path outPath = config.makeSegmentPartitionInfoPath(interval);
      final OutputStream out = Utils.makePathAndOutputStream(
          context, outPath, config.isOverwriteFiles()
      );
      try {
        HadoopDruidIndexerConfig.jsonMapper.writerWithType(
            new TypeReference<Long>()
            {
            }
        ).writeValue(
            out,
            aggregate.cardinality()
        );
      }
      finally {
        Closeables.close(out, false);
      }
    }
    @Override
    public void run(Context context)
        throws IOException, InterruptedException
    {
      super.run(context);
      if (!config.getSegmentGranularIntervals().isPresent()) {
        final Path outPath = config.makeIntervalInfoPath();
        final OutputStream out = Utils.makePathAndOutputStream(
            context, outPath, config.isOverwriteFiles()
        );
        try {
          HadoopDruidIndexerConfig.jsonMapper.writerWithType(
              new TypeReference<List<Interval>>()
              {
              }
          ).writeValue(
              out,
              intervals
          );
        }
        finally {
          Closeables.close(out, false);
        }
      }
    }
  }
 }
--- a/indexing-hadoop/src/main/java/io/druid/indexer/DeterminePartitionsJob.java
+++ b/indexing-hadoop/src/main/java/io/druid/indexer/DeterminePartitionsJob.java
@ -26,7 +26,9 @@ import com.google.common.base.Optional;
 import com.google.common.base.Splitter;
 import com.google.common.base.Throwables;
 import com.google.common.collect.ComparisonChain;
 import com.google.common.collect.ImmutableList;
 import com.google.common.collect.ImmutableMap;
 import com.google.common.collect.ImmutableSortedSet;
 import com.google.common.collect.Iterables;
 import com.google.common.collect.Iterators;
 import com.google.common.collect.Lists;
@ -38,9 +40,7 @@ import com.metamx.common.guava.nary.BinaryFn;
 import com.metamx.common.logger.Logger;
 import io.druid.collections.CombiningIterable;
 import io.druid.data.input.InputRow;
 import io.druid.data.input.Rows;
 import io.druid.granularity.QueryGranularity;
 import io.druid.indexer.partitions.SingleDimensionPartitionsSpec;
 import io.druid.timeline.partition.NoneShardSpec;
 import io.druid.timeline.partition.ShardSpec;
 import io.druid.timeline.partition.SingleDimensionShardSpec;
@ -76,6 +76,7 @@ import java.util.Comparator;
 import java.util.Iterator;
 import java.util.List;
 import java.util.Map;
 import java.util.Set;
 /**
 * Determines appropriate ShardSpecs for a job by determining whether or not partitioning is necessary, and if so,
@ -106,6 +107,16 @@ public class DeterminePartitionsJob implements Jobby
    this.config = config;
  }
  public static void injectSystemProperties(Job job)
  {
    final Configuration conf = job.getConfiguration();
    for (String propName : System.getProperties().stringPropertyNames()) {
      if (propName.startsWith("hadoop.")) {
        conf.set(propName.substring("hadoop.".length()), System.getProperty(propName));
      }
    }
  }
  public boolean run()
  {
    try {
@ -114,17 +125,13 @@ public class DeterminePartitionsJob implements Jobby
       * in the final segment.
       */
      if(!(config.getPartitionsSpec() instanceof SingleDimensionPartitionsSpec)){
        throw new ISE("DeterminePartitionsJob can only be run for SingleDimensionPartitionsSpec, partitionSpec found [%s]", config.getPartitionsSpec());
      }
      if (!config.getPartitionsSpec().isAssumeGrouped()) {
        final Job groupByJob = new Job(
            new Configuration(),
            String.format("%s-determine_partitions_groupby-%s", config.getDataSource(), config.getIntervals())
        );
-        JobHelper.injectSystemProperties(groupByJob);
+        injectSystemProperties(groupByJob);
        groupByJob.setInputFormatClass(TextInputFormat.class);
        groupByJob.setMapperClass(DeterminePartitionsGroupByMapper.class);
        groupByJob.setMapOutputKeyClass(BytesWritable.class);
@ -161,7 +168,7 @@ public class DeterminePartitionsJob implements Jobby
      dimSelectionJob.getConfiguration().set("io.sort.record.percent", "0.19");
-      JobHelper.injectSystemProperties(dimSelectionJob);
+      injectSystemProperties(dimSelectionJob);
      if (!config.getPartitionsSpec().isAssumeGrouped()) {
        // Read grouped data from the groupByJob.
@ -183,7 +190,7 @@ public class DeterminePartitionsJob implements Jobby
      dimSelectionJob.setOutputValueClass(Text.class);
      dimSelectionJob.setOutputFormatClass(DeterminePartitionsDimSelectionOutputFormat.class);
      dimSelectionJob.setPartitionerClass(DeterminePartitionsDimSelectionPartitioner.class);
-      dimSelectionJob.setNumReduceTasks(config.getGranularitySpec().bucketIntervals().get().size());
+      dimSelectionJob.setNumReduceTasks(config.getGranularitySpec().bucketIntervals().size());
      JobHelper.setupClasspath(config, dimSelectionJob);
      config.intoConfiguration(dimSelectionJob);
@ -209,8 +216,10 @@ public class DeterminePartitionsJob implements Jobby
      FileSystem fileSystem = null;
      Map<DateTime, List<HadoopyShardSpec>> shardSpecs = Maps.newTreeMap(DateTimeComparator.getInstance());
      int shardCount = 0;
-      for (Interval segmentGranularity : config.getSegmentGranularIntervals().get()) {
+      for (Interval segmentGranularity : config.getSegmentGranularIntervals()) {
-        final Path partitionInfoPath = config.makeSegmentPartitionInfoPath(segmentGranularity);
+        DateTime bucket = segmentGranularity.getStart();
        final Path partitionInfoPath = config.makeSegmentPartitionInfoPath(new Bucket(0, bucket, 0));
        if (fileSystem == null) {
          fileSystem = partitionInfoPath.getFileSystem(dimSelectionJob.getConfiguration());
        }
@ -224,10 +233,10 @@ public class DeterminePartitionsJob implements Jobby
          List<HadoopyShardSpec> actualSpecs = Lists.newArrayListWithExpectedSize(specs.size());
          for (int i = 0; i < specs.size(); ++i) {
            actualSpecs.add(new HadoopyShardSpec(specs.get(i), shardCount++));
-            log.info("DateTime[%s], partition[%d], spec[%s]", segmentGranularity, i, actualSpecs.get(i));
+            log.info("DateTime[%s], partition[%d], spec[%s]", bucket, i, actualSpecs.get(i));
          }
-          shardSpecs.put(segmentGranularity.getStart(), actualSpecs);
+          shardSpecs.put(bucket, actualSpecs);
        } else {
          log.info("Path[%s] didn't exist!?", partitionInfoPath);
        }
@ -260,9 +269,17 @@ public class DeterminePartitionsJob implements Jobby
        Context context
    ) throws IOException, InterruptedException
    {
-      final List<Object> groupKey = Rows.toGroupKey(
+      // Create group key, there are probably more efficient ways of doing this
      final Map<String, Set<String>> dims = Maps.newTreeMap();
      for (final String dim : inputRow.getDimensions()) {
        final Set<String> dimValues = ImmutableSortedSet.copyOf(inputRow.getDimension(dim));
        if (dimValues.size() > 0) {
          dims.put(dim, dimValues);
        }
      }
      final List<Object> groupKey = ImmutableList.of(
          rollupGranularity.truncate(inputRow.getTimestampFromEpoch()),
-          inputRow
+          dims
      );
      context.write(
          new BytesWritable(HadoopDruidIndexerConfig.jsonMapper.writeValueAsBytes(groupKey)),
@ -298,8 +315,8 @@ public class DeterminePartitionsJob implements Jobby
        throws IOException, InterruptedException
    {
      final HadoopDruidIndexerConfig config = HadoopDruidIndexerConfigBuilder.fromConfiguration(context.getConfiguration());
-      SingleDimensionPartitionsSpec spec = (SingleDimensionPartitionsSpec) config.getPartitionsSpec();
+      final String partitionDimension = config.getPartitionDimension();
-      helper = new DeterminePartitionsDimSelectionMapperHelper(config, spec.getPartitionDimension());
+      helper = new DeterminePartitionsDimSelectionMapperHelper(config, partitionDimension);
    }
    @Override
@ -330,8 +347,8 @@ public class DeterminePartitionsJob implements Jobby
    {
      super.setup(context);
      final HadoopDruidIndexerConfig config = HadoopDruidIndexerConfigBuilder.fromConfiguration(context.getConfiguration());
-      final SingleDimensionPartitionsSpec spec = (SingleDimensionPartitionsSpec) config.getPartitionsSpec();
+      final String partitionDimension = config.getPartitionDimension();
-      helper = new DeterminePartitionsDimSelectionMapperHelper(config, spec.getPartitionDimension());
+      helper = new DeterminePartitionsDimSelectionMapperHelper(config, partitionDimension);
    }
    @Override
@ -366,7 +383,7 @@ public class DeterminePartitionsJob implements Jobby
      final ImmutableMap.Builder<DateTime, Integer> timeIndexBuilder = ImmutableMap.builder();
      int idx = 0;
-      for (final Interval bucketInterval : config.getGranularitySpec().bucketIntervals().get()) {
+      for (final Interval bucketInterval : config.getGranularitySpec().bucketIntervals()) {
        timeIndexBuilder.put(bucketInterval.getStart(), idx);
        idx++;
      }
@ -724,7 +741,7 @@ public class DeterminePartitionsJob implements Jobby
      }
      final OutputStream out = Utils.makePathAndOutputStream(
-          context, config.makeSegmentPartitionInfoPath(config.getGranularitySpec().getGranularity().bucket(bucket)), config.isOverwriteFiles()
+          context, config.makeSegmentPartitionInfoPath(new Bucket(0, bucket, 0)), config.isOverwriteFiles()
      );
      final DimPartitions chosenPartitions = maxCardinality > HIGH_CARDINALITY_THRESHOLD
--- a/indexing-hadoop/src/main/java/io/druid/indexer/HadoopDruidDetermineConfigurationJob.java
+++ b/indexing-hadoop/src/main/java/io/druid/indexer/HadoopDruidDetermineConfigurationJob.java
@ -1,74 +0,0 @@
 /*
 * Druid - a distributed column store.
 * Copyright (C) 2012, 2013  Metamarkets Group Inc.
 *
 * This program is free software; you can redistribute it and/or
 * modify it under the terms of the GNU General Public License
 * as published by the Free Software Foundation; either version 2
 * of the License, or (at your option) any later version.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License
 * along with this program; if not, write to the Free Software
 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
 */
 package io.druid.indexer;
 import com.google.common.collect.Lists;
 import com.google.common.collect.Maps;
 import com.google.inject.Inject;
 import com.metamx.common.logger.Logger;
 import io.druid.timeline.partition.NoneShardSpec;
 import org.joda.time.DateTime;
 import org.joda.time.DateTimeComparator;
 import org.joda.time.Interval;
 import java.util.List;
 import java.util.Map;
 /**
 */
 public class HadoopDruidDetermineConfigurationJob implements Jobby
 {
  private static final Logger log = new Logger(HadoopDruidDetermineConfigurationJob.class);
  private final HadoopDruidIndexerConfig config;
  @Inject
  public HadoopDruidDetermineConfigurationJob(
      HadoopDruidIndexerConfig config
  )
  {
    this.config = config;
  }
  @Override
  public boolean run()
  {
    List<Jobby> jobs = Lists.newArrayList();
    JobHelper.ensurePaths(config);
    if (config.isDeterminingPartitions()) {
      jobs.add(config.getPartitionsSpec().getPartitionJob(config));
    } else {
      Map<DateTime, List<HadoopyShardSpec>> shardSpecs = Maps.newTreeMap(DateTimeComparator.getInstance());
      int shardCount = 0;
      for (Interval segmentGranularity : config.getSegmentGranularIntervals().get()) {
        DateTime bucket = segmentGranularity.getStart();
        final HadoopyShardSpec spec = new HadoopyShardSpec(new NoneShardSpec(), shardCount++);
        shardSpecs.put(bucket, Lists.newArrayList(spec));
        log.info("DateTime[%s], spec[%s]", bucket, spec);
      }
      config.setShardSpecs(shardSpecs);
    }
    return JobHelper.runJobs(jobs, config);
  }
 }
--- a/indexing-hadoop/src/main/java/io/druid/indexer/HadoopDruidIndexerConfig.java
+++ b/indexing-hadoop/src/main/java/io/druid/indexer/HadoopDruidIndexerConfig.java
@ -50,7 +50,6 @@ import io.druid.guice.annotations.Self;
 import io.druid.indexer.granularity.GranularitySpec;
 import io.druid.indexer.granularity.UniformGranularitySpec;
 import io.druid.indexer.partitions.PartitionsSpec;
 import io.druid.indexer.partitions.SingleDimensionPartitionsSpec;
 import io.druid.indexer.path.PathSpec;
 import io.druid.indexer.rollup.DataRollupSpec;
 import io.druid.indexer.updater.DbUpdaterJobSpec;
@ -74,7 +73,6 @@ import java.util.Collections;
 import java.util.List;
 import java.util.Map;
 import java.util.Set;
 import java.util.SortedSet;
 /**
 */
@ -180,7 +178,7 @@ public class HadoopDruidIndexerConfig
      this.partitionsSpec = partitionsSpec;
    } else {
      // Backwards compatibility
-      this.partitionsSpec = new SingleDimensionPartitionsSpec(partitionDimension, targetPartitionSize, null, false);
+      this.partitionsSpec = new PartitionsSpec(partitionDimension, targetPartitionSize, null, false);
    }
    if (granularitySpec != null) {
@ -380,17 +378,17 @@ public class HadoopDruidIndexerConfig
    this.ignoreInvalidRows = ignoreInvalidRows;
  }
-  public Optional<List<Interval>> getIntervals()
+  public List<Interval> getIntervals()
  {
-    Optional<SortedSet<Interval>> setOptional = getGranularitySpec().bucketIntervals();
+    return JodaUtils.condenseIntervals(getGranularitySpec().bucketIntervals());
    if (setOptional.isPresent()) {
      return Optional.of((List<Interval>) JodaUtils.condenseIntervals(setOptional.get()));
    } else {
      return Optional.absent();
    }
  }
-  public boolean isDeterminingPartitions()
+  public String getPartitionDimension()
  {
    return partitionsSpec.getPartitionDimension();
  }
  public boolean partitionByDimension()
  {
    return partitionsSpec.isDeterminingPartitions();
  }
@ -485,70 +483,64 @@ public class HadoopDruidIndexerConfig
    throw new ISE("row[%s] doesn't fit in any shard[%s]", inputRow, shards);
  }
-  public Optional<Set<Interval>> getSegmentGranularIntervals()
+  public Set<Interval> getSegmentGranularIntervals()
  {
-    return Optional.fromNullable((Set<Interval>) granularitySpec.bucketIntervals().orNull());
+    return granularitySpec.bucketIntervals();
  }
-  public Optional<Iterable<Bucket>> getAllBuckets()
+  public Iterable<Bucket> getAllBuckets()
  {
-    Optional<Set<Interval>> intervals = getSegmentGranularIntervals();
+    return FunctionalIterable
-    if (intervals.isPresent()) {
+        .create(getSegmentGranularIntervals())
-      return Optional.of(
+        .transformCat(
-          (Iterable<Bucket>) FunctionalIterable
+            new Function<Interval, Iterable<Bucket>>()
-              .create(intervals.get())
+            {
-              .transformCat(
+              @Override
-                  new Function<Interval, Iterable<Bucket>>()
+              public Iterable<Bucket> apply(Interval input)
-                  {
+              {
-                    @Override
+                final DateTime bucketTime = input.getStart();
-                    public Iterable<Bucket> apply(Interval input)
+                final List<HadoopyShardSpec> specs = shardSpecs.get(bucketTime);
-                    {
+                if (specs == null) {
-                      final DateTime bucketTime = input.getStart();
+                  return ImmutableList.of();
-                      final List<HadoopyShardSpec> specs = shardSpecs.get(bucketTime);
+                }
                      if (specs == null) {
                        return ImmutableList.of();
                      }
-                      return FunctionalIterable
+                return FunctionalIterable
-                          .create(specs)
+                    .create(specs)
-                          .transform(
+                    .transform(
-                              new Function<HadoopyShardSpec, Bucket>()
+                        new Function<HadoopyShardSpec, Bucket>()
-                              {
+                        {
-                                int i = 0;
+                          int i = 0;
-                                @Override
+                          @Override
-                                public Bucket apply(HadoopyShardSpec input)
+                          public Bucket apply(HadoopyShardSpec input)
-                                {
+                          {
-                                  return new Bucket(input.getShardNum(), bucketTime, i++);
+                            return new Bucket(input.getShardNum(), bucketTime, i++);
-                                }
+                          }
-                              }
+                        }
-                          );
+                    );
-                    }
+              }
-                  }
+            }
-              )
+        );
      );
    } else {
      return Optional.absent();
    }
  }
-    /******************************************
+  /******************************************
-     Path helper logic
+   Path helper logic
-     ******************************************/
+   ******************************************/
    /**
     * Make the intermediate path for this job run.
     *
     * @return the intermediate path for this job run.
     */
  /**
   * Make the intermediate path for this job run.
   *
   * @return the intermediate path for this job run.
   */
  public Path makeIntermediatePath()
  {
    return new Path(String.format("%s/%s/%s", getWorkingPath(), getDataSource(), getVersion().replace(":", "")));
  }
-  public Path makeSegmentPartitionInfoPath(Interval bucketInterval)
+  public Path makeSegmentPartitionInfoPath(Bucket bucket)
  {
    final Interval bucketInterval = getGranularitySpec().bucketInterval(bucket.time).get();
    return new Path(
        String.format(
            "%s/%s_%s/partitions.json",
@ -559,16 +551,6 @@ public class HadoopDruidIndexerConfig
    );
  }
  public Path makeIntervalInfoPath()
  {
    return new Path(
        String.format(
            "%s/intervals.json",
            makeIntermediatePath()
        )
    );
  }
  public Path makeDescriptorInfoDir()
  {
    return new Path(makeIntermediatePath(), "segmentDescriptorInfo");
@ -643,5 +625,8 @@ public class HadoopDruidIndexerConfig
    Preconditions.checkNotNull(segmentOutputPath, "segmentOutputPath");
    Preconditions.checkNotNull(version, "version");
    Preconditions.checkNotNull(rollupSpec, "rollupSpec");
    final int nIntervals = getIntervals().size();
    Preconditions.checkArgument(nIntervals > 0, "intervals.size()[%s] <= 0", nIntervals);
  }
 }
--- a/indexing-hadoop/src/main/java/io/druid/indexer/HadoopDruidIndexerConfigBuilder.java
+++ b/indexing-hadoop/src/main/java/io/druid/indexer/HadoopDruidIndexerConfigBuilder.java
@ -46,10 +46,6 @@ public class HadoopDruidIndexerConfigBuilder
    return HadoopDruidIndexerConfig.jsonMapper.convertValue(schema, HadoopDruidIndexerConfig.class);
  }
  public static HadoopDruidIndexerSchema toSchema(HadoopDruidIndexerConfig config){
    return HadoopDruidIndexerConfig.jsonMapper.convertValue(config, HadoopDruidIndexerSchema.class);
  }
  public static HadoopDruidIndexerConfig fromMap(Map<String, Object> argSpec)
  {
    return HadoopDruidIndexerConfig.jsonMapper.convertValue(argSpec, HadoopDruidIndexerConfig.class);
--- a/indexing-hadoop/src/main/java/io/druid/indexer/HadoopDruidIndexerJob.java
+++ b/indexing-hadoop/src/main/java/io/druid/indexer/HadoopDruidIndexerJob.java
@ -19,20 +19,34 @@
 package io.druid.indexer;
 import com.google.common.base.Throwables;
 import com.google.common.collect.Lists;
 import com.google.common.collect.Maps;
 import com.google.inject.Inject;
 import com.metamx.common.ISE;
 import com.metamx.common.logger.Logger;
 import io.druid.timeline.DataSegment;
 import io.druid.timeline.partition.NoneShardSpec;
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.fs.Path;
 import org.apache.hadoop.mapreduce.Job;
 import org.joda.time.DateTime;
 import org.joda.time.DateTimeComparator;
 import org.joda.time.Interval;
 import java.io.IOException;
 import java.util.List;
 import java.util.Map;
 /**
 */
 public class HadoopDruidIndexerJob implements Jobby
 {
  private static final Logger log = new Logger(HadoopDruidIndexerJob.class);
  private final HadoopDruidIndexerConfig config;
  private final DbUpdaterJob dbUpdaterJob;
  private IndexGeneratorJob indexJob;
  private volatile List<DataSegment> publishedSegments = null;
@ -55,7 +69,23 @@ public class HadoopDruidIndexerJob implements Jobby
  public boolean run()
  {
    List<Jobby> jobs = Lists.newArrayList();
-    JobHelper.ensurePaths(config);
+
    ensurePaths();
    if (config.partitionByDimension()) {
      jobs.add(new DeterminePartitionsJob(config));
    }
    else {
      Map<DateTime, List<HadoopyShardSpec>> shardSpecs = Maps.newTreeMap(DateTimeComparator.getInstance());
      int shardCount = 0;
      for (Interval segmentGranularity : config.getSegmentGranularIntervals()) {
        DateTime bucket = segmentGranularity.getStart();
        final HadoopyShardSpec spec = new HadoopyShardSpec(new NoneShardSpec(), shardCount++);
        shardSpecs.put(bucket, Lists.newArrayList(spec));
        log.info("DateTime[%s], spec[%s]", bucket, spec);
      }
      config.setShardSpecs(shardSpecs);
    }
    indexJob = new IndexGeneratorJob(config);
    jobs.add(indexJob);
@ -66,15 +96,65 @@ public class HadoopDruidIndexerJob implements Jobby
      log.info("No updaterJobSpec set, not uploading to database");
    }
    String failedMessage = null;
    for (Jobby job : jobs) {
      if (failedMessage == null) {
        if (!job.run()) {
          failedMessage = String.format("Job[%s] failed!", job.getClass());
        }
      }
    }
    if (failedMessage == null) {
      publishedSegments = IndexGeneratorJob.getPublishedSegments(config);
    }
    if (!config.isLeaveIntermediate()) {
      if (failedMessage == null || config.isCleanupOnFailure()) {
        Path workingPath = config.makeIntermediatePath();
        log.info("Deleting path[%s]", workingPath);
        try {
          workingPath.getFileSystem(new Configuration()).delete(workingPath, true);
        }
        catch (IOException e) {
          log.error(e, "Failed to cleanup path[%s]", workingPath);
        }
      }
    }
    if (failedMessage != null) {
      throw new ISE(failedMessage);
    }
    JobHelper.runJobs(jobs, config);
    publishedSegments = IndexGeneratorJob.getPublishedSegments(config);
    return true;
  }
-  public List<DataSegment> getPublishedSegments()
+  private void ensurePaths()
  {
-    if (publishedSegments == null) {
+    // config.addInputPaths() can have side-effects ( boo! :( ), so this stuff needs to be done before anything else
    try {
      Job job = new Job(
          new Configuration(),
          String.format("%s-determine_partitions-%s", config.getDataSource(), config.getIntervals())
      );
      job.getConfiguration().set("io.sort.record.percent", "0.19");
      for (String propName : System.getProperties().stringPropertyNames()) {
        Configuration conf = job.getConfiguration();
        if (propName.startsWith("hadoop.")) {
          conf.set(propName.substring("hadoop.".length()), System.getProperty(propName));
        }
      }
      config.addInputPaths(job);
    }
    catch (IOException e) {
      throw Throwables.propagate(e);
    }
  }
  public List<DataSegment> getPublishedSegments() {
    if(publishedSegments == null) {
      throw new IllegalStateException("Job hasn't run yet. No segments have been published yet.");
    }
    return publishedSegments;
--- a/indexing-hadoop/src/main/java/io/druid/indexer/HadoopDruidIndexerMapper.java
+++ b/indexing-hadoop/src/main/java/io/druid/indexer/HadoopDruidIndexerMapper.java
@ -22,7 +22,6 @@ package io.druid.indexer;
 import com.metamx.common.RE;
 import io.druid.data.input.InputRow;
 import io.druid.data.input.impl.StringInputRowParser;
 import io.druid.indexer.granularity.GranularitySpec;
 import org.apache.hadoop.io.LongWritable;
 import org.apache.hadoop.io.Text;
 import org.apache.hadoop.mapreduce.Mapper;
@ -71,9 +70,8 @@ public abstract class HadoopDruidIndexerMapper<KEYOUT, VALUEOUT> extends Mapper<
          throw e;
        }
      }
-      GranularitySpec spec = config.getGranularitySpec();
+
-      if (!spec.bucketIntervals().isPresent() || spec.bucketInterval(new DateTime(inputRow.getTimestampFromEpoch()))
+      if(config.getGranularitySpec().bucketInterval(new DateTime(inputRow.getTimestampFromEpoch())).isPresent()) {
                                                     .isPresent()) {
        innerMap(inputRow, value, context);
      }
    }
--- a/indexing-hadoop/src/main/java/io/druid/indexer/IndexGeneratorJob.java
+++ b/indexing-hadoop/src/main/java/io/druid/indexer/IndexGeneratorJob.java
@ -44,7 +44,6 @@ import io.druid.segment.incremental.IncrementalIndex;
 import io.druid.segment.incremental.IncrementalIndexSchema;
 import io.druid.timeline.DataSegment;
 import org.apache.commons.io.FileUtils;
 import org.apache.hadoop.conf.Configurable;
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.fs.FSDataOutputStream;
 import org.apache.hadoop.fs.FileStatus;
@ -85,7 +84,9 @@ import java.util.zip.ZipOutputStream;
 public class IndexGeneratorJob implements Jobby
 {
  private static final Logger log = new Logger(IndexGeneratorJob.class);
  private final HadoopDruidIndexerConfig config;
  private IndexGeneratorStats jobStats;
  public IndexGeneratorJob(
@ -96,6 +97,65 @@ public class IndexGeneratorJob implements Jobby
    this.jobStats = new IndexGeneratorStats();
  }
  public IndexGeneratorStats getJobStats()
  {
    return jobStats;
  }
  public boolean run()
  {
    try {
      Job job = new Job(
          new Configuration(),
          String.format("%s-index-generator-%s", config.getDataSource(), config.getIntervals())
      );
      job.getConfiguration().set("io.sort.record.percent", "0.23");
      for (String propName : System.getProperties().stringPropertyNames()) {
        Configuration conf = job.getConfiguration();
        if (propName.startsWith("hadoop.")) {
          conf.set(propName.substring("hadoop.".length()), System.getProperty(propName));
        }
      }
      job.setInputFormatClass(TextInputFormat.class);
      job.setMapperClass(IndexGeneratorMapper.class);
      job.setMapOutputValueClass(Text.class);
      SortableBytes.useSortableBytesAsMapOutputKey(job);
      job.setNumReduceTasks(Iterables.size(config.getAllBuckets()));
      job.setPartitionerClass(IndexGeneratorPartitioner.class);
      job.setReducerClass(IndexGeneratorReducer.class);
      job.setOutputKeyClass(BytesWritable.class);
      job.setOutputValueClass(Text.class);
      job.setOutputFormatClass(IndexGeneratorOutputFormat.class);
      FileOutputFormat.setOutputPath(job, config.makeIntermediatePath());
      config.addInputPaths(job);
      config.intoConfiguration(job);
      JobHelper.setupClasspath(config, job);
      job.submit();
      log.info("Job %s submitted, status available at %s", job.getJobName(), job.getTrackingURL());
      boolean success = job.waitForCompletion(true);
      Counter invalidRowCount = job.getCounters()
                                   .findCounter(HadoopDruidIndexerConfig.IndexJobCounters.INVALID_ROW_COUNTER);
      jobStats.setInvalidRowCount(invalidRowCount.getValue());
      return success;
    }
    catch (Exception e) {
      throw new RuntimeException(e);
    }
  }
  public static List<DataSegment> getPublishedSegments(HadoopDruidIndexerConfig config)
  {
@ -129,60 +189,6 @@ public class IndexGeneratorJob implements Jobby
    return publishedSegments;
  }
  public IndexGeneratorStats getJobStats()
  {
    return jobStats;
  }
  public boolean run()
  {
    try {
      Job job = new Job(
          new Configuration(),
          String.format("%s-index-generator-%s", config.getDataSource(), config.getIntervals())
      );
      job.getConfiguration().set("io.sort.record.percent", "0.23");
      JobHelper.injectSystemProperties(job);
      job.setInputFormatClass(TextInputFormat.class);
      job.setMapperClass(IndexGeneratorMapper.class);
      job.setMapOutputValueClass(Text.class);
      SortableBytes.useSortableBytesAsMapOutputKey(job);
      job.setNumReduceTasks(Iterables.size(config.getAllBuckets().get()));
      job.setPartitionerClass(IndexGeneratorPartitioner.class);
      job.setReducerClass(IndexGeneratorReducer.class);
      job.setOutputKeyClass(BytesWritable.class);
      job.setOutputValueClass(Text.class);
      job.setOutputFormatClass(IndexGeneratorOutputFormat.class);
      FileOutputFormat.setOutputPath(job, config.makeIntermediatePath());
      config.addInputPaths(job);
      config.intoConfiguration(job);
      JobHelper.setupClasspath(config, job);
      job.submit();
      log.info("Job %s submitted, status available at %s", job.getJobName(), job.getTrackingURL());
      boolean success = job.waitForCompletion(true);
      Counter invalidRowCount = job.getCounters()
                                   .findCounter(HadoopDruidIndexerConfig.IndexJobCounters.INVALID_ROW_COUNTER);
      jobStats.setInvalidRowCount(invalidRowCount.getValue());
      return success;
    }
    catch (Exception e) {
      throw new RuntimeException(e);
    }
  }
  public static class IndexGeneratorMapper extends HadoopDruidIndexerMapper<BytesWritable, Text>
  {
@ -210,9 +216,8 @@ public class IndexGeneratorJob implements Jobby
    }
  }
-  public static class IndexGeneratorPartitioner extends Partitioner<BytesWritable, Text> implements Configurable
+  public static class IndexGeneratorPartitioner extends Partitioner<BytesWritable, Text>
  {
    private Configuration config;
    @Override
    public int getPartition(BytesWritable bytesWritable, Text text, int numPartitions)
@ -220,27 +225,12 @@ public class IndexGeneratorJob implements Jobby
      final ByteBuffer bytes = ByteBuffer.wrap(bytesWritable.getBytes());
      bytes.position(4); // Skip length added by SortableBytes
      int shardNum = bytes.getInt();
      if (config.get("mapred.job.tracker").equals("local")) {
        return shardNum % numPartitions;
      } else {
        if (shardNum >= numPartitions) {
          throw new ISE("Not enough partitions, shard[%,d] >= numPartitions[%,d]", shardNum, numPartitions);
        }
        return shardNum;
      if (shardNum >= numPartitions) {
        throw new ISE("Not enough partitions, shard[%,d] >= numPartitions[%,d]", shardNum, numPartitions);
      }
    }
-    @Override
+      return shardNum;
    public Configuration getConf()
    {
      return config;
    }
    @Override
    public void setConf(Configuration config)
    {
      this.config = config;
    }
  }
--- a/indexing-hadoop/src/main/java/io/druid/indexer/JobHelper.java
+++ b/indexing-hadoop/src/main/java/io/druid/indexer/JobHelper.java
@ -20,11 +20,9 @@
 package io.druid.indexer;
 import com.google.api.client.util.Sets;
 import com.google.common.base.Throwables;
 import com.google.common.io.ByteStreams;
 import com.google.common.io.Files;
 import com.google.common.io.OutputSupplier;
 import com.metamx.common.ISE;
 import com.metamx.common.logger.Logger;
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.filecache.DistributedCache;
@ -36,7 +34,6 @@ import org.apache.hadoop.mapreduce.Job;
 import java.io.File;
 import java.io.IOException;
 import java.io.OutputStream;
 import java.util.List;
 import java.util.Set;
 /**
@ -97,63 +94,4 @@ public class JobHelper
      }
    }
  }
  public static void injectSystemProperties(Job job)
  {
    final Configuration conf = job.getConfiguration();
    for (String propName : System.getProperties().stringPropertyNames()) {
      if (propName.startsWith("hadoop.")) {
        conf.set(propName.substring("hadoop.".length()), System.getProperty(propName));
      }
    }
  }
  public static void ensurePaths(HadoopDruidIndexerConfig config)
  {
    // config.addInputPaths() can have side-effects ( boo! :( ), so this stuff needs to be done before anything else
    try {
      Job job = new Job(
          new Configuration(),
          String.format("%s-determine_partitions-%s", config.getDataSource(), config.getIntervals())
      );
      job.getConfiguration().set("io.sort.record.percent", "0.19");
      injectSystemProperties(job);
      config.addInputPaths(job);
    }
    catch (IOException e) {
      throw Throwables.propagate(e);
    }
  }
  public static boolean runJobs(List<Jobby> jobs, HadoopDruidIndexerConfig config){
    String failedMessage = null;
    for (Jobby job : jobs) {
      if (failedMessage == null) {
        if (!job.run()) {
          failedMessage = String.format("Job[%s] failed!", job.getClass());
        }
      }
    }
    if (!config.isLeaveIntermediate()) {
      if (failedMessage == null || config.isCleanupOnFailure()) {
        Path workingPath = config.makeIntermediatePath();
        log.info("Deleting path[%s]", workingPath);
        try {
          workingPath.getFileSystem(new Configuration()).delete(workingPath, true);
        }
        catch (IOException e) {
          log.error(e, "Failed to cleanup path[%s]", workingPath);
        }
      }
    }
    if (failedMessage != null) {
      throw new ISE(failedMessage);
    }
    return true;
  }
 }
--- a/indexing-hadoop/src/main/java/io/druid/indexer/granularity/ArbitraryGranularitySpec.java
+++ b/indexing-hadoop/src/main/java/io/druid/indexer/granularity/ArbitraryGranularitySpec.java
@ -72,9 +72,9 @@ public class ArbitraryGranularitySpec implements GranularitySpec
  @Override
  @JsonProperty("intervals")
-  public Optional<SortedSet<Interval>> bucketIntervals()
+  public SortedSet<Interval> bucketIntervals()
  {
-    return Optional.of((SortedSet<Interval>) intervals);
+    return intervals;
  }
  @Override
--- a/indexing-hadoop/src/main/java/io/druid/indexer/granularity/GranularitySpec.java
+++ b/indexing-hadoop/src/main/java/io/druid/indexer/granularity/GranularitySpec.java
@ -40,7 +40,7 @@ import java.util.SortedSet;
 public interface GranularitySpec
 {
  /** Set of all time groups, broken up on segment boundaries. Should be sorted by interval start and non-overlapping.*/
-  public Optional<SortedSet<Interval>> bucketIntervals();
+  public SortedSet<Interval> bucketIntervals();
  /** Time-grouping interval corresponding to some instant, if any. */
  public Optional<Interval> bucketInterval(DateTime dt);
--- a/indexing-hadoop/src/main/java/io/druid/indexer/granularity/UniformGranularitySpec.java
+++ b/indexing-hadoop/src/main/java/io/druid/indexer/granularity/UniformGranularitySpec.java
@ -35,7 +35,7 @@ import java.util.SortedSet;
 public class UniformGranularitySpec implements GranularitySpec
 {
  final private Granularity granularity;
-  final private Iterable<Interval> inputIntervals;
+  final private List<Interval> inputIntervals;
  final private ArbitraryGranularitySpec wrappedSpec;
  @JsonCreator
@ -44,28 +44,21 @@ public class UniformGranularitySpec implements GranularitySpec
      @JsonProperty("intervals") List<Interval> inputIntervals
  )
  {
-    this.granularity = granularity;
+    List<Interval> granularIntervals = Lists.newArrayList();
-    if (inputIntervals != null) {
+
-      List<Interval> granularIntervals = Lists.newArrayList();
+    for (Interval inputInterval : inputIntervals) {
-      for (Interval inputInterval : inputIntervals) {
+      Iterables.addAll(granularIntervals,  granularity.getIterable(inputInterval));
        Iterables.addAll(granularIntervals, granularity.getIterable(inputInterval));
      }
      this.inputIntervals = ImmutableList.copyOf(inputIntervals);
      this.wrappedSpec = new ArbitraryGranularitySpec(granularIntervals);
    } else {
      this.inputIntervals = null;
      this.wrappedSpec = null;
    }
    this.granularity = granularity;
    this.inputIntervals = ImmutableList.copyOf(inputIntervals);
    this.wrappedSpec = new ArbitraryGranularitySpec(granularIntervals);
  }
  @Override
-  public Optional<SortedSet<Interval>> bucketIntervals()
+  public SortedSet<Interval> bucketIntervals()
  {
-    if (wrappedSpec == null) {
+    return wrappedSpec.bucketIntervals();
      return Optional.absent();
    } else {
      return wrappedSpec.bucketIntervals();
    }
  }
  @Override
@ -82,8 +75,8 @@ public class UniformGranularitySpec implements GranularitySpec
  }
  @JsonProperty("intervals")
-  public Optional<Iterable<Interval>> getIntervals()
+  public Iterable<Interval> getIntervals()
  {
-    return Optional.fromNullable(inputIntervals);
+    return inputIntervals;
  }
 }
--- a/indexing-hadoop/src/main/java/io/druid/indexer/partitions/AbstractPartitionsSpec.java
+++ b/indexing-hadoop/src/main/java/io/druid/indexer/partitions/AbstractPartitionsSpec.java
@ -1,68 +0,0 @@
 /*
 * Druid - a distributed column store.
 * Copyright (C) 2012, 2013  Metamarkets Group Inc.
 *
 * This program is free software; you can redistribute it and/or
 * modify it under the terms of the GNU General Public License
 * as published by the Free Software Foundation; either version 2
 * of the License, or (at your option) any later version.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License
 * along with this program; if not, write to the Free Software
 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
 */
 package io.druid.indexer.partitions;
 import com.fasterxml.jackson.annotation.JsonProperty;
 public abstract class AbstractPartitionsSpec implements PartitionsSpec
 {
  private static final double DEFAULT_OVERSIZE_THRESHOLD = 1.5;
  private final long targetPartitionSize;
  private final long maxPartitionSize;
  private final boolean assumeGrouped;
  public AbstractPartitionsSpec(
      Long targetPartitionSize,
      Long maxPartitionSize,
      Boolean assumeGrouped
  )
  {
    this.targetPartitionSize = targetPartitionSize == null ? -1 : targetPartitionSize;
    this.maxPartitionSize = maxPartitionSize == null
                            ? (long) (this.targetPartitionSize * DEFAULT_OVERSIZE_THRESHOLD)
                            : maxPartitionSize;
    this.assumeGrouped = assumeGrouped == null ? false : assumeGrouped;
  }
  @JsonProperty
  public long getTargetPartitionSize()
  {
    return targetPartitionSize;
  }
  @JsonProperty
  public long getMaxPartitionSize()
  {
    return maxPartitionSize;
  }
  @JsonProperty
  public boolean isAssumeGrouped()
  {
    return assumeGrouped;
  }
  @Override
  public boolean isDeterminingPartitions()
  {
    return targetPartitionSize > 0;
  }
 }
--- a/indexing-hadoop/src/main/java/io/druid/indexer/partitions/PartitionsSpec.java
+++ b/indexing-hadoop/src/main/java/io/druid/indexer/partitions/PartitionsSpec.java
@ -19,33 +19,69 @@
 package io.druid.indexer.partitions;
 import com.fasterxml.jackson.annotation.JsonCreator;
 import com.fasterxml.jackson.annotation.JsonIgnore;
 import com.fasterxml.jackson.annotation.JsonProperty;
 import com.fasterxml.jackson.annotation.JsonSubTypes;
 import com.fasterxml.jackson.annotation.JsonTypeInfo;
 import io.druid.indexer.HadoopDruidIndexerConfig;
 import io.druid.indexer.Jobby;
-@JsonTypeInfo(use = JsonTypeInfo.Id.NAME, property = "type", defaultImpl = SingleDimensionPartitionsSpec.class)
+import javax.annotation.Nullable;
-@JsonSubTypes(value = {
+
-    @JsonSubTypes.Type(name = "dimension", value = SingleDimensionPartitionsSpec.class),
+public class PartitionsSpec
    @JsonSubTypes.Type(name = "random", value = RandomPartitionsSpec.class)
 })
 public interface PartitionsSpec
 {
-  @JsonIgnore
+  private static final double DEFAULT_OVERSIZE_THRESHOLD = 1.5;
  public Jobby getPartitionJob(HadoopDruidIndexerConfig config);
-  @JsonProperty
+  @Nullable
-  public long getTargetPartitionSize();
+  private final String partitionDimension;
-  @JsonProperty
+  private final long targetPartitionSize;
  public long getMaxPartitionSize();
-  @JsonProperty
+  private final long maxPartitionSize;
-  public boolean isAssumeGrouped();
+
  private final boolean assumeGrouped;
  @JsonCreator
  public PartitionsSpec(
      @JsonProperty("partitionDimension") @Nullable String partitionDimension,
      @JsonProperty("targetPartitionSize") @Nullable Long targetPartitionSize,
      @JsonProperty("maxPartitionSize") @Nullable Long maxPartitionSize,
      @JsonProperty("assumeGrouped") @Nullable Boolean assumeGrouped
  )
  {
    this.partitionDimension = partitionDimension;
    this.targetPartitionSize = targetPartitionSize == null ? -1 : targetPartitionSize;
    this.maxPartitionSize = maxPartitionSize == null
                            ? (long) (this.targetPartitionSize * DEFAULT_OVERSIZE_THRESHOLD)
                            : maxPartitionSize;
    this.assumeGrouped = assumeGrouped == null ? false : assumeGrouped;
  }
  @JsonIgnore
-  public boolean isDeterminingPartitions();
+  public boolean isDeterminingPartitions()
  {
    return targetPartitionSize > 0;
  }
  @JsonProperty
  @Nullable
  public String getPartitionDimension()
  {
    return partitionDimension;
  }
  @JsonProperty
  public long getTargetPartitionSize()
  {
    return targetPartitionSize;
  }
  @JsonProperty
  public long getMaxPartitionSize()
  {
    return maxPartitionSize;
  }
  @JsonProperty
  public boolean isAssumeGrouped()
  {
    return assumeGrouped;
  }
 }
--- a/indexing-hadoop/src/main/java/io/druid/indexer/partitions/RandomPartitionsSpec.java
+++ b/indexing-hadoop/src/main/java/io/druid/indexer/partitions/RandomPartitionsSpec.java
@ -1,47 +0,0 @@
 /*
 * Druid - a distributed column store.
 * Copyright (C) 2012, 2013  Metamarkets Group Inc.
 *
 * This program is free software; you can redistribute it and/or
 * modify it under the terms of the GNU General Public License
 * as published by the Free Software Foundation; either version 2
 * of the License, or (at your option) any later version.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License
 * along with this program; if not, write to the Free Software
 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
 */
 package io.druid.indexer.partitions;
 import com.fasterxml.jackson.annotation.JsonCreator;
 import com.fasterxml.jackson.annotation.JsonProperty;
 import io.druid.indexer.DetermineHashedPartitionsJob;
 import io.druid.indexer.HadoopDruidIndexerConfig;
 import io.druid.indexer.Jobby;
 import javax.annotation.Nullable;
 public class RandomPartitionsSpec extends AbstractPartitionsSpec
 {
  @JsonCreator
  public RandomPartitionsSpec(
      @JsonProperty("targetPartitionSize") @Nullable Long targetPartitionSize,
      @JsonProperty("maxPartitionSize") @Nullable Long maxPartitionSize,
      @JsonProperty("assumeGrouped") @Nullable Boolean assumeGrouped
  )
  {
    super(targetPartitionSize, maxPartitionSize, assumeGrouped);
  }
  @Override
  public Jobby getPartitionJob(HadoopDruidIndexerConfig config)
  {
    return new DetermineHashedPartitionsJob(config);
  }
 }
--- a/indexing-hadoop/src/main/java/io/druid/indexer/partitions/SingleDimensionPartitionsSpec.java
+++ b/indexing-hadoop/src/main/java/io/druid/indexer/partitions/SingleDimensionPartitionsSpec.java
@ -1,60 +0,0 @@
 /*
 * Druid - a distributed column store.
 * Copyright (C) 2012, 2013  Metamarkets Group Inc.
 *
 * This program is free software; you can redistribute it and/or
 * modify it under the terms of the GNU General Public License
 * as published by the Free Software Foundation; either version 2
 * of the License, or (at your option) any later version.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License
 * along with this program; if not, write to the Free Software
 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
 */
 package io.druid.indexer.partitions;
 import com.fasterxml.jackson.annotation.JsonCreator;
 import com.fasterxml.jackson.annotation.JsonProperty;
 import io.druid.indexer.DeterminePartitionsJob;
 import io.druid.indexer.HadoopDruidIndexerConfig;
 import io.druid.indexer.Jobby;
 import javax.annotation.Nullable;
 public class SingleDimensionPartitionsSpec extends AbstractPartitionsSpec
 {
  @Nullable
  private final String partitionDimension;
  @JsonCreator
  public SingleDimensionPartitionsSpec(
      @JsonProperty("partitionDimension") @Nullable String partitionDimension,
      @JsonProperty("targetPartitionSize") @Nullable Long targetPartitionSize,
      @JsonProperty("maxPartitionSize") @Nullable Long maxPartitionSize,
      @JsonProperty("assumeGrouped") @Nullable Boolean assumeGrouped
  )
  {
    super(targetPartitionSize, maxPartitionSize, assumeGrouped);
    this.partitionDimension = partitionDimension;
  }
  @JsonProperty
  @Nullable
  public String getPartitionDimension()
  {
    return partitionDimension;
  }
  @Override
  public Jobby getPartitionJob(HadoopDruidIndexerConfig config)
  {
    return new DeterminePartitionsJob(config);
  }
 }
--- a/indexing-hadoop/src/main/java/io/druid/indexer/path/GranularityPathSpec.java
+++ b/indexing-hadoop/src/main/java/io/druid/indexer/path/GranularityPathSpec.java
@ -20,7 +20,6 @@
 package io.druid.indexer.path;
 import com.fasterxml.jackson.annotation.JsonProperty;
 import com.google.common.base.Optional;
 import com.google.common.collect.Sets;
 import com.metamx.common.Granularity;
 import com.metamx.common.guava.Comparators;
@ -100,12 +99,9 @@ public class GranularityPathSpec implements PathSpec
  public Job addInputPaths(HadoopDruidIndexerConfig config, Job job) throws IOException
  {
    final Set<Interval> intervals = Sets.newTreeSet(Comparators.intervals());
-    Optional<Set<Interval>> optionalIntervals = config.getSegmentGranularIntervals();
+    for (Interval segmentInterval : config.getSegmentGranularIntervals()) {
-    if (optionalIntervals.isPresent()) {
+      for (Interval dataInterval : dataGranularity.getIterable(segmentInterval)) {
-      for (Interval segmentInterval : optionalIntervals.get()) {
+        intervals.add(dataInterval);
        for (Interval dataInterval : dataGranularity.getIterable(segmentInterval)) {
          intervals.add(dataInterval);
        }
      }
    }
--- a/indexing-hadoop/src/test/java/io/druid/indexer/HadoopDruidIndexerConfigTest.java
+++ b/indexing-hadoop/src/test/java/io/druid/indexer/HadoopDruidIndexerConfigTest.java
@ -25,8 +25,6 @@ import com.google.common.collect.Lists;
 import io.druid.db.DbConnectorConfig;
 import io.druid.indexer.granularity.UniformGranularitySpec;
 import io.druid.indexer.partitions.PartitionsSpec;
 import io.druid.indexer.partitions.RandomPartitionsSpec;
 import io.druid.indexer.partitions.SingleDimensionPartitionsSpec;
 import io.druid.indexer.updater.DbUpdaterJobSpec;
 import io.druid.jackson.DefaultObjectMapper;
 import org.apache.hadoop.fs.LocalFileSystem;
@ -67,7 +65,7 @@ public class HadoopDruidIndexerConfigTest
    Assert.assertEquals(
        "getIntervals",
        Lists.newArrayList(new Interval("2012-01-01/P1D")),
-        granularitySpec.getIntervals().get()
+        granularitySpec.getIntervals()
    );
    Assert.assertEquals(
@ -101,7 +99,7 @@ public class HadoopDruidIndexerConfigTest
    Assert.assertEquals(
        "getIntervals",
        Lists.newArrayList(new Interval("2012-02-01/P1D")),
-        granularitySpec.getIntervals().get()
+        granularitySpec.getIntervals()
    );
    Assert.assertEquals(
@ -169,14 +167,15 @@ public class HadoopDruidIndexerConfigTest
        100
    );
-    Assert.assertTrue(
+    Assert.assertEquals(
-        "partitionSpec",
+        "getPartitionDimension",
-        partitionsSpec instanceof SingleDimensionPartitionsSpec
+        partitionsSpec.getPartitionDimension(),
        null
    );
  }
  @Test
-  public void testPartitionsSpecSpecificDimensionLegacy()
+  public void testPartitionsSpecSpecificDimension()
  {
    final HadoopDruidIndexerConfig cfg;
@ -215,10 +214,9 @@ public class HadoopDruidIndexerConfigTest
        150
    );
    Assert.assertTrue("partitionsSpec" , partitionsSpec instanceof SingleDimensionPartitionsSpec);
    Assert.assertEquals(
        "getPartitionDimension",
-        ((SingleDimensionPartitionsSpec)partitionsSpec).getPartitionDimension(),
+        partitionsSpec.getPartitionDimension(),
        "foo"
    );
  }
@ -261,10 +259,9 @@ public class HadoopDruidIndexerConfigTest
        150
    );
    Assert.assertTrue("partitionsSpec" , partitionsSpec instanceof SingleDimensionPartitionsSpec);
    Assert.assertEquals(
        "getPartitionDimension",
-        ((SingleDimensionPartitionsSpec)partitionsSpec).getPartitionDimension(),
+        partitionsSpec.getPartitionDimension(),
        "foo"
    );
  }
@ -310,10 +307,9 @@ public class HadoopDruidIndexerConfigTest
        200
    );
    Assert.assertTrue("partitionsSpec" , partitionsSpec instanceof SingleDimensionPartitionsSpec);
    Assert.assertEquals(
        "getPartitionDimension",
-        ((SingleDimensionPartitionsSpec)partitionsSpec).getPartitionDimension(),
+        partitionsSpec.getPartitionDimension(),
        "foo"
    );
  }
@ -424,6 +420,7 @@ public class HadoopDruidIndexerConfigTest
    );
  }
  @Test
  public void shouldMakeHDFSCompliantSegmentOutputPath()
  {
@ -500,47 +497,4 @@ public class HadoopDruidIndexerConfigTest
      throw Throwables.propagate(e);
    }
  }
  public void testRandomPartitionsSpec() throws Exception{
    {
      final HadoopDruidIndexerConfig cfg;
      try {
        cfg = jsonReadWriteRead(
            "{"
            + "\"partitionsSpec\":{"
            + "   \"targetPartitionSize\":100,"
            + "   \"type\":\"random\""
            + " }"
            + "}",
            HadoopDruidIndexerConfig.class
        );
      }
      catch (Exception e) {
        throw Throwables.propagate(e);
      }
      final PartitionsSpec partitionsSpec = cfg.getPartitionsSpec();
      Assert.assertEquals(
          "isDeterminingPartitions",
          partitionsSpec.isDeterminingPartitions(),
          true
      );
      Assert.assertEquals(
          "getTargetPartitionSize",
          partitionsSpec.getTargetPartitionSize(),
          100
      );
      Assert.assertEquals(
          "getMaxPartitionSize",
          partitionsSpec.getMaxPartitionSize(),
          150
      );
      Assert.assertTrue("partitionsSpec" , partitionsSpec instanceof RandomPartitionsSpec);
    }
  }
 }
--- a/indexing-hadoop/src/test/java/io/druid/indexer/granularity/ArbitraryGranularityTest.java
+++ b/indexing-hadoop/src/test/java/io/druid/indexer/granularity/ArbitraryGranularityTest.java
@ -54,7 +54,7 @@ public class ArbitraryGranularityTest
            new Interval("2012-01-08T00Z/2012-01-11T00Z"),
            new Interval("2012-02-01T00Z/2012-03-01T00Z")
        ),
-        Lists.newArrayList(spec.bucketIntervals().get())
+        Lists.newArrayList(spec.bucketIntervals())
    );
    Assert.assertEquals(
--- a/indexing-hadoop/src/test/java/io/druid/indexer/granularity/UniformGranularityTest.java
+++ b/indexing-hadoop/src/test/java/io/druid/indexer/granularity/UniformGranularityTest.java
@ -57,7 +57,7 @@ public class UniformGranularityTest
            new Interval("2012-01-09T00Z/P1D"),
            new Interval("2012-01-10T00Z/P1D")
        ),
-        Lists.newArrayList(spec.bucketIntervals().get())
+        Lists.newArrayList(spec.bucketIntervals())
    );
    Assert.assertEquals(
--- a/indexing-service/src/main/java/io/druid/indexing/common/task/HadoopIndexTask.java
+++ b/indexing-service/src/main/java/io/druid/indexing/common/task/HadoopIndexTask.java
@ -24,23 +24,25 @@ import com.fasterxml.jackson.annotation.JsonIgnore;
 import com.fasterxml.jackson.annotation.JsonProperty;
 import com.fasterxml.jackson.core.type.TypeReference;
 import com.google.api.client.util.Lists;
 import com.google.common.base.Function;
 import com.google.common.base.Joiner;
 import com.google.common.base.Optional;
 import com.google.common.base.Preconditions;
 import com.google.common.collect.ImmutableSet;
 import com.google.common.collect.Iterables;
 import com.google.common.collect.Maps;
 import com.google.common.collect.Multimap;
 import com.google.common.collect.Multimaps;
 import com.metamx.common.logger.Logger;
 import io.druid.common.utils.JodaUtils;
 import io.druid.indexer.HadoopDruidDetermineConfigurationJob;
 import io.druid.indexer.HadoopDruidIndexerConfig;
 import io.druid.indexer.HadoopDruidIndexerConfigBuilder;
 import io.druid.indexer.HadoopDruidIndexerJob;
 import io.druid.indexer.HadoopDruidIndexerSchema;
 import io.druid.indexer.Jobby;
 import io.druid.indexing.common.TaskLock;
 import io.druid.indexing.common.TaskStatus;
 import io.druid.indexing.common.TaskToolbox;
 import io.druid.indexing.common.actions.LockAcquireAction;
 import io.druid.indexing.common.actions.LockTryAcquireAction;
 import io.druid.indexing.common.actions.SegmentInsertAction;
 import io.druid.indexing.common.actions.TaskActionClient;
 import io.druid.initialization.Initialization;
 import io.druid.server.initialization.ExtensionsConfig;
@ -49,26 +51,30 @@ import io.tesla.aether.internal.DefaultTeslaAether;
 import org.joda.time.DateTime;
 import org.joda.time.Interval;
 import javax.annotation.Nullable;
 import java.io.File;
 import java.lang.reflect.Method;
 import java.net.URL;
 import java.net.URLClassLoader;
 import java.util.Arrays;
 import java.util.Collection;
 import java.util.List;
-import java.util.SortedSet;
+import java.util.Map;
-public class HadoopIndexTask extends AbstractTask
+public class HadoopIndexTask extends AbstractFixedIntervalTask
 {
  private static final Logger log = new Logger(HadoopIndexTask.class);
  private static String defaultHadoopCoordinates = "org.apache.hadoop:hadoop-core:1.0.3";
  private static final ExtensionsConfig extensionsConfig;
  static {
    extensionsConfig = Initialization.makeStartupInjector().getInstance(ExtensionsConfig.class);
  }
  private static String defaultHadoopCoordinates = "org.apache.hadoop:hadoop-core:1.0.3";
  @JsonIgnore
  private final HadoopDruidIndexerSchema schema;
  @JsonIgnore
  private final String hadoopCoordinates;
@ -91,7 +97,13 @@ public class HadoopIndexTask extends AbstractTask
  {
    super(
        id != null ? id : String.format("index_hadoop_%s_%s", schema.getDataSource(), new DateTime()),
-        schema.getDataSource()
+        schema.getDataSource(),
        JodaUtils.umbrellaInterval(
            JodaUtils.condenseIntervals(
                schema.getGranularitySpec()
                      .bucketIntervals()
            )
        )
    );
    // Some HadoopDruidIndexerSchema stuff doesn't make sense in the context of the indexing service
@ -109,22 +121,6 @@ public class HadoopIndexTask extends AbstractTask
    return "index_hadoop";
  }
  @Override
  public boolean isReady(TaskActionClient taskActionClient) throws Exception
  {
    Optional<SortedSet<Interval>> intervals = schema.getGranularitySpec().bucketIntervals();
    if (intervals.isPresent()) {
      Interval interval = JodaUtils.umbrellaInterval(
          JodaUtils.condenseIntervals(
              intervals.get()
          )
      );
      return taskActionClient.submit(new LockTryAcquireAction(interval)).isPresent();
    } else {
      return true;
    }
  }
  @JsonProperty("config")
  public HadoopDruidIndexerSchema getSchema()
  {
@ -171,60 +167,29 @@ public class HadoopIndexTask extends AbstractTask
    jobUrls.addAll(extensionURLs);
    System.setProperty("druid.hadoop.internal.classpath", Joiner.on(File.pathSeparator).join(jobUrls));
    boolean determineIntervals = !schema.getGranularitySpec().bucketIntervals().isPresent();
-    final Class<?> determineConfigurationMainClass = loader.loadClass(HadoopDetermineConfigInnerProcessing.class.getName());
+    final Class<?> mainClass = loader.loadClass(HadoopIndexTaskInnerProcessing.class.getName());
-    final Method determineConfigurationMainMethod = determineConfigurationMainClass.getMethod(
+    final Method mainMethod = mainClass.getMethod("runTask", String[].class);
        "runTask",
        String[].class
    );
-    String[] determineConfigArgs = new String[]{
+    // We should have a lock from before we started running
    final TaskLock myLock = Iterables.getOnlyElement(getTaskLocks(toolbox));
    log.info("Setting version to: %s", myLock.getVersion());
    String[] args = new String[]{
        toolbox.getObjectMapper().writeValueAsString(schema),
        myLock.getVersion(),
        toolbox.getConfig().getHadoopWorkingPath(),
-        toolbox.getSegmentPusher().getPathForHadoop(getDataSource())
+        toolbox.getSegmentPusher().getPathForHadoop(getDataSource()),
    };
-    String config = (String) determineConfigurationMainMethod.invoke(null, new Object[]{determineConfigArgs});
+    String segments = (String) mainMethod.invoke(null, new Object[]{args});
    HadoopDruidIndexerSchema indexerSchema = toolbox.getObjectMapper()
                                                    .readValue(config, HadoopDruidIndexerSchema.class);
    // We should have a lock from before we started running only if interval was specified
    final String version;
    if (determineIntervals) {
      Interval interval = JodaUtils.umbrellaInterval(
          JodaUtils.condenseIntervals(
              indexerSchema.getGranularitySpec().bucketIntervals().get()
          )
      );
      TaskLock lock = toolbox.getTaskActionClient().submit(new LockAcquireAction(interval));
      version = lock.getVersion();
    } else {
      Iterable<TaskLock> locks = getTaskLocks(toolbox);
      final TaskLock myLock = Iterables.getOnlyElement(locks);
      version = myLock.getVersion();
    }
    log.info("Setting version to: %s", version);
    final Class<?> indexGeneratorMainClass = loader.loadClass(HadoopIndexGeneratorInnerProcessing.class.getName());
    final Method indexGeneratorMainMethod = indexGeneratorMainClass.getMethod("runTask", String[].class);
    String[] indexGeneratorArgs = new String[]{
        toolbox.getObjectMapper().writeValueAsString(indexerSchema),
        version
    };
    String segments = (String) indexGeneratorMainMethod.invoke(null, new Object[]{indexGeneratorArgs});
    if (segments != null) {
      List<DataSegment> publishedSegments = toolbox.getObjectMapper().readValue(
          segments,
-          new TypeReference<List<DataSegment>>()
+          new TypeReference<List<DataSegment>>() {}
          {
          }
      );
      toolbox.pushSegments(publishedSegments);
      return TaskStatus.success(getId());
    } else {
@ -232,12 +197,14 @@ public class HadoopIndexTask extends AbstractTask
    }
  }
-  public static class HadoopIndexGeneratorInnerProcessing
+  public static class HadoopIndexTaskInnerProcessing
  {
    public static String runTask(String[] args) throws Exception
    {
      final String schema = args[0];
-      String version = args[1];
+      final String version = args[1];
      final String workingPath = args[2];
      final String segmentOutputPath = args[3];
      final HadoopDruidIndexerSchema theSchema = HadoopDruidIndexerConfig.jsonMapper
                                                                         .readValue(
@ -247,6 +214,12 @@ public class HadoopIndexTask extends AbstractTask
      final HadoopDruidIndexerConfig config =
          new HadoopDruidIndexerConfigBuilder().withSchema(theSchema)
                                               .withVersion(version)
                                               .withWorkingPath(
                                                   workingPath
                                               )
                                               .withSegmentOutputPath(
                                                   segmentOutputPath
                                               )
                                               .build();
      HadoopDruidIndexerJob job = new HadoopDruidIndexerJob(config);
@ -259,34 +232,4 @@ public class HadoopIndexTask extends AbstractTask
      return null;
    }
  }
  public static class HadoopDetermineConfigInnerProcessing
  {
    public static String runTask(String[] args) throws Exception
    {
      final String schema = args[0];
      final String workingPath = args[1];
      final String segmentOutputPath = args[2];
      final HadoopDruidIndexerSchema theSchema = HadoopDruidIndexerConfig.jsonMapper
                                                                         .readValue(
                                                                             schema,
                                                                             HadoopDruidIndexerSchema.class
                                                                         );
      final HadoopDruidIndexerConfig config =
          new HadoopDruidIndexerConfigBuilder().withSchema(theSchema)
                                               .withWorkingPath(workingPath)
                                               .withSegmentOutputPath(segmentOutputPath)
                                               .build();
      Jobby job = new HadoopDruidDetermineConfigurationJob(config);
      log.info("Starting a hadoop index generator job...");
      if (job.run()) {
        return HadoopDruidIndexerConfig.jsonMapper.writeValueAsString(HadoopDruidIndexerConfigBuilder.toSchema(config));
      }
      return null;
    }
  }
 }
--- a/indexing-service/src/main/java/io/druid/indexing/common/task/IndexTask.java
+++ b/indexing-service/src/main/java/io/druid/indexing/common/task/IndexTask.java
@ -109,8 +109,8 @@ public class IndexTask extends AbstractFixedIntervalTask
        id != null ? id : String.format("index_%s_%s", dataSource, new DateTime().toString()),
        dataSource,
        new Interval(
-            granularitySpec.bucketIntervals().get().first().getStart(),
+            granularitySpec.bucketIntervals().first().getStart(),
-            granularitySpec.bucketIntervals().get().last().getEnd()
+            granularitySpec.bucketIntervals().last().getEnd()
        )
    );
@ -137,7 +137,7 @@ public class IndexTask extends AbstractFixedIntervalTask
    final TaskLock myLock = Iterables.getOnlyElement(getTaskLocks(toolbox));
    final Set<DataSegment> segments = Sets.newHashSet();
-    final Set<Interval> validIntervals = Sets.intersection(granularitySpec.bucketIntervals().get(), getDataIntervals());
+    final Set<Interval> validIntervals = Sets.intersection(granularitySpec.bucketIntervals(), getDataIntervals());
    if (validIntervals.isEmpty()) {
      throw new ISE("No valid data intervals found. Check your configs!");
    }
--- a/indexing-service/src/test/java/io/druid/indexing/common/task/TaskSerdeTest.java
+++ b/indexing-service/src/test/java/io/druid/indexing/common/task/TaskSerdeTest.java
@ -382,9 +382,11 @@ public class TaskSerdeTest
    final HadoopIndexTask task2 = (HadoopIndexTask) jsonMapper.readValue(json, Task.class);
    Assert.assertEquals("foo", task.getDataSource());
    Assert.assertEquals(new Interval("2010-01-01/P1D"), task.getInterval());
    Assert.assertEquals(task.getId(), task2.getId());
    Assert.assertEquals(task.getGroupId(), task2.getGroupId());
    Assert.assertEquals(task.getDataSource(), task2.getDataSource());
    Assert.assertEquals(task.getInterval(), task2.getInterval());
  }
 }
--- a/server/src/main/java/io/druid/guice/ServerModule.java
+++ b/server/src/main/java/io/druid/guice/ServerModule.java
@ -30,7 +30,6 @@ import io.druid.guice.annotations.Self;
 import io.druid.initialization.DruidModule;
 import io.druid.server.DruidNode;
 import io.druid.server.initialization.ZkPathsConfig;
 import io.druid.timeline.partition.HashBasedNumberedShardSpec;
 import io.druid.timeline.partition.LinearShardSpec;
 import io.druid.timeline.partition.NumberedShardSpec;
 import io.druid.timeline.partition.SingleDimensionShardSpec;
@ -64,8 +63,7 @@ public class ServerModule implements DruidModule
            .registerSubtypes(
                new NamedType(SingleDimensionShardSpec.class, "single"),
                new NamedType(LinearShardSpec.class, "linear"),
-                new NamedType(NumberedShardSpec.class, "numbered"),
+                new NamedType(NumberedShardSpec.class, "numbered")
                new NamedType(HashBasedNumberedShardSpec.class, "hashed")
            )
    );
  }
--- a/server/src/main/java/io/druid/timeline/partition/HashBasedNumberedShardSpec.java
+++ b/server/src/main/java/io/druid/timeline/partition/HashBasedNumberedShardSpec.java
@ -1,77 +0,0 @@
 /*
 * Druid - a distributed column store.
 * Copyright (C) 2014  Metamarkets Group Inc.
 *
 * This program is free software; you can redistribute it and/or
 * modify it under the terms of the GNU General Public License
 * as published by the Free Software Foundation; either version 2
 * of the License, or (at your option) any later version.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License
 * along with this program; if not, write to the Free Software
 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
 */
 package io.druid.timeline.partition;
 import com.fasterxml.jackson.annotation.JacksonInject;
 import com.fasterxml.jackson.annotation.JsonCreator;
 import com.fasterxml.jackson.annotation.JsonProperty;
 import com.fasterxml.jackson.core.JsonProcessingException;
 import com.fasterxml.jackson.databind.ObjectMapper;
 import com.google.api.client.repackaged.com.google.common.base.Throwables;
 import com.google.common.hash.HashFunction;
 import com.google.common.hash.Hashing;
 import io.druid.data.input.InputRow;
 import io.druid.data.input.Rows;
 import java.util.List;
 public class HashBasedNumberedShardSpec extends NumberedShardSpec
 {
  private static final HashFunction hashFunction = Hashing.murmur3_32();
  @JacksonInject
  private ObjectMapper jsonMapper;
  @JsonCreator
  public HashBasedNumberedShardSpec(
      @JsonProperty("partitionNum") int partitionNum,
      @JsonProperty("partitions") int partitions
  )
  {
    super(partitionNum, partitions);
  }
  @Override
  public boolean isInChunk(InputRow inputRow)
  {
    return Math.abs(hash(inputRow)) % getPartitions() == getPartitionNum();
  }
  private int hash(InputRow inputRow)
  {
    final List<Object> groupKey = Rows.toGroupKey(inputRow.getTimestampFromEpoch(), inputRow);
    try {
      return hashFunction.hashBytes(jsonMapper.writeValueAsBytes(groupKey)).asInt();
    }
    catch (JsonProcessingException e) {
      throw Throwables.propagate(e);
    }
  }
  @Override
  public String toString()
  {
    return "HashBasedNumberedShardSpec{" +
           "partitionNum=" + getPartitionNum() +
           ", partitions=" + getPartitions() +
           '}';
  }
 }
--- a/server/src/test/java/io/druid/TestUtil.java
+++ b/server/src/test/java/io/druid/TestUtil.java
@ -19,12 +19,8 @@
 package io.druid;
 import com.fasterxml.jackson.databind.BeanProperty;
 import com.fasterxml.jackson.databind.DeserializationContext;
 import com.fasterxml.jackson.databind.InjectableValues;
 import com.fasterxml.jackson.databind.Module;
 import com.fasterxml.jackson.databind.ObjectMapper;
 import com.metamx.common.ISE;
 import io.druid.guice.ServerModule;
 import io.druid.jackson.DefaultObjectMapper;
@ -41,20 +37,5 @@ public class TestUtil
    for (Module module : list) {
      MAPPER.registerModule(module);
    }
    MAPPER.setInjectableValues(
        new InjectableValues()
        {
          @Override
          public Object findInjectableValue(
              Object valueId, DeserializationContext ctxt, BeanProperty forProperty, Object beanInstance
          )
          {
            if (valueId.equals("com.fasterxml.jackson.databind.ObjectMapper")) {
              return TestUtil.MAPPER;
            }
            throw new ISE("No Injectable value found");
          }
        }
    );
  }
 }
--- a/server/src/test/java/io/druid/server/shard/HashBasedNumberedShardSpecTest.java
+++ b/server/src/test/java/io/druid/server/shard/HashBasedNumberedShardSpecTest.java
@ -1,110 +0,0 @@
 /*
 * Druid - a distributed column store.
 * Copyright (C) 2012, 2013  Metamarkets Group Inc.
 *
 * This program is free software; you can redistribute it and/or
 * modify it under the terms of the GNU General Public License
 * as published by the Free Software Foundation; either version 2
 * of the License, or (at your option) any later version.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License
 * along with this program; if not, write to the Free Software
 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
 */
 package io.druid.server.shard;
 import com.fasterxml.jackson.databind.BeanProperty;
 import com.fasterxml.jackson.databind.DeserializationContext;
 import com.fasterxml.jackson.databind.InjectableValues;
 import com.google.common.base.Function;
 import com.google.common.collect.ImmutableList;
 import com.google.common.collect.Lists;
 import com.metamx.common.ISE;
 import io.druid.TestUtil;
 import io.druid.timeline.partition.HashBasedNumberedShardSpec;
 import io.druid.timeline.partition.PartitionChunk;
 import io.druid.timeline.partition.ShardSpec;
 import junit.framework.Assert;
 import org.junit.Before;
 import org.junit.Test;
 import java.util.List;
 public class HashBasedNumberedShardSpecTest
 {
  @Test
  public void testSerdeRoundTrip() throws Exception
  {
    final ShardSpec spec = TestUtil.MAPPER.readValue(
        TestUtil.MAPPER.writeValueAsBytes(new HashBasedNumberedShardSpec(1, 2)),
        ShardSpec.class
    );
    Assert.assertEquals(1, spec.getPartitionNum());
    Assert.assertEquals(2, ((HashBasedNumberedShardSpec) spec).getPartitions());
  }
  @Test
  public void testSerdeBackwardsCompat() throws Exception
  {
    final ShardSpec spec = TestUtil.MAPPER.readValue(
        "{\"type\": \"hashed\", \"partitions\": 2, \"partitionNum\": 1}",
        ShardSpec.class
    );
    Assert.assertEquals(1, spec.getPartitionNum());
    Assert.assertEquals(2, ((HashBasedNumberedShardSpec) spec).getPartitions());
  }
  @Test
  public void testPartitionChunks()
  {
    final List<ShardSpec> specs = ImmutableList.<ShardSpec>of(
        new HashBasedNumberedShardSpec(0, 3),
        new HashBasedNumberedShardSpec(1, 3),
        new HashBasedNumberedShardSpec(2, 3)
    );
    final List<PartitionChunk<String>> chunks = Lists.transform(
        specs,
        new Function<ShardSpec, PartitionChunk<String>>()
        {
          @Override
          public PartitionChunk<String> apply(ShardSpec shardSpec)
          {
            return shardSpec.createChunk("rofl");
          }
        }
    );
    Assert.assertEquals(0, chunks.get(0).getChunkNumber());
    Assert.assertEquals(1, chunks.get(1).getChunkNumber());
    Assert.assertEquals(2, chunks.get(2).getChunkNumber());
    Assert.assertTrue(chunks.get(0).isStart());
    Assert.assertFalse(chunks.get(1).isStart());
    Assert.assertFalse(chunks.get(2).isStart());
    Assert.assertFalse(chunks.get(0).isEnd());
    Assert.assertFalse(chunks.get(1).isEnd());
    Assert.assertTrue(chunks.get(2).isEnd());
    Assert.assertTrue(chunks.get(0).abuts(chunks.get(1)));
    Assert.assertTrue(chunks.get(1).abuts(chunks.get(2)));
    Assert.assertFalse(chunks.get(0).abuts(chunks.get(0)));
    Assert.assertFalse(chunks.get(0).abuts(chunks.get(2)));
    Assert.assertFalse(chunks.get(1).abuts(chunks.get(0)));
    Assert.assertFalse(chunks.get(1).abuts(chunks.get(1)));
    Assert.assertFalse(chunks.get(2).abuts(chunks.get(0)));
    Assert.assertFalse(chunks.get(2).abuts(chunks.get(1)));
    Assert.assertFalse(chunks.get(2).abuts(chunks.get(2)));
  }
 }