Revert "Merge branch 'determine-partitions-improvements'"

This reverts commit 7ad228ceb5, reversing changes made to 9c55e2b779.
2014-02-14 12:47:34 -08:00 · 2014-02-14 12:47:34 -08:00 · 189b3e2b9b
parent 5607edb813
commit 189b3e2b9b
29 changed files with 380 additions and 1280 deletions
--- a/docs/content/Batch-ingestion.md
+++ b/docs/content/Batch-ingestion.md
@ -82,7 +82,6 @@ The interval is the [ISO8601 interval](http://en.wikipedia.org/wiki/ISO_8601#Tim
  "segmentOutputPath": "s3n:\/\/billy-bucket\/the\/segments\/go\/here",
  "leaveIntermediate": "false",
  "partitionsSpec": {
-    "type": "random"
    "targetPartitionSize": 5000000
  },
  "updaterJobSpec": {
@ -146,20 +145,12 @@ The indexing process has the ability to roll data up as it processes the incomin

 ### Partitioning specification

-Segments are always partitioned based on timestamp (according to the granularitySpec) and may be further partitioned in some other way depending on partition type.
-Druid supports two types of partitions spec - singleDimension and random.
-
-In SingleDimension partition type data is partitioned based on the values in that dimension.
-For example, data for a day may be split by the dimension "last\_name" into two segments: one with all values from A-M and one with all values from N-Z.
-
-In random partition type, the number of partitions is determined based on the targetPartitionSize and cardinality of input set and the data is partitioned based on the hashcode of the row.
-Random partition type is more efficient and gives better distribution of data.
+Segments are always partitioned based on timestamp (according to the granularitySpec) and may be further partitioned in some other way. For example, data for a day may be split by the dimension "last\_name" into two segments: one with all values from A-M and one with all values from N-Z.

 To use this option, the indexer must be given a target partition size. It can then find a good set of partition ranges on its own.

 |property|description|required?|
 |--------|-----------|---------|
-|type|type of partitionSpec to be used |no, default : singleDimension|
 |targetPartitionSize|target number of rows to include in a partition, should be a number that targets segments of 700MB\~1GB.|yes|
 |partitionDimension|the dimension to partition on. Leave blank to select a dimension automatically.|no|
 |assumeGrouped|assume input data has already been grouped on time and dimensions. This is faster, but can choose suboptimal partitions if the assumption is violated.|no|
--- a/indexing-hadoop/pom.xml
+++ b/indexing-hadoop/pom.xml
@ -97,11 +97,6 @@
            <artifactId>junit</artifactId>
            <scope>test</scope>
        </dependency>
-      <dependency>
-        <groupId>com.clearspring.analytics</groupId>
-        <artifactId>stream</artifactId>
-        <version>2.5.2</version>
-      </dependency>
    </dependencies>

    <build>
--- a/indexing-hadoop/src/main/java/io/druid/indexer/DetermineHashedPartitionsJob.java
+++ b/indexing-hadoop/src/main/java/io/druid/indexer/DetermineHashedPartitionsJob.java
@ -1,357 +0,0 @@
-/*
- * Druid - a distributed column store.
- * Copyright (C) 2014  Metamarkets Group Inc.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version 2
- * of the License, or (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
- */
-
-package io.druid.indexer;
-
-import com.clearspring.analytics.stream.cardinality.CardinalityMergeException;
-import com.clearspring.analytics.stream.cardinality.HyperLogLog;
-import com.fasterxml.jackson.core.type.TypeReference;
-import com.google.common.base.Optional;
-import com.google.common.base.Throwables;
-import com.google.common.collect.ImmutableMap;
-import com.google.common.collect.Lists;
-import com.google.common.collect.Maps;
-import com.google.common.hash.HashFunction;
-import com.google.common.hash.Hashing;
-import com.google.common.io.Closeables;
-import com.metamx.common.ISE;
-import com.metamx.common.logger.Logger;
-import io.druid.data.input.InputRow;
-import io.druid.data.input.Rows;
-import io.druid.granularity.QueryGranularity;
-import io.druid.indexer.granularity.UniformGranularitySpec;
-import io.druid.timeline.partition.HashBasedNumberedShardSpec;
-import io.druid.timeline.partition.NoneShardSpec;
-import org.apache.hadoop.conf.Configuration;
-import org.apache.hadoop.fs.FileSystem;
-import org.apache.hadoop.fs.Path;
-import org.apache.hadoop.io.BytesWritable;
-import org.apache.hadoop.io.LongWritable;
-import org.apache.hadoop.io.NullWritable;
-import org.apache.hadoop.io.Text;
-import org.apache.hadoop.mapreduce.Job;
-import org.apache.hadoop.mapreduce.Reducer;
-import org.apache.hadoop.mapreduce.lib.input.TextInputFormat;
-import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
-import org.apache.hadoop.mapreduce.lib.output.SequenceFileOutputFormat;
-import org.joda.time.DateTime;
-import org.joda.time.DateTimeComparator;
-import org.joda.time.Interval;
-
-import java.io.IOException;
-import java.io.OutputStream;
-import java.util.List;
-import java.util.Map;
-import java.util.Set;
-
-/**
- * Determines appropriate ShardSpecs for a job by determining approximate cardinality of data set using HyperLogLog
- */
-public class DetermineHashedPartitionsJob implements Jobby
-{
-  private static final int MAX_SHARDS = 128;
-  private static final Logger log = new Logger(DetermineHashedPartitionsJob.class);
-  private static final int HYPER_LOG_LOG_BIT_SIZE = 20;
-  private final HadoopDruidIndexerConfig config;
-
-  public DetermineHashedPartitionsJob(
-      HadoopDruidIndexerConfig config
-  )
-  {
-    this.config = config;
-  }
-
-  public boolean run()
-  {
-    try {
-      /*
-       * Group by (timestamp, dimensions) so we can correctly count dimension values as they would appear
-       * in the final segment.
-       */
-      long startTime = System.currentTimeMillis();
-      final Job groupByJob = new Job(
-          new Configuration(),
-          String.format("%s-determine_partitions_hashed-%s", config.getDataSource(), config.getIntervals())
-      );
-
-      JobHelper.injectSystemProperties(groupByJob);
-      groupByJob.setInputFormatClass(TextInputFormat.class);
-      groupByJob.setMapperClass(DetermineCardinalityMapper.class);
-      groupByJob.setMapOutputKeyClass(LongWritable.class);
-      groupByJob.setMapOutputValueClass(BytesWritable.class);
-      groupByJob.setReducerClass(DetermineCardinalityReducer.class);
-      groupByJob.setOutputKeyClass(NullWritable.class);
-      groupByJob.setOutputValueClass(NullWritable.class);
-      groupByJob.setOutputFormatClass(SequenceFileOutputFormat.class);
-      groupByJob.setNumReduceTasks(1);
-      JobHelper.setupClasspath(config, groupByJob);
-
-      config.addInputPaths(groupByJob);
-      config.intoConfiguration(groupByJob);
-      FileOutputFormat.setOutputPath(groupByJob, config.makeGroupedDataDir());
-
-      groupByJob.submit();
-      log.info("Job %s submitted, status available at: %s", groupByJob.getJobName(), groupByJob.getTrackingURL());
-
-      if (!groupByJob.waitForCompletion(true)) {
-        log.error("Job failed: %s", groupByJob.getJobID());
-        return false;
-      }
-
-      /*
-       * Load partitions and intervals determined by the previous job.
-       */
-
-      log.info("Job completed, loading up partitions for intervals[%s].", config.getSegmentGranularIntervals());
-      FileSystem fileSystem = null;
-      if (!config.getSegmentGranularIntervals().isPresent()) {
-        final Path intervalInfoPath = config.makeIntervalInfoPath();
-        fileSystem = intervalInfoPath.getFileSystem(groupByJob.getConfiguration());
-        if (!fileSystem.exists(intervalInfoPath)) {
-          throw new ISE("Path[%s] didn't exist!?", intervalInfoPath);
-        }
-        List<Interval> intervals = config.jsonMapper.readValue(
-            Utils.openInputStream(groupByJob, intervalInfoPath), new TypeReference<List<Interval>>()
-        {
-        }
-        );
-        config.setGranularitySpec(new UniformGranularitySpec(config.getGranularitySpec().getGranularity(), intervals));
-        log.info("Determined Intervals for Job [%s]" + config.getSegmentGranularIntervals());
-      }
-      Map<DateTime, List<HadoopyShardSpec>> shardSpecs = Maps.newTreeMap(DateTimeComparator.getInstance());
-      int shardCount = 0;
-      for (Interval segmentGranularity : config.getSegmentGranularIntervals().get()) {
-        DateTime bucket = segmentGranularity.getStart();
-
-        final Path partitionInfoPath = config.makeSegmentPartitionInfoPath(segmentGranularity);
-        if (fileSystem == null) {
-          fileSystem = partitionInfoPath.getFileSystem(groupByJob.getConfiguration());
-        }
-        if (fileSystem.exists(partitionInfoPath)) {
-          Long cardinality = config.jsonMapper.readValue(
-              Utils.openInputStream(groupByJob, partitionInfoPath), new TypeReference<Long>()
-          {
-          }
-          );
-          int numberOfShards = (int) Math.ceil((double) cardinality / config.getTargetPartitionSize());
-
-          if (numberOfShards > MAX_SHARDS) {
-            throw new ISE(
-                "Number of shards [%d] exceed the maximum limit of [%d], either targetPartitionSize is too low or data volume is too high",
-                numberOfShards,
-                MAX_SHARDS
-            );
-          }
-
-          List<HadoopyShardSpec> actualSpecs = Lists.newArrayListWithExpectedSize(numberOfShards);
-          if (numberOfShards == 1) {
-            actualSpecs.add(new HadoopyShardSpec(new NoneShardSpec(), shardCount++));
-          } else {
-            for (int i = 0; i < numberOfShards; ++i) {
-              actualSpecs.add(new HadoopyShardSpec(new HashBasedNumberedShardSpec(i, numberOfShards), shardCount++));
-              log.info("DateTime[%s], partition[%d], spec[%s]", bucket, i, actualSpecs.get(i));
-            }
-          }
-
-          shardSpecs.put(bucket, actualSpecs);
-
-        } else {
-          log.info("Path[%s] didn't exist!?", partitionInfoPath);
-        }
-      }
-      config.setShardSpecs(shardSpecs);
-      log.info(
-          "DetermineHashedPartitionsJob took %d millis",
-          (System.currentTimeMillis() - startTime)
-      );
-
-      return true;
-    }
-    catch (Exception e) {
-      throw Throwables.propagate(e);
-    }
-  }
-
-  public static class DetermineCardinalityMapper extends HadoopDruidIndexerMapper<LongWritable, BytesWritable>
-  {
-    private static HashFunction hashFunction = Hashing.murmur3_128();
-    private QueryGranularity rollupGranularity = null;
-    private Map<Interval, HyperLogLog> hyperLogLogs;
-    private HadoopDruidIndexerConfig config;
-    private boolean determineIntervals;
-
-    @Override
-    protected void setup(Context context)
-        throws IOException, InterruptedException
-    {
-      super.setup(context);
-      rollupGranularity = getConfig().getRollupSpec().getRollupGranularity();
-      config = HadoopDruidIndexerConfigBuilder.fromConfiguration(context.getConfiguration());
-      Optional<Set<Interval>> intervals = config.getSegmentGranularIntervals();
-      if (intervals.isPresent()) {
-        determineIntervals = false;
-        final ImmutableMap.Builder<Interval, HyperLogLog> builder = ImmutableMap.builder();
-        for (final Interval bucketInterval : intervals.get()) {
-          builder.put(bucketInterval, new HyperLogLog(HYPER_LOG_LOG_BIT_SIZE));
-        }
-        hyperLogLogs = builder.build();
-      } else {
-        determineIntervals = true;
-        hyperLogLogs = Maps.newHashMap();
-      }
-    }
-
-    @Override
-    protected void innerMap(
-        InputRow inputRow,
-        Text text,
-        Context context
-    ) throws IOException, InterruptedException
-    {
-
-      final List<Object> groupKey = Rows.toGroupKey(
-          rollupGranularity.truncate(inputRow.getTimestampFromEpoch()),
-          inputRow
-      );
-      Interval interval;
-      if (determineIntervals) {
-        interval = config.getGranularitySpec().getGranularity().bucket(new DateTime(inputRow.getTimestampFromEpoch()));
-
-        if (!hyperLogLogs.containsKey(interval)) {
-          hyperLogLogs.put(interval, new HyperLogLog(HYPER_LOG_LOG_BIT_SIZE));
-        }
-      } else {
-        final Optional<Interval> maybeInterval = config.getGranularitySpec()
-                                                       .bucketInterval(new DateTime(inputRow.getTimestampFromEpoch()));
-
-        if (!maybeInterval.isPresent()) {
-          throw new ISE("WTF?! No bucket found for timestamp: %s", inputRow.getTimestampFromEpoch());
-        }
-        interval = maybeInterval.get();
-      }
-      hyperLogLogs.get(interval)
-                  .offerHashed(
-                      hashFunction.hashBytes(HadoopDruidIndexerConfig.jsonMapper.writeValueAsBytes(groupKey))
-                                  .asLong()
-                  );
-    }
-
-    @Override
-    public void run(Context context) throws IOException, InterruptedException
-    {
-      setup(context);
-
-      while (context.nextKeyValue()) {
-        map(context.getCurrentKey(), context.getCurrentValue(), context);
-      }
-
-      for (Map.Entry<Interval, HyperLogLog> entry : hyperLogLogs.entrySet()) {
-        context.write(
-            new LongWritable(entry.getKey().getStartMillis()),
-            new BytesWritable(entry.getValue().getBytes())
-        );
-      }
-      cleanup(context);
-    }
-
-  }
-
-  public static class DetermineCardinalityReducer
-      extends Reducer<LongWritable, BytesWritable, NullWritable, NullWritable>
-  {
-    private final List<Interval> intervals = Lists.newArrayList();
-    protected HadoopDruidIndexerConfig config = null;
-
-    @Override
-    protected void setup(Context context)
-        throws IOException, InterruptedException
-    {
-      config = HadoopDruidIndexerConfigBuilder.fromConfiguration(context.getConfiguration());
-    }
-
-    @Override
-    protected void reduce(
-        LongWritable key,
-        Iterable<BytesWritable> values,
-        Context context
-    ) throws IOException, InterruptedException
-    {
-      HyperLogLog aggregate = new HyperLogLog(HYPER_LOG_LOG_BIT_SIZE);
-      for (BytesWritable value : values) {
-        HyperLogLog logValue = HyperLogLog.Builder.build(value.getBytes());
-        try {
-          aggregate.addAll(logValue);
-        }
-        catch (CardinalityMergeException e) {
-          e.printStackTrace(); // TODO: check for better handling
-        }
-      }
-      Interval interval = config.getGranularitySpec().getGranularity().bucket(new DateTime(key.get()));
-      intervals.add(interval);
-      final Path outPath = config.makeSegmentPartitionInfoPath(interval);
-      final OutputStream out = Utils.makePathAndOutputStream(
-          context, outPath, config.isOverwriteFiles()
-      );
-
-      try {
-        HadoopDruidIndexerConfig.jsonMapper.writerWithType(
-            new TypeReference<Long>()
-            {
-            }
-        ).writeValue(
-            out,
-            aggregate.cardinality()
-        );
-      }
-      finally {
-        Closeables.close(out, false);
-      }
-    }
-
-    @Override
-    public void run(Context context)
-        throws IOException, InterruptedException
-    {
-      super.run(context);
-      if (!config.getSegmentGranularIntervals().isPresent()) {
-        final Path outPath = config.makeIntervalInfoPath();
-        final OutputStream out = Utils.makePathAndOutputStream(
-            context, outPath, config.isOverwriteFiles()
-        );
-
-        try {
-          HadoopDruidIndexerConfig.jsonMapper.writerWithType(
-              new TypeReference<List<Interval>>()
-              {
-              }
-          ).writeValue(
-              out,
-              intervals
-          );
-        }
-        finally {
-          Closeables.close(out, false);
-        }
-      }
-    }
-  }
-}
-
-
-
--- a/indexing-hadoop/src/main/java/io/druid/indexer/DeterminePartitionsJob.java
+++ b/indexing-hadoop/src/main/java/io/druid/indexer/DeterminePartitionsJob.java
@ -26,7 +26,9 @@ import com.google.common.base.Optional;
 import com.google.common.base.Splitter;
 import com.google.common.base.Throwables;
 import com.google.common.collect.ComparisonChain;
+import com.google.common.collect.ImmutableList;
 import com.google.common.collect.ImmutableMap;
+import com.google.common.collect.ImmutableSortedSet;
 import com.google.common.collect.Iterables;
 import com.google.common.collect.Iterators;
 import com.google.common.collect.Lists;
@ -38,9 +40,7 @@ import com.metamx.common.guava.nary.BinaryFn;
 import com.metamx.common.logger.Logger;
 import io.druid.collections.CombiningIterable;
 import io.druid.data.input.InputRow;
-import io.druid.data.input.Rows;
 import io.druid.granularity.QueryGranularity;
-import io.druid.indexer.partitions.SingleDimensionPartitionsSpec;
 import io.druid.timeline.partition.NoneShardSpec;
 import io.druid.timeline.partition.ShardSpec;
 import io.druid.timeline.partition.SingleDimensionShardSpec;
@ -76,6 +76,7 @@ import java.util.Comparator;
 import java.util.Iterator;
 import java.util.List;
 import java.util.Map;
+import java.util.Set;

 /**
 * Determines appropriate ShardSpecs for a job by determining whether or not partitioning is necessary, and if so,
@ -106,6 +107,16 @@ public class DeterminePartitionsJob implements Jobby
    this.config = config;
  }

+  public static void injectSystemProperties(Job job)
+  {
+    final Configuration conf = job.getConfiguration();
+    for (String propName : System.getProperties().stringPropertyNames()) {
+      if (propName.startsWith("hadoop.")) {
+        conf.set(propName.substring("hadoop.".length()), System.getProperty(propName));
+      }
+    }
+  }
+
  public boolean run()
  {
    try {
@ -114,17 +125,13 @@ public class DeterminePartitionsJob implements Jobby
       * in the final segment.
       */

-      if(!(config.getPartitionsSpec() instanceof SingleDimensionPartitionsSpec)){
-        throw new ISE("DeterminePartitionsJob can only be run for SingleDimensionPartitionsSpec, partitionSpec found [%s]", config.getPartitionsSpec());
-      }
-
      if (!config.getPartitionsSpec().isAssumeGrouped()) {
        final Job groupByJob = new Job(
            new Configuration(),
            String.format("%s-determine_partitions_groupby-%s", config.getDataSource(), config.getIntervals())
        );

-        JobHelper.injectSystemProperties(groupByJob);
+        injectSystemProperties(groupByJob);
        groupByJob.setInputFormatClass(TextInputFormat.class);
        groupByJob.setMapperClass(DeterminePartitionsGroupByMapper.class);
        groupByJob.setMapOutputKeyClass(BytesWritable.class);
@ -161,7 +168,7 @@ public class DeterminePartitionsJob implements Jobby

      dimSelectionJob.getConfiguration().set("io.sort.record.percent", "0.19");

-      JobHelper.injectSystemProperties(dimSelectionJob);
+      injectSystemProperties(dimSelectionJob);

      if (!config.getPartitionsSpec().isAssumeGrouped()) {
        // Read grouped data from the groupByJob.
@ -183,7 +190,7 @@ public class DeterminePartitionsJob implements Jobby
      dimSelectionJob.setOutputValueClass(Text.class);
      dimSelectionJob.setOutputFormatClass(DeterminePartitionsDimSelectionOutputFormat.class);
      dimSelectionJob.setPartitionerClass(DeterminePartitionsDimSelectionPartitioner.class);
-      dimSelectionJob.setNumReduceTasks(config.getGranularitySpec().bucketIntervals().get().size());
+      dimSelectionJob.setNumReduceTasks(config.getGranularitySpec().bucketIntervals().size());
      JobHelper.setupClasspath(config, dimSelectionJob);

      config.intoConfiguration(dimSelectionJob);
@ -209,8 +216,10 @@ public class DeterminePartitionsJob implements Jobby
      FileSystem fileSystem = null;
      Map<DateTime, List<HadoopyShardSpec>> shardSpecs = Maps.newTreeMap(DateTimeComparator.getInstance());
      int shardCount = 0;
-      for (Interval segmentGranularity : config.getSegmentGranularIntervals().get()) {
-        final Path partitionInfoPath = config.makeSegmentPartitionInfoPath(segmentGranularity);
+      for (Interval segmentGranularity : config.getSegmentGranularIntervals()) {
+        DateTime bucket = segmentGranularity.getStart();
+
+        final Path partitionInfoPath = config.makeSegmentPartitionInfoPath(new Bucket(0, bucket, 0));
        if (fileSystem == null) {
          fileSystem = partitionInfoPath.getFileSystem(dimSelectionJob.getConfiguration());
        }
@ -224,10 +233,10 @@ public class DeterminePartitionsJob implements Jobby
          List<HadoopyShardSpec> actualSpecs = Lists.newArrayListWithExpectedSize(specs.size());
          for (int i = 0; i < specs.size(); ++i) {
            actualSpecs.add(new HadoopyShardSpec(specs.get(i), shardCount++));
-            log.info("DateTime[%s], partition[%d], spec[%s]", segmentGranularity, i, actualSpecs.get(i));
+            log.info("DateTime[%s], partition[%d], spec[%s]", bucket, i, actualSpecs.get(i));
          }

-          shardSpecs.put(segmentGranularity.getStart(), actualSpecs);
+          shardSpecs.put(bucket, actualSpecs);
        } else {
          log.info("Path[%s] didn't exist!?", partitionInfoPath);
        }
@ -260,9 +269,17 @@ public class DeterminePartitionsJob implements Jobby
        Context context
    ) throws IOException, InterruptedException
    {
-      final List<Object> groupKey = Rows.toGroupKey(
+      // Create group key, there are probably more efficient ways of doing this
+      final Map<String, Set<String>> dims = Maps.newTreeMap();
+      for (final String dim : inputRow.getDimensions()) {
+        final Set<String> dimValues = ImmutableSortedSet.copyOf(inputRow.getDimension(dim));
+        if (dimValues.size() > 0) {
+          dims.put(dim, dimValues);
+        }
+      }
+      final List<Object> groupKey = ImmutableList.of(
          rollupGranularity.truncate(inputRow.getTimestampFromEpoch()),
-          inputRow
+          dims
      );
      context.write(
          new BytesWritable(HadoopDruidIndexerConfig.jsonMapper.writeValueAsBytes(groupKey)),
@ -298,8 +315,8 @@ public class DeterminePartitionsJob implements Jobby
        throws IOException, InterruptedException
    {
      final HadoopDruidIndexerConfig config = HadoopDruidIndexerConfigBuilder.fromConfiguration(context.getConfiguration());
-      SingleDimensionPartitionsSpec spec = (SingleDimensionPartitionsSpec) config.getPartitionsSpec();
-      helper = new DeterminePartitionsDimSelectionMapperHelper(config, spec.getPartitionDimension());
+      final String partitionDimension = config.getPartitionDimension();
+      helper = new DeterminePartitionsDimSelectionMapperHelper(config, partitionDimension);
    }

    @Override
@ -330,8 +347,8 @@ public class DeterminePartitionsJob implements Jobby
    {
      super.setup(context);
      final HadoopDruidIndexerConfig config = HadoopDruidIndexerConfigBuilder.fromConfiguration(context.getConfiguration());
-      final SingleDimensionPartitionsSpec spec = (SingleDimensionPartitionsSpec) config.getPartitionsSpec();
-      helper = new DeterminePartitionsDimSelectionMapperHelper(config, spec.getPartitionDimension());
+      final String partitionDimension = config.getPartitionDimension();
+      helper = new DeterminePartitionsDimSelectionMapperHelper(config, partitionDimension);
    }

    @Override
@ -366,7 +383,7 @@ public class DeterminePartitionsJob implements Jobby

      final ImmutableMap.Builder<DateTime, Integer> timeIndexBuilder = ImmutableMap.builder();
      int idx = 0;
-      for (final Interval bucketInterval : config.getGranularitySpec().bucketIntervals().get()) {
+      for (final Interval bucketInterval : config.getGranularitySpec().bucketIntervals()) {
        timeIndexBuilder.put(bucketInterval.getStart(), idx);
        idx++;
      }
@ -724,7 +741,7 @@ public class DeterminePartitionsJob implements Jobby
      }

      final OutputStream out = Utils.makePathAndOutputStream(
-          context, config.makeSegmentPartitionInfoPath(config.getGranularitySpec().getGranularity().bucket(bucket)), config.isOverwriteFiles()
+          context, config.makeSegmentPartitionInfoPath(new Bucket(0, bucket, 0)), config.isOverwriteFiles()
      );

      final DimPartitions chosenPartitions = maxCardinality > HIGH_CARDINALITY_THRESHOLD
--- a/indexing-hadoop/src/main/java/io/druid/indexer/HadoopDruidDetermineConfigurationJob.java
+++ b/indexing-hadoop/src/main/java/io/druid/indexer/HadoopDruidDetermineConfigurationJob.java
@ -1,74 +0,0 @@
-/*
- * Druid - a distributed column store.
- * Copyright (C) 2012, 2013  Metamarkets Group Inc.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version 2
- * of the License, or (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
- */
-
-package io.druid.indexer;
-
-import com.google.common.collect.Lists;
-import com.google.common.collect.Maps;
-import com.google.inject.Inject;
-import com.metamx.common.logger.Logger;
-import io.druid.timeline.partition.NoneShardSpec;
-import org.joda.time.DateTime;
-import org.joda.time.DateTimeComparator;
-import org.joda.time.Interval;
-
-import java.util.List;
-import java.util.Map;
-
-/**
- */
-public class HadoopDruidDetermineConfigurationJob implements Jobby
-{
-  private static final Logger log = new Logger(HadoopDruidDetermineConfigurationJob.class);
-  private final HadoopDruidIndexerConfig config;
-
-  @Inject
-  public HadoopDruidDetermineConfigurationJob(
-      HadoopDruidIndexerConfig config
-  )
-  {
-    this.config = config;
-  }
-
-  @Override
-  public boolean run()
-  {
-    List<Jobby> jobs = Lists.newArrayList();
-
-    JobHelper.ensurePaths(config);
-
-    if (config.isDeterminingPartitions()) {
-      jobs.add(config.getPartitionsSpec().getPartitionJob(config));
-    } else {
-      Map<DateTime, List<HadoopyShardSpec>> shardSpecs = Maps.newTreeMap(DateTimeComparator.getInstance());
-      int shardCount = 0;
-      for (Interval segmentGranularity : config.getSegmentGranularIntervals().get()) {
-        DateTime bucket = segmentGranularity.getStart();
-        final HadoopyShardSpec spec = new HadoopyShardSpec(new NoneShardSpec(), shardCount++);
-        shardSpecs.put(bucket, Lists.newArrayList(spec));
-        log.info("DateTime[%s], spec[%s]", bucket, spec);
-      }
-      config.setShardSpecs(shardSpecs);
-    }
-
-    return JobHelper.runJobs(jobs, config);
-
-  }
-
-}
--- a/indexing-hadoop/src/main/java/io/druid/indexer/HadoopDruidIndexerConfig.java
+++ b/indexing-hadoop/src/main/java/io/druid/indexer/HadoopDruidIndexerConfig.java
@ -50,7 +50,6 @@ import io.druid.guice.annotations.Self;
 import io.druid.indexer.granularity.GranularitySpec;
 import io.druid.indexer.granularity.UniformGranularitySpec;
 import io.druid.indexer.partitions.PartitionsSpec;
-import io.druid.indexer.partitions.SingleDimensionPartitionsSpec;
 import io.druid.indexer.path.PathSpec;
 import io.druid.indexer.rollup.DataRollupSpec;
 import io.druid.indexer.updater.DbUpdaterJobSpec;
@ -74,7 +73,6 @@ import java.util.Collections;
 import java.util.List;
 import java.util.Map;
 import java.util.Set;
-import java.util.SortedSet;

 /**
 */
@ -180,7 +178,7 @@ public class HadoopDruidIndexerConfig
      this.partitionsSpec = partitionsSpec;
    } else {
      // Backwards compatibility
-      this.partitionsSpec = new SingleDimensionPartitionsSpec(partitionDimension, targetPartitionSize, null, false);
+      this.partitionsSpec = new PartitionsSpec(partitionDimension, targetPartitionSize, null, false);
    }

    if (granularitySpec != null) {
@ -380,17 +378,17 @@ public class HadoopDruidIndexerConfig
    this.ignoreInvalidRows = ignoreInvalidRows;
  }

-  public Optional<List<Interval>> getIntervals()
+  public List<Interval> getIntervals()
  {
-    Optional<SortedSet<Interval>> setOptional = getGranularitySpec().bucketIntervals();
-    if (setOptional.isPresent()) {
-      return Optional.of((List<Interval>) JodaUtils.condenseIntervals(setOptional.get()));
-    } else {
-      return Optional.absent();
-    }
+    return JodaUtils.condenseIntervals(getGranularitySpec().bucketIntervals());
  }

-  public boolean isDeterminingPartitions()
+  public String getPartitionDimension()
+  {
+    return partitionsSpec.getPartitionDimension();
+  }
+
+  public boolean partitionByDimension()
  {
    return partitionsSpec.isDeterminingPartitions();
  }
@ -485,70 +483,64 @@ public class HadoopDruidIndexerConfig
    throw new ISE("row[%s] doesn't fit in any shard[%s]", inputRow, shards);
  }

-  public Optional<Set<Interval>> getSegmentGranularIntervals()
+  public Set<Interval> getSegmentGranularIntervals()
  {
-    return Optional.fromNullable((Set<Interval>) granularitySpec.bucketIntervals().orNull());
+    return granularitySpec.bucketIntervals();
  }

-  public Optional<Iterable<Bucket>> getAllBuckets()
+  public Iterable<Bucket> getAllBuckets()
  {
-    Optional<Set<Interval>> intervals = getSegmentGranularIntervals();
-    if (intervals.isPresent()) {
-      return Optional.of(
-          (Iterable<Bucket>) FunctionalIterable
-              .create(intervals.get())
-              .transformCat(
-                  new Function<Interval, Iterable<Bucket>>()
-                  {
-                    @Override
-                    public Iterable<Bucket> apply(Interval input)
-                    {
-                      final DateTime bucketTime = input.getStart();
-                      final List<HadoopyShardSpec> specs = shardSpecs.get(bucketTime);
-                      if (specs == null) {
-                        return ImmutableList.of();
-                      }
+    return FunctionalIterable
+        .create(getSegmentGranularIntervals())
+        .transformCat(
+            new Function<Interval, Iterable<Bucket>>()
+            {
+              @Override
+              public Iterable<Bucket> apply(Interval input)
+              {
+                final DateTime bucketTime = input.getStart();
+                final List<HadoopyShardSpec> specs = shardSpecs.get(bucketTime);
+                if (specs == null) {
+                  return ImmutableList.of();
+                }

-                      return FunctionalIterable
-                          .create(specs)
-                          .transform(
-                              new Function<HadoopyShardSpec, Bucket>()
-                              {
-                                int i = 0;
+                return FunctionalIterable
+                    .create(specs)
+                    .transform(
+                        new Function<HadoopyShardSpec, Bucket>()
+                        {
+                          int i = 0;

-                                @Override
-                                public Bucket apply(HadoopyShardSpec input)
-                                {
-                                  return new Bucket(input.getShardNum(), bucketTime, i++);
-                                }
-                              }
-                          );
-                    }
-                  }
-              )
-      );
-    } else {
-      return Optional.absent();
-    }
+                          @Override
+                          public Bucket apply(HadoopyShardSpec input)
+                          {
+                            return new Bucket(input.getShardNum(), bucketTime, i++);
+                          }
+                        }
+                    );
+              }
+            }
+        );
  }

-    /******************************************
-     Path helper logic
-     ******************************************/
-
-    /**
-     * Make the intermediate path for this job run.
-     *
-     * @return the intermediate path for this job run.
-     */
+  /******************************************
+   Path helper logic
+   ******************************************/

+  /**
+   * Make the intermediate path for this job run.
+   *
+   * @return the intermediate path for this job run.
+   */
  public Path makeIntermediatePath()
  {
    return new Path(String.format("%s/%s/%s", getWorkingPath(), getDataSource(), getVersion().replace(":", "")));
  }

-  public Path makeSegmentPartitionInfoPath(Interval bucketInterval)
+  public Path makeSegmentPartitionInfoPath(Bucket bucket)
  {
+    final Interval bucketInterval = getGranularitySpec().bucketInterval(bucket.time).get();
+
    return new Path(
        String.format(
            "%s/%s_%s/partitions.json",
@ -559,16 +551,6 @@ public class HadoopDruidIndexerConfig
    );
  }

-  public Path makeIntervalInfoPath()
-  {
-    return new Path(
-        String.format(
-            "%s/intervals.json",
-            makeIntermediatePath()
-        )
-    );
-  }
-
  public Path makeDescriptorInfoDir()
  {
    return new Path(makeIntermediatePath(), "segmentDescriptorInfo");
@ -643,5 +625,8 @@ public class HadoopDruidIndexerConfig
    Preconditions.checkNotNull(segmentOutputPath, "segmentOutputPath");
    Preconditions.checkNotNull(version, "version");
    Preconditions.checkNotNull(rollupSpec, "rollupSpec");
+
+    final int nIntervals = getIntervals().size();
+    Preconditions.checkArgument(nIntervals > 0, "intervals.size()[%s] <= 0", nIntervals);
  }
 }
--- a/indexing-hadoop/src/main/java/io/druid/indexer/HadoopDruidIndexerConfigBuilder.java
+++ b/indexing-hadoop/src/main/java/io/druid/indexer/HadoopDruidIndexerConfigBuilder.java
@ -46,10 +46,6 @@ public class HadoopDruidIndexerConfigBuilder
    return HadoopDruidIndexerConfig.jsonMapper.convertValue(schema, HadoopDruidIndexerConfig.class);
  }

-  public static HadoopDruidIndexerSchema toSchema(HadoopDruidIndexerConfig config){
-    return HadoopDruidIndexerConfig.jsonMapper.convertValue(config, HadoopDruidIndexerSchema.class);
-  }
-
  public static HadoopDruidIndexerConfig fromMap(Map<String, Object> argSpec)
  {
    return HadoopDruidIndexerConfig.jsonMapper.convertValue(argSpec, HadoopDruidIndexerConfig.class);
--- a/indexing-hadoop/src/main/java/io/druid/indexer/HadoopDruidIndexerJob.java
+++ b/indexing-hadoop/src/main/java/io/druid/indexer/HadoopDruidIndexerJob.java
@ -19,20 +19,34 @@

 package io.druid.indexer;

+import com.google.common.base.Throwables;
 import com.google.common.collect.Lists;
+import com.google.common.collect.Maps;
 import com.google.inject.Inject;
+import com.metamx.common.ISE;
 import com.metamx.common.logger.Logger;
 import io.druid.timeline.DataSegment;
+import io.druid.timeline.partition.NoneShardSpec;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.mapreduce.Job;
+import org.joda.time.DateTime;
+import org.joda.time.DateTimeComparator;
+import org.joda.time.Interval;

+import java.io.IOException;
 import java.util.List;
+import java.util.Map;

 /**
 */
 public class HadoopDruidIndexerJob implements Jobby
 {
  private static final Logger log = new Logger(HadoopDruidIndexerJob.class);
+
  private final HadoopDruidIndexerConfig config;
  private final DbUpdaterJob dbUpdaterJob;
+
  private IndexGeneratorJob indexJob;
  private volatile List<DataSegment> publishedSegments = null;

@ -55,7 +69,23 @@ public class HadoopDruidIndexerJob implements Jobby
  public boolean run()
  {
    List<Jobby> jobs = Lists.newArrayList();
-    JobHelper.ensurePaths(config);
+
+    ensurePaths();
+
+    if (config.partitionByDimension()) {
+      jobs.add(new DeterminePartitionsJob(config));
+    }
+    else {
+      Map<DateTime, List<HadoopyShardSpec>> shardSpecs = Maps.newTreeMap(DateTimeComparator.getInstance());
+      int shardCount = 0;
+      for (Interval segmentGranularity : config.getSegmentGranularIntervals()) {
+        DateTime bucket = segmentGranularity.getStart();
+        final HadoopyShardSpec spec = new HadoopyShardSpec(new NoneShardSpec(), shardCount++);
+        shardSpecs.put(bucket, Lists.newArrayList(spec));
+        log.info("DateTime[%s], spec[%s]", bucket, spec);
+      }
+      config.setShardSpecs(shardSpecs);
+    }

    indexJob = new IndexGeneratorJob(config);
    jobs.add(indexJob);
@ -66,15 +96,65 @@ public class HadoopDruidIndexerJob implements Jobby
      log.info("No updaterJobSpec set, not uploading to database");
    }

+    String failedMessage = null;
+    for (Jobby job : jobs) {
+      if (failedMessage == null) {
+        if (!job.run()) {
+          failedMessage = String.format("Job[%s] failed!", job.getClass());
+        }
+      }
+    }
+
+    if (failedMessage == null) {
+      publishedSegments = IndexGeneratorJob.getPublishedSegments(config);
+    }
+
+    if (!config.isLeaveIntermediate()) {
+      if (failedMessage == null || config.isCleanupOnFailure()) {
+        Path workingPath = config.makeIntermediatePath();
+        log.info("Deleting path[%s]", workingPath);
+        try {
+          workingPath.getFileSystem(new Configuration()).delete(workingPath, true);
+        }
+        catch (IOException e) {
+          log.error(e, "Failed to cleanup path[%s]", workingPath);
+        }
+      }
+    }
+
+    if (failedMessage != null) {
+      throw new ISE(failedMessage);
+    }

-    JobHelper.runJobs(jobs, config);
-    publishedSegments = IndexGeneratorJob.getPublishedSegments(config);
    return true;
  }

-  public List<DataSegment> getPublishedSegments()
+  private void ensurePaths()
  {
-    if (publishedSegments == null) {
+    // config.addInputPaths() can have side-effects ( boo! :( ), so this stuff needs to be done before anything else
+    try {
+      Job job = new Job(
+          new Configuration(),
+          String.format("%s-determine_partitions-%s", config.getDataSource(), config.getIntervals())
+      );
+
+      job.getConfiguration().set("io.sort.record.percent", "0.19");
+      for (String propName : System.getProperties().stringPropertyNames()) {
+        Configuration conf = job.getConfiguration();
+        if (propName.startsWith("hadoop.")) {
+          conf.set(propName.substring("hadoop.".length()), System.getProperty(propName));
+        }
+      }
+
+      config.addInputPaths(job);
+    }
+    catch (IOException e) {
+      throw Throwables.propagate(e);
+    }
+  }
+
+  public List<DataSegment> getPublishedSegments() {
+    if(publishedSegments == null) {
      throw new IllegalStateException("Job hasn't run yet. No segments have been published yet.");
    }
    return publishedSegments;
--- a/indexing-hadoop/src/main/java/io/druid/indexer/HadoopDruidIndexerMapper.java
+++ b/indexing-hadoop/src/main/java/io/druid/indexer/HadoopDruidIndexerMapper.java
@ -22,7 +22,6 @@ package io.druid.indexer;
 import com.metamx.common.RE;
 import io.druid.data.input.InputRow;
 import io.druid.data.input.impl.StringInputRowParser;
-import io.druid.indexer.granularity.GranularitySpec;
 import org.apache.hadoop.io.LongWritable;
 import org.apache.hadoop.io.Text;
 import org.apache.hadoop.mapreduce.Mapper;
@ -71,9 +70,8 @@ public abstract class HadoopDruidIndexerMapper<KEYOUT, VALUEOUT> extends Mapper<
          throw e;
        }
      }
-      GranularitySpec spec = config.getGranularitySpec();
-      if (!spec.bucketIntervals().isPresent() || spec.bucketInterval(new DateTime(inputRow.getTimestampFromEpoch()))
-                                                     .isPresent()) {
+
+      if(config.getGranularitySpec().bucketInterval(new DateTime(inputRow.getTimestampFromEpoch())).isPresent()) {
        innerMap(inputRow, value, context);
      }
    }
--- a/indexing-hadoop/src/main/java/io/druid/indexer/IndexGeneratorJob.java
+++ b/indexing-hadoop/src/main/java/io/druid/indexer/IndexGeneratorJob.java
@ -44,7 +44,6 @@ import io.druid.segment.incremental.IncrementalIndex;
 import io.druid.segment.incremental.IncrementalIndexSchema;
 import io.druid.timeline.DataSegment;
 import org.apache.commons.io.FileUtils;
-import org.apache.hadoop.conf.Configurable;
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.fs.FSDataOutputStream;
 import org.apache.hadoop.fs.FileStatus;
@ -85,7 +84,9 @@ import java.util.zip.ZipOutputStream;
 public class IndexGeneratorJob implements Jobby
 {
  private static final Logger log = new Logger(IndexGeneratorJob.class);
+
  private final HadoopDruidIndexerConfig config;
+
  private IndexGeneratorStats jobStats;

  public IndexGeneratorJob(
@ -96,6 +97,65 @@ public class IndexGeneratorJob implements Jobby
    this.jobStats = new IndexGeneratorStats();
  }

+  public IndexGeneratorStats getJobStats()
+  {
+    return jobStats;
+  }
+
+  public boolean run()
+  {
+    try {
+      Job job = new Job(
+          new Configuration(),
+          String.format("%s-index-generator-%s", config.getDataSource(), config.getIntervals())
+      );
+
+      job.getConfiguration().set("io.sort.record.percent", "0.23");
+
+      for (String propName : System.getProperties().stringPropertyNames()) {
+        Configuration conf = job.getConfiguration();
+        if (propName.startsWith("hadoop.")) {
+          conf.set(propName.substring("hadoop.".length()), System.getProperty(propName));
+        }
+      }
+
+      job.setInputFormatClass(TextInputFormat.class);
+
+      job.setMapperClass(IndexGeneratorMapper.class);
+      job.setMapOutputValueClass(Text.class);
+
+      SortableBytes.useSortableBytesAsMapOutputKey(job);
+
+      job.setNumReduceTasks(Iterables.size(config.getAllBuckets()));
+      job.setPartitionerClass(IndexGeneratorPartitioner.class);
+
+      job.setReducerClass(IndexGeneratorReducer.class);
+      job.setOutputKeyClass(BytesWritable.class);
+      job.setOutputValueClass(Text.class);
+      job.setOutputFormatClass(IndexGeneratorOutputFormat.class);
+      FileOutputFormat.setOutputPath(job, config.makeIntermediatePath());
+
+      config.addInputPaths(job);
+      config.intoConfiguration(job);
+
+      JobHelper.setupClasspath(config, job);
+
+      job.submit();
+      log.info("Job %s submitted, status available at %s", job.getJobName(), job.getTrackingURL());
+
+      boolean success = job.waitForCompletion(true);
+
+      Counter invalidRowCount = job.getCounters()
+                                   .findCounter(HadoopDruidIndexerConfig.IndexJobCounters.INVALID_ROW_COUNTER);
+      jobStats.setInvalidRowCount(invalidRowCount.getValue());
+
+      return success;
+    }
+    catch (Exception e) {
+      throw new RuntimeException(e);
+    }
+  }
+
  public static List<DataSegment> getPublishedSegments(HadoopDruidIndexerConfig config)
  {

@ -129,60 +189,6 @@ public class IndexGeneratorJob implements Jobby
    return publishedSegments;
  }

-  public IndexGeneratorStats getJobStats()
-  {
-    return jobStats;
-  }
-
-  public boolean run()
-  {
-    try {
-      Job job = new Job(
-          new Configuration(),
-          String.format("%s-index-generator-%s", config.getDataSource(), config.getIntervals())
-      );
-
-      job.getConfiguration().set("io.sort.record.percent", "0.23");
-
-      JobHelper.injectSystemProperties(job);
-
-      job.setInputFormatClass(TextInputFormat.class);
-
-      job.setMapperClass(IndexGeneratorMapper.class);
-      job.setMapOutputValueClass(Text.class);
-
-      SortableBytes.useSortableBytesAsMapOutputKey(job);
-
-      job.setNumReduceTasks(Iterables.size(config.getAllBuckets().get()));
-      job.setPartitionerClass(IndexGeneratorPartitioner.class);
-
-      job.setReducerClass(IndexGeneratorReducer.class);
-      job.setOutputKeyClass(BytesWritable.class);
-      job.setOutputValueClass(Text.class);
-      job.setOutputFormatClass(IndexGeneratorOutputFormat.class);
-      FileOutputFormat.setOutputPath(job, config.makeIntermediatePath());
-
-      config.addInputPaths(job);
-      config.intoConfiguration(job);
-
-      JobHelper.setupClasspath(config, job);
-
-      job.submit();
-      log.info("Job %s submitted, status available at %s", job.getJobName(), job.getTrackingURL());
-
-      boolean success = job.waitForCompletion(true);
-
-      Counter invalidRowCount = job.getCounters()
-                                   .findCounter(HadoopDruidIndexerConfig.IndexJobCounters.INVALID_ROW_COUNTER);
-      jobStats.setInvalidRowCount(invalidRowCount.getValue());
-
-      return success;
-    }
-    catch (Exception e) {
-      throw new RuntimeException(e);
-    }
-  }
-
  public static class IndexGeneratorMapper extends HadoopDruidIndexerMapper<BytesWritable, Text>

  {
@ -210,9 +216,8 @@ public class IndexGeneratorJob implements Jobby
    }
  }

-  public static class IndexGeneratorPartitioner extends Partitioner<BytesWritable, Text> implements Configurable
+  public static class IndexGeneratorPartitioner extends Partitioner<BytesWritable, Text>
  {
-    private Configuration config;

    @Override
    public int getPartition(BytesWritable bytesWritable, Text text, int numPartitions)
@ -220,27 +225,12 @@ public class IndexGeneratorJob implements Jobby
      final ByteBuffer bytes = ByteBuffer.wrap(bytesWritable.getBytes());
      bytes.position(4); // Skip length added by SortableBytes
      int shardNum = bytes.getInt();
-      if (config.get("mapred.job.tracker").equals("local")) {
-        return shardNum % numPartitions;
-      } else {
-        if (shardNum >= numPartitions) {
-          throw new ISE("Not enough partitions, shard[%,d] >= numPartitions[%,d]", shardNum, numPartitions);
-        }
-        return shardNum;

+      if (shardNum >= numPartitions) {
+        throw new ISE("Not enough partitions, shard[%,d] >= numPartitions[%,d]", shardNum, numPartitions);
      }
-    }

-    @Override
-    public Configuration getConf()
-    {
-      return config;
-    }
-
-    @Override
-    public void setConf(Configuration config)
-    {
-      this.config = config;
+      return shardNum;
    }
  }

--- a/indexing-hadoop/src/main/java/io/druid/indexer/JobHelper.java
+++ b/indexing-hadoop/src/main/java/io/druid/indexer/JobHelper.java
@ -20,11 +20,9 @@
 package io.druid.indexer;

 import com.google.api.client.util.Sets;
-import com.google.common.base.Throwables;
 import com.google.common.io.ByteStreams;
 import com.google.common.io.Files;
 import com.google.common.io.OutputSupplier;
-import com.metamx.common.ISE;
 import com.metamx.common.logger.Logger;
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.filecache.DistributedCache;
@ -36,7 +34,6 @@ import org.apache.hadoop.mapreduce.Job;
 import java.io.File;
 import java.io.IOException;
 import java.io.OutputStream;
-import java.util.List;
 import java.util.Set;

 /**
@ -97,63 +94,4 @@ public class JobHelper
      }
    }
  }
-
-  public static void injectSystemProperties(Job job)
-  {
-    final Configuration conf = job.getConfiguration();
-    for (String propName : System.getProperties().stringPropertyNames()) {
-      if (propName.startsWith("hadoop.")) {
-        conf.set(propName.substring("hadoop.".length()), System.getProperty(propName));
-      }
-    }
-  }
-
-  public static void ensurePaths(HadoopDruidIndexerConfig config)
-  {
-    // config.addInputPaths() can have side-effects ( boo! :( ), so this stuff needs to be done before anything else
-    try {
-      Job job = new Job(
-          new Configuration(),
-          String.format("%s-determine_partitions-%s", config.getDataSource(), config.getIntervals())
-      );
-
-      job.getConfiguration().set("io.sort.record.percent", "0.19");
-      injectSystemProperties(job);
-
-      config.addInputPaths(job);
-    }
-    catch (IOException e) {
-      throw Throwables.propagate(e);
-    }
-  }
-
-  public static boolean runJobs(List<Jobby> jobs, HadoopDruidIndexerConfig config){
-    String failedMessage = null;
-    for (Jobby job : jobs) {
-      if (failedMessage == null) {
-        if (!job.run()) {
-          failedMessage = String.format("Job[%s] failed!", job.getClass());
-        }
-      }
-    }
-
-    if (!config.isLeaveIntermediate()) {
-      if (failedMessage == null || config.isCleanupOnFailure()) {
-        Path workingPath = config.makeIntermediatePath();
-        log.info("Deleting path[%s]", workingPath);
-        try {
-          workingPath.getFileSystem(new Configuration()).delete(workingPath, true);
-        }
-        catch (IOException e) {
-          log.error(e, "Failed to cleanup path[%s]", workingPath);
-        }
-      }
-    }
-
-    if (failedMessage != null) {
-      throw new ISE(failedMessage);
-    }
-
-    return true;
-  }
 }
--- a/indexing-hadoop/src/main/java/io/druid/indexer/granularity/ArbitraryGranularitySpec.java
+++ b/indexing-hadoop/src/main/java/io/druid/indexer/granularity/ArbitraryGranularitySpec.java
@ -72,9 +72,9 @@ public class ArbitraryGranularitySpec implements GranularitySpec

  @Override
  @JsonProperty("intervals")
-  public Optional<SortedSet<Interval>> bucketIntervals()
+  public SortedSet<Interval> bucketIntervals()
  {
-    return Optional.of((SortedSet<Interval>) intervals);
+    return intervals;
  }

  @Override
--- a/indexing-hadoop/src/main/java/io/druid/indexer/granularity/GranularitySpec.java
+++ b/indexing-hadoop/src/main/java/io/druid/indexer/granularity/GranularitySpec.java
@ -40,7 +40,7 @@ import java.util.SortedSet;
 public interface GranularitySpec
 {
  /** Set of all time groups, broken up on segment boundaries. Should be sorted by interval start and non-overlapping.*/
-  public Optional<SortedSet<Interval>> bucketIntervals();
+  public SortedSet<Interval> bucketIntervals();

  /** Time-grouping interval corresponding to some instant, if any. */
  public Optional<Interval> bucketInterval(DateTime dt);
--- a/indexing-hadoop/src/main/java/io/druid/indexer/granularity/UniformGranularitySpec.java
+++ b/indexing-hadoop/src/main/java/io/druid/indexer/granularity/UniformGranularitySpec.java
@ -35,7 +35,7 @@ import java.util.SortedSet;
 public class UniformGranularitySpec implements GranularitySpec
 {
  final private Granularity granularity;
-  final private Iterable<Interval> inputIntervals;
+  final private List<Interval> inputIntervals;
  final private ArbitraryGranularitySpec wrappedSpec;

  @JsonCreator
@ -44,28 +44,21 @@ public class UniformGranularitySpec implements GranularitySpec
      @JsonProperty("intervals") List<Interval> inputIntervals
  )
  {
-    this.granularity = granularity;
-    if (inputIntervals != null) {
-      List<Interval> granularIntervals = Lists.newArrayList();
-      for (Interval inputInterval : inputIntervals) {
-        Iterables.addAll(granularIntervals, granularity.getIterable(inputInterval));
-      }
-      this.inputIntervals = ImmutableList.copyOf(inputIntervals);
-      this.wrappedSpec = new ArbitraryGranularitySpec(granularIntervals);
-    } else {
-      this.inputIntervals = null;
-      this.wrappedSpec = null;
+    List<Interval> granularIntervals = Lists.newArrayList();
+
+    for (Interval inputInterval : inputIntervals) {
+      Iterables.addAll(granularIntervals,  granularity.getIterable(inputInterval));
    }
+
+    this.granularity = granularity;
+    this.inputIntervals = ImmutableList.copyOf(inputIntervals);
+    this.wrappedSpec = new ArbitraryGranularitySpec(granularIntervals);
  }

  @Override
-  public Optional<SortedSet<Interval>> bucketIntervals()
+  public SortedSet<Interval> bucketIntervals()
  {
-    if (wrappedSpec == null) {
-      return Optional.absent();
-    } else {
-      return wrappedSpec.bucketIntervals();
-    }
+    return wrappedSpec.bucketIntervals();
  }

  @Override
@ -82,8 +75,8 @@ public class UniformGranularitySpec implements GranularitySpec
  }

  @JsonProperty("intervals")
-  public Optional<Iterable<Interval>> getIntervals()
+  public Iterable<Interval> getIntervals()
  {
-    return Optional.fromNullable(inputIntervals);
+    return inputIntervals;
  }
 }
--- a/indexing-hadoop/src/main/java/io/druid/indexer/partitions/AbstractPartitionsSpec.java
+++ b/indexing-hadoop/src/main/java/io/druid/indexer/partitions/AbstractPartitionsSpec.java
@ -1,68 +0,0 @@
-/*
- * Druid - a distributed column store.
- * Copyright (C) 2012, 2013  Metamarkets Group Inc.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version 2
- * of the License, or (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
- */
-
-package io.druid.indexer.partitions;
-
-import com.fasterxml.jackson.annotation.JsonProperty;
-
-
-public abstract class AbstractPartitionsSpec implements PartitionsSpec
-{
-  private static final double DEFAULT_OVERSIZE_THRESHOLD = 1.5;
-  private final long targetPartitionSize;
-  private final long maxPartitionSize;
-  private final boolean assumeGrouped;
-
-  public AbstractPartitionsSpec(
-      Long targetPartitionSize,
-      Long maxPartitionSize,
-      Boolean assumeGrouped
-  )
-  {
-    this.targetPartitionSize = targetPartitionSize == null ? -1 : targetPartitionSize;
-    this.maxPartitionSize = maxPartitionSize == null
-                            ? (long) (this.targetPartitionSize * DEFAULT_OVERSIZE_THRESHOLD)
-                            : maxPartitionSize;
-    this.assumeGrouped = assumeGrouped == null ? false : assumeGrouped;
-  }
-
-  @JsonProperty
-  public long getTargetPartitionSize()
-  {
-    return targetPartitionSize;
-  }
-
-  @JsonProperty
-  public long getMaxPartitionSize()
-  {
-    return maxPartitionSize;
-  }
-
-  @JsonProperty
-  public boolean isAssumeGrouped()
-  {
-    return assumeGrouped;
-  }
-
-  @Override
-  public boolean isDeterminingPartitions()
-  {
-    return targetPartitionSize > 0;
-  }
-}
--- a/indexing-hadoop/src/main/java/io/druid/indexer/partitions/PartitionsSpec.java
+++ b/indexing-hadoop/src/main/java/io/druid/indexer/partitions/PartitionsSpec.java
@ -19,33 +19,69 @@

 package io.druid.indexer.partitions;

+import com.fasterxml.jackson.annotation.JsonCreator;
 import com.fasterxml.jackson.annotation.JsonIgnore;
 import com.fasterxml.jackson.annotation.JsonProperty;
-import com.fasterxml.jackson.annotation.JsonSubTypes;
-import com.fasterxml.jackson.annotation.JsonTypeInfo;
-import io.druid.indexer.HadoopDruidIndexerConfig;
-import io.druid.indexer.Jobby;

-@JsonTypeInfo(use = JsonTypeInfo.Id.NAME, property = "type", defaultImpl = SingleDimensionPartitionsSpec.class)
-@JsonSubTypes(value = {
-    @JsonSubTypes.Type(name = "dimension", value = SingleDimensionPartitionsSpec.class),
-    @JsonSubTypes.Type(name = "random", value = RandomPartitionsSpec.class)
-})
-public interface PartitionsSpec
+import javax.annotation.Nullable;
+
+public class PartitionsSpec
 {
-  @JsonIgnore
-  public Jobby getPartitionJob(HadoopDruidIndexerConfig config);
+  private static final double DEFAULT_OVERSIZE_THRESHOLD = 1.5;

-  @JsonProperty
-  public long getTargetPartitionSize();
+  @Nullable
+  private final String partitionDimension;

-  @JsonProperty
-  public long getMaxPartitionSize();
+  private final long targetPartitionSize;

-  @JsonProperty
-  public boolean isAssumeGrouped();
+  private final long maxPartitionSize;
+
+  private final boolean assumeGrouped;
+
+  @JsonCreator
+  public PartitionsSpec(
+      @JsonProperty("partitionDimension") @Nullable String partitionDimension,
+      @JsonProperty("targetPartitionSize") @Nullable Long targetPartitionSize,
+      @JsonProperty("maxPartitionSize") @Nullable Long maxPartitionSize,
+      @JsonProperty("assumeGrouped") @Nullable Boolean assumeGrouped
+  )
+  {
+    this.partitionDimension = partitionDimension;
+    this.targetPartitionSize = targetPartitionSize == null ? -1 : targetPartitionSize;
+    this.maxPartitionSize = maxPartitionSize == null
+                            ? (long) (this.targetPartitionSize * DEFAULT_OVERSIZE_THRESHOLD)
+                            : maxPartitionSize;
+    this.assumeGrouped = assumeGrouped == null ? false : assumeGrouped;
+  }

  @JsonIgnore
-  public boolean isDeterminingPartitions();
+  public boolean isDeterminingPartitions()
+  {
+    return targetPartitionSize > 0;
+  }

+  @JsonProperty
+  @Nullable
+  public String getPartitionDimension()
+  {
+    return partitionDimension;
+  }
+
+  @JsonProperty
+  public long getTargetPartitionSize()
+  {
+    return targetPartitionSize;
+  }
+
+  @JsonProperty
+  public long getMaxPartitionSize()
+  {
+    return maxPartitionSize;
+  }
+
+  @JsonProperty
+  public boolean isAssumeGrouped()
+  {
+    return assumeGrouped;
+  }
 }
--- a/indexing-hadoop/src/main/java/io/druid/indexer/partitions/RandomPartitionsSpec.java
+++ b/indexing-hadoop/src/main/java/io/druid/indexer/partitions/RandomPartitionsSpec.java
@ -1,47 +0,0 @@
-/*
- * Druid - a distributed column store.
- * Copyright (C) 2012, 2013  Metamarkets Group Inc.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version 2
- * of the License, or (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
- */
-
-package io.druid.indexer.partitions;
-
-import com.fasterxml.jackson.annotation.JsonCreator;
-import com.fasterxml.jackson.annotation.JsonProperty;
-import io.druid.indexer.DetermineHashedPartitionsJob;
-import io.druid.indexer.HadoopDruidIndexerConfig;
-import io.druid.indexer.Jobby;
-
-import javax.annotation.Nullable;
-
-public class RandomPartitionsSpec extends AbstractPartitionsSpec
-{
-  @JsonCreator
-  public RandomPartitionsSpec(
-      @JsonProperty("targetPartitionSize") @Nullable Long targetPartitionSize,
-      @JsonProperty("maxPartitionSize") @Nullable Long maxPartitionSize,
-      @JsonProperty("assumeGrouped") @Nullable Boolean assumeGrouped
-  )
-  {
-    super(targetPartitionSize, maxPartitionSize, assumeGrouped);
-  }
-
-  @Override
-  public Jobby getPartitionJob(HadoopDruidIndexerConfig config)
-  {
-    return new DetermineHashedPartitionsJob(config);
-  }
-}
--- a/indexing-hadoop/src/main/java/io/druid/indexer/partitions/SingleDimensionPartitionsSpec.java
+++ b/indexing-hadoop/src/main/java/io/druid/indexer/partitions/SingleDimensionPartitionsSpec.java
@ -1,60 +0,0 @@
-/*
- * Druid - a distributed column store.
- * Copyright (C) 2012, 2013  Metamarkets Group Inc.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version 2
- * of the License, or (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
- */
-
-package io.druid.indexer.partitions;
-
-
-import com.fasterxml.jackson.annotation.JsonCreator;
-import com.fasterxml.jackson.annotation.JsonProperty;
-import io.druid.indexer.DeterminePartitionsJob;
-import io.druid.indexer.HadoopDruidIndexerConfig;
-import io.druid.indexer.Jobby;
-
-import javax.annotation.Nullable;
-
-public class SingleDimensionPartitionsSpec extends AbstractPartitionsSpec
-{
-  @Nullable
-  private final String partitionDimension;
-
-  @JsonCreator
-  public SingleDimensionPartitionsSpec(
-      @JsonProperty("partitionDimension") @Nullable String partitionDimension,
-      @JsonProperty("targetPartitionSize") @Nullable Long targetPartitionSize,
-      @JsonProperty("maxPartitionSize") @Nullable Long maxPartitionSize,
-      @JsonProperty("assumeGrouped") @Nullable Boolean assumeGrouped
-  )
-  {
-    super(targetPartitionSize, maxPartitionSize, assumeGrouped);
-    this.partitionDimension = partitionDimension;
-  }
-
-  @JsonProperty
-  @Nullable
-  public String getPartitionDimension()
-  {
-    return partitionDimension;
-  }
-
-  @Override
-  public Jobby getPartitionJob(HadoopDruidIndexerConfig config)
-  {
-    return new DeterminePartitionsJob(config);
-  }
-}
--- a/indexing-hadoop/src/main/java/io/druid/indexer/path/GranularityPathSpec.java
+++ b/indexing-hadoop/src/main/java/io/druid/indexer/path/GranularityPathSpec.java
@ -20,7 +20,6 @@
 package io.druid.indexer.path;

 import com.fasterxml.jackson.annotation.JsonProperty;
-import com.google.common.base.Optional;
 import com.google.common.collect.Sets;
 import com.metamx.common.Granularity;
 import com.metamx.common.guava.Comparators;
@ -100,12 +99,9 @@ public class GranularityPathSpec implements PathSpec
  public Job addInputPaths(HadoopDruidIndexerConfig config, Job job) throws IOException
  {
    final Set<Interval> intervals = Sets.newTreeSet(Comparators.intervals());
-    Optional<Set<Interval>> optionalIntervals = config.getSegmentGranularIntervals();
-    if (optionalIntervals.isPresent()) {
-      for (Interval segmentInterval : optionalIntervals.get()) {
-        for (Interval dataInterval : dataGranularity.getIterable(segmentInterval)) {
-          intervals.add(dataInterval);
-        }
+    for (Interval segmentInterval : config.getSegmentGranularIntervals()) {
+      for (Interval dataInterval : dataGranularity.getIterable(segmentInterval)) {
+        intervals.add(dataInterval);
      }
    }

--- a/indexing-hadoop/src/test/java/io/druid/indexer/HadoopDruidIndexerConfigTest.java
+++ b/indexing-hadoop/src/test/java/io/druid/indexer/HadoopDruidIndexerConfigTest.java
@ -25,8 +25,6 @@ import com.google.common.collect.Lists;
 import io.druid.db.DbConnectorConfig;
 import io.druid.indexer.granularity.UniformGranularitySpec;
 import io.druid.indexer.partitions.PartitionsSpec;
-import io.druid.indexer.partitions.RandomPartitionsSpec;
-import io.druid.indexer.partitions.SingleDimensionPartitionsSpec;
 import io.druid.indexer.updater.DbUpdaterJobSpec;
 import io.druid.jackson.DefaultObjectMapper;
 import org.apache.hadoop.fs.LocalFileSystem;
@ -67,7 +65,7 @@ public class HadoopDruidIndexerConfigTest
    Assert.assertEquals(
        "getIntervals",
        Lists.newArrayList(new Interval("2012-01-01/P1D")),
-        granularitySpec.getIntervals().get()
+        granularitySpec.getIntervals()
    );

    Assert.assertEquals(
@ -101,7 +99,7 @@ public class HadoopDruidIndexerConfigTest
    Assert.assertEquals(
        "getIntervals",
        Lists.newArrayList(new Interval("2012-02-01/P1D")),
-        granularitySpec.getIntervals().get()
+        granularitySpec.getIntervals()
    );

    Assert.assertEquals(
@ -169,14 +167,15 @@ public class HadoopDruidIndexerConfigTest
        100
    );

-    Assert.assertTrue(
-        "partitionSpec",
-        partitionsSpec instanceof SingleDimensionPartitionsSpec
+    Assert.assertEquals(
+        "getPartitionDimension",
+        partitionsSpec.getPartitionDimension(),
+        null
    );
  }

  @Test
-  public void testPartitionsSpecSpecificDimensionLegacy()
+  public void testPartitionsSpecSpecificDimension()
  {
    final HadoopDruidIndexerConfig cfg;

@ -215,10 +214,9 @@ public class HadoopDruidIndexerConfigTest
        150
    );

-    Assert.assertTrue("partitionsSpec" , partitionsSpec instanceof SingleDimensionPartitionsSpec);
    Assert.assertEquals(
        "getPartitionDimension",
-        ((SingleDimensionPartitionsSpec)partitionsSpec).getPartitionDimension(),
+        partitionsSpec.getPartitionDimension(),
        "foo"
    );
  }
@ -261,10 +259,9 @@ public class HadoopDruidIndexerConfigTest
        150
    );

-    Assert.assertTrue("partitionsSpec" , partitionsSpec instanceof SingleDimensionPartitionsSpec);
    Assert.assertEquals(
        "getPartitionDimension",
-        ((SingleDimensionPartitionsSpec)partitionsSpec).getPartitionDimension(),
+        partitionsSpec.getPartitionDimension(),
        "foo"
    );
  }
@ -310,10 +307,9 @@ public class HadoopDruidIndexerConfigTest
        200
    );

-    Assert.assertTrue("partitionsSpec" , partitionsSpec instanceof SingleDimensionPartitionsSpec);
    Assert.assertEquals(
        "getPartitionDimension",
-        ((SingleDimensionPartitionsSpec)partitionsSpec).getPartitionDimension(),
+        partitionsSpec.getPartitionDimension(),
        "foo"
    );
  }
@ -424,6 +420,7 @@ public class HadoopDruidIndexerConfigTest
    );
  }

+
  @Test
  public void shouldMakeHDFSCompliantSegmentOutputPath()
  {
@ -500,47 +497,4 @@ public class HadoopDruidIndexerConfigTest
      throw Throwables.propagate(e);
    }
  }
-
-  public void testRandomPartitionsSpec() throws Exception{
-    {
-      final HadoopDruidIndexerConfig cfg;
-
-      try {
-        cfg = jsonReadWriteRead(
-            "{"
-            + "\"partitionsSpec\":{"
-            + "   \"targetPartitionSize\":100,"
-            + "   \"type\":\"random\""
-            + " }"
-            + "}",
-            HadoopDruidIndexerConfig.class
-        );
-      }
-      catch (Exception e) {
-        throw Throwables.propagate(e);
-      }
-
-      final PartitionsSpec partitionsSpec = cfg.getPartitionsSpec();
-
-      Assert.assertEquals(
-          "isDeterminingPartitions",
-          partitionsSpec.isDeterminingPartitions(),
-          true
-      );
-
-      Assert.assertEquals(
-          "getTargetPartitionSize",
-          partitionsSpec.getTargetPartitionSize(),
-          100
-      );
-
-      Assert.assertEquals(
-          "getMaxPartitionSize",
-          partitionsSpec.getMaxPartitionSize(),
-          150
-      );
-
-      Assert.assertTrue("partitionsSpec" , partitionsSpec instanceof RandomPartitionsSpec);
-    }
-  }
 }
--- a/indexing-hadoop/src/test/java/io/druid/indexer/granularity/ArbitraryGranularityTest.java
+++ b/indexing-hadoop/src/test/java/io/druid/indexer/granularity/ArbitraryGranularityTest.java
@ -54,7 +54,7 @@ public class ArbitraryGranularityTest
            new Interval("2012-01-08T00Z/2012-01-11T00Z"),
            new Interval("2012-02-01T00Z/2012-03-01T00Z")
        ),
-        Lists.newArrayList(spec.bucketIntervals().get())
+        Lists.newArrayList(spec.bucketIntervals())
    );

    Assert.assertEquals(
--- a/indexing-hadoop/src/test/java/io/druid/indexer/granularity/UniformGranularityTest.java
+++ b/indexing-hadoop/src/test/java/io/druid/indexer/granularity/UniformGranularityTest.java
@ -57,7 +57,7 @@ public class UniformGranularityTest
            new Interval("2012-01-09T00Z/P1D"),
            new Interval("2012-01-10T00Z/P1D")
        ),
-        Lists.newArrayList(spec.bucketIntervals().get())
+        Lists.newArrayList(spec.bucketIntervals())
    );

    Assert.assertEquals(
--- a/indexing-service/src/main/java/io/druid/indexing/common/task/HadoopIndexTask.java
+++ b/indexing-service/src/main/java/io/druid/indexing/common/task/HadoopIndexTask.java
@ -24,23 +24,25 @@ import com.fasterxml.jackson.annotation.JsonIgnore;
 import com.fasterxml.jackson.annotation.JsonProperty;
 import com.fasterxml.jackson.core.type.TypeReference;
 import com.google.api.client.util.Lists;
+import com.google.common.base.Function;
 import com.google.common.base.Joiner;
-import com.google.common.base.Optional;
 import com.google.common.base.Preconditions;
+import com.google.common.collect.ImmutableSet;
 import com.google.common.collect.Iterables;
+import com.google.common.collect.Maps;
+import com.google.common.collect.Multimap;
+import com.google.common.collect.Multimaps;
 import com.metamx.common.logger.Logger;
 import io.druid.common.utils.JodaUtils;
-import io.druid.indexer.HadoopDruidDetermineConfigurationJob;
 import io.druid.indexer.HadoopDruidIndexerConfig;
 import io.druid.indexer.HadoopDruidIndexerConfigBuilder;
 import io.druid.indexer.HadoopDruidIndexerJob;
 import io.druid.indexer.HadoopDruidIndexerSchema;
-import io.druid.indexer.Jobby;
 import io.druid.indexing.common.TaskLock;
 import io.druid.indexing.common.TaskStatus;
 import io.druid.indexing.common.TaskToolbox;
-import io.druid.indexing.common.actions.LockAcquireAction;
 import io.druid.indexing.common.actions.LockTryAcquireAction;
+import io.druid.indexing.common.actions.SegmentInsertAction;
 import io.druid.indexing.common.actions.TaskActionClient;
 import io.druid.initialization.Initialization;
 import io.druid.server.initialization.ExtensionsConfig;
@ -49,26 +51,30 @@ import io.tesla.aether.internal.DefaultTeslaAether;
 import org.joda.time.DateTime;
 import org.joda.time.Interval;

+import javax.annotation.Nullable;
 import java.io.File;
 import java.lang.reflect.Method;
 import java.net.URL;
 import java.net.URLClassLoader;
 import java.util.Arrays;
+import java.util.Collection;
 import java.util.List;
-import java.util.SortedSet;
+import java.util.Map;

-public class HadoopIndexTask extends AbstractTask
+public class HadoopIndexTask extends AbstractFixedIntervalTask
 {
  private static final Logger log = new Logger(HadoopIndexTask.class);
+  private static String defaultHadoopCoordinates = "org.apache.hadoop:hadoop-core:1.0.3";
+
  private static final ExtensionsConfig extensionsConfig;

  static {
    extensionsConfig = Initialization.makeStartupInjector().getInstance(ExtensionsConfig.class);
  }

-  private static String defaultHadoopCoordinates = "org.apache.hadoop:hadoop-core:1.0.3";
  @JsonIgnore
  private final HadoopDruidIndexerSchema schema;
+
  @JsonIgnore
  private final String hadoopCoordinates;

@ -91,7 +97,13 @@ public class HadoopIndexTask extends AbstractTask
  {
    super(
        id != null ? id : String.format("index_hadoop_%s_%s", schema.getDataSource(), new DateTime()),
-        schema.getDataSource()
+        schema.getDataSource(),
+        JodaUtils.umbrellaInterval(
+            JodaUtils.condenseIntervals(
+                schema.getGranularitySpec()
+                      .bucketIntervals()
+            )
+        )
    );

    // Some HadoopDruidIndexerSchema stuff doesn't make sense in the context of the indexing service
@ -109,22 +121,6 @@ public class HadoopIndexTask extends AbstractTask
    return "index_hadoop";
  }

-  @Override
-  public boolean isReady(TaskActionClient taskActionClient) throws Exception
-  {
-    Optional<SortedSet<Interval>> intervals = schema.getGranularitySpec().bucketIntervals();
-    if (intervals.isPresent()) {
-      Interval interval = JodaUtils.umbrellaInterval(
-          JodaUtils.condenseIntervals(
-              intervals.get()
-          )
-      );
-      return taskActionClient.submit(new LockTryAcquireAction(interval)).isPresent();
-    } else {
-      return true;
-    }
-  }
-
  @JsonProperty("config")
  public HadoopDruidIndexerSchema getSchema()
  {
@ -171,60 +167,29 @@ public class HadoopIndexTask extends AbstractTask
    jobUrls.addAll(extensionURLs);

    System.setProperty("druid.hadoop.internal.classpath", Joiner.on(File.pathSeparator).join(jobUrls));
-    boolean determineIntervals = !schema.getGranularitySpec().bucketIntervals().isPresent();

-    final Class<?> determineConfigurationMainClass = loader.loadClass(HadoopDetermineConfigInnerProcessing.class.getName());
-    final Method determineConfigurationMainMethod = determineConfigurationMainClass.getMethod(
-        "runTask",
-        String[].class
-    );
+    final Class<?> mainClass = loader.loadClass(HadoopIndexTaskInnerProcessing.class.getName());
+    final Method mainMethod = mainClass.getMethod("runTask", String[].class);

-    String[] determineConfigArgs = new String[]{
+    // We should have a lock from before we started running
+    final TaskLock myLock = Iterables.getOnlyElement(getTaskLocks(toolbox));
+    log.info("Setting version to: %s", myLock.getVersion());
+
+    String[] args = new String[]{
        toolbox.getObjectMapper().writeValueAsString(schema),
+        myLock.getVersion(),
        toolbox.getConfig().getHadoopWorkingPath(),
-        toolbox.getSegmentPusher().getPathForHadoop(getDataSource())
+        toolbox.getSegmentPusher().getPathForHadoop(getDataSource()),
    };

-    String config = (String) determineConfigurationMainMethod.invoke(null, new Object[]{determineConfigArgs});
-    HadoopDruidIndexerSchema indexerSchema = toolbox.getObjectMapper()
-                                                    .readValue(config, HadoopDruidIndexerSchema.class);
-
-
-    // We should have a lock from before we started running only if interval was specified
-    final String version;
-    if (determineIntervals) {
-      Interval interval = JodaUtils.umbrellaInterval(
-          JodaUtils.condenseIntervals(
-              indexerSchema.getGranularitySpec().bucketIntervals().get()
-          )
-      );
-      TaskLock lock = toolbox.getTaskActionClient().submit(new LockAcquireAction(interval));
-      version = lock.getVersion();
-    } else {
-      Iterable<TaskLock> locks = getTaskLocks(toolbox);
-      final TaskLock myLock = Iterables.getOnlyElement(locks);
-      version = myLock.getVersion();
-    }
-    log.info("Setting version to: %s", version);
-
-    final Class<?> indexGeneratorMainClass = loader.loadClass(HadoopIndexGeneratorInnerProcessing.class.getName());
-    final Method indexGeneratorMainMethod = indexGeneratorMainClass.getMethod("runTask", String[].class);
-    String[] indexGeneratorArgs = new String[]{
-        toolbox.getObjectMapper().writeValueAsString(indexerSchema),
-        version
-    };
-    String segments = (String) indexGeneratorMainMethod.invoke(null, new Object[]{indexGeneratorArgs});
+    String segments = (String) mainMethod.invoke(null, new Object[]{args});


    if (segments != null) {
-
      List<DataSegment> publishedSegments = toolbox.getObjectMapper().readValue(
          segments,
-          new TypeReference<List<DataSegment>>()
-          {
-          }
+          new TypeReference<List<DataSegment>>() {}
      );
-
      toolbox.pushSegments(publishedSegments);
      return TaskStatus.success(getId());
    } else {
@ -232,12 +197,14 @@ public class HadoopIndexTask extends AbstractTask
    }
  }

-  public static class HadoopIndexGeneratorInnerProcessing
+  public static class HadoopIndexTaskInnerProcessing
  {
    public static String runTask(String[] args) throws Exception
    {
      final String schema = args[0];
-      String version = args[1];
+      final String version = args[1];
+      final String workingPath = args[2];
+      final String segmentOutputPath = args[3];

      final HadoopDruidIndexerSchema theSchema = HadoopDruidIndexerConfig.jsonMapper
                                                                         .readValue(
@ -247,6 +214,12 @@ public class HadoopIndexTask extends AbstractTask
      final HadoopDruidIndexerConfig config =
          new HadoopDruidIndexerConfigBuilder().withSchema(theSchema)
                                               .withVersion(version)
+                                               .withWorkingPath(
+                                                   workingPath
+                                               )
+                                               .withSegmentOutputPath(
+                                                   segmentOutputPath
+                                               )
                                               .build();

      HadoopDruidIndexerJob job = new HadoopDruidIndexerJob(config);
@ -259,34 +232,4 @@ public class HadoopIndexTask extends AbstractTask
      return null;
    }
  }
-
-  public static class HadoopDetermineConfigInnerProcessing
-  {
-    public static String runTask(String[] args) throws Exception
-    {
-      final String schema = args[0];
-      final String workingPath = args[1];
-      final String segmentOutputPath = args[2];
-
-      final HadoopDruidIndexerSchema theSchema = HadoopDruidIndexerConfig.jsonMapper
-                                                                         .readValue(
-                                                                             schema,
-                                                                             HadoopDruidIndexerSchema.class
-                                                                         );
-      final HadoopDruidIndexerConfig config =
-          new HadoopDruidIndexerConfigBuilder().withSchema(theSchema)
-                                               .withWorkingPath(workingPath)
-                                               .withSegmentOutputPath(segmentOutputPath)
-                                               .build();
-
-      Jobby job = new HadoopDruidDetermineConfigurationJob(config);
-
-      log.info("Starting a hadoop index generator job...");
-      if (job.run()) {
-        return HadoopDruidIndexerConfig.jsonMapper.writeValueAsString(HadoopDruidIndexerConfigBuilder.toSchema(config));
-      }
-
-      return null;
-    }
-  }
 }
--- a/indexing-service/src/main/java/io/druid/indexing/common/task/IndexTask.java
+++ b/indexing-service/src/main/java/io/druid/indexing/common/task/IndexTask.java
@ -109,8 +109,8 @@ public class IndexTask extends AbstractFixedIntervalTask
        id != null ? id : String.format("index_%s_%s", dataSource, new DateTime().toString()),
        dataSource,
        new Interval(
-            granularitySpec.bucketIntervals().get().first().getStart(),
-            granularitySpec.bucketIntervals().get().last().getEnd()
+            granularitySpec.bucketIntervals().first().getStart(),
+            granularitySpec.bucketIntervals().last().getEnd()
        )
    );

@ -137,7 +137,7 @@ public class IndexTask extends AbstractFixedIntervalTask
    final TaskLock myLock = Iterables.getOnlyElement(getTaskLocks(toolbox));
    final Set<DataSegment> segments = Sets.newHashSet();

-    final Set<Interval> validIntervals = Sets.intersection(granularitySpec.bucketIntervals().get(), getDataIntervals());
+    final Set<Interval> validIntervals = Sets.intersection(granularitySpec.bucketIntervals(), getDataIntervals());
    if (validIntervals.isEmpty()) {
      throw new ISE("No valid data intervals found. Check your configs!");
    }
--- a/indexing-service/src/test/java/io/druid/indexing/common/task/TaskSerdeTest.java
+++ b/indexing-service/src/test/java/io/druid/indexing/common/task/TaskSerdeTest.java
@ -382,9 +382,11 @@ public class TaskSerdeTest
    final HadoopIndexTask task2 = (HadoopIndexTask) jsonMapper.readValue(json, Task.class);

    Assert.assertEquals("foo", task.getDataSource());
+    Assert.assertEquals(new Interval("2010-01-01/P1D"), task.getInterval());

    Assert.assertEquals(task.getId(), task2.getId());
    Assert.assertEquals(task.getGroupId(), task2.getGroupId());
    Assert.assertEquals(task.getDataSource(), task2.getDataSource());
+    Assert.assertEquals(task.getInterval(), task2.getInterval());
  }
 }
--- a/server/src/main/java/io/druid/guice/ServerModule.java
+++ b/server/src/main/java/io/druid/guice/ServerModule.java
@ -30,7 +30,6 @@ import io.druid.guice.annotations.Self;
 import io.druid.initialization.DruidModule;
 import io.druid.server.DruidNode;
 import io.druid.server.initialization.ZkPathsConfig;
-import io.druid.timeline.partition.HashBasedNumberedShardSpec;
 import io.druid.timeline.partition.LinearShardSpec;
 import io.druid.timeline.partition.NumberedShardSpec;
 import io.druid.timeline.partition.SingleDimensionShardSpec;
@ -64,8 +63,7 @@ public class ServerModule implements DruidModule
            .registerSubtypes(
                new NamedType(SingleDimensionShardSpec.class, "single"),
                new NamedType(LinearShardSpec.class, "linear"),
-                new NamedType(NumberedShardSpec.class, "numbered"),
-                new NamedType(HashBasedNumberedShardSpec.class, "hashed")
+                new NamedType(NumberedShardSpec.class, "numbered")
            )
    );
  }
--- a/server/src/main/java/io/druid/timeline/partition/HashBasedNumberedShardSpec.java
+++ b/server/src/main/java/io/druid/timeline/partition/HashBasedNumberedShardSpec.java
@ -1,77 +0,0 @@
-/*
- * Druid - a distributed column store.
- * Copyright (C) 2014  Metamarkets Group Inc.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version 2
- * of the License, or (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
- */
-
-package io.druid.timeline.partition;
-
-import com.fasterxml.jackson.annotation.JacksonInject;
-import com.fasterxml.jackson.annotation.JsonCreator;
-import com.fasterxml.jackson.annotation.JsonProperty;
-import com.fasterxml.jackson.core.JsonProcessingException;
-import com.fasterxml.jackson.databind.ObjectMapper;
-import com.google.api.client.repackaged.com.google.common.base.Throwables;
-import com.google.common.hash.HashFunction;
-import com.google.common.hash.Hashing;
-import io.druid.data.input.InputRow;
-import io.druid.data.input.Rows;
-
-import java.util.List;
-
-public class HashBasedNumberedShardSpec extends NumberedShardSpec
-{
-
-  private static final HashFunction hashFunction = Hashing.murmur3_32();
-  @JacksonInject
-  private ObjectMapper jsonMapper;
-
-  @JsonCreator
-  public HashBasedNumberedShardSpec(
-      @JsonProperty("partitionNum") int partitionNum,
-      @JsonProperty("partitions") int partitions
-  )
-  {
-    super(partitionNum, partitions);
-  }
-
-  @Override
-  public boolean isInChunk(InputRow inputRow)
-  {
-    return Math.abs(hash(inputRow)) % getPartitions() == getPartitionNum();
-  }
-
-  private int hash(InputRow inputRow)
-  {
-    final List<Object> groupKey = Rows.toGroupKey(inputRow.getTimestampFromEpoch(), inputRow);
-    try {
-      return hashFunction.hashBytes(jsonMapper.writeValueAsBytes(groupKey)).asInt();
-    }
-    catch (JsonProcessingException e) {
-      throw Throwables.propagate(e);
-    }
-  }
-
-  @Override
-  public String toString()
-  {
-    return "HashBasedNumberedShardSpec{" +
-           "partitionNum=" + getPartitionNum() +
-           ", partitions=" + getPartitions() +
-           '}';
-  }
-
-}
--- a/server/src/test/java/io/druid/TestUtil.java
+++ b/server/src/test/java/io/druid/TestUtil.java
@ -19,12 +19,8 @@

 package io.druid;

-import com.fasterxml.jackson.databind.BeanProperty;
-import com.fasterxml.jackson.databind.DeserializationContext;
-import com.fasterxml.jackson.databind.InjectableValues;
 import com.fasterxml.jackson.databind.Module;
 import com.fasterxml.jackson.databind.ObjectMapper;
-import com.metamx.common.ISE;
 import io.druid.guice.ServerModule;
 import io.druid.jackson.DefaultObjectMapper;

@ -41,20 +37,5 @@ public class TestUtil
    for (Module module : list) {
      MAPPER.registerModule(module);
    }
-    MAPPER.setInjectableValues(
-        new InjectableValues()
-        {
-          @Override
-          public Object findInjectableValue(
-              Object valueId, DeserializationContext ctxt, BeanProperty forProperty, Object beanInstance
-          )
-          {
-            if (valueId.equals("com.fasterxml.jackson.databind.ObjectMapper")) {
-              return TestUtil.MAPPER;
-            }
-            throw new ISE("No Injectable value found");
-          }
-        }
-    );
  }
 }
--- a/server/src/test/java/io/druid/server/shard/HashBasedNumberedShardSpecTest.java
+++ b/server/src/test/java/io/druid/server/shard/HashBasedNumberedShardSpecTest.java
@ -1,110 +0,0 @@
-/*
- * Druid - a distributed column store.
- * Copyright (C) 2012, 2013  Metamarkets Group Inc.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version 2
- * of the License, or (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
- */
-
-package io.druid.server.shard;
-
-import com.fasterxml.jackson.databind.BeanProperty;
-import com.fasterxml.jackson.databind.DeserializationContext;
-import com.fasterxml.jackson.databind.InjectableValues;
-import com.google.common.base.Function;
-import com.google.common.collect.ImmutableList;
-import com.google.common.collect.Lists;
-import com.metamx.common.ISE;
-import io.druid.TestUtil;
-import io.druid.timeline.partition.HashBasedNumberedShardSpec;
-import io.druid.timeline.partition.PartitionChunk;
-import io.druid.timeline.partition.ShardSpec;
-import junit.framework.Assert;
-import org.junit.Before;
-import org.junit.Test;
-
-import java.util.List;
-
-public class HashBasedNumberedShardSpecTest
-{
-
-  @Test
-  public void testSerdeRoundTrip() throws Exception
-  {
-
-    final ShardSpec spec = TestUtil.MAPPER.readValue(
-        TestUtil.MAPPER.writeValueAsBytes(new HashBasedNumberedShardSpec(1, 2)),
-        ShardSpec.class
-    );
-    Assert.assertEquals(1, spec.getPartitionNum());
-    Assert.assertEquals(2, ((HashBasedNumberedShardSpec) spec).getPartitions());
-  }
-
-  @Test
-  public void testSerdeBackwardsCompat() throws Exception
-  {
-    final ShardSpec spec = TestUtil.MAPPER.readValue(
-        "{\"type\": \"hashed\", \"partitions\": 2, \"partitionNum\": 1}",
-        ShardSpec.class
-    );
-    Assert.assertEquals(1, spec.getPartitionNum());
-    Assert.assertEquals(2, ((HashBasedNumberedShardSpec) spec).getPartitions());
-  }
-
-  @Test
-  public void testPartitionChunks()
-  {
-    final List<ShardSpec> specs = ImmutableList.<ShardSpec>of(
-        new HashBasedNumberedShardSpec(0, 3),
-        new HashBasedNumberedShardSpec(1, 3),
-        new HashBasedNumberedShardSpec(2, 3)
-    );
-
-    final List<PartitionChunk<String>> chunks = Lists.transform(
-        specs,
-        new Function<ShardSpec, PartitionChunk<String>>()
-        {
-          @Override
-          public PartitionChunk<String> apply(ShardSpec shardSpec)
-          {
-            return shardSpec.createChunk("rofl");
-          }
-        }
-    );
-
-    Assert.assertEquals(0, chunks.get(0).getChunkNumber());
-    Assert.assertEquals(1, chunks.get(1).getChunkNumber());
-    Assert.assertEquals(2, chunks.get(2).getChunkNumber());
-
-    Assert.assertTrue(chunks.get(0).isStart());
-    Assert.assertFalse(chunks.get(1).isStart());
-    Assert.assertFalse(chunks.get(2).isStart());
-
-    Assert.assertFalse(chunks.get(0).isEnd());
-    Assert.assertFalse(chunks.get(1).isEnd());
-    Assert.assertTrue(chunks.get(2).isEnd());
-
-    Assert.assertTrue(chunks.get(0).abuts(chunks.get(1)));
-    Assert.assertTrue(chunks.get(1).abuts(chunks.get(2)));
-
-    Assert.assertFalse(chunks.get(0).abuts(chunks.get(0)));
-    Assert.assertFalse(chunks.get(0).abuts(chunks.get(2)));
-    Assert.assertFalse(chunks.get(1).abuts(chunks.get(0)));
-    Assert.assertFalse(chunks.get(1).abuts(chunks.get(1)));
-    Assert.assertFalse(chunks.get(2).abuts(chunks.get(0)));
-    Assert.assertFalse(chunks.get(2).abuts(chunks.get(1)));
-    Assert.assertFalse(chunks.get(2).abuts(chunks.get(2)));
-  }
-
-}