Merge branch 'master' into task-stuff

2013-01-25 11:34:18 -08:00 · 2013-01-25 11:34:18 -08:00 · 553738e1d8
parent f3b04d3f5f ec034ddef4
commit 553738e1d8
24 changed files with 1636 additions and 799 deletions
--- a/client/src/main/java/com/metamx/druid/client/CachingClusteredClient.java
+++ b/client/src/main/java/com/metamx/druid/client/CachingClusteredClient.java
@ -28,6 +28,7 @@ import com.google.common.collect.Iterators;
 import com.google.common.collect.Lists;
 import com.google.common.collect.Maps;
 import com.google.common.collect.Ordering;
+import com.google.common.collect.Sets;
 import com.google.common.util.concurrent.ThreadFactoryBuilder;
 import com.metamx.common.ISE;
 import com.metamx.common.Pair;
@ -41,7 +42,6 @@ import com.metamx.druid.TimelineObjectHolder;
 import com.metamx.druid.VersionedIntervalTimeline;
 import com.metamx.druid.aggregation.AggregatorFactory;
 import com.metamx.druid.client.cache.Cache;
-import com.metamx.druid.client.cache.CacheBroker;
 import com.metamx.druid.client.selector.ServerSelector;
 import com.metamx.druid.partition.PartitionChunk;
 import com.metamx.druid.query.CacheStrategy;
@ -64,6 +64,7 @@ import java.util.Collections;
 import java.util.Iterator;
 import java.util.List;
 import java.util.Map;
+import java.util.Set;
 import java.util.concurrent.Executors;

 /**
@ -74,19 +75,19 @@ public class CachingClusteredClient<T> implements QueryRunner<T>

  private final QueryToolChestWarehouse warehouse;
  private final ServerView serverView;
-  private final CacheBroker cacheBroker;
+  private final Cache cache;
  private final ObjectMapper objectMapper;

  public CachingClusteredClient(
      QueryToolChestWarehouse warehouse,
      ServerView serverView,
-      CacheBroker cacheBroker,
+      Cache cache,
      ObjectMapper objectMapper
  )
  {
    this.warehouse = warehouse;
    this.serverView = serverView;
-    this.cacheBroker = cacheBroker;
+    this.cache = cache;
    this.objectMapper = objectMapper;

    serverView.registerSegmentCallback(
@ -98,7 +99,7 @@ public class CachingClusteredClient<T> implements QueryRunner<T>
          @Override
          public ServerView.CallbackAction segmentRemoved(DruidServer server, DataSegment segment)
          {
-            CachingClusteredClient.this.cacheBroker.provideCache(segment.getIdentifier()).close();
+            CachingClusteredClient.this.cache.close(segment.getIdentifier());
            return ServerView.CallbackAction.CONTINUE;
          }
        }
@ -111,7 +112,8 @@ public class CachingClusteredClient<T> implements QueryRunner<T>
    final QueryToolChest<T, Query<T>> toolChest = warehouse.getToolChest(query);
    final CacheStrategy<T, Query<T>> strategy = toolChest.getCacheStrategy(query);

-    final Map<DruidServer, List<SegmentDescriptor>> segs = Maps.newTreeMap();
+    final Map<DruidServer, List<SegmentDescriptor>> serverSegments = Maps.newTreeMap();
+
    final List<Pair<DateTime, byte[]>> cachedResults = Lists.newArrayList();
    final Map<String, CachePopulator> cachePopulatorMap = Maps.newHashMap();

@ -131,10 +133,8 @@ public class CachingClusteredClient<T> implements QueryRunner<T>
      return Sequences.empty();
    }

-    byte[] queryCacheKey = null;
-    if (strategy != null) {
-      queryCacheKey = strategy.computeCacheKey(query);
-    }
+    // build set of segments to query
+    Set<Pair<ServerSelector, SegmentDescriptor>> segments = Sets.newLinkedHashSet();

    for (Interval interval : rewrittenQuery.getIntervals()) {
      List<TimelineObjectHolder<String, ServerSelector>> serversLookup = timeline.lookup(interval);
@ -146,55 +146,67 @@ public class CachingClusteredClient<T> implements QueryRunner<T>
              holder.getInterval(), holder.getVersion(), chunk.getChunkNumber()
          );

-          if (queryCacheKey == null) {
-            final DruidServer server = selector.pick();
-            List<SegmentDescriptor> descriptors = segs.get(server);
-
-            if (descriptors == null) {
-              descriptors = Lists.newArrayList();
-              segs.put(server, descriptors);
-            }
-
-            descriptors.add(descriptor);
-          }
-          else {
-            final Interval segmentQueryInterval = holder.getInterval();
-            final byte[] versionBytes = descriptor.getVersion().getBytes();
-
-            final byte[] cacheKey = ByteBuffer
-                .allocate(16 + versionBytes.length + 4 + queryCacheKey.length)
-                .putLong(segmentQueryInterval.getStartMillis())
-                .putLong(segmentQueryInterval.getEndMillis())
-                .put(versionBytes)
-                .putInt(descriptor.getPartitionNumber())
-                .put(queryCacheKey)
-                .array();
-            final String segmentIdentifier = selector.getSegment().getIdentifier();
-            final Cache cache = cacheBroker.provideCache(segmentIdentifier);
-            final byte[] cachedValue = cache.get(cacheKey);
-
-            if (useCache && cachedValue != null) {
-              cachedResults.add(Pair.of(segmentQueryInterval.getStart(), cachedValue));
-            } else {
-              final DruidServer server = selector.pick();
-              List<SegmentDescriptor> descriptors = segs.get(server);
-
-              if (descriptors == null) {
-                descriptors = Lists.newArrayList();
-                segs.put(server, descriptors);
-              }
-
-              descriptors.add(descriptor);
-              cachePopulatorMap.put(
-                  String.format("%s_%s", segmentIdentifier, segmentQueryInterval),
-                  new CachePopulator(cache, objectMapper, cacheKey)
-              );
-            }
-          }
+          segments.add(Pair.of(selector, descriptor));
        }
      }
    }

+    final byte[] queryCacheKey;
+    if(strategy != null) {
+      queryCacheKey = strategy.computeCacheKey(query);
+    } else {
+      queryCacheKey = null;
+    }
+
+    // Pull cached segments from cache and remove from set of segments to query
+    if(useCache && queryCacheKey != null) {
+      Map<Pair<ServerSelector, SegmentDescriptor>, Cache.NamedKey> cacheKeys = Maps.newHashMap();
+      for(Pair<ServerSelector, SegmentDescriptor> e : segments) {
+        cacheKeys.put(e, computeSegmentCacheKey(e.lhs.getSegment().getIdentifier(), e.rhs, queryCacheKey));
+      }
+
+      Map<Cache.NamedKey, byte[]> cachedValues = cache.getBulk(cacheKeys.values());
+
+      for(Map.Entry<Pair<ServerSelector, SegmentDescriptor>, Cache.NamedKey> entry : cacheKeys.entrySet()) {
+        Pair<ServerSelector, SegmentDescriptor> segment = entry.getKey();
+        Cache.NamedKey segmentCacheKey = entry.getValue();
+
+        final ServerSelector selector = segment.lhs;
+        final SegmentDescriptor descriptor = segment.rhs;
+        final Interval segmentQueryInterval = descriptor.getInterval();
+
+        final byte[] cachedValue = cachedValues.get(segmentCacheKey);
+
+        if (cachedValue != null) {
+          cachedResults.add(Pair.of(segmentQueryInterval.getStart(), cachedValue));
+
+          // remove cached segment from set of segments to query
+          segments.remove(segment);
+        }
+        else {
+          final String segmentIdentifier = selector.getSegment().getIdentifier();
+          cachePopulatorMap.put(
+              String.format("%s_%s", segmentIdentifier, segmentQueryInterval),
+              new CachePopulator(cache, objectMapper, segmentCacheKey)
+          );
+        }
+      }
+    }
+
+    // Compile list of all segments not pulled from cache
+    for(Pair<ServerSelector, SegmentDescriptor> segment : segments) {
+      final DruidServer server = segment.lhs.pick();
+      List<SegmentDescriptor> descriptors = serverSegments.get(server);
+
+      if (descriptors == null) {
+        descriptors = Lists.newArrayList();
+        serverSegments.put(server, descriptors);
+      }
+
+      descriptors.add(segment.rhs);
+    }
+
+
    return new LazySequence<T>(
        new Supplier<Sequence<T>>()
        {
@ -264,7 +276,7 @@ public class CachingClusteredClient<T> implements QueryRunner<T>
          @SuppressWarnings("unchecked")
          private void addSequencesFromServer(ArrayList<Pair<DateTime, Sequence<T>>> listOfSequences)
          {
-            for (Map.Entry<DruidServer, List<SegmentDescriptor>> entry : segs.entrySet()) {
+            for (Map.Entry<DruidServer, List<SegmentDescriptor>> entry : serverSegments.entrySet()) {
              final DruidServer server = entry.getKey();
              final List<SegmentDescriptor> descriptors = entry.getValue();

@ -328,13 +340,29 @@ public class CachingClusteredClient<T> implements QueryRunner<T>
    );
  }

+  private Cache.NamedKey computeSegmentCacheKey(String segmentIdentifier, SegmentDescriptor descriptor, byte[] queryCacheKey)
+  {
+    final Interval segmentQueryInterval = descriptor.getInterval();
+    final byte[] versionBytes = descriptor.getVersion().getBytes();
+
+    return new Cache.NamedKey(
+        segmentIdentifier, ByteBuffer
+        .allocate(16 + versionBytes.length + 4 + queryCacheKey.length)
+        .putLong(segmentQueryInterval.getStartMillis())
+        .putLong(segmentQueryInterval.getEndMillis())
+        .put(versionBytes)
+        .putInt(descriptor.getPartitionNumber())
+        .put(queryCacheKey).array()
+    );
+  }
+
  private static class CachePopulator
  {
    private final Cache cache;
    private final ObjectMapper mapper;
-    private final byte[] key;
+    private final Cache.NamedKey key;

-    public CachePopulator(Cache cache, ObjectMapper mapper, byte[] key)
+    public CachePopulator(Cache cache, ObjectMapper mapper, Cache.NamedKey key)
    {
      this.cache = cache;
      this.mapper = mapper;
--- a/client/src/main/java/com/metamx/druid/client/cache/Cache.java
+++ b/client/src/main/java/com/metamx/druid/client/cache/Cache.java
@ -19,13 +19,63 @@

 package com.metamx.druid.client.cache;

+import com.google.common.base.Preconditions;
+
+import java.util.Arrays;
+import java.util.Map;
+
 /**
- * An interface to limit the operations that can be done on a Cache so that it is easier to reason about what
- * is actually going to be done.
 */
 public interface Cache
 {
-  public byte[] get(byte[] key);
-  public void put(byte[] key, byte[] value);
-  public void close();
+  public byte[] get(NamedKey key);
+  public void put(NamedKey key, byte[] value);
+  public Map<NamedKey, byte[]> getBulk(Iterable<NamedKey> keys);
+
+  public void close(String namespace);
+
+  public CacheStats getStats();
+
+  public class NamedKey
+  {
+    final public String namespace;
+    final public byte[] key;
+
+    public NamedKey(String namespace, byte[] key) {
+      Preconditions.checkArgument(namespace != null, "namespace must not be null");
+      Preconditions.checkArgument(key != null, "key must not be null");
+      this.namespace = namespace;
+      this.key = key;
+    }
+
+    @Override
+    public boolean equals(Object o)
+    {
+      if (this == o) {
+        return true;
+      }
+      if (o == null || getClass() != o.getClass()) {
+        return false;
+      }
+
+      NamedKey namedKey = (NamedKey) o;
+
+      if (!namespace.equals(namedKey.namespace)) {
+        return false;
+      }
+      if (!Arrays.equals(key, namedKey.key)) {
+        return false;
+      }
+
+      return true;
+    }
+
+    @Override
+    public int hashCode()
+    {
+      int result = namespace.hashCode();
+      result = 31 * result + Arrays.hashCode(key);
+      return result;
+    }
+  }
 }
--- a/client/src/main/java/com/metamx/druid/client/cache/CacheBroker.java
+++ b/client/src/main/java/com/metamx/druid/client/cache/CacheBroker.java
@ -1,28 +0,0 @@
-/*
- * Druid - a distributed column store.
- * Copyright (C) 2012  Metamarkets Group Inc.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version 2
- * of the License, or (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
- */
-
-package com.metamx.druid.client.cache;
-
-/**
- */
-public interface CacheBroker
-{
-  public CacheStats getStats();
-  public Cache provideCache(String identifier);
-}
--- a/client/src/main/java/com/metamx/druid/client/cache/CacheMonitor.java
+++ b/client/src/main/java/com/metamx/druid/client/cache/CacheMonitor.java
@ -27,21 +27,21 @@ import com.metamx.metrics.AbstractMonitor;
 */
 public class CacheMonitor extends AbstractMonitor
 {
-  private final CacheBroker cacheBroker;
+  private final Cache cache;

  private volatile CacheStats prevCacheStats = null;

  public CacheMonitor(
-      CacheBroker cacheBroker
+      Cache cache
  )
  {
-    this.cacheBroker = cacheBroker;
+    this.cache = cache;
  }

  @Override
  public boolean doMonitor(ServiceEmitter emitter)
  {
-    final CacheStats currCacheStats = cacheBroker.getStats();
+    final CacheStats currCacheStats = cache.getStats();
    final CacheStats deltaCacheStats = currCacheStats.delta(prevCacheStats);

    final ServiceMetricEvent.Builder builder = new ServiceMetricEvent.Builder();
--- a/client/src/main/java/com/metamx/druid/client/cache/MapCache.java
+++ b/client/src/main/java/com/metamx/druid/client/cache/MapCache.java
@ -0,0 +1,158 @@
+/*
+ * Druid - a distributed column store.
+ * Copyright (C) 2012  Metamarkets Group Inc.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version 2
+ * of the License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+package com.metamx.druid.client.cache;
+
+import com.google.common.collect.Maps;
+import com.google.common.primitives.Ints;
+
+import java.nio.ByteBuffer;
+import java.util.Collections;
+import java.util.Iterator;
+import java.util.Map;
+import java.util.concurrent.atomic.AtomicInteger;
+import java.util.concurrent.atomic.AtomicLong;
+
+/**
+ */
+public class MapCache implements Cache
+{
+  private final Map<ByteBuffer, byte[]> baseMap;
+  private final ByteCountingLRUMap byteCountingLRUMap;
+
+  private final Map<String, byte[]> namespaceId;
+  private final AtomicInteger ids;
+
+  private final Object clearLock = new Object();
+
+  private final AtomicLong hitCount = new AtomicLong(0);
+  private final AtomicLong missCount = new AtomicLong(0);
+
+  public static com.metamx.druid.client.cache.Cache create(final MapCacheConfig config)
+  {
+    return new MapCache(
+        new ByteCountingLRUMap(
+            config.getInitialSize(),
+            config.getLogEvictionCount(),
+            config.getSizeInBytes()
+        )
+    );
+  }
+
+  MapCache(
+      ByteCountingLRUMap byteCountingLRUMap
+  )
+  {
+    this.byteCountingLRUMap = byteCountingLRUMap;
+
+    this.baseMap = Collections.synchronizedMap(byteCountingLRUMap);
+
+    namespaceId = Maps.newHashMap();
+    ids = new AtomicInteger();
+  }
+
+  @Override
+  public CacheStats getStats()
+  {
+    return new CacheStats(
+        hitCount.get(),
+        missCount.get(),
+        byteCountingLRUMap.size(),
+        byteCountingLRUMap.getNumBytes(),
+        byteCountingLRUMap.getEvictionCount(),
+        0
+    );
+  }
+
+  @Override
+  public byte[] get(NamedKey key)
+  {
+    final byte[] retVal = baseMap.get(computeKey(getNamespaceId(key.namespace), key.key));
+    if (retVal == null) {
+      missCount.incrementAndGet();
+    } else {
+      hitCount.incrementAndGet();
+    }
+    return retVal;
+  }
+
+  @Override
+  public void put(NamedKey key, byte[] value)
+  {
+    synchronized (clearLock) {
+        baseMap.put(computeKey(getNamespaceId(key.namespace), key.key), value);
+    }
+  }
+
+  @Override
+  public Map<NamedKey, byte[]> getBulk(Iterable<NamedKey> keys)
+  {
+    Map<NamedKey, byte[]> retVal = Maps.newHashMap();
+    for(NamedKey key : keys) {
+      retVal.put(key, get(key));
+    }
+    return retVal;
+  }
+
+  @Override
+  public void close(String namespace)
+  {
+    byte[] idBytes;
+    synchronized (namespaceId) {
+      idBytes = getNamespaceId(namespace);
+      if(idBytes == null) return;
+
+      namespaceId.remove(namespace);
+    }
+    synchronized (clearLock) {
+      Iterator<ByteBuffer> iter = baseMap.keySet().iterator();
+      while (iter.hasNext()) {
+        ByteBuffer next = iter.next();
+
+        if (next.get(0) == idBytes[0]
+            && next.get(1) == idBytes[1]
+            && next.get(2) == idBytes[2]
+            && next.get(3) == idBytes[3]) {
+          iter.remove();
+        }
+      }
+    }
+  }
+
+  private byte[] getNamespaceId(final String identifier)
+  {
+    synchronized (namespaceId) {
+      byte[] idBytes = namespaceId.get(identifier);
+      if (idBytes != null) {
+        return idBytes;
+      }
+
+      idBytes = Ints.toByteArray(ids.getAndIncrement());
+      namespaceId.put(identifier, idBytes);
+      return idBytes;
+    }
+  }
+
+  private ByteBuffer computeKey(byte[] idBytes, byte[] key)
+  {
+    final ByteBuffer retVal = ByteBuffer.allocate(key.length + 4).put(idBytes).put(key);
+    retVal.rewind();
+    return retVal;
+  }
+}
--- a/client/src/main/java/com/metamx/druid/client/cache/MapCacheBroker.java
+++ b/client/src/main/java/com/metamx/druid/client/cache/MapCacheBroker.java
@ -1,165 +0,0 @@
-/*
- * Druid - a distributed column store.
- * Copyright (C) 2012  Metamarkets Group Inc.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version 2
- * of the License, or (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
- */
-
-package com.metamx.druid.client.cache;
-
-import com.google.common.collect.Maps;
-import com.google.common.primitives.Ints;
-import com.metamx.common.ISE;
-
-import java.nio.ByteBuffer;
-import java.util.Collections;
-import java.util.Iterator;
-import java.util.Map;
-import java.util.concurrent.atomic.AtomicInteger;
-import java.util.concurrent.atomic.AtomicLong;
-
-/**
- */
-public class MapCacheBroker implements CacheBroker
-{
-  private final Map<ByteBuffer, byte[]> baseMap;
-  private final ByteCountingLRUMap byteCountingLRUMap;
-
-  private final Map<String, Cache> cacheCache;
-  private final AtomicInteger ids;
-
-  private final Object clearLock = new Object();
-
-  private final AtomicLong hitCount = new AtomicLong(0);
-  private final AtomicLong missCount = new AtomicLong(0);
-
-  public static CacheBroker create(final MapCacheBrokerConfig config)
-  {
-    return new MapCacheBroker(
-        new ByteCountingLRUMap(
-            config.getInitialSize(),
-            config.getLogEvictionCount(),
-            config.getSizeInBytes()
-        )
-    );
-  }
-
-  MapCacheBroker(
-      ByteCountingLRUMap byteCountingLRUMap
-  )
-  {
-    this.byteCountingLRUMap = byteCountingLRUMap;
-
-    this.baseMap = Collections.synchronizedMap(byteCountingLRUMap);
-
-    cacheCache = Maps.newHashMap();
-    ids = new AtomicInteger();
-  }
-
-
-  @Override
-  public CacheStats getStats()
-  {
-    return new CacheStats(
-        hitCount.get(),
-        missCount.get(),
-        byteCountingLRUMap.size(),
-        byteCountingLRUMap.getNumBytes(),
-        byteCountingLRUMap.getEvictionCount(),
-        0
-    );
-  }
-
-  @Override
-  public Cache provideCache(final String identifier)
-  {
-    synchronized (cacheCache) {
-      final Cache cachedCache = cacheCache.get(identifier);
-      if (cachedCache != null) {
-        return cachedCache;
-      }
-
-      final byte[] myIdBytes = Ints.toByteArray(ids.getAndIncrement());
-
-      final Cache theCache = new Cache()
-      {
-        volatile boolean open = true;
-
-        @Override
-        public byte[] get(byte[] key)
-        {
-          if (open) {
-            final byte[] retVal = baseMap.get(computeKey(key));
-            if (retVal == null) {
-              missCount.incrementAndGet();
-            } else {
-              hitCount.incrementAndGet();
-            }
-            return retVal;
-          }
-          throw new ISE("Cache for identifier[%s] is closed.", identifier);
-        }
-
-        @Override
-        public void put(byte[] key, byte[] value)
-        {
-          synchronized (clearLock) {
-            if (open) {
-              baseMap.put(computeKey(key), value);
-              return;
-            }
-          }
-          throw new ISE("Cache for identifier[%s] is closed.", identifier);
-        }
-
-        @Override
-        public void close()
-        {
-          synchronized (cacheCache) {
-            cacheCache.remove(identifier);
-          }
-          synchronized (clearLock) {
-            if (open) {
-              open = false;
-
-              Iterator<ByteBuffer> iter = baseMap.keySet().iterator();
-              while (iter.hasNext()) {
-                ByteBuffer next = iter.next();
-
-                if (next.get(0) == myIdBytes[0]
-                    && next.get(1) == myIdBytes[1]
-                    && next.get(2) == myIdBytes[2]
-                    && next.get(3) == myIdBytes[3]) {
-                  iter.remove();
-                }
-              }
-            }
-          }
-        }
-
-        private ByteBuffer computeKey(byte[] key)
-        {
-          final ByteBuffer retVal = ByteBuffer.allocate(key.length + 4).put(myIdBytes).put(key);
-          retVal.rewind();
-          return retVal;
-        }
-      };
-
-      cacheCache.put(identifier, theCache);
-
-      return theCache;
-    }
-  }
-}
--- a/client/src/main/java/com/metamx/druid/client/cache/MapCacheBrokerConfig.java
+++ b/client/src/main/java/com/metamx/druid/client/cache/MapCacheBrokerConfig.java
@ -24,7 +24,7 @@ import org.skife.config.Default;

 /**
 */
-public abstract class MapCacheBrokerConfig
+public abstract class MapCacheConfig
 {
  @Config("${prefix}.sizeInBytes")
  @Default("0")
--- a/client/src/main/java/com/metamx/druid/client/cache/MemcachedCacheBroker.java
+++ b/client/src/main/java/com/metamx/druid/client/cache/MemcachedCacheBroker.java
@ -19,7 +19,9 @@

 package com.metamx.druid.client.cache;

+import com.google.common.base.Function;
 import com.google.common.base.Throwables;
+import com.google.common.collect.Maps;
 import net.iharder.base64.Base64;
 import net.spy.memcached.AddrUtil;
 import net.spy.memcached.ConnectionFactoryBuilder;
@ -27,25 +29,28 @@ import net.spy.memcached.DefaultHashAlgorithm;
 import net.spy.memcached.FailureMode;
 import net.spy.memcached.MemcachedClient;
 import net.spy.memcached.MemcachedClientIF;
+import net.spy.memcached.internal.BulkFuture;
 import net.spy.memcached.transcoders.SerializingTranscoder;

+import javax.annotation.Nullable;
 import java.io.IOException;
+import java.util.Map;
 import java.util.concurrent.ExecutionException;
 import java.util.concurrent.Future;
 import java.util.concurrent.TimeUnit;
 import java.util.concurrent.TimeoutException;
 import java.util.concurrent.atomic.AtomicLong;

-public class MemcachedCacheBroker implements CacheBroker
+public class MemcachedCache implements Cache
 {
-  public static MemcachedCacheBroker create(final MemcachedCacheBrokerConfig config)
+  public static MemcachedCache create(final MemcachedCacheConfig config)
  {
    try {
      SerializingTranscoder transcoder = new SerializingTranscoder(config.getMaxObjectSize());
      // disable compression
      transcoder.setCompressionThreshold(Integer.MAX_VALUE);

-      return new MemcachedCacheBroker(
+      return new MemcachedCache(
        new MemcachedClient(
          new ConnectionFactoryBuilder().setProtocol(ConnectionFactoryBuilder.Protocol.BINARY)
                                        .setHashAlg(DefaultHashAlgorithm.FNV1A_64_HASH)
@ -74,7 +79,7 @@ public class MemcachedCacheBroker implements CacheBroker
  private final AtomicLong missCount = new AtomicLong(0);
  private final AtomicLong timeoutCount = new AtomicLong(0);

-  MemcachedCacheBroker(MemcachedClientIF client, int timeout, int expiration) {
+  MemcachedCache(MemcachedClientIF client, int timeout, int expiration) {
    this.timeout = timeout;
    this.expiration = expiration;
    this.client = client;
@ -94,52 +99,94 @@ public class MemcachedCacheBroker implements CacheBroker
  }

  @Override
-  public Cache provideCache(final String identifier)
+  public byte[] get(NamedKey key)
  {
-    return new Cache()
-    {
-      @Override
-      public byte[] get(byte[] key)
-      {
-        Future<Object> future = client.asyncGet(computeKey(identifier, key));
-        try {
-          byte[] bytes = (byte[]) future.get(timeout, TimeUnit.MILLISECONDS);
-          if(bytes != null) {
-            hitCount.incrementAndGet();
-          }
-          else {
-            missCount.incrementAndGet();
-          }
-          return bytes;
-        }
-        catch(TimeoutException e) {
-          timeoutCount.incrementAndGet();
-          future.cancel(false);
-          return null;
-        }
-        catch(InterruptedException e) {
-          throw Throwables.propagate(e);
-        }
-        catch(ExecutionException e) {
-          throw Throwables.propagate(e);
-        }
+    Future<Object> future = client.asyncGet(computeKeyString(key));
+    try {
+      byte[] bytes = (byte[]) future.get(timeout, TimeUnit.MILLISECONDS);
+      if(bytes != null) {
+        hitCount.incrementAndGet();
      }
-
-      @Override
-      public void put(byte[] key, byte[] value)
-      {
-        client.set(computeKey(identifier, key), expiration, value);
+      else {
+        missCount.incrementAndGet();
      }
-
-      @Override
-      public void close()
-      {
-        // no resources to cleanup
-      }
-    };
+      return bytes;
+    }
+    catch(TimeoutException e) {
+      timeoutCount.incrementAndGet();
+      future.cancel(false);
+      return null;
+    }
+    catch(InterruptedException e) {
+      Thread.currentThread().interrupt();
+      throw Throwables.propagate(e);
+    }
+    catch(ExecutionException e) {
+      throw Throwables.propagate(e);
+    }
  }

-  private String computeKey(String identifier, byte[] key) {
-    return identifier + Base64.encodeBytes(key, Base64.DONT_BREAK_LINES);
+  @Override
+  public void put(NamedKey key, byte[] value)
+  {
+    client.set(computeKeyString(key), expiration, value);
+  }
+
+  @Override
+  public Map<NamedKey, byte[]> getBulk(Iterable<NamedKey> keys)
+  {
+    Map<String, NamedKey> keyLookup = Maps.uniqueIndex(
+        keys,
+        new Function<NamedKey, String>()
+        {
+          @Override
+          public String apply(
+              @Nullable NamedKey input
+          )
+          {
+            return computeKeyString(input);
+          }
+        }
+    );
+
+    BulkFuture<Map<String, Object>> future = client.asyncGetBulk(keyLookup.keySet());
+
+    try {
+      Map<String, Object> some = future.getSome(timeout, TimeUnit.MILLISECONDS);
+
+      if(future.isTimeout()) {
+        future.cancel(false);
+        timeoutCount.incrementAndGet();
+      }
+      missCount.addAndGet(keyLookup.size() - some.size());
+      hitCount.addAndGet(some.size());
+
+      Map<NamedKey, byte[]> results = Maps.newHashMap();
+      for(Map.Entry<String, Object> entry : some.entrySet()) {
+        results.put(
+            keyLookup.get(entry.getKey()),
+            (byte[])entry.getValue()
+        );
+      }
+
+      return results;
+    }
+    catch(InterruptedException e) {
+      Thread.currentThread().interrupt();
+      throw Throwables.propagate(e);
+    }
+    catch(ExecutionException e) {
+      throw Throwables.propagate(e);
+    }
+  }
+
+  @Override
+  public void close(String namespace)
+  {
+    // no resources to cleanup
+  }
+
+  private static String computeKeyString(NamedKey key) {
+    return key.namespace + ":" + Base64.encodeBytes(key.key, Base64.DONT_BREAK_LINES);
  }
 }
--- a/client/src/main/java/com/metamx/druid/client/cache/MemcachedCacheBrokerConfig.java
+++ b/client/src/main/java/com/metamx/druid/client/cache/MemcachedCacheBrokerConfig.java
@ -3,7 +3,7 @@ package com.metamx.druid.client.cache;
 import org.skife.config.Config;
 import org.skife.config.Default;

-public abstract class MemcachedCacheBrokerConfig
+public abstract class MemcachedCacheConfig
 {
  @Config("${prefix}.expiration")
  @Default("31536000")
--- a/client/src/main/java/com/metamx/druid/http/BrokerNode.java
+++ b/client/src/main/java/com/metamx/druid/http/BrokerNode.java
@ -34,13 +34,13 @@ import com.metamx.druid.client.BrokerServerView;
 import com.metamx.druid.client.CachingClusteredClient;
 import com.metamx.druid.client.ClientConfig;
 import com.metamx.druid.client.ClientInventoryManager;
-import com.metamx.druid.client.cache.CacheBroker;
+import com.metamx.druid.client.cache.Cache;
 import com.metamx.druid.client.cache.CacheConfig;
 import com.metamx.druid.client.cache.CacheMonitor;
-import com.metamx.druid.client.cache.MapCacheBroker;
-import com.metamx.druid.client.cache.MapCacheBrokerConfig;
-import com.metamx.druid.client.cache.MemcachedCacheBroker;
-import com.metamx.druid.client.cache.MemcachedCacheBrokerConfig;
+import com.metamx.druid.client.cache.MapCache;
+import com.metamx.druid.client.cache.MapCacheConfig;
+import com.metamx.druid.client.cache.MemcachedCache;
+import com.metamx.druid.client.cache.MemcachedCacheConfig;
 import com.metamx.druid.initialization.Initialization;
 import com.metamx.druid.initialization.ServiceDiscoveryConfig;
 import com.metamx.druid.jackson.DefaultObjectMapper;
@ -78,7 +78,7 @@ public class BrokerNode extends QueryableNode<BrokerNode>

  private QueryToolChestWarehouse warehouse = null;
  private HttpClient brokerHttpClient = null;
-  private CacheBroker cacheBroker = null;
+  private Cache cache = null;

  private boolean useDiscovery = true;

@ -122,15 +122,15 @@ public class BrokerNode extends QueryableNode<BrokerNode>
    return this;
  }

-  public CacheBroker getCacheBroker()
+  public Cache getCache()
  {
    initializeCacheBroker();
-    return cacheBroker;
+    return cache;
  }

-  public BrokerNode setCacheBroker(CacheBroker cacheBroker)
+  public BrokerNode setCache(Cache cache)
  {
-    checkFieldNotSetAndSet("cacheBroker", cacheBroker);
+    checkFieldNotSetAndSet("cache", cache);
    return this;
  }

@ -185,7 +185,7 @@ public class BrokerNode extends QueryableNode<BrokerNode>
    final Lifecycle lifecycle = getLifecycle();

    final List<Monitor> monitors = getMonitors();
-    monitors.add(new CacheMonitor(cacheBroker));
+    monitors.add(new CacheMonitor(cache));
    startMonitoring(monitors);

    final BrokerServerView view = new BrokerServerView(warehouse, getSmileMapper(), brokerHttpClient);
@ -194,7 +194,7 @@ public class BrokerNode extends QueryableNode<BrokerNode>
    );
    lifecycle.addManagedInstance(clientInventoryManager);

-    final CachingClusteredClient baseClient = new CachingClusteredClient(warehouse, view, cacheBroker, getSmileMapper());
+    final CachingClusteredClient baseClient = new CachingClusteredClient(warehouse, view, cache, getSmileMapper());
    lifecycle.addManagedInstance(baseClient);


@ -239,25 +239,25 @@ public class BrokerNode extends QueryableNode<BrokerNode>

  private void initializeCacheBroker()
  {
-    if (cacheBroker == null) {
+    if (cache == null) {
      String cacheType = getConfigFactory()
          .build(CacheConfig.class)
          .getType();

      if (cacheType.equals(CACHE_TYPE_LOCAL)) {
-        setCacheBroker(
-            MapCacheBroker.create(
+        setCache(
+            MapCache.create(
                getConfigFactory().buildWithReplacements(
-                    MapCacheBrokerConfig.class,
+                    MapCacheConfig.class,
                    ImmutableMap.of("prefix", CACHE_PROPERTY_PREFIX)
                )
            )
        );
      } else if (cacheType.equals(CACHE_TYPE_MEMCACHED)) {
-        setCacheBroker(
-            MemcachedCacheBroker.create(
+        setCache(
+            MemcachedCache.create(
                getConfigFactory().buildWithReplacements(
-                    MemcachedCacheBrokerConfig.class,
+                    MemcachedCacheConfig.class,
                    ImmutableMap.of("prefix", CACHE_PROPERTY_PREFIX)
                )
            )
--- a/client/src/test/java/com/metamx/druid/client/cache/MapCacheBrokerTest.java
+++ b/client/src/test/java/com/metamx/druid/client/cache/MapCacheBrokerTest.java
@ -31,56 +31,53 @@ public class MapCacheBrokerTest
  private static final byte[] HI = "hi".getBytes();
  private static final byte[] HO = "ho".getBytes();
  private ByteCountingLRUMap baseMap;
-  private MapCacheBroker broker;
+  private MapCache cache;

  @Before
  public void setUp() throws Exception
  {
    baseMap = new ByteCountingLRUMap(1024 * 1024);
-    broker = new MapCacheBroker(baseMap);
+    cache = new MapCache(baseMap);
  }

  @Test
  public void testSanity() throws Exception
  {
-    Cache aCache = broker.provideCache("a");
-    Cache theCache = broker.provideCache("the");
-
-    Assert.assertNull(aCache.get(HI));
+    Assert.assertNull(cache.get(new Cache.NamedKey("a", HI)));
    Assert.assertEquals(0, baseMap.size());
-    put(aCache, HI, 1);
+    put(cache, "a", HI, 1);
    Assert.assertEquals(1, baseMap.size());
-    Assert.assertEquals(1, get(aCache, HI));
-    Assert.assertNull(theCache.get(HI));
+    Assert.assertEquals(1, get(cache, "a", HI));
+    Assert.assertNull(cache.get(new Cache.NamedKey("the", HI)));

-    put(theCache, HI, 2);
+    put(cache, "the", HI, 2);
    Assert.assertEquals(2, baseMap.size());
-    Assert.assertEquals(1, get(aCache, HI));
-    Assert.assertEquals(2, get(theCache, HI));
+    Assert.assertEquals(1, get(cache, "a", HI));
+    Assert.assertEquals(2, get(cache, "the", HI));

-    put(theCache, HO, 10);
+    put(cache, "the", HO, 10);
    Assert.assertEquals(3, baseMap.size());
-    Assert.assertEquals(1, get(aCache, HI));
-    Assert.assertNull(aCache.get(HO));
-    Assert.assertEquals(2, get(theCache, HI));
-    Assert.assertEquals(10, get(theCache, HO));
+    Assert.assertEquals(1, get(cache, "a", HI));
+    Assert.assertNull(cache.get(new Cache.NamedKey("a", HO)));
+    Assert.assertEquals(2, get(cache, "the", HI));
+    Assert.assertEquals(10, get(cache, "the", HO));

-    theCache.close();
+    cache.close("the");
    Assert.assertEquals(1, baseMap.size());
-    Assert.assertEquals(1, get(aCache, HI));
-    Assert.assertNull(aCache.get(HO));
+    Assert.assertEquals(1, get(cache, "a", HI));
+    Assert.assertNull(cache.get(new Cache.NamedKey("a", HO)));

-    aCache.close();
+    cache.close("a");
    Assert.assertEquals(0, baseMap.size());
  }

-  public void put(Cache cache, byte[] key, Integer value)
+  public void put(Cache cache, String namespace, byte[] key, Integer value)
  {
-    cache.put(key, Ints.toByteArray(value));
+    cache.put(new Cache.NamedKey(namespace, key), Ints.toByteArray(value));
  }

-  public int get(Cache cache, byte[] key)
+  public int get(Cache cache, String namespace, byte[] key)
  {
-    return Ints.fromByteArray(cache.get(key));
+    return Ints.fromByteArray(cache.get(new Cache.NamedKey(namespace, key)));
  }
 }
--- a/client/src/test/java/com/metamx/druid/client/cache/MemcachedCacheBrokerBenchmark.java
+++ b/client/src/test/java/com/metamx/druid/client/cache/MemcachedCacheBrokerBenchmark.java
@ -3,6 +3,7 @@ package com.metamx.druid.client.cache;
 import com.google.caliper.Param;
 import com.google.caliper.Runner;
 import com.google.caliper.SimpleBenchmark;
+import com.google.common.collect.Lists;
 import net.spy.memcached.AddrUtil;
 import net.spy.memcached.ConnectionFactoryBuilder;
 import net.spy.memcached.DefaultHashAlgorithm;
@ -11,17 +12,19 @@ import net.spy.memcached.MemcachedClient;
 import net.spy.memcached.MemcachedClientIF;
 import net.spy.memcached.transcoders.SerializingTranscoder;

+import java.util.List;
+import java.util.Map;
 import java.util.Random;
 import java.util.concurrent.TimeUnit;

 public class MemcachedCacheBrokerBenchmark extends SimpleBenchmark
 {
  private static final String BASE_KEY = "test_2012-11-26T00:00:00.000Z_2012-11-27T00:00:00.000Z_2012-11-27T04:11:25.979Z_";
+  public static final String NAMESPACE = "default";

-  private MemcachedCacheBroker broker;
+  private MemcachedCache cache;
  private MemcachedClientIF client;

-  private Cache cache;
  private static byte[] randBytes;

  @Param({"localhost:11211"}) String hosts;
@ -39,8 +42,6 @@ public class MemcachedCacheBrokerBenchmark extends SimpleBenchmark
    // disable compression
    transcoder.setCompressionThreshold(Integer.MAX_VALUE);

-    System.out.println(String.format("Using memcached hosts [%s]", hosts));
-
    client = new MemcachedClient(
        new ConnectionFactoryBuilder().setProtocol(ConnectionFactoryBuilder.Protocol.BINARY)
                                      .setHashAlg(DefaultHashAlgorithm.FNV1A_64_HASH)
@ -53,14 +54,12 @@ public class MemcachedCacheBrokerBenchmark extends SimpleBenchmark
        AddrUtil.getAddresses(hosts)
    );

-    broker = new MemcachedCacheBroker(
+    cache = new MemcachedCache(
        client,
-        500, // 500 milliseconds
+        30000, // 30 seconds
        3600 // 1 hour
    );

-    cache = broker.provideCache("default");
-

    randBytes = new byte[objectSize * 1024];
    new Random(0).nextBytes(randBytes);
@ -69,15 +68,14 @@ public class MemcachedCacheBrokerBenchmark extends SimpleBenchmark
  @Override
  protected void tearDown() throws Exception
  {
-    client.flush();
-    client.shutdown();
+    client.shutdown(1, TimeUnit.MINUTES);
  }

  public void timePutObjects(int reps) {
    for(int i = 0; i < reps; ++i) {
      for(int k = 0; k < objectCount; ++k) {
-        String key = BASE_KEY + i;
-        cache.put(key.getBytes(), randBytes);
+        String key = BASE_KEY + k;
+        cache.put(new Cache.NamedKey(NAMESPACE, key.getBytes()), randBytes);
      }
      // make sure the write queue is empty
      client.waitForQueues(1, TimeUnit.HOURS);
@ -89,8 +87,25 @@ public class MemcachedCacheBrokerBenchmark extends SimpleBenchmark
    long count = 0;
    for (int i = 0; i < reps; i++) {
      for(int k = 0; k < objectCount; ++k) {
-        String key = BASE_KEY + i;
-        bytes = cache.get(key.getBytes());
+        String key = BASE_KEY + k;
+        bytes = cache.get(new Cache.NamedKey(NAMESPACE, key.getBytes()));
+        count += bytes.length;
+      }
+    }
+    return count;
+  }
+
+  public long timeBulkGetObjects(int reps) {
+    long count = 0;
+    for (int i = 0; i < reps; i++) {
+      List<Cache.NamedKey> keys = Lists.newArrayList();
+      for(int k = 0; k < objectCount; ++k) {
+        String key = BASE_KEY + k;
+        keys.add(new Cache.NamedKey(NAMESPACE, key.getBytes()));
+      }
+      Map<Cache.NamedKey, byte[]> results = cache.getBulk(keys);
+      for(Cache.NamedKey key : keys) {
+        byte[] bytes = results.get(key);
        count += bytes.length;
      }
    }
--- a/client/src/test/java/com/metamx/druid/client/cache/MemcachedCacheBrokerTest.java
+++ b/client/src/test/java/com/metamx/druid/client/cache/MemcachedCacheBrokerTest.java
@ -19,6 +19,8 @@

 package com.metamx.druid.client.cache;

+import com.google.common.collect.Lists;
+import com.google.common.collect.Maps;
 import com.google.common.primitives.Ints;
 import net.spy.memcached.CASResponse;
 import net.spy.memcached.CASValue;
@ -27,6 +29,7 @@ import net.spy.memcached.ConnectionObserver;
 import net.spy.memcached.MemcachedClientIF;
 import net.spy.memcached.NodeLocator;
 import net.spy.memcached.internal.BulkFuture;
+import net.spy.memcached.ops.OperationStatus;
 import net.spy.memcached.transcoders.SerializingTranscoder;
 import net.spy.memcached.transcoders.Transcoder;
 import org.junit.Assert;
@ -51,51 +54,70 @@ public class MemcachedCacheBrokerTest
 {
  private static final byte[] HI = "hi".getBytes();
  private static final byte[] HO = "ho".getBytes();
-  private MemcachedCacheBroker broker;
+  private MemcachedCache cache;

  @Before
  public void setUp() throws Exception
  {
    MemcachedClientIF client = new MockMemcachedClient();
-    broker = new MemcachedCacheBroker(client, 500, 3600);
+    cache = new MemcachedCache(client, 500, 3600);
  }

  @Test
  public void testSanity() throws Exception
  {
-    Cache aCache = broker.provideCache("a");
-    Cache theCache = broker.provideCache("the");
+    Assert.assertNull(cache.get(new Cache.NamedKey("a", HI)));
+    put(cache, "a", HI, 1);
+    Assert.assertEquals(1, get(cache, "a", HI));
+    Assert.assertNull(cache.get(new Cache.NamedKey("the", HI)));

-    Assert.assertNull(aCache.get(HI));
-    put(aCache, HI, 1);
-    Assert.assertEquals(1, get(aCache, HI));
-    Assert.assertNull(theCache.get(HI));
+    put(cache, "the", HI, 2);
+    Assert.assertEquals(1, get(cache, "a", HI));
+    Assert.assertEquals(2, get(cache, "the", HI));

-    put(theCache, HI, 2);
-    Assert.assertEquals(1, get(aCache, HI));
-    Assert.assertEquals(2, get(theCache, HI));
+    put(cache, "the", HO, 10);
+    Assert.assertEquals(1, get(cache, "a", HI));
+    Assert.assertNull(cache.get(new Cache.NamedKey("a", HO)));
+    Assert.assertEquals(2, get(cache, "the", HI));
+    Assert.assertEquals(10, get(cache, "the", HO));

-    put(theCache, HO, 10);
-    Assert.assertEquals(1, get(aCache, HI));
-    Assert.assertNull(aCache.get(HO));
-    Assert.assertEquals(2, get(theCache, HI));
-    Assert.assertEquals(10, get(theCache, HO));
+    cache.close("the");
+    Assert.assertEquals(1, get(cache, "a", HI));
+    Assert.assertNull(cache.get(new Cache.NamedKey("a", HO)));

-    theCache.close();
-    Assert.assertEquals(1, get(aCache, HI));
-    Assert.assertNull(aCache.get(HO));
-
-    aCache.close();
+    cache.close("a");
  }

-  public void put(Cache cache, byte[] key, Integer value)
+  @Test
+  public void testGetBulk() throws Exception
  {
-    cache.put(key, Ints.toByteArray(value));
+    Assert.assertNull(cache.get(new Cache.NamedKey("the", HI)));
+
+    put(cache, "the", HI, 2);
+    put(cache, "the", HO, 10);
+
+    Cache.NamedKey key1 = new Cache.NamedKey("the", HI);
+    Cache.NamedKey key2 = new Cache.NamedKey("the", HO);
+
+    Map<Cache.NamedKey, byte[]> result = cache.getBulk(
+        Lists.newArrayList(
+            key1,
+            key2
+        )
+    );
+
+    Assert.assertEquals(2, Ints.fromByteArray(result.get(key1)));
+    Assert.assertEquals(10, Ints.fromByteArray(result.get(key2)));
  }

-  public int get(Cache cache, byte[] key)
+  public void put(Cache cache, String namespace, byte[] key, Integer value)
  {
-    return Ints.fromByteArray(cache.get(key));
+    cache.put(new Cache.NamedKey(namespace, key), Ints.toByteArray(value));
+  }
+
+  public int get(Cache cache, String namespace, byte[] key)
+  {
+    return Ints.fromByteArray(cache.get(new Cache.NamedKey(namespace, key)));
  }
 }

@ -365,9 +387,67 @@ class MockMemcachedClient implements MemcachedClientIF
  }

  @Override
-  public <T> BulkFuture<Map<String, T>> asyncGetBulk(Iterator<String> keys, Transcoder<T> tc)
+  public <T> BulkFuture<Map<String, T>> asyncGetBulk(final Iterator<String> keys, final Transcoder<T> tc)
  {
-    throw new UnsupportedOperationException("not implemented");
+    return new BulkFuture<Map<String, T>>()
+        {
+          @Override
+          public boolean isTimeout()
+          {
+            return false;
+          }
+
+          @Override
+          public Map<String, T> getSome(long timeout, TimeUnit unit) throws InterruptedException, ExecutionException
+          {
+            return get();
+          }
+
+          @Override
+          public OperationStatus getStatus()
+          {
+            return null;
+          }
+
+          @Override
+          public boolean cancel(boolean b)
+          {
+            return false;
+          }
+
+          @Override
+          public boolean isCancelled()
+          {
+            return false;
+          }
+
+          @Override
+          public boolean isDone()
+          {
+            return true;
+          }
+
+          @Override
+          public Map<String, T> get() throws InterruptedException, ExecutionException
+          {
+            Map<String, T> retVal = Maps.newHashMap();
+
+            while(keys.hasNext()) {
+              String key = keys.next();
+              CachedData data = theMap.get(key);
+              retVal.put(key, data != null ? tc.decode(data) : null);
+            }
+
+            return retVal;
+          }
+
+          @Override
+          public Map<String, T> get(long l, TimeUnit timeUnit)
+              throws InterruptedException, ExecutionException, TimeoutException
+          {
+            return get();
+          }
+        };
  }

  @Override
@ -383,9 +463,9 @@ class MockMemcachedClient implements MemcachedClientIF
  }

  @Override
-  public BulkFuture<Map<String, Object>> asyncGetBulk(Collection<String> keys)
+  public BulkFuture<Map<String, Object>> asyncGetBulk(final Collection<String> keys)
  {
-    throw new UnsupportedOperationException("not implemented");
+    return asyncGetBulk(keys.iterator(), transcoder);
  }

  @Override
--- a/index-common/src/main/java/com/metamx/druid/indexer/data/StringInputRowParser.java
+++ b/index-common/src/main/java/com/metamx/druid/indexer/data/StringInputRowParser.java
@ -19,6 +19,7 @@

 package com.metamx.druid.indexer.data;

+import com.google.common.base.Function;
 import com.google.common.collect.Lists;
 import com.google.common.collect.Sets;
 import com.metamx.common.exception.FormattedException;
@ -56,7 +57,18 @@ public class StringInputRowParser

    this.dimensionExclusions = Sets.newHashSet();
    if (dimensionExclusions != null) {
-      this.dimensionExclusions.addAll(dimensionExclusions);
+      this.dimensionExclusions.addAll(
+          Lists.transform(
+              dimensionExclusions, new Function<String, String>()
+          {
+            @Override
+            public String apply(String s)
+            {
+              return s.toLowerCase();
+            }
+          }
+          )
+      );
    }
    this.dimensionExclusions.add(timestampSpec.getTimestampColumn());

--- a/indexer/src/main/java/com/metamx/druid/indexer/DeterminePartitionsJob.java
+++ b/indexer/src/main/java/com/metamx/druid/indexer/DeterminePartitionsJob.java
@ -23,21 +23,23 @@ import com.google.common.base.Charsets;
 import com.google.common.base.Function;
 import com.google.common.base.Joiner;
 import com.google.common.base.Optional;
-import com.google.common.base.Preconditions;
 import com.google.common.base.Splitter;
+import com.google.common.base.Throwables;
+import com.google.common.collect.ComparisonChain;
+import com.google.common.collect.ImmutableList;
+import com.google.common.collect.ImmutableSortedSet;
 import com.google.common.collect.Iterables;
 import com.google.common.collect.Iterators;
 import com.google.common.collect.Lists;
 import com.google.common.collect.Maps;
 import com.google.common.collect.PeekingIterator;
 import com.google.common.io.Closeables;
-import com.metamx.common.IAE;
-import com.metamx.common.Pair;
+import com.metamx.common.ISE;
 import com.metamx.common.guava.nary.BinaryFn;
 import com.metamx.common.logger.Logger;
-import com.metamx.common.parsers.Parser;
-import com.metamx.common.parsers.ParserUtils;
 import com.metamx.druid.CombiningIterable;
+import com.metamx.druid.QueryGranularity;
+import com.metamx.druid.input.InputRow;
 import com.metamx.druid.shard.NoneShardSpec;
 import com.metamx.druid.shard.ShardSpec;
 import com.metamx.druid.shard.SingleDimensionShardSpec;
@ -45,7 +47,7 @@ import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.fs.FileSystem;
 import org.apache.hadoop.fs.Path;
 import org.apache.hadoop.io.BytesWritable;
-import org.apache.hadoop.io.LongWritable;
+import org.apache.hadoop.io.NullWritable;
 import org.apache.hadoop.io.Text;
 import org.apache.hadoop.io.Writable;
 import org.apache.hadoop.mapred.InvalidJobConfException;
@ -56,8 +58,11 @@ import org.apache.hadoop.mapreduce.RecordWriter;
 import org.apache.hadoop.mapreduce.Reducer;
 import org.apache.hadoop.mapreduce.TaskAttemptContext;
 import org.apache.hadoop.mapreduce.TaskInputOutputContext;
+import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
+import org.apache.hadoop.mapreduce.lib.input.SequenceFileInputFormat;
 import org.apache.hadoop.mapreduce.lib.input.TextInputFormat;
 import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
+import org.apache.hadoop.mapreduce.lib.output.SequenceFileOutputFormat;
 import org.codehaus.jackson.type.TypeReference;
 import org.joda.time.DateTime;
 import org.joda.time.DateTimeComparator;
@ -65,20 +70,26 @@ import org.joda.time.Interval;

 import java.io.IOException;
 import java.io.OutputStream;
-import java.util.ArrayList;
 import java.util.Comparator;
 import java.util.Iterator;
 import java.util.List;
 import java.util.Map;
+import java.util.Set;

 /**
+ * Determines appropriate ShardSpecs for a job by determining whether or not partitioning is necessary, and if so,
+ * choosing the highest cardinality dimension that satisfies the criteria:
+ *
+ * <ul>
+ *   <li>Must have exactly one value per row.</li>
+ *   <li>Must not generate oversized partitions. A dimension with N rows having the same value will necessarily
+ *   put all those rows in the same partition, and that partition may be much larger than the target size.</li>
+ * </ul>
 */
 public class DeterminePartitionsJob implements Jobby
 {
  private static final Logger log = new Logger(DeterminePartitionsJob.class);

-  private static final Joiner keyJoiner = Joiner.on(",");
-  private static final Splitter keySplitter = Splitter.on(",");
  private static final Joiner tabJoiner = HadoopDruidIndexerConfig.tabJoiner;
  private static final Splitter tabSplitter = HadoopDruidIndexerConfig.tabSplitter;

@ -91,146 +102,314 @@ public class DeterminePartitionsJob implements Jobby
    this.config = config;
  }

-  public boolean run()
+  public static void injectSystemProperties(Job job)
  {
-    try {
-      Job job = new Job(
-          new Configuration(),
-          String.format("%s-determine_partitions-%s", config.getDataSource(), config.getIntervals())
-      );
-
-      job.getConfiguration().set("io.sort.record.percent", "0.19");
-      for (String propName : System.getProperties().stringPropertyNames()) {
-        Configuration conf = job.getConfiguration();
-        if (propName.startsWith("hadoop.")) {
-          conf.set(propName.substring("hadoop.".length()), System.getProperty(propName));
-        }
+    final Configuration conf = job.getConfiguration();
+    for (String propName : System.getProperties().stringPropertyNames()) {
+      if (propName.startsWith("hadoop.")) {
+        conf.set(propName.substring("hadoop.".length()), System.getProperty(propName));
      }
-
-      job.setInputFormatClass(TextInputFormat.class);
-
-      job.setMapperClass(DeterminePartitionsMapper.class);
-      job.setMapOutputValueClass(Text.class);
-
-      SortableBytes.useSortableBytesAsKey(job);
-
-      job.setCombinerClass(DeterminePartitionsCombiner.class);
-      job.setReducerClass(DeterminePartitionsReducer.class);
-      job.setOutputKeyClass(BytesWritable.class);
-      job.setOutputValueClass(Text.class);
-      job.setOutputFormatClass(DeterminePartitionsJob.DeterminePartitionsOutputFormat.class);
-      FileOutputFormat.setOutputPath(job, config.makeIntermediatePath());
-
-      config.addInputPaths(job);
-      config.intoConfiguration(job);
-
-      job.setJarByClass(DeterminePartitionsJob.class);
-
-      job.submit();
-      log.info("Job submitted, status available at %s", job.getTrackingURL());
-
-      final boolean retVal = job.waitForCompletion(true);
-
-      if (retVal) {
-        log.info("Job completed, loading up partitions for intervals[%s].", config.getSegmentGranularIntervals());
-        FileSystem fileSystem = null;
-        Map<DateTime, List<HadoopyShardSpec>> shardSpecs = Maps.newTreeMap(DateTimeComparator.getInstance());
-        int shardCount = 0;
-        for (Interval segmentGranularity : config.getSegmentGranularIntervals()) {
-          DateTime bucket = segmentGranularity.getStart();
-
-          final Path partitionInfoPath = config.makeSegmentPartitionInfoPath(new Bucket(0, bucket, 0));
-          if (fileSystem == null) {
-            fileSystem = partitionInfoPath.getFileSystem(job.getConfiguration());
-          }
-          if (fileSystem.exists(partitionInfoPath)) {
-            List<ShardSpec> specs = config.jsonMapper.readValue(
-                Utils.openInputStream(job, partitionInfoPath), new TypeReference<List<ShardSpec>>()
-            {
-            }
-            );
-
-            List<HadoopyShardSpec> actualSpecs = Lists.newArrayListWithExpectedSize(specs.size());
-            for (int i = 0; i < specs.size(); ++i) {
-              actualSpecs.add(new HadoopyShardSpec(specs.get(i), shardCount++));
-              log.info("DateTime[%s], partition[%d], spec[%s]", bucket, i, actualSpecs.get(i));
-            }
-
-            shardSpecs.put(bucket, actualSpecs);
-          }
-          else {
-            log.info("Path[%s] didn't exist!?", partitionInfoPath);
-          }
-        }
-        config.setShardSpecs(shardSpecs);
-      }
-      else {
-        log.info("Job completed unsuccessfully.");
-      }
-
-      return retVal;
-    }
-    catch (Exception e) {
-      throw new RuntimeException(e);
    }
  }

-  public static class DeterminePartitionsMapper extends Mapper<LongWritable, Text, BytesWritable, Text>
+  public boolean run()
  {
-    private HadoopDruidIndexerConfig config;
-    private String partitionDimension;
-    private Parser parser;
-    private Function<String, DateTime> timestampConverter;
+    try {
+      /*
+       * Group by (timestamp, dimensions) so we can correctly count dimension values as they would appear
+       * in the final segment.
+       */
+
+      if(!config.getPartitionsSpec().isAssumeGrouped()) {
+        final Job groupByJob = new Job(
+            new Configuration(),
+            String.format("%s-determine_partitions_groupby-%s", config.getDataSource(), config.getIntervals())
+        );
+
+        injectSystemProperties(groupByJob);
+        groupByJob.setInputFormatClass(TextInputFormat.class);
+        groupByJob.setMapperClass(DeterminePartitionsGroupByMapper.class);
+        groupByJob.setMapOutputKeyClass(BytesWritable.class);
+        groupByJob.setMapOutputValueClass(NullWritable.class);
+        groupByJob.setCombinerClass(DeterminePartitionsGroupByReducer.class);
+        groupByJob.setReducerClass(DeterminePartitionsGroupByReducer.class);
+        groupByJob.setOutputKeyClass(BytesWritable.class);
+        groupByJob.setOutputValueClass(NullWritable.class);
+        groupByJob.setOutputFormatClass(SequenceFileOutputFormat.class);
+        groupByJob.setJarByClass(DeterminePartitionsJob.class);
+
+        config.addInputPaths(groupByJob);
+        config.intoConfiguration(groupByJob);
+        FileOutputFormat.setOutputPath(groupByJob, config.makeGroupedDataDir());
+
+        groupByJob.submit();
+        log.info("Job %s submitted, status available at: %s", groupByJob.getJobName(), groupByJob.getTrackingURL());
+
+        if(!groupByJob.waitForCompletion(true)) {
+          log.error("Job failed: %s", groupByJob.getJobID().toString());
+          return false;
+        }
+      } else {
+        log.info("Skipping group-by job.");
+      }
+
+      /*
+       * Read grouped data and determine appropriate partitions.
+       */
+      final Job dimSelectionJob = new Job(
+          new Configuration(),
+          String.format("%s-determine_partitions_dimselection-%s", config.getDataSource(), config.getIntervals())
+      );
+
+      dimSelectionJob.getConfiguration().set("io.sort.record.percent", "0.19");
+
+      injectSystemProperties(dimSelectionJob);
+
+      if(!config.getPartitionsSpec().isAssumeGrouped()) {
+        // Read grouped data from the groupByJob.
+        dimSelectionJob.setMapperClass(DeterminePartitionsDimSelectionPostGroupByMapper.class);
+        dimSelectionJob.setInputFormatClass(SequenceFileInputFormat.class);
+        FileInputFormat.addInputPath(dimSelectionJob, config.makeGroupedDataDir());
+      } else {
+        // Directly read the source data, since we assume it's already grouped.
+        dimSelectionJob.setMapperClass(DeterminePartitionsDimSelectionAssumeGroupedMapper.class);
+        dimSelectionJob.setInputFormatClass(TextInputFormat.class);
+        config.addInputPaths(dimSelectionJob);
+      }
+
+      SortableBytes.useSortableBytesAsMapOutputKey(dimSelectionJob);
+      dimSelectionJob.setMapOutputValueClass(Text.class);
+      dimSelectionJob.setCombinerClass(DeterminePartitionsDimSelectionCombiner.class);
+      dimSelectionJob.setReducerClass(DeterminePartitionsDimSelectionReducer.class);
+      dimSelectionJob.setOutputKeyClass(BytesWritable.class);
+      dimSelectionJob.setOutputValueClass(Text.class);
+      dimSelectionJob.setOutputFormatClass(DeterminePartitionsDimSelectionOutputFormat.class);
+      dimSelectionJob.setJarByClass(DeterminePartitionsJob.class);
+
+      config.intoConfiguration(dimSelectionJob);
+      FileOutputFormat.setOutputPath(dimSelectionJob, config.makeIntermediatePath());
+
+      dimSelectionJob.submit();
+      log.info(
+          "Job %s submitted, status available at: %s",
+          dimSelectionJob.getJobName(),
+          dimSelectionJob.getTrackingURL()
+      );
+
+      if(!dimSelectionJob.waitForCompletion(true)) {
+        log.error("Job failed: %s", dimSelectionJob.getJobID().toString());
+        return false;
+      }
+
+      /*
+       * Load partitions determined by the previous job.
+       */
+
+      log.info("Job completed, loading up partitions for intervals[%s].", config.getSegmentGranularIntervals());
+      FileSystem fileSystem = null;
+      Map<DateTime, List<HadoopyShardSpec>> shardSpecs = Maps.newTreeMap(DateTimeComparator.getInstance());
+      int shardCount = 0;
+      for (Interval segmentGranularity : config.getSegmentGranularIntervals()) {
+        DateTime bucket = segmentGranularity.getStart();
+
+        final Path partitionInfoPath = config.makeSegmentPartitionInfoPath(new Bucket(0, bucket, 0));
+        if (fileSystem == null) {
+          fileSystem = partitionInfoPath.getFileSystem(dimSelectionJob.getConfiguration());
+        }
+        if (fileSystem.exists(partitionInfoPath)) {
+          List<ShardSpec> specs = config.jsonMapper.readValue(
+              Utils.openInputStream(dimSelectionJob, partitionInfoPath), new TypeReference<List<ShardSpec>>()
+          {
+          }
+          );
+
+          List<HadoopyShardSpec> actualSpecs = Lists.newArrayListWithExpectedSize(specs.size());
+          for (int i = 0; i < specs.size(); ++i) {
+            actualSpecs.add(new HadoopyShardSpec(specs.get(i), shardCount++));
+            log.info("DateTime[%s], partition[%d], spec[%s]", bucket, i, actualSpecs.get(i));
+          }
+
+          shardSpecs.put(bucket, actualSpecs);
+        }
+        else {
+          log.info("Path[%s] didn't exist!?", partitionInfoPath);
+        }
+      }
+      config.setShardSpecs(shardSpecs);
+
+      return true;
+    } catch(Exception e) {
+      throw Throwables.propagate(e);
+    }
+  }
+
+  public static class DeterminePartitionsGroupByMapper extends HadoopDruidIndexerMapper<BytesWritable, NullWritable>
+  {
+    private QueryGranularity rollupGranularity = null;

    @Override
    protected void setup(Context context)
        throws IOException, InterruptedException
    {
-      config = HadoopDruidIndexerConfig.fromConfiguration(context.getConfiguration());
-      partitionDimension = config.getPartitionDimension();
-      parser = config.getDataSpec().getParser();
-      timestampConverter = ParserUtils.createTimestampParser(config.getTimestampFormat());
+      super.setup(context);
+      rollupGranularity = getConfig().getRollupSpec().getRollupGranularity();
+    }
+
+    @Override
+    protected void innerMap(
+        InputRow inputRow,
+        Text text,
+        Context context
+    ) throws IOException, InterruptedException
+    {
+      // Create group key
+      // TODO -- There are more efficient ways to do this
+      final Map<String, Set<String>> dims = Maps.newTreeMap();
+      for(final String dim : inputRow.getDimensions()) {
+        final Set<String> dimValues = ImmutableSortedSet.copyOf(inputRow.getDimension(dim));
+        if(dimValues.size() > 0) {
+          dims.put(dim, dimValues);
+        }
+      }
+      final List<Object> groupKey = ImmutableList.of(
+          rollupGranularity.truncate(inputRow.getTimestampFromEpoch()),
+          dims
+      );
+      context.write(
+          new BytesWritable(HadoopDruidIndexerConfig.jsonMapper.writeValueAsBytes(groupKey)),
+          NullWritable.get()
+      );
+    }
+  }
+
+  public static class DeterminePartitionsGroupByReducer
+      extends Reducer<BytesWritable, NullWritable, BytesWritable, NullWritable>
+  {
+    @Override
+    protected void reduce(
+        BytesWritable key,
+        Iterable<NullWritable> values,
+        Context context
+    ) throws IOException, InterruptedException
+    {
+      context.write(key, NullWritable.get());
+    }
+  }
+
+  /**
+   * This DimSelection mapper runs on data generated by our GroupBy job.
+   */
+  public static class DeterminePartitionsDimSelectionPostGroupByMapper
+      extends Mapper<BytesWritable, NullWritable, BytesWritable, Text>
+  {
+    private DeterminePartitionsDimSelectionMapperHelper helper;
+
+    @Override
+    protected void setup(Context context)
+        throws IOException, InterruptedException
+    {
+      final HadoopDruidIndexerConfig config = HadoopDruidIndexerConfig.fromConfiguration(context.getConfiguration());
+      final String partitionDimension = config.getPartitionDimension();
+      helper = new DeterminePartitionsDimSelectionMapperHelper(config, partitionDimension);
    }

    @Override
    protected void map(
-        LongWritable key, Text value, Context context
+        BytesWritable key, NullWritable value, Context context
    ) throws IOException, InterruptedException
    {
-      Map<String, Object> values = parser.parse(value.toString());
-      final DateTime timestamp;
-      final String tsStr = (String) values.get(config.getTimestampColumnName());
-      try {
-        timestamp = timestampConverter.apply(tsStr);
-      }
-      catch(IllegalArgumentException e) {
-        if(config.isIgnoreInvalidRows()) {
-          context.getCounter(HadoopDruidIndexerConfig.IndexJobCounters.INVALID_ROW_COUNTER).increment(1);
-          return; // we're ignoring this invalid row
-        }
-        else {
-          throw e;
-        }
-      }
+      final List<Object> timeAndDims = HadoopDruidIndexerConfig.jsonMapper.readValue(key.getBytes(), List.class);

+      final DateTime timestamp = new DateTime(timeAndDims.get(0));
+      final Map<String, Iterable<String>> dims = (Map<String, Iterable<String>>) timeAndDims.get(1);
+
+      helper.emitDimValueCounts(context, timestamp, dims);
+    }
+  }
+
+  /**
+   * This DimSelection mapper runs on raw input data that we assume has already been grouped.
+   */
+  public static class DeterminePartitionsDimSelectionAssumeGroupedMapper
+      extends HadoopDruidIndexerMapper<BytesWritable, Text>
+  {
+    private DeterminePartitionsDimSelectionMapperHelper helper;
+
+    @Override
+    protected void setup(Context context)
+        throws IOException, InterruptedException
+    {
+      super.setup(context);
+      final HadoopDruidIndexerConfig config = HadoopDruidIndexerConfig.fromConfiguration(context.getConfiguration());
+      final String partitionDimension = config.getPartitionDimension();
+      helper = new DeterminePartitionsDimSelectionMapperHelper(config, partitionDimension);
+    }
+
+    @Override
+    protected void innerMap(
+        InputRow inputRow,
+        Text text,
+        Context context
+    ) throws IOException, InterruptedException
+    {
+      final Map<String, Iterable<String>> dims = Maps.newHashMap();
+      for(final String dim : inputRow.getDimensions()) {
+        dims.put(dim, inputRow.getDimension(dim));
+      }
+      helper.emitDimValueCounts(context, new DateTime(inputRow.getTimestampFromEpoch()), dims);
+    }
+  }
+
+  /**
+   * Since we have two slightly different DimSelectionMappers, this class encapsulates the shared logic for
+   * emitting dimension value counts.
+   */
+  public static class DeterminePartitionsDimSelectionMapperHelper
+  {
+    private final HadoopDruidIndexerConfig config;
+    private final String partitionDimension;
+
+    public DeterminePartitionsDimSelectionMapperHelper(HadoopDruidIndexerConfig config, String partitionDimension)
+    {
+      this.config = config;
+      this.partitionDimension = partitionDimension;
+    }
+
+    public void emitDimValueCounts(
+        TaskInputOutputContext<? extends Writable, ? extends Writable, BytesWritable, Text> context,
+        DateTime timestamp,
+        Map<String, Iterable<String>> dims
+    ) throws IOException, InterruptedException
+    {
      final Optional<Interval> maybeInterval = config.getGranularitySpec().bucketInterval(timestamp);
-      if(maybeInterval.isPresent()) {
-        final DateTime bucket = maybeInterval.get().getStart();
-        final String outKey = keyJoiner.join(bucket.toString(), partitionDimension);

-        final Object dimValue = values.get(partitionDimension);
-        if (! (dimValue instanceof String)) {
-          throw new IAE("Cannot partition on a tag-style dimension[%s], line was[%s]", partitionDimension, value);
+      if(!maybeInterval.isPresent()) {
+        throw new ISE("WTF?! No bucket found for timestamp: %s", timestamp);
+      }
+
+      final Interval interval = maybeInterval.get();
+      final byte[] groupKey = interval.getStart().toString().getBytes(Charsets.UTF_8);
+
+      for(final Map.Entry<String, Iterable<String>> dimAndValues : dims.entrySet()) {
+        final String dim = dimAndValues.getKey();
+
+        if(partitionDimension == null || partitionDimension.equals(dim)) {
+          final Iterable<String> dimValues = dimAndValues.getValue();
+
+          if(Iterables.size(dimValues) == 1) {
+            // Emit this value.
+            write(context, groupKey, new DimValueCount(dim, Iterables.getOnlyElement(dimValues), 1));
+          } else {
+            // This dimension is unsuitable for partitioning. Poison it by emitting a negative value.
+            write(context, groupKey, new DimValueCount(dim, "", -1));
+          }
        }
-
-        final byte[] groupKey = outKey.getBytes(Charsets.UTF_8);
-        write(context, groupKey, "", 1);
-        write(context, groupKey, (String) dimValue, 1);
      }
    }
  }

-  private static abstract class DeterminePartitionsBaseReducer extends Reducer<BytesWritable, Text, BytesWritable, Text>
+  private static abstract class DeterminePartitionsDimSelectionBaseReducer
+      extends Reducer<BytesWritable, Text, BytesWritable, Text>
  {

    protected static volatile HadoopDruidIndexerConfig config = null;
@ -240,7 +419,7 @@ public class DeterminePartitionsJob implements Jobby
        throws IOException, InterruptedException
    {
      if (config == null) {
-        synchronized (DeterminePartitionsBaseReducer.class) {
+        synchronized (DeterminePartitionsDimSelectionBaseReducer.class) {
          if (config == null) {
            config = HadoopDruidIndexerConfig.fromConfiguration(context.getConfiguration());
          }
@ -255,166 +434,275 @@ public class DeterminePartitionsJob implements Jobby
    {
      SortableBytes keyBytes = SortableBytes.fromBytesWritable(key);

-      final Iterable<Pair<String, Long>> combinedIterable = combineRows(values);
+      final Iterable<DimValueCount> combinedIterable = combineRows(values);
      innerReduce(context, keyBytes, combinedIterable);
    }

    protected abstract void innerReduce(
-        Context context, SortableBytes keyBytes, Iterable<Pair<String, Long>> combinedIterable
+        Context context, SortableBytes keyBytes, Iterable<DimValueCount> combinedIterable
    ) throws IOException, InterruptedException;

-    private Iterable<Pair<String, Long>> combineRows(Iterable<Text> input)
+    private Iterable<DimValueCount> combineRows(Iterable<Text> input)
    {
-      return new CombiningIterable<Pair<String, Long>>(
+      return new CombiningIterable<DimValueCount>(
          Iterables.transform(
              input,
-              new Function<Text, Pair<String, Long>>()
+              new Function<Text, DimValueCount>()
              {
                @Override
-                public Pair<String, Long> apply(Text input)
+                public DimValueCount apply(Text input)
                {
-                  Iterator<String> splits = tabSplitter.split(input.toString()).iterator();
-                  return new Pair<String, Long>(splits.next(), Long.parseLong(splits.next()));
+                  return DimValueCount.fromText(input);
                }
              }
          ),
-          new Comparator<Pair<String, Long>>()
+          new Comparator<DimValueCount>()
          {
            @Override
-            public int compare(Pair<String, Long> o1, Pair<String, Long> o2)
+            public int compare(DimValueCount o1, DimValueCount o2)
            {
-              return o1.lhs.compareTo(o2.lhs);
+              return ComparisonChain.start().compare(o1.dim, o2.dim).compare(o1.value, o2.value).result();
            }
          },
-          new BinaryFn<Pair<String, Long>, Pair<String, Long>, Pair<String, Long>>()
+          new BinaryFn<DimValueCount, DimValueCount, DimValueCount>()
          {
            @Override
-            public Pair<String, Long> apply(Pair<String, Long> arg1, Pair<String, Long> arg2)
+            public DimValueCount apply(DimValueCount arg1, DimValueCount arg2)
            {
              if (arg2 == null) {
                return arg1;
              }

-              return new Pair<String, Long>(arg1.lhs, arg1.rhs + arg2.rhs);
+              // Respect "poisoning" (negative values mean we can't use this dimension)
+              final int newNumRows = (arg1.numRows >= 0 && arg2.numRows >= 0 ? arg1.numRows + arg2.numRows : -1);
+              return new DimValueCount(arg1.dim, arg1.value, newNumRows);
            }
          }
      );
    }
  }

-  public static class DeterminePartitionsCombiner extends DeterminePartitionsBaseReducer
+  public static class DeterminePartitionsDimSelectionCombiner extends DeterminePartitionsDimSelectionBaseReducer
  {
    @Override
    protected void innerReduce(
-        Context context, SortableBytes keyBytes, Iterable<Pair<String, Long>> combinedIterable
+        Context context, SortableBytes keyBytes, Iterable<DimValueCount> combinedIterable
    ) throws IOException, InterruptedException
    {
-      for (Pair<String, Long> pair : combinedIterable) {
-        write(context, keyBytes.getGroupKey(), pair.lhs, pair.rhs);
+      for (DimValueCount dvc : combinedIterable) {
+        write(context, keyBytes.getGroupKey(), dvc);
      }
    }
  }

-  public static class DeterminePartitionsReducer extends DeterminePartitionsBaseReducer
+  public static class DeterminePartitionsDimSelectionReducer extends DeterminePartitionsDimSelectionBaseReducer
  {
-    String previousBoundary;
-    long runningTotal;
+    private static final double SHARD_COMBINE_THRESHOLD = 0.25;
+    private static final double SHARD_OVERSIZE_THRESHOLD = 1.5;

    @Override
    protected void innerReduce(
-        Context context, SortableBytes keyBytes, Iterable<Pair<String, Long>> combinedIterable
+        Context context, SortableBytes keyBytes, Iterable<DimValueCount> combinedIterable
    ) throws IOException, InterruptedException
    {
-      PeekingIterator<Pair<String, Long>> iterator = Iterators.peekingIterator(combinedIterable.iterator());
-      Pair<String, Long> totalPair = iterator.next();
+      PeekingIterator<DimValueCount> iterator = Iterators.peekingIterator(combinedIterable.iterator());

-      Preconditions.checkState(totalPair.lhs.equals(""), "Total pair value was[%s]!?", totalPair.lhs);
-      long totalRows = totalPair.rhs;
+      // "iterator" will take us over many candidate dimensions
+      DimPartitions currentDimPartitions = null;
+      DimPartition currentDimPartition = null;
+      String currentDimPartitionStart = null;
+      boolean currentDimSkip = false;

-      long numPartitions = Math.max(totalRows / config.getTargetPartitionSize(), 1);
-      long expectedRowsPerPartition = totalRows / numPartitions;
+      // We'll store possible partitions in here
+      final Map<String, DimPartitions> dimPartitionss = Maps.newHashMap();

-      class PartitionsList extends ArrayList<ShardSpec>
-      {
-      }
-      List<ShardSpec> partitions = new PartitionsList();
+      while(iterator.hasNext()) {
+        final DimValueCount dvc = iterator.next();

-      runningTotal = 0;
-      Pair<String, Long> prev = null;
-      previousBoundary = null;
-      while (iterator.hasNext()) {
-        Pair<String, Long> curr = iterator.next();
-
-        if (runningTotal > expectedRowsPerPartition) {
-          Preconditions.checkNotNull(
-              prev, "Prev[null] while runningTotal[%s] was > expectedRows[%s]!?", runningTotal, expectedRowsPerPartition
-          );
-
-          addPartition(partitions, curr.lhs);
+        if(currentDimPartitions == null || !currentDimPartitions.dim.equals(dvc.dim)) {
+          // Starting a new dimension! Exciting!
+          currentDimPartitions = new DimPartitions(dvc.dim);
+          currentDimPartition = new DimPartition();
+          currentDimPartitionStart = null;
+          currentDimSkip = false;
        }

-        runningTotal += curr.rhs;
-        prev = curr;
+        // Respect poisoning
+        if(!currentDimSkip && dvc.numRows < 0) {
+          log.info("Cannot partition on multi-valued dimension: %s", dvc.dim);
+          currentDimSkip = true;
+        }
+
+        if(currentDimSkip) {
+          continue;
+        }
+
+        // See if we need to cut a new partition ending immediately before this dimension value
+        if(currentDimPartition.rows > 0 && currentDimPartition.rows + dvc.numRows >= config.getTargetPartitionSize()) {
+          final ShardSpec shardSpec = new SingleDimensionShardSpec(
+              currentDimPartitions.dim,
+              currentDimPartitionStart,
+              dvc.value,
+              currentDimPartitions.partitions.size()
+          );
+
+          log.info(
+              "Adding possible shard with %,d rows and %,d unique values: %s",
+              currentDimPartition.rows,
+              currentDimPartition.cardinality,
+              shardSpec
+          );
+
+          currentDimPartition.shardSpec = shardSpec;
+          currentDimPartitions.partitions.add(currentDimPartition);
+          currentDimPartition = new DimPartition();
+          currentDimPartitionStart = dvc.value;
+        }
+
+        // Update counters
+        currentDimPartition.cardinality ++;
+        currentDimPartition.rows += dvc.numRows;
+
+        if(!iterator.hasNext() || !currentDimPartitions.dim.equals(iterator.peek().dim)) {
+          // Finalize the current dimension
+
+          if(currentDimPartition.rows > 0) {
+            // One more shard to go
+            final ShardSpec shardSpec;
+
+            if (currentDimPartitions.partitions.isEmpty()) {
+              shardSpec = new NoneShardSpec();
+            } else {
+              if(currentDimPartition.rows < config.getTargetPartitionSize() * SHARD_COMBINE_THRESHOLD) {
+                // Combine with previous shard
+                final DimPartition previousDimPartition = currentDimPartitions.partitions.remove(
+                    currentDimPartitions.partitions.size() - 1
+                );
+
+                final SingleDimensionShardSpec previousShardSpec = (SingleDimensionShardSpec) previousDimPartition.shardSpec;
+
+                shardSpec = new SingleDimensionShardSpec(
+                    currentDimPartitions.dim,
+                    previousShardSpec.getStart(),
+                    null,
+                    previousShardSpec.getPartitionNum()
+                );
+
+                log.info("Removing possible shard: %s", previousShardSpec);
+
+                currentDimPartition.rows += previousDimPartition.rows;
+                currentDimPartition.cardinality += previousDimPartition.cardinality;
+              } else {
+                // Create new shard
+                shardSpec = new SingleDimensionShardSpec(
+                    currentDimPartitions.dim,
+                    currentDimPartitionStart,
+                    null,
+                    currentDimPartitions.partitions.size()
+                );
+              }
+            }
+
+            log.info(
+                "Adding possible shard with %,d rows and %,d unique values: %s",
+                currentDimPartition.rows,
+                currentDimPartition.cardinality,
+                shardSpec
+            );
+
+            currentDimPartition.shardSpec = shardSpec;
+            currentDimPartitions.partitions.add(currentDimPartition);
+          }
+
+          log.info(
+              "Completed dimension[%s]: %,d possible shards with %,d unique values",
+              currentDimPartitions.dim,
+              currentDimPartitions.partitions.size(),
+              currentDimPartitions.getCardinality()
+          );
+
+          // Add ourselves to the partitions map
+          dimPartitionss.put(currentDimPartitions.dim, currentDimPartitions);
+        }
      }

-      if (partitions.isEmpty()) {
-        partitions.add(new NoneShardSpec());
-      } else if (((double) runningTotal / (double) expectedRowsPerPartition) < 0.25) {
-        final SingleDimensionShardSpec lastSpec = (SingleDimensionShardSpec) partitions.remove(partitions.size() - 1);
-        partitions.add(
-            new SingleDimensionShardSpec(
-                config.getPartitionDimension(),
-                lastSpec.getStart(),
-                null,
-                lastSpec.getPartitionNum()
-            )
-        );
-      } else {
-        partitions.add(
-            new SingleDimensionShardSpec(
-                config.getPartitionDimension(),
-                previousBoundary,
-                null,
-                partitions.size()
-            )
-        );
+      // Choose best dimension
+      if(dimPartitionss.isEmpty()) {
+        throw new ISE("No suitable partitioning dimension found!");
      }

-      DateTime bucket = new DateTime(
-          Iterables.get(keySplitter.split(new String(keyBytes.getGroupKey(), Charsets.UTF_8)), 0)
-      );
-      OutputStream out = Utils.makePathAndOutputStream(
+      final int totalRows = dimPartitionss.values().iterator().next().getRows();
+
+      int maxCardinality = -1;
+      DimPartitions maxCardinalityPartitions = null;
+
+      for(final DimPartitions dimPartitions : dimPartitionss.values()) {
+        if(dimPartitions.getRows() != totalRows) {
+          throw new ISE(
+              "WTF?! Dimension[%s] row count %,d != expected row count %,d",
+              dimPartitions.dim,
+              dimPartitions.getRows(),
+              totalRows
+          );
+        }
+
+        // Make sure none of these shards are oversized
+        boolean oversized = false;
+        for(final DimPartition partition : dimPartitions.partitions) {
+          if(partition.rows > config.getTargetPartitionSize() * SHARD_OVERSIZE_THRESHOLD) {
+            log.info("Dimension[%s] has an oversized shard: %s", dimPartitions.dim, partition.shardSpec);
+            oversized = true;
+          }
+        }
+
+        if(oversized) {
+          continue;
+        }
+
+        if(dimPartitions.getCardinality() > maxCardinality) {
+          maxCardinality = dimPartitions.getCardinality();
+          maxCardinalityPartitions = dimPartitions;
+        }
+      }
+
+      if(maxCardinalityPartitions == null) {
+        throw new ISE("No suitable partitioning dimension found!");
+      }
+
+      final DateTime bucket = new DateTime(new String(keyBytes.getGroupKey(), Charsets.UTF_8));
+      final OutputStream out = Utils.makePathAndOutputStream(
          context, config.makeSegmentPartitionInfoPath(new Bucket(0, bucket, 0)), config.isOverwriteFiles()
      );

-      for (ShardSpec partition : partitions) {
-        log.info("%s", partition);
+      final List<ShardSpec> chosenShardSpecs = Lists.transform(
+          maxCardinalityPartitions.partitions, new Function<DimPartition, ShardSpec>()
+      {
+        @Override
+        public ShardSpec apply(DimPartition dimPartition)
+        {
+          return dimPartition.shardSpec;
+        }
+      }
+      );
+
+      log.info("Chosen partitions:");
+      for (ShardSpec shardSpec : chosenShardSpecs) {
+        log.info("  %s", shardSpec);
      }

      try {
-        config.jsonMapper.writeValue(out, partitions);
+        HadoopDruidIndexerConfig.jsonMapper.writerWithType(new TypeReference<List<ShardSpec>>() {}).writeValue(
+            out,
+            chosenShardSpecs
+        );
      }
      finally {
        Closeables.close(out, false);
      }
    }
-
-    private void addPartition(List<ShardSpec> partitions, String boundary)
-    {
-      partitions.add(
-          new SingleDimensionShardSpec(
-              config.getPartitionDimension(),
-              previousBoundary,
-              boundary,
-              partitions.size()
-          )
-      );
-      previousBoundary = boundary;
-      runningTotal = 0;
-    }
  }

-  public static class DeterminePartitionsOutputFormat extends FileOutputFormat
+  public static class DeterminePartitionsDimSelectionOutputFormat extends FileOutputFormat
  {
    @Override
    public RecordWriter getRecordWriter(final TaskAttemptContext job) throws IOException, InterruptedException
@ -444,17 +732,81 @@ public class DeterminePartitionsJob implements Jobby
    }
  }

+  private static class DimPartitions
+  {
+    public final String dim;
+    public final List<DimPartition> partitions = Lists.newArrayList();
+
+    private DimPartitions(String dim)
+    {
+      this.dim = dim;
+    }
+
+    public int getCardinality()
+    {
+      int sum = 0;
+      for(final DimPartition dimPartition : partitions) {
+        sum += dimPartition.cardinality;
+      }
+      return sum;
+    }
+
+    public int getRows()
+    {
+      int sum = 0;
+      for(final DimPartition dimPartition : partitions) {
+        sum += dimPartition.rows;
+      }
+      return sum;
+    }
+  }
+
+  private static class DimPartition
+  {
+    public ShardSpec shardSpec = null;
+    public int cardinality = 0;
+    public int rows = 0;
+  }
+
+  private static class DimValueCount
+  {
+    public final String dim;
+    public final String value;
+    public final int numRows;
+
+    private DimValueCount(String dim, String value, int numRows)
+    {
+      this.dim = dim;
+      this.value = value;
+      this.numRows = numRows;
+    }
+
+    public Text toText()
+    {
+      return new Text(tabJoiner.join(dim, String.valueOf(numRows), value));
+    }
+
+    public static DimValueCount fromText(Text text)
+    {
+      final Iterator<String> splits = tabSplitter.limit(3).split(text.toString()).iterator();
+      final String dim = splits.next();
+      final int numRows = Integer.parseInt(splits.next());
+      final String value = splits.next();
+
+      return new DimValueCount(dim, value, numRows);
+    }
+  }
+
  private static void write(
      TaskInputOutputContext<? extends Writable, ? extends Writable, BytesWritable, Text> context,
      final byte[] groupKey,
-      String value,
-      long numRows
+      DimValueCount dimValueCount
  )
      throws IOException, InterruptedException
  {
    context.write(
-        new SortableBytes(groupKey, value.getBytes(HadoopDruidIndexerConfig.javaNativeCharset)).toBytesWritable(),
-        new Text(tabJoiner.join(value, numRows))
+        new SortableBytes(groupKey, tabJoiner.join(dimValueCount.dim, dimValueCount.value).getBytes(HadoopDruidIndexerConfig.javaNativeCharset)).toBytesWritable(),
+        dimValueCount.toText()
    );
  }
 }
--- a/indexer/src/main/java/com/metamx/druid/indexer/HadoopDruidIndexerConfig.java
+++ b/indexer/src/main/java/com/metamx/druid/indexer/HadoopDruidIndexerConfig.java
@ -34,15 +34,20 @@ import com.metamx.common.MapUtils;
 import com.metamx.common.guava.FunctionalIterable;
 import com.metamx.common.logger.Logger;
 import com.metamx.druid.RegisteringNode;
+import com.metamx.druid.aggregation.AggregatorFactory;
 import com.metamx.druid.client.DataSegment;
 import com.metamx.druid.index.v1.serde.Registererer;
 import com.metamx.druid.indexer.data.DataSpec;
+import com.metamx.druid.indexer.data.StringInputRowParser;
+import com.metamx.druid.indexer.data.TimestampSpec;
 import com.metamx.druid.indexer.data.ToLowercaseDataSpec;
 import com.metamx.druid.indexer.granularity.GranularitySpec;
 import com.metamx.druid.indexer.granularity.UniformGranularitySpec;
+import com.metamx.druid.indexer.partitions.PartitionsSpec;
 import com.metamx.druid.indexer.path.PathSpec;
 import com.metamx.druid.indexer.rollup.DataRollupSpec;
 import com.metamx.druid.indexer.updater.UpdaterJobSpec;
+import com.metamx.druid.input.InputRow;
 import com.metamx.druid.jackson.DefaultObjectMapper;
 import com.metamx.druid.shard.ShardSpec;
 import com.metamx.druid.utils.JodaUtils;
@ -50,6 +55,7 @@ import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.fs.Path;
 import org.apache.hadoop.mapreduce.Job;
 import org.codehaus.jackson.JsonGenerator;
+import org.codehaus.jackson.annotate.JsonCreator;
 import org.codehaus.jackson.annotate.JsonProperty;
 import org.codehaus.jackson.map.ObjectMapper;
 import org.codehaus.jackson.type.TypeReference;
@ -60,8 +66,6 @@ import org.joda.time.format.ISODateTimeFormat;
 import javax.annotation.Nullable;
 import java.io.File;
 import java.io.IOException;
-import java.net.URI;
-import java.net.URISyntaxException;
 import java.nio.charset.Charset;
 import java.util.Arrays;
 import java.util.Collections;
@ -162,8 +166,6 @@ public class HadoopDruidIndexerConfig

  private static final String CONFIG_PROPERTY = "druid.indexer.config";

-  @Deprecated
-  private volatile List<Interval> intervals;
  private volatile String dataSource;
  private volatile String timestampColumnName;
  private volatile String timestampFormat;
@ -175,8 +177,7 @@ public class HadoopDruidIndexerConfig
  private volatile String jobOutputDir;
  private volatile String segmentOutputDir;
  private volatile DateTime version = new DateTime();
-  private volatile String partitionDimension;
-  private volatile Long targetPartitionSize;
+  private volatile PartitionsSpec partitionsSpec;
  private volatile boolean leaveIntermediate = false;
  private volatile boolean cleanupOnFailure = true;
  private volatile Map<DateTime, List<HadoopyShardSpec>> shardSpecs = ImmutableMap.of();
@ -186,22 +187,97 @@ public class HadoopDruidIndexerConfig
  private volatile boolean ignoreInvalidRows = false;
  private volatile List<String> registererers = Lists.newArrayList();

+  @JsonCreator
+  public HadoopDruidIndexerConfig(
+      final @JsonProperty("intervals") List<Interval> intervals,
+      final @JsonProperty("dataSource") String dataSource,
+      final @JsonProperty("timestampColumnName") String timestampColumnName,
+      final @JsonProperty("timestampFormat") String timestampFormat,
+      final @JsonProperty("dataSpec") DataSpec dataSpec,
+      final @JsonProperty("segmentGranularity") Granularity segmentGranularity,
+      final @JsonProperty("granularitySpec") GranularitySpec granularitySpec,
+      final @JsonProperty("pathSpec") PathSpec pathSpec,
+      final @JsonProperty("jobOutputDir") String jobOutputDir,
+      final @JsonProperty("segmentOutputDir") String segmentOutputDir,
+      final @JsonProperty("version") DateTime version,
+      final @JsonProperty("partitionDimension") String partitionDimension,
+      final @JsonProperty("targetPartitionSize") Long targetPartitionSize,
+      final @JsonProperty("partitionsSpec") PartitionsSpec partitionsSpec,
+      final @JsonProperty("leaveIntermediate") boolean leaveIntermediate,
+      final @JsonProperty("cleanupOnFailure") boolean cleanupOnFailure,
+      final @JsonProperty("shardSpecs") Map<DateTime, List<HadoopyShardSpec>> shardSpecs,
+      final @JsonProperty("overwriteFiles") boolean overwriteFiles,
+      final @JsonProperty("rollupSpec") DataRollupSpec rollupSpec,
+      final @JsonProperty("updaterJobSpec") UpdaterJobSpec updaterJobSpec,
+      final @JsonProperty("ignoreInvalidRows") boolean ignoreInvalidRows,
+      final @JsonProperty("registererers") List<String> registererers
+  )
+  {
+    this.dataSource = dataSource;
+    this.timestampColumnName = timestampColumnName;
+    this.timestampFormat = timestampFormat;
+    this.dataSpec = dataSpec;
+    this.granularitySpec = granularitySpec;
+    this.pathSpec = pathSpec;
+    this.jobOutputDir = jobOutputDir;
+    this.segmentOutputDir = segmentOutputDir;
+    this.version = version;
+    this.partitionsSpec = partitionsSpec;
+    this.leaveIntermediate = leaveIntermediate;
+    this.cleanupOnFailure = cleanupOnFailure;
+    this.shardSpecs = shardSpecs;
+    this.overwriteFiles = overwriteFiles;
+    this.rollupSpec = rollupSpec;
+    this.updaterJobSpec = updaterJobSpec;
+    this.ignoreInvalidRows = ignoreInvalidRows;
+    this.registererers = registererers;
+
+    if(partitionsSpec != null) {
+      Preconditions.checkArgument(
+          partitionDimension == null && targetPartitionSize == null,
+          "Cannot mix partitionsSpec with partitionDimension/targetPartitionSize"
+      );
+
+      this.partitionsSpec = partitionsSpec;
+    } else {
+      // Backwards compatibility
+      this.partitionsSpec = new PartitionsSpec(partitionDimension, targetPartitionSize, false);
+    }
+
+    if(granularitySpec != null) {
+      Preconditions.checkArgument(
+          segmentGranularity == null && intervals == null,
+          "Cannot mix granularitySpec with segmentGranularity/intervals"
+      );
+    } else {
+      // Backwards compatibility
+      this.segmentGranularity = segmentGranularity;
+      if(segmentGranularity != null && intervals != null) {
+        this.granularitySpec = new UniformGranularitySpec(segmentGranularity, intervals);
+      }
+    }
+  }
+
+  /**
+   * Default constructor does nothing. The caller is expected to use the various setX methods.
+   */
+  public HadoopDruidIndexerConfig()
+  {
+  }
+
  public List<Interval> getIntervals()
  {
    return JodaUtils.condenseIntervals(getGranularitySpec().bucketIntervals());
  }

  @Deprecated
-  @JsonProperty
  public void setIntervals(List<Interval> intervals)
  {
-    Preconditions.checkState(this.granularitySpec == null, "Use setGranularitySpec");
+    Preconditions.checkState(this.granularitySpec == null, "Cannot mix setIntervals with granularitySpec");
+    Preconditions.checkState(this.segmentGranularity != null, "Cannot use setIntervals without segmentGranularity");

    // For backwards compatibility
-    this.intervals = intervals;
-    if (this.segmentGranularity != null) {
-      this.granularitySpec = new UniformGranularitySpec(this.segmentGranularity, this.intervals);
-    }
+    this.granularitySpec = new UniformGranularitySpec(this.segmentGranularity, intervals);
  }

  @JsonProperty
@ -237,6 +313,11 @@ public class HadoopDruidIndexerConfig
    this.timestampFormat = timestampFormat;
  }

+  public TimestampSpec getTimestampSpec()
+  {
+    return new TimestampSpec(timestampColumnName, timestampFormat);
+  }
+
  @JsonProperty
  public DataSpec getDataSpec()
  {
@ -248,17 +329,30 @@ public class HadoopDruidIndexerConfig
    this.dataSpec = new ToLowercaseDataSpec(dataSpec);
  }

-  @Deprecated
-  @JsonProperty
-  public void setSegmentGranularity(Granularity segmentGranularity)
+  public StringInputRowParser getParser()
  {
-    Preconditions.checkState(this.granularitySpec == null, "Use setGranularitySpec");
+    final List<String> dimensionExclusions;

-    // For backwards compatibility
-    this.segmentGranularity = segmentGranularity;
-    if (this.intervals != null) {
-      this.granularitySpec = new UniformGranularitySpec(this.segmentGranularity, this.intervals);
+    if(getDataSpec().hasCustomDimensions()) {
+      dimensionExclusions = null;
+    } else {
+      dimensionExclusions = Lists.newArrayList();
+      dimensionExclusions.add(getTimestampColumnName());
+      dimensionExclusions.addAll(
+          Lists.transform(
+              getRollupSpec().getAggs(), new Function<AggregatorFactory, String>()
+          {
+            @Override
+            public String apply(AggregatorFactory aggregatorFactory)
+            {
+              return aggregatorFactory.getName();
+            }
+          }
+          )
+      );
    }
+
+    return new StringInputRowParser(getTimestampSpec(), getDataSpec(), dimensionExclusions);
  }

  @JsonProperty
@ -269,15 +363,20 @@ public class HadoopDruidIndexerConfig

  public void setGranularitySpec(GranularitySpec granularitySpec)
  {
-    Preconditions.checkState(this.intervals == null, "Use setGranularitySpec instead of setIntervals");
-    Preconditions.checkState(
-        this.segmentGranularity == null,
-        "Use setGranularitySpec instead of setSegmentGranularity"
-    );
-
    this.granularitySpec = granularitySpec;
  }

+  @JsonProperty
+  public PartitionsSpec getPartitionsSpec()
+  {
+    return partitionsSpec;
+  }
+
+  public void setPartitionsSpec(PartitionsSpec partitionsSpec)
+  {
+    this.partitionsSpec = partitionsSpec;
+  }
+
  @JsonProperty
  public PathSpec getPathSpec()
  {
@ -322,31 +421,19 @@ public class HadoopDruidIndexerConfig
    this.version = version;
  }

-  @JsonProperty
  public String getPartitionDimension()
  {
-    return partitionDimension;
-  }
-
-  public void setPartitionDimension(String partitionDimension)
-  {
-    this.partitionDimension = (partitionDimension == null) ? partitionDimension : partitionDimension;
+    return partitionsSpec.getPartitionDimension();
  }

  public boolean partitionByDimension()
  {
-    return partitionDimension != null;
+    return partitionsSpec.isDeterminingPartitions();
  }

-  @JsonProperty
  public Long getTargetPartitionSize()
  {
-    return targetPartitionSize;
-  }
-
-  public void setTargetPartitionSize(Long targetPartitionSize)
-  {
-    this.targetPartitionSize = targetPartitionSize;
+    return partitionsSpec.getTargetPartitionSize();
  }

  public boolean isUpdaterJobSpecSet()
@ -447,21 +534,15 @@ public class HadoopDruidIndexerConfig
   ********************************************/

  /**
-   * Get the proper bucket for this "row"
+   * Get the proper bucket for some input row.
   *
-   * @param theMap a Map that represents a "row", keys are column names, values are, well, values
+   * @param inputRow an InputRow
   *
   * @return the Bucket that this row belongs to
   */
-  public Optional<Bucket> getBucket(Map<String, String> theMap)
+  public Optional<Bucket> getBucket(InputRow inputRow)
  {
-    final Optional<Interval> timeBucket = getGranularitySpec().bucketInterval(
-        new DateTime(
-            theMap.get(
-                getTimestampColumnName()
-            )
-        )
-    );
+    final Optional<Interval> timeBucket = getGranularitySpec().bucketInterval(new DateTime(inputRow.getTimestampFromEpoch()));
    if (!timeBucket.isPresent()) {
      return Optional.absent();
    }
@ -473,7 +554,7 @@ public class HadoopDruidIndexerConfig

    for (final HadoopyShardSpec hadoopyShardSpec : shards) {
      final ShardSpec actualSpec = hadoopyShardSpec.getActualSpec();
-      if (actualSpec.isInChunk(theMap)) {
+      if (actualSpec.isInChunk(inputRow)) {
        return Optional.of(
            new Bucket(
                hadoopyShardSpec.getShardNum(),
@ -484,7 +565,7 @@ public class HadoopDruidIndexerConfig
      }
    }

-    throw new ISE("row[%s] doesn't fit in any shard[%s]", theMap, shards);
+    throw new ISE("row[%s] doesn't fit in any shard[%s]", inputRow, shards);
  }

  public Set<Interval> getSegmentGranularIntervals()
@ -566,6 +647,11 @@ public class HadoopDruidIndexerConfig
    return new Path(makeIntermediatePath(), "segmentDescriptorInfo");
  }

+  public Path makeGroupedDataDir()
+  {
+    return new Path(makeIntermediatePath(), "groupedData");
+  }
+
  public Path makeDescriptorInfoPath(DataSegment segment)
  {
    return new Path(makeDescriptorInfoDir(), String.format("%s.json", segment.getIdentifier().replace(":", "")));
@ -626,10 +712,5 @@ public class HadoopDruidIndexerConfig

    final int nIntervals = getIntervals().size();
    Preconditions.checkArgument(nIntervals > 0, "intervals.size()[%s] <= 0", nIntervals);
-
-    if (partitionByDimension()) {
-      Preconditions.checkNotNull(partitionDimension);
-      Preconditions.checkNotNull(targetPartitionSize);
-    }
  }
 }
--- a/indexer/src/main/java/com/metamx/druid/indexer/HadoopDruidIndexerMapper.java
+++ b/indexer/src/main/java/com/metamx/druid/indexer/HadoopDruidIndexerMapper.java
@ -0,0 +1,66 @@
+package com.metamx.druid.indexer;
+
+import com.metamx.common.RE;
+import com.metamx.druid.indexer.data.StringInputRowParser;
+import com.metamx.druid.input.InputRow;
+import org.apache.hadoop.io.LongWritable;
+import org.apache.hadoop.io.Text;
+import org.apache.hadoop.mapreduce.Mapper;
+import org.joda.time.DateTime;
+
+import java.io.IOException;
+
+public abstract class HadoopDruidIndexerMapper<KEYOUT, VALUEOUT> extends Mapper<LongWritable, Text, KEYOUT, VALUEOUT>
+{
+  private HadoopDruidIndexerConfig config;
+  private StringInputRowParser parser;
+
+  @Override
+  protected void setup(Context context)
+      throws IOException, InterruptedException
+  {
+    config = HadoopDruidIndexerConfig.fromConfiguration(context.getConfiguration());
+    parser = config.getParser();
+  }
+
+  public HadoopDruidIndexerConfig getConfig()
+  {
+    return config;
+  }
+
+  public StringInputRowParser getParser()
+  {
+    return parser;
+  }
+
+  @Override
+  protected void map(
+      LongWritable key, Text value, Context context
+  ) throws IOException, InterruptedException
+  {
+    try {
+      final InputRow inputRow;
+      try {
+        inputRow = parser.parse(value.toString());
+      }
+      catch (IllegalArgumentException e) {
+        if (config.isIgnoreInvalidRows()) {
+          context.getCounter(HadoopDruidIndexerConfig.IndexJobCounters.INVALID_ROW_COUNTER).increment(1);
+          return; // we're ignoring this invalid row
+        } else {
+          throw e;
+        }
+      }
+
+      if(config.getGranularitySpec().bucketInterval(new DateTime(inputRow.getTimestampFromEpoch())).isPresent()) {
+        innerMap(inputRow, value, context);
+      }
+    }
+    catch (RuntimeException e) {
+      throw new RE(e, "Failure on row[%s]", value);
+    }
+  }
+
+  abstract protected void innerMap(InputRow inputRow, Text text, Context context)
+      throws IOException, InterruptedException;
+}
--- a/indexer/src/main/java/com/metamx/druid/indexer/IndexGeneratorJob.java
+++ b/indexer/src/main/java/com/metamx/druid/indexer/IndexGeneratorJob.java
@ -19,31 +19,25 @@

 package com.metamx.druid.indexer;

-import com.google.common.base.Function;
 import com.google.common.base.Optional;
-import com.google.common.base.Predicate;
 import com.google.common.base.Throwables;
 import com.google.common.collect.ImmutableMap;
 import com.google.common.collect.Iterables;
 import com.google.common.collect.Lists;
-import com.google.common.collect.Maps;
 import com.google.common.collect.Sets;
 import com.google.common.io.Closeables;
 import com.google.common.primitives.Longs;
 import com.metamx.common.ISE;
-import com.metamx.common.RE;
-import com.metamx.common.guava.FunctionalIterable;
 import com.metamx.common.logger.Logger;
-import com.metamx.common.parsers.Parser;
-import com.metamx.common.parsers.ParserUtils;
 import com.metamx.druid.aggregation.AggregatorFactory;
 import com.metamx.druid.client.DataSegment;
 import com.metamx.druid.index.QueryableIndex;
 import com.metamx.druid.index.v1.IncrementalIndex;
 import com.metamx.druid.index.v1.IndexIO;
 import com.metamx.druid.index.v1.IndexMerger;
+import com.metamx.druid.indexer.data.StringInputRowParser;
 import com.metamx.druid.indexer.rollup.DataRollupSpec;
-import com.metamx.druid.input.MapBasedInputRow;
+import com.metamx.druid.input.InputRow;
 import org.apache.commons.io.FileUtils;
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.fs.FSDataOutputStream;
@ -53,13 +47,11 @@ import org.apache.hadoop.fs.LocalFileSystem;
 import org.apache.hadoop.fs.Path;
 import org.apache.hadoop.fs.s3native.NativeS3FileSystem;
 import org.apache.hadoop.io.BytesWritable;
-import org.apache.hadoop.io.LongWritable;
 import org.apache.hadoop.io.Text;
 import org.apache.hadoop.mapred.InvalidJobConfException;
 import org.apache.hadoop.mapreduce.Counter;
 import org.apache.hadoop.mapreduce.Job;
 import org.apache.hadoop.mapreduce.JobContext;
-import org.apache.hadoop.mapreduce.Mapper;
 import org.apache.hadoop.mapreduce.Partitioner;
 import org.apache.hadoop.mapreduce.Reducer;
 import org.apache.hadoop.mapreduce.lib.input.TextInputFormat;
@ -68,7 +60,6 @@ import org.apache.hadoop.mapreduce.lib.output.TextOutputFormat;
 import org.joda.time.DateTime;
 import org.joda.time.Interval;

-import javax.annotation.Nullable;
 import java.io.BufferedOutputStream;
 import java.io.File;
 import java.io.FileInputStream;
@ -78,7 +69,6 @@ import java.net.URI;
 import java.nio.ByteBuffer;
 import java.util.Arrays;
 import java.util.List;
-import java.util.Map;
 import java.util.Set;
 import java.util.zip.ZipEntry;
 import java.util.zip.ZipOutputStream;
@ -127,7 +117,7 @@ public class IndexGeneratorJob implements Jobby
      job.setMapperClass(IndexGeneratorMapper.class);
      job.setMapOutputValueClass(Text.class);

-      SortableBytes.useSortableBytesAsKey(job);
+      SortableBytes.useSortableBytesAsMapOutputKey(job);

      job.setNumReduceTasks(Iterables.size(config.getAllBuckets()));
      job.setPartitionerClass(IndexGeneratorPartitioner.class);
@ -144,7 +134,7 @@ public class IndexGeneratorJob implements Jobby
      job.setJarByClass(IndexGeneratorJob.class);

      job.submit();
-      log.info("Job submitted, status available at %s", job.getTrackingURL());
+      log.info("Job %s submitted, status available at %s", job.getJobName(), job.getTrackingURL());

      boolean success = job.waitForCompletion(true);

@ -159,75 +149,29 @@ public class IndexGeneratorJob implements Jobby
    }
  }

-  public static class IndexGeneratorMapper extends Mapper<LongWritable, Text, BytesWritable, Text>
+  public static class IndexGeneratorMapper extends HadoopDruidIndexerMapper<BytesWritable, Text>
  {
-    private HadoopDruidIndexerConfig config;
-    private Parser<String, Object> parser;
-    private Function<String, DateTime> timestampConverter;
-
    @Override
-    protected void setup(Context context)
-        throws IOException, InterruptedException
-    {
-      config = HadoopDruidIndexerConfig.fromConfiguration(context.getConfiguration());
-      parser = config.getDataSpec().getParser();
-      timestampConverter = ParserUtils.createTimestampParser(config.getTimestampFormat());
-    }
-
-    @Override
-    protected void map(
-        LongWritable key, Text value, Context context
+    protected void innerMap(
+        InputRow inputRow,
+        Text text,
+        Context context
    ) throws IOException, InterruptedException
    {
+      // Group by bucket, sort by timestamp
+      final Optional<Bucket> bucket = getConfig().getBucket(inputRow);

-      try {
-        final Map<String, Object> values = parser.parse(value.toString());
-
-        final String tsStr = (String) values.get(config.getTimestampColumnName());
-        final DateTime timestamp;
-        try {
-          timestamp = timestampConverter.apply(tsStr);
-        }
-        catch (IllegalArgumentException e) {
-          if (config.isIgnoreInvalidRows()) {
-            context.getCounter(HadoopDruidIndexerConfig.IndexJobCounters.INVALID_ROW_COUNTER).increment(1);
-            return; // we're ignoring this invalid row
-          } else {
-            throw e;
-          }
-        }
-
-        Optional<Bucket> bucket = config.getBucket(
-            Maps.transformEntries(
-                values,
-                new Maps.EntryTransformer<String, Object, String>()
-                {
-                  @Override
-                  public String transformEntry(@Nullable String key, @Nullable Object value)
-                  {
-                    if (key.equalsIgnoreCase(config.getTimestampColumnName())) {
-                      return timestamp.toString();
-                    }
-                    return value.toString();
-                  }
-                }
-            )
-        );
-
-        if (bucket.isPresent()) {
-          // Group by bucket, sort by timestamp
-          context.write(
-              new SortableBytes(
-                  bucket.get().toGroupKey(),
-                  Longs.toByteArray(timestamp.getMillis())
-              ).toBytesWritable(),
-              value
-          );
-        }
-      }
-      catch (RuntimeException e) {
-        throw new RE(e, "Failure on row[%s]", value);
+      if(!bucket.isPresent()) {
+        throw new ISE("WTF?! No bucket found for row: %s", inputRow);
      }
+
+      context.write(
+          new SortableBytes(
+              bucket.get().toGroupKey(),
+              Longs.toByteArray(inputRow.getTimestampFromEpoch())
+          ).toBytesWritable(),
+          text
+      );
    }
  }

@ -253,8 +197,7 @@ public class IndexGeneratorJob implements Jobby
  {
    private HadoopDruidIndexerConfig config;
    private List<String> metricNames = Lists.newArrayList();
-    private Function<String, DateTime> timestampConverter;
-    private Parser parser;
+    private StringInputRowParser parser;

    @Override
    protected void setup(Context context)
@ -265,8 +208,8 @@ public class IndexGeneratorJob implements Jobby
      for (AggregatorFactory factory : config.getRollupSpec().getAggs()) {
        metricNames.add(factory.getName().toLowerCase());
      }
-      timestampConverter = ParserUtils.createTimestampParser(config.getTimestampFormat());
-      parser = config.getDataSpec().getParser();
+
+      parser = config.getParser();
    }

    @Override
@ -299,32 +242,10 @@ public class IndexGeneratorJob implements Jobby

      for (final Text value : values) {
        context.progress();
-        Map<String, Object> event = parser.parse(value.toString());
-        final long timestamp = timestampConverter.apply((String) event.get(config.getTimestampColumnName()))
-                                                 .getMillis();
-        List<String> dimensionNames =
-            config.getDataSpec().hasCustomDimensions() ?
-            config.getDataSpec().getDimensions() :
-            Lists.newArrayList(
-                FunctionalIterable.create(event.keySet())
-                                  .filter(
-                                      new Predicate<String>()
-                                      {
-                                        @Override
-                                        public boolean apply(@Nullable String input)
-                                        {
-                                          return !(metricNames.contains(input.toLowerCase())
-                                                   || config.getTimestampColumnName()
-                                                            .equalsIgnoreCase(input));
-                                        }
-                                      }
-                                  )
-            );
-        allDimensionNames.addAll(dimensionNames);
+        final InputRow inputRow = parser.parse(value.toString());
+        allDimensionNames.addAll(inputRow.getDimensions());

-        int numRows = index.add(
-            new MapBasedInputRow(timestamp, dimensionNames, event)
-        );
+        int numRows = index.add(inputRow);
        ++lineCount;

        if (numRows >= rollupSpec.rowFlushBoundary) {
--- a/indexer/src/main/java/com/metamx/druid/indexer/SortableBytes.java
+++ b/indexer/src/main/java/com/metamx/druid/indexer/SortableBytes.java
@ -102,7 +102,7 @@ public class SortableBytes
    );
  }

-  public static void useSortableBytesAsKey(Job job)
+  public static void useSortableBytesAsMapOutputKey(Job job)
  {
    job.setMapOutputKeyClass(BytesWritable.class);
    job.setGroupingComparatorClass(SortableBytesGroupingComparator.class);
--- a/indexer/src/main/java/com/metamx/druid/indexer/granularity/UniformGranularitySpec.java
+++ b/indexer/src/main/java/com/metamx/druid/indexer/granularity/UniformGranularitySpec.java
@ -20,6 +20,9 @@
 package com.metamx.druid.indexer.granularity;

 import com.google.common.base.Optional;
+import com.google.common.collect.ImmutableList;
+import com.google.common.collect.Iterables;
+import com.google.common.collect.Lists;
 import com.google.common.collect.Sets;
 import com.metamx.common.Granularity;
 import com.metamx.common.guava.Comparators;
@ -35,47 +38,47 @@ import java.util.TreeSet;
 public class UniformGranularitySpec implements GranularitySpec
 {
  final private Granularity granularity;
-  final private List<Interval> intervals;
+  final private List<Interval> inputIntervals;
+  final private ArbitraryGranularitySpec wrappedSpec;

  @JsonCreator
  public UniformGranularitySpec(
      @JsonProperty("gran") Granularity granularity,
-      @JsonProperty("intervals") List<Interval> intervals
+      @JsonProperty("intervals") List<Interval> inputIntervals
  )
  {
+    List<Interval> granularIntervals = Lists.newArrayList();
+
+    for (Interval inputInterval : inputIntervals) {
+      Iterables.addAll(granularIntervals,  granularity.getIterable(inputInterval));
+    }
+
    this.granularity = granularity;
-    this.intervals = intervals;
+    this.inputIntervals = ImmutableList.copyOf(inputIntervals);
+    this.wrappedSpec = new ArbitraryGranularitySpec(granularIntervals);
  }

  @Override
  public SortedSet<Interval> bucketIntervals()
  {
-    final TreeSet<Interval> retVal = Sets.newTreeSet(Comparators.intervals());
-
-    for (Interval interval : intervals) {
-      for (Interval segmentInterval : granularity.getIterable(interval)) {
-        retVal.add(segmentInterval);
-      }
-    }
-
-    return retVal;
+    return wrappedSpec.bucketIntervals();
  }

  @Override
  public Optional<Interval> bucketInterval(DateTime dt)
  {
-    return Optional.of(granularity.bucket(dt));
+    return wrappedSpec.bucketInterval(dt);
  }

-  @JsonProperty
+  @JsonProperty("gran")
  public Granularity getGranularity()
  {
    return granularity;
  }

-  @JsonProperty
+  @JsonProperty("intervals")
  public Iterable<Interval> getIntervals()
  {
-    return intervals;
+    return inputIntervals;
  }
 }
--- a/indexer/src/main/java/com/metamx/druid/indexer/partitions/PartitionsSpec.java
+++ b/indexer/src/main/java/com/metamx/druid/indexer/partitions/PartitionsSpec.java
@ -0,0 +1,52 @@
+package com.metamx.druid.indexer.partitions;
+
+import org.codehaus.jackson.annotate.JsonIgnore;
+import org.codehaus.jackson.annotate.JsonProperty;
+
+import javax.annotation.Nullable;
+
+public class PartitionsSpec
+{
+  @Nullable
+  private final String partitionDimension;
+
+  private final long targetPartitionSize;
+
+  private final boolean assumeGrouped;
+
+  public PartitionsSpec(
+      @JsonProperty("partitionDimension") @Nullable String partitionDimension,
+      @JsonProperty("targetPartitionSize") @Nullable Long targetPartitionSize,
+      @JsonProperty("assumeGrouped") @Nullable Boolean assumeGrouped
+  )
+  {
+    this.partitionDimension = partitionDimension;
+    this.targetPartitionSize = targetPartitionSize == null ? -1 : targetPartitionSize;
+    this.assumeGrouped = assumeGrouped == null ? false : assumeGrouped;
+  }
+
+  @JsonIgnore
+  public boolean isDeterminingPartitions()
+  {
+    return targetPartitionSize > 0;
+  }
+
+  @JsonProperty
+  @Nullable
+  public String getPartitionDimension()
+  {
+    return partitionDimension;
+  }
+
+  @JsonProperty
+  public long getTargetPartitionSize()
+  {
+    return targetPartitionSize;
+  }
+
+  @JsonProperty
+  public boolean isAssumeGrouped()
+  {
+    return assumeGrouped;
+  }
+}
--- a/indexer/src/test/java/com/metamx/druid/indexer/HadoopDruidIndexerConfigTest.java
+++ b/indexer/src/test/java/com/metamx/druid/indexer/HadoopDruidIndexerConfigTest.java
@ -22,6 +22,7 @@ package com.metamx.druid.indexer;
 import com.google.common.base.Throwables;
 import com.google.common.collect.Lists;
 import com.metamx.druid.indexer.granularity.UniformGranularitySpec;
+import com.metamx.druid.indexer.partitions.PartitionsSpec;
 import com.metamx.druid.jackson.DefaultObjectMapper;
 import org.codehaus.jackson.map.ObjectMapper;
 import org.joda.time.Interval;
@ -67,7 +68,7 @@ public class HadoopDruidIndexerConfigTest
  }

  @Test
-  public void testIntervalsAndSegmentGranularity() {
+  public void testGranularitySpecLegacy() {
    // Deprecated and replaced by granularitySpec, but still supported
    final HadoopDruidIndexerConfig cfg;

@ -98,9 +99,8 @@ public class HadoopDruidIndexerConfigTest
    );
  }

-
  @Test
-  public void testCmdlineAndSegmentGranularity() {
+  public void testGranularitySpecPostConstructorIntervals() {
    // Deprecated and replaced by granularitySpec, but still supported
    final HadoopDruidIndexerConfig cfg;

@ -133,7 +133,7 @@ public class HadoopDruidIndexerConfigTest
  }

  @Test
-  public void testInvalidCombination() {
+  public void testInvalidGranularityCombination() {
    boolean thrown = false;
    try {
      final HadoopDruidIndexerConfig cfg = jsonMapper.readValue(
@ -154,4 +154,160 @@ public class HadoopDruidIndexerConfigTest

    Assert.assertTrue("Exception thrown", thrown);
  }
+
+  @Test
+  public void testPartitionsSpecNoPartitioning() {
+    final HadoopDruidIndexerConfig cfg;
+
+    try {
+      cfg = jsonMapper.readValue(
+          "{}",
+          HadoopDruidIndexerConfig.class
+      );
+    } catch(Exception e) {
+      throw Throwables.propagate(e);
+    }
+
+    final PartitionsSpec partitionsSpec = cfg.getPartitionsSpec();
+
+    Assert.assertEquals(
+        "isDeterminingPartitions",
+        partitionsSpec.isDeterminingPartitions(),
+        false
+    );
+  }
+
+  @Test
+  public void testPartitionsSpecAutoDimension() {
+    final HadoopDruidIndexerConfig cfg;
+
+    try {
+      cfg = jsonMapper.readValue(
+          "{"
+          + "\"partitionsSpec\":{"
+          + "   \"targetPartitionSize\":100"
+          + " }"
+          + "}",
+          HadoopDruidIndexerConfig.class
+      );
+    } catch(Exception e) {
+      throw Throwables.propagate(e);
+    }
+
+    final PartitionsSpec partitionsSpec = cfg.getPartitionsSpec();
+
+    Assert.assertEquals(
+        "isDeterminingPartitions",
+        partitionsSpec.isDeterminingPartitions(),
+        true
+    );
+
+    Assert.assertEquals(
+        "getTargetPartitionSize",
+        partitionsSpec.getTargetPartitionSize(),
+        100
+    );
+
+    Assert.assertEquals(
+        "getPartitionDimension",
+        partitionsSpec.getPartitionDimension(),
+        null
+    );
+  }
+
+  @Test
+  public void testPartitionsSpecSpecificDimension() {
+    final HadoopDruidIndexerConfig cfg;
+
+    try {
+      cfg = jsonMapper.readValue(
+          "{"
+          + "\"partitionsSpec\":{"
+          + "   \"targetPartitionSize\":100,"
+          + "   \"partitionDimension\":\"foo\""
+          + " }"
+          + "}",
+          HadoopDruidIndexerConfig.class
+      );
+    } catch(Exception e) {
+      throw Throwables.propagate(e);
+    }
+
+    final PartitionsSpec partitionsSpec = cfg.getPartitionsSpec();
+
+    Assert.assertEquals(
+        "isDeterminingPartitions",
+        partitionsSpec.isDeterminingPartitions(),
+        true
+    );
+
+    Assert.assertEquals(
+        "getTargetPartitionSize",
+        partitionsSpec.getTargetPartitionSize(),
+        100
+    );
+
+    Assert.assertEquals(
+        "getPartitionDimension",
+        partitionsSpec.getPartitionDimension(),
+        "foo"
+    );
+  }
+
+  @Test
+  public void testPartitionsSpecLegacy() {
+    final HadoopDruidIndexerConfig cfg;
+
+    try {
+      cfg = jsonMapper.readValue(
+          "{"
+          + "\"targetPartitionSize\":100,"
+          + "\"partitionDimension\":\"foo\""
+          + "}",
+          HadoopDruidIndexerConfig.class
+      );
+    } catch(Exception e) {
+      throw Throwables.propagate(e);
+    }
+
+    final PartitionsSpec partitionsSpec = cfg.getPartitionsSpec();
+
+    Assert.assertEquals(
+        "isDeterminingPartitions",
+        partitionsSpec.isDeterminingPartitions(),
+        true
+    );
+
+    Assert.assertEquals(
+        "getTargetPartitionSize",
+        partitionsSpec.getTargetPartitionSize(),
+        100
+    );
+
+    Assert.assertEquals(
+        "getPartitionDimension",
+        partitionsSpec.getPartitionDimension(),
+        "foo"
+    );
+  }
+
+  @Test
+  public void testInvalidPartitionsCombination() {
+    boolean thrown = false;
+    try {
+      final HadoopDruidIndexerConfig cfg = jsonMapper.readValue(
+          "{"
+          + "\"targetPartitionSize\":100,"
+          + "\"partitionsSpec\":{"
+          + "   \"targetPartitionSize\":100"
+          + " }"
+          + "}",
+          HadoopDruidIndexerConfig.class
+      );
+    } catch(Exception e) {
+      thrown = true;
+    }
+
+    Assert.assertTrue("Exception thrown", thrown);
+  }
 }
--- a/indexer/src/test/java/com/metamx/druid/indexer/granularity/ArbitraryGranularityTest.java
+++ b/indexer/src/test/java/com/metamx/druid/indexer/granularity/ArbitraryGranularityTest.java
@ -69,6 +69,12 @@ public class ArbitraryGranularityTest
        spec.bucketInterval(new DateTime("2012-01-03T01Z"))
    );

+    Assert.assertEquals(
+        "2012-01-04T01Z",
+        Optional.<Interval>absent(),
+        spec.bucketInterval(new DateTime("2012-01-04T01Z"))
+    );
+
    Assert.assertEquals(
        "2012-01-07T23:59:59.999Z",
        Optional.of(new Interval("2012-01-07T00Z/2012-01-08T00Z")),
--- a/indexer/src/test/java/com/metamx/druid/indexer/granularity/UniformGranularityTest.java
+++ b/indexer/src/test/java/com/metamx/druid/indexer/granularity/UniformGranularityTest.java
@ -72,6 +72,12 @@ public class UniformGranularityTest
        spec.bucketInterval(new DateTime("2012-01-03T01Z"))
    );

+    Assert.assertEquals(
+        "2012-01-04T01Z",
+        Optional.<Interval>absent(),
+        spec.bucketInterval(new DateTime("2012-01-04T01Z"))
+    );
+
    Assert.assertEquals(
        "2012-01-07T23:59:59.999Z",
        Optional.of(new Interval("2012-01-07T00Z/2012-01-08T00Z")),