diff --git a/client/pom.xml b/client/pom.xml index 55358d59bab..f5025092b8f 100644 --- a/client/pom.xml +++ b/client/pom.xml @@ -28,7 +28,7 @@ com.metamx druid - 0.1.25-SNAPSHOT + 0.2.7-SNAPSHOT diff --git a/client/src/main/java/com/metamx/druid/BaseQuery.java b/client/src/main/java/com/metamx/druid/BaseQuery.java index 4538467c167..76448ed9f17 100644 --- a/client/src/main/java/com/metamx/druid/BaseQuery.java +++ b/client/src/main/java/com/metamx/druid/BaseQuery.java @@ -22,6 +22,7 @@ package com.metamx.druid; import com.google.common.base.Preconditions; import com.google.common.collect.Maps; import com.metamx.common.guava.Sequence; +import com.metamx.druid.query.QueryRunner; import com.metamx.druid.query.segment.QuerySegmentSpec; import com.metamx.druid.query.segment.QuerySegmentWalker; import org.codehaus.jackson.annotate.JsonProperty; @@ -72,7 +73,12 @@ public abstract class BaseQuery implements Query @Override public Sequence run(QuerySegmentWalker walker) { - return querySegmentSpec.lookup(this, walker).run(this); + return run(querySegmentSpec.lookup(this, walker)); + } + + public Sequence run(QueryRunner runner) + { + return runner.run(this); } @Override diff --git a/client/src/main/java/com/metamx/druid/Query.java b/client/src/main/java/com/metamx/druid/Query.java index bd1dc49702a..4c4e7f715b4 100644 --- a/client/src/main/java/com/metamx/druid/Query.java +++ b/client/src/main/java/com/metamx/druid/Query.java @@ -20,6 +20,7 @@ package com.metamx.druid; import com.metamx.common.guava.Sequence; +import com.metamx.druid.query.QueryRunner; import com.metamx.druid.query.group.GroupByQuery; import com.metamx.druid.query.metadata.SegmentMetadataQuery; import com.metamx.druid.query.search.SearchQuery; @@ -57,6 +58,8 @@ public interface Query public Sequence run(QuerySegmentWalker walker); + public Sequence run(QueryRunner runner); + public List getIntervals(); public Duration getDuration(); diff --git a/client/src/main/java/com/metamx/druid/client/CachingClusteredClient.java b/client/src/main/java/com/metamx/druid/client/CachingClusteredClient.java index 47376dd2e68..163f1986a53 100644 --- a/client/src/main/java/com/metamx/druid/client/CachingClusteredClient.java +++ b/client/src/main/java/com/metamx/druid/client/CachingClusteredClient.java @@ -28,6 +28,7 @@ import com.google.common.collect.Iterators; import com.google.common.collect.Lists; import com.google.common.collect.Maps; import com.google.common.collect.Ordering; +import com.google.common.collect.Sets; import com.google.common.util.concurrent.ThreadFactoryBuilder; import com.metamx.common.ISE; import com.metamx.common.Pair; @@ -41,7 +42,6 @@ import com.metamx.druid.TimelineObjectHolder; import com.metamx.druid.VersionedIntervalTimeline; import com.metamx.druid.aggregation.AggregatorFactory; import com.metamx.druid.client.cache.Cache; -import com.metamx.druid.client.cache.CacheBroker; import com.metamx.druid.client.selector.ServerSelector; import com.metamx.druid.partition.PartitionChunk; import com.metamx.druid.query.CacheStrategy; @@ -54,6 +54,7 @@ import com.metamx.druid.query.segment.SegmentDescriptor; import com.metamx.druid.result.BySegmentResultValueClass; import com.metamx.druid.result.Result; import org.codehaus.jackson.map.ObjectMapper; +import org.codehaus.jackson.type.TypeReference; import org.joda.time.DateTime; import org.joda.time.Interval; @@ -64,6 +65,7 @@ import java.util.Collections; import java.util.Iterator; import java.util.List; import java.util.Map; +import java.util.Set; import java.util.concurrent.Executors; /** @@ -74,19 +76,19 @@ public class CachingClusteredClient implements QueryRunner private final QueryToolChestWarehouse warehouse; private final ServerView serverView; - private final CacheBroker cacheBroker; + private final Cache cache; private final ObjectMapper objectMapper; public CachingClusteredClient( QueryToolChestWarehouse warehouse, ServerView serverView, - CacheBroker cacheBroker, + Cache cache, ObjectMapper objectMapper ) { this.warehouse = warehouse; this.serverView = serverView; - this.cacheBroker = cacheBroker; + this.cache = cache; this.objectMapper = objectMapper; serverView.registerSegmentCallback( @@ -98,7 +100,7 @@ public class CachingClusteredClient implements QueryRunner @Override public ServerView.CallbackAction segmentRemoved(DruidServer server, DataSegment segment) { - CachingClusteredClient.this.cacheBroker.provideCache(segment.getIdentifier()).close(); + CachingClusteredClient.this.cache.close(segment.getIdentifier()); return ServerView.CallbackAction.CONTINUE; } } @@ -109,9 +111,10 @@ public class CachingClusteredClient implements QueryRunner public Sequence run(final Query query) { final QueryToolChest> toolChest = warehouse.getToolChest(query); - final CacheStrategy> strategy = toolChest.getCacheStrategy(query); + final CacheStrategy> strategy = toolChest.getCacheStrategy(query); + + final Map> serverSegments = Maps.newTreeMap(); - final Map> segs = Maps.newTreeMap(); final List> cachedResults = Lists.newArrayList(); final Map cachePopulatorMap = Maps.newHashMap(); @@ -131,10 +134,8 @@ public class CachingClusteredClient implements QueryRunner return Sequences.empty(); } - byte[] queryCacheKey = null; - if (strategy != null) { - queryCacheKey = strategy.computeCacheKey(query); - } + // build set of segments to query + Set> segments = Sets.newLinkedHashSet(); for (Interval interval : rewrittenQuery.getIntervals()) { List> serversLookup = timeline.lookup(interval); @@ -146,55 +147,67 @@ public class CachingClusteredClient implements QueryRunner holder.getInterval(), holder.getVersion(), chunk.getChunkNumber() ); - if (queryCacheKey == null) { - final DruidServer server = selector.pick(); - List descriptors = segs.get(server); - - if (descriptors == null) { - descriptors = Lists.newArrayList(); - segs.put(server, descriptors); - } - - descriptors.add(descriptor); - } - else { - final Interval segmentQueryInterval = holder.getInterval(); - final byte[] versionBytes = descriptor.getVersion().getBytes(); - - final byte[] cacheKey = ByteBuffer - .allocate(16 + versionBytes.length + 4 + queryCacheKey.length) - .putLong(segmentQueryInterval.getStartMillis()) - .putLong(segmentQueryInterval.getEndMillis()) - .put(versionBytes) - .putInt(descriptor.getPartitionNumber()) - .put(queryCacheKey) - .array(); - final String segmentIdentifier = selector.getSegment().getIdentifier(); - final Cache cache = cacheBroker.provideCache(segmentIdentifier); - final byte[] cachedValue = cache.get(cacheKey); - - if (useCache && cachedValue != null) { - cachedResults.add(Pair.of(segmentQueryInterval.getStart(), cachedValue)); - } else { - final DruidServer server = selector.pick(); - List descriptors = segs.get(server); - - if (descriptors == null) { - descriptors = Lists.newArrayList(); - segs.put(server, descriptors); - } - - descriptors.add(descriptor); - cachePopulatorMap.put( - String.format("%s_%s", segmentIdentifier, segmentQueryInterval), - new CachePopulator(cache, objectMapper, cacheKey) - ); - } - } + segments.add(Pair.of(selector, descriptor)); } } } + final byte[] queryCacheKey; + if(strategy != null) { + queryCacheKey = strategy.computeCacheKey(query); + } else { + queryCacheKey = null; + } + + // Pull cached segments from cache and remove from set of segments to query + if(useCache && queryCacheKey != null) { + Map, Cache.NamedKey> cacheKeys = Maps.newHashMap(); + for(Pair e : segments) { + cacheKeys.put(e, computeSegmentCacheKey(e.lhs.getSegment().getIdentifier(), e.rhs, queryCacheKey)); + } + + Map cachedValues = cache.getBulk(cacheKeys.values()); + + for(Map.Entry, Cache.NamedKey> entry : cacheKeys.entrySet()) { + Pair segment = entry.getKey(); + Cache.NamedKey segmentCacheKey = entry.getValue(); + + final ServerSelector selector = segment.lhs; + final SegmentDescriptor descriptor = segment.rhs; + final Interval segmentQueryInterval = descriptor.getInterval(); + + final byte[] cachedValue = cachedValues.get(segmentCacheKey); + + if (cachedValue != null) { + cachedResults.add(Pair.of(segmentQueryInterval.getStart(), cachedValue)); + + // remove cached segment from set of segments to query + segments.remove(segment); + } + else { + final String segmentIdentifier = selector.getSegment().getIdentifier(); + cachePopulatorMap.put( + String.format("%s_%s", segmentIdentifier, segmentQueryInterval), + new CachePopulator(cache, objectMapper, segmentCacheKey) + ); + } + } + } + + // Compile list of all segments not pulled from cache + for(Pair segment : segments) { + final DruidServer server = segment.lhs.pick(); + List descriptors = serverSegments.get(server); + + if (descriptors == null) { + descriptors = Lists.newArrayList(); + serverSegments.put(server, descriptors); + } + + descriptors.add(segment.rhs); + } + + return new LazySequence( new Supplier>() { @@ -229,6 +242,7 @@ public class CachingClusteredClient implements QueryRunner } final Function pullFromCacheFunction = strategy.pullFromCache(); + final TypeReference cacheObjectClazz = strategy.getCacheObjectClazz(); for (Pair cachedResultPair : cachedResults) { final byte[] cachedResult = cachedResultPair.rhs; Sequence cachedSequence = new BaseSequence>( @@ -243,7 +257,8 @@ public class CachingClusteredClient implements QueryRunner } return objectMapper.readValues( - objectMapper.getJsonFactory().createJsonParser(cachedResult), Object.class + objectMapper.getJsonFactory().createJsonParser(cachedResult), + cacheObjectClazz ); } catch (IOException e) { @@ -264,7 +279,7 @@ public class CachingClusteredClient implements QueryRunner @SuppressWarnings("unchecked") private void addSequencesFromServer(ArrayList>> listOfSequences) { - for (Map.Entry> entry : segs.entrySet()) { + for (Map.Entry> entry : serverSegments.entrySet()) { final DruidServer server = entry.getKey(); final List descriptors = entry.getValue(); @@ -328,13 +343,29 @@ public class CachingClusteredClient implements QueryRunner ); } + private Cache.NamedKey computeSegmentCacheKey(String segmentIdentifier, SegmentDescriptor descriptor, byte[] queryCacheKey) + { + final Interval segmentQueryInterval = descriptor.getInterval(); + final byte[] versionBytes = descriptor.getVersion().getBytes(); + + return new Cache.NamedKey( + segmentIdentifier, ByteBuffer + .allocate(16 + versionBytes.length + 4 + queryCacheKey.length) + .putLong(segmentQueryInterval.getStartMillis()) + .putLong(segmentQueryInterval.getEndMillis()) + .put(versionBytes) + .putInt(descriptor.getPartitionNumber()) + .put(queryCacheKey).array() + ); + } + private static class CachePopulator { private final Cache cache; private final ObjectMapper mapper; - private final byte[] key; + private final Cache.NamedKey key; - public CachePopulator(Cache cache, ObjectMapper mapper, byte[] key) + public CachePopulator(Cache cache, ObjectMapper mapper, Cache.NamedKey key) { this.cache = cache; this.mapper = mapper; diff --git a/client/src/main/java/com/metamx/druid/client/DataSegment.java b/client/src/main/java/com/metamx/druid/client/DataSegment.java index 1f7fcc42d19..9b29f3507ee 100644 --- a/client/src/main/java/com/metamx/druid/client/DataSegment.java +++ b/client/src/main/java/com/metamx/druid/client/DataSegment.java @@ -48,6 +48,8 @@ import java.util.Map; public class DataSegment implements Comparable { public static String delimiter = "_"; + private final Integer binaryVersion; + public static String makeDataSegmentIdentifier( String dataSource, DateTime start, @@ -89,6 +91,7 @@ public class DataSegment implements Comparable @JsonProperty("dimensions") @JsonDeserialize(using = CommaListJoinDeserializer.class) List dimensions, @JsonProperty("metrics") @JsonDeserialize(using = CommaListJoinDeserializer.class) List metrics, @JsonProperty("shardSpec") ShardSpec shardSpec, + @JsonProperty("binaryVersion") Integer binaryVersion, @JsonProperty("size") long size ) { @@ -112,6 +115,7 @@ public class DataSegment implements Comparable ? ImmutableList.of() : ImmutableList.copyOf(Iterables.filter(metrics, nonEmpty)); this.shardSpec = (shardSpec == null) ? new NoneShardSpec() : shardSpec; + this.binaryVersion = binaryVersion; this.size = size; this.identifier = makeDataSegmentIdentifier( @@ -172,6 +176,12 @@ public class DataSegment implements Comparable return shardSpec; } + @JsonProperty + public Integer getBinaryVersion() + { + return binaryVersion; + } + @JsonProperty public long getSize() { @@ -209,6 +219,11 @@ public class DataSegment implements Comparable return builder(this).version(version).build(); } + public DataSegment withBinaryVersion(int binaryVersion) + { + return builder(this).binaryVersion(binaryVersion).build(); + } + @Override public int compareTo(DataSegment dataSegment) { @@ -287,6 +302,7 @@ public class DataSegment implements Comparable private List dimensions; private List metrics; private ShardSpec shardSpec; + private Integer binaryVersion; private long size; public Builder() @@ -307,6 +323,7 @@ public class DataSegment implements Comparable this.dimensions = segment.getDimensions(); this.metrics = segment.getMetrics(); this.shardSpec = segment.getShardSpec(); + this.binaryVersion = segment.getBinaryVersion(); this.size = segment.getSize(); } @@ -352,6 +369,12 @@ public class DataSegment implements Comparable return this; } + public Builder binaryVersion(Integer binaryVersion) + { + this.binaryVersion = binaryVersion; + return this; + } + public Builder size(long size) { this.size = size; @@ -374,6 +397,7 @@ public class DataSegment implements Comparable dimensions, metrics, shardSpec, + binaryVersion, size ); } diff --git a/client/src/main/java/com/metamx/druid/client/cache/Cache.java b/client/src/main/java/com/metamx/druid/client/cache/Cache.java index e7907c9548f..6e9463deb56 100644 --- a/client/src/main/java/com/metamx/druid/client/cache/Cache.java +++ b/client/src/main/java/com/metamx/druid/client/cache/Cache.java @@ -19,13 +19,74 @@ package com.metamx.druid.client.cache; +import com.google.common.base.Charsets; +import com.google.common.base.Preconditions; +import com.google.common.primitives.Ints; + +import java.nio.ByteBuffer; +import java.util.Arrays; +import java.util.Map; + /** - * An interface to limit the operations that can be done on a Cache so that it is easier to reason about what - * is actually going to be done. */ public interface Cache { - public byte[] get(byte[] key); - public void put(byte[] key, byte[] value); - public void close(); + public byte[] get(NamedKey key); + public void put(NamedKey key, byte[] value); + public Map getBulk(Iterable keys); + + public void close(String namespace); + + public CacheStats getStats(); + + public class NamedKey + { + final public String namespace; + final public byte[] key; + + public NamedKey(String namespace, byte[] key) { + Preconditions.checkArgument(namespace != null, "namespace must not be null"); + Preconditions.checkArgument(key != null, "key must not be null"); + this.namespace = namespace; + this.key = key; + } + + public byte[] toByteArray() { + final byte[] nsBytes = this.namespace.getBytes(Charsets.UTF_8); + return ByteBuffer.allocate(Ints.BYTES + nsBytes.length + this.key.length) + .putInt(nsBytes.length) + .put(nsBytes) + .put(this.key).array(); + } + + @Override + public boolean equals(Object o) + { + if (this == o) { + return true; + } + if (o == null || getClass() != o.getClass()) { + return false; + } + + NamedKey namedKey = (NamedKey) o; + + if (!namespace.equals(namedKey.namespace)) { + return false; + } + if (!Arrays.equals(key, namedKey.key)) { + return false; + } + + return true; + } + + @Override + public int hashCode() + { + int result = namespace.hashCode(); + result = 31 * result + Arrays.hashCode(key); + return result; + } + } } diff --git a/client/src/main/java/com/metamx/druid/client/cache/CacheMonitor.java b/client/src/main/java/com/metamx/druid/client/cache/CacheMonitor.java index d1337163ba6..b0c36629e89 100644 --- a/client/src/main/java/com/metamx/druid/client/cache/CacheMonitor.java +++ b/client/src/main/java/com/metamx/druid/client/cache/CacheMonitor.java @@ -27,21 +27,21 @@ import com.metamx.metrics.AbstractMonitor; */ public class CacheMonitor extends AbstractMonitor { - private final CacheBroker cacheBroker; + private final Cache cache; private volatile CacheStats prevCacheStats = null; public CacheMonitor( - CacheBroker cacheBroker + Cache cache ) { - this.cacheBroker = cacheBroker; + this.cache = cache; } @Override public boolean doMonitor(ServiceEmitter emitter) { - final CacheStats currCacheStats = cacheBroker.getStats(); + final CacheStats currCacheStats = cache.getStats(); final CacheStats deltaCacheStats = currCacheStats.delta(prevCacheStats); final ServiceMetricEvent.Builder builder = new ServiceMetricEvent.Builder(); diff --git a/client/src/main/java/com/metamx/druid/client/cache/MapCache.java b/client/src/main/java/com/metamx/druid/client/cache/MapCache.java new file mode 100644 index 00000000000..53e1e20280a --- /dev/null +++ b/client/src/main/java/com/metamx/druid/client/cache/MapCache.java @@ -0,0 +1,158 @@ +/* + * Druid - a distributed column store. + * Copyright (C) 2012 Metamarkets Group Inc. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version 2 + * of the License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + */ + +package com.metamx.druid.client.cache; + +import com.google.common.collect.Maps; +import com.google.common.primitives.Ints; + +import java.nio.ByteBuffer; +import java.util.Collections; +import java.util.Iterator; +import java.util.Map; +import java.util.concurrent.atomic.AtomicInteger; +import java.util.concurrent.atomic.AtomicLong; + +/** + */ +public class MapCache implements Cache +{ + private final Map baseMap; + private final ByteCountingLRUMap byteCountingLRUMap; + + private final Map namespaceId; + private final AtomicInteger ids; + + private final Object clearLock = new Object(); + + private final AtomicLong hitCount = new AtomicLong(0); + private final AtomicLong missCount = new AtomicLong(0); + + public static com.metamx.druid.client.cache.Cache create(final MapCacheConfig config) + { + return new MapCache( + new ByteCountingLRUMap( + config.getInitialSize(), + config.getLogEvictionCount(), + config.getSizeInBytes() + ) + ); + } + + MapCache( + ByteCountingLRUMap byteCountingLRUMap + ) + { + this.byteCountingLRUMap = byteCountingLRUMap; + + this.baseMap = Collections.synchronizedMap(byteCountingLRUMap); + + namespaceId = Maps.newHashMap(); + ids = new AtomicInteger(); + } + + @Override + public CacheStats getStats() + { + return new CacheStats( + hitCount.get(), + missCount.get(), + byteCountingLRUMap.size(), + byteCountingLRUMap.getNumBytes(), + byteCountingLRUMap.getEvictionCount(), + 0 + ); + } + + @Override + public byte[] get(NamedKey key) + { + final byte[] retVal = baseMap.get(computeKey(getNamespaceId(key.namespace), key.key)); + if (retVal == null) { + missCount.incrementAndGet(); + } else { + hitCount.incrementAndGet(); + } + return retVal; + } + + @Override + public void put(NamedKey key, byte[] value) + { + synchronized (clearLock) { + baseMap.put(computeKey(getNamespaceId(key.namespace), key.key), value); + } + } + + @Override + public Map getBulk(Iterable keys) + { + Map retVal = Maps.newHashMap(); + for(NamedKey key : keys) { + retVal.put(key, get(key)); + } + return retVal; + } + + @Override + public void close(String namespace) + { + byte[] idBytes; + synchronized (namespaceId) { + idBytes = getNamespaceId(namespace); + if(idBytes == null) return; + + namespaceId.remove(namespace); + } + synchronized (clearLock) { + Iterator iter = baseMap.keySet().iterator(); + while (iter.hasNext()) { + ByteBuffer next = iter.next(); + + if (next.get(0) == idBytes[0] + && next.get(1) == idBytes[1] + && next.get(2) == idBytes[2] + && next.get(3) == idBytes[3]) { + iter.remove(); + } + } + } + } + + private byte[] getNamespaceId(final String identifier) + { + synchronized (namespaceId) { + byte[] idBytes = namespaceId.get(identifier); + if (idBytes != null) { + return idBytes; + } + + idBytes = Ints.toByteArray(ids.getAndIncrement()); + namespaceId.put(identifier, idBytes); + return idBytes; + } + } + + private ByteBuffer computeKey(byte[] idBytes, byte[] key) + { + final ByteBuffer retVal = ByteBuffer.allocate(key.length + 4).put(idBytes).put(key); + retVal.rewind(); + return retVal; + } +} diff --git a/client/src/main/java/com/metamx/druid/client/cache/MapCacheBroker.java b/client/src/main/java/com/metamx/druid/client/cache/MapCacheBroker.java deleted file mode 100644 index d8ec202021a..00000000000 --- a/client/src/main/java/com/metamx/druid/client/cache/MapCacheBroker.java +++ /dev/null @@ -1,165 +0,0 @@ -/* - * Druid - a distributed column store. - * Copyright (C) 2012 Metamarkets Group Inc. - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation; either version 2 - * of the License, or (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. - */ - -package com.metamx.druid.client.cache; - -import com.google.common.collect.Maps; -import com.google.common.primitives.Ints; -import com.metamx.common.ISE; - -import java.nio.ByteBuffer; -import java.util.Collections; -import java.util.Iterator; -import java.util.Map; -import java.util.concurrent.atomic.AtomicInteger; -import java.util.concurrent.atomic.AtomicLong; - -/** - */ -public class MapCacheBroker implements CacheBroker -{ - private final Map baseMap; - private final ByteCountingLRUMap byteCountingLRUMap; - - private final Map cacheCache; - private final AtomicInteger ids; - - private final Object clearLock = new Object(); - - private final AtomicLong hitCount = new AtomicLong(0); - private final AtomicLong missCount = new AtomicLong(0); - - public static CacheBroker create(final MapCacheBrokerConfig config) - { - return new MapCacheBroker( - new ByteCountingLRUMap( - config.getInitialSize(), - config.getLogEvictionCount(), - config.getSizeInBytes() - ) - ); - } - - MapCacheBroker( - ByteCountingLRUMap byteCountingLRUMap - ) - { - this.byteCountingLRUMap = byteCountingLRUMap; - - this.baseMap = Collections.synchronizedMap(byteCountingLRUMap); - - cacheCache = Maps.newHashMap(); - ids = new AtomicInteger(); - } - - - @Override - public CacheStats getStats() - { - return new CacheStats( - hitCount.get(), - missCount.get(), - byteCountingLRUMap.size(), - byteCountingLRUMap.getNumBytes(), - byteCountingLRUMap.getEvictionCount(), - 0 - ); - } - - @Override - public Cache provideCache(final String identifier) - { - synchronized (cacheCache) { - final Cache cachedCache = cacheCache.get(identifier); - if (cachedCache != null) { - return cachedCache; - } - - final byte[] myIdBytes = Ints.toByteArray(ids.getAndIncrement()); - - final Cache theCache = new Cache() - { - volatile boolean open = true; - - @Override - public byte[] get(byte[] key) - { - if (open) { - final byte[] retVal = baseMap.get(computeKey(key)); - if (retVal == null) { - missCount.incrementAndGet(); - } else { - hitCount.incrementAndGet(); - } - return retVal; - } - throw new ISE("Cache for identifier[%s] is closed.", identifier); - } - - @Override - public void put(byte[] key, byte[] value) - { - synchronized (clearLock) { - if (open) { - baseMap.put(computeKey(key), value); - return; - } - } - throw new ISE("Cache for identifier[%s] is closed.", identifier); - } - - @Override - public void close() - { - synchronized (cacheCache) { - cacheCache.remove(identifier); - } - synchronized (clearLock) { - if (open) { - open = false; - - Iterator iter = baseMap.keySet().iterator(); - while (iter.hasNext()) { - ByteBuffer next = iter.next(); - - if (next.get(0) == myIdBytes[0] - && next.get(1) == myIdBytes[1] - && next.get(2) == myIdBytes[2] - && next.get(3) == myIdBytes[3]) { - iter.remove(); - } - } - } - } - } - - private ByteBuffer computeKey(byte[] key) - { - final ByteBuffer retVal = ByteBuffer.allocate(key.length + 4).put(myIdBytes).put(key); - retVal.rewind(); - return retVal; - } - }; - - cacheCache.put(identifier, theCache); - - return theCache; - } - } -} diff --git a/client/src/main/java/com/metamx/druid/client/cache/MapCacheBrokerConfig.java b/client/src/main/java/com/metamx/druid/client/cache/MapCacheConfig.java similarity index 96% rename from client/src/main/java/com/metamx/druid/client/cache/MapCacheBrokerConfig.java rename to client/src/main/java/com/metamx/druid/client/cache/MapCacheConfig.java index 6ff7a778236..2bc468bf899 100644 --- a/client/src/main/java/com/metamx/druid/client/cache/MapCacheBrokerConfig.java +++ b/client/src/main/java/com/metamx/druid/client/cache/MapCacheConfig.java @@ -24,7 +24,7 @@ import org.skife.config.Default; /** */ -public abstract class MapCacheBrokerConfig +public abstract class MapCacheConfig { @Config("${prefix}.sizeInBytes") @Default("0") diff --git a/client/src/main/java/com/metamx/druid/client/cache/MemcachedCache.java b/client/src/main/java/com/metamx/druid/client/cache/MemcachedCache.java new file mode 100644 index 00000000000..ed7f5292e8a --- /dev/null +++ b/client/src/main/java/com/metamx/druid/client/cache/MemcachedCache.java @@ -0,0 +1,236 @@ +/* + * Druid - a distributed column store. + * Copyright (C) 2012 Metamarkets Group Inc. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version 2 + * of the License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + */ + +package com.metamx.druid.client.cache; + +import com.google.common.base.Function; +import com.google.common.base.Preconditions; +import com.google.common.base.Throwables; +import com.google.common.collect.Maps; +import com.google.common.primitives.Ints; +import net.spy.memcached.AddrUtil; +import net.spy.memcached.ConnectionFactoryBuilder; +import net.spy.memcached.DefaultHashAlgorithm; +import net.spy.memcached.FailureMode; +import net.spy.memcached.MemcachedClient; +import net.spy.memcached.MemcachedClientIF; +import net.spy.memcached.internal.BulkFuture; +import net.spy.memcached.transcoders.SerializingTranscoder; +import org.apache.commons.codec.digest.DigestUtils; + +import javax.annotation.Nullable; +import java.io.IOException; +import java.nio.ByteBuffer; +import java.util.Arrays; +import java.util.Map; +import java.util.concurrent.ExecutionException; +import java.util.concurrent.Future; +import java.util.concurrent.TimeUnit; +import java.util.concurrent.TimeoutException; +import java.util.concurrent.atomic.AtomicLong; + +public class MemcachedCache implements Cache +{ + public static MemcachedCache create(final MemcachedCacheConfig config) + { + try { + SerializingTranscoder transcoder = new SerializingTranscoder(config.getMaxObjectSize()); + // disable compression + transcoder.setCompressionThreshold(Integer.MAX_VALUE); + + return new MemcachedCache( + new MemcachedClient( + new ConnectionFactoryBuilder().setProtocol(ConnectionFactoryBuilder.Protocol.BINARY) + .setHashAlg(DefaultHashAlgorithm.FNV1A_64_HASH) + .setLocatorType(ConnectionFactoryBuilder.Locator.CONSISTENT) + .setDaemon(true) + .setFailureMode(FailureMode.Retry) + .setTranscoder(transcoder) + .setShouldOptimize(true) + .build(), + AddrUtil.getAddresses(config.getHosts()) + ), + config.getMemcachedPrefix(), + config.getTimeout(), + config.getExpiration() + ); + } catch(IOException e) { + throw Throwables.propagate(e); + } + } + + private final int timeout; + private final int expiration; + private final String memcachedPrefix; + + private final MemcachedClientIF client; + + private final AtomicLong hitCount = new AtomicLong(0); + private final AtomicLong missCount = new AtomicLong(0); + private final AtomicLong timeoutCount = new AtomicLong(0); + + MemcachedCache(MemcachedClientIF client, String memcachedPrefix, int timeout, int expiration) { + Preconditions.checkArgument(memcachedPrefix.length() <= MAX_PREFIX_LENGTH, + "memcachedPrefix length [%d] exceeds maximum length [%d]", + memcachedPrefix.length(), + MAX_PREFIX_LENGTH); + this.timeout = timeout; + this.expiration = expiration; + this.client = client; + this.memcachedPrefix = memcachedPrefix; + } + + @Override + public CacheStats getStats() + { + return new CacheStats( + hitCount.get(), + missCount.get(), + 0, + 0, + 0, + timeoutCount.get() + ); + } + + @Override + public byte[] get(NamedKey key) + { + Future future = client.asyncGet(computeKeyHash(memcachedPrefix, key)); + try { + byte[] bytes = (byte[]) future.get(timeout, TimeUnit.MILLISECONDS); + if(bytes != null) { + hitCount.incrementAndGet(); + } + else { + missCount.incrementAndGet(); + } + return bytes == null ? null : deserializeValue(key, bytes); + } + catch(TimeoutException e) { + timeoutCount.incrementAndGet(); + future.cancel(false); + return null; + } + catch(InterruptedException e) { + Thread.currentThread().interrupt(); + throw Throwables.propagate(e); + } + catch(ExecutionException e) { + throw Throwables.propagate(e); + } + } + + @Override + public void put(NamedKey key, byte[] value) + { + client.set(computeKeyHash(memcachedPrefix, key), expiration, serializeValue(key, value)); + } + + private static byte[] serializeValue(NamedKey key, byte[] value) { + byte[] keyBytes = key.toByteArray(); + return ByteBuffer.allocate(Ints.BYTES + keyBytes.length + value.length) + .putInt(keyBytes.length) + .put(keyBytes) + .put(value) + .array(); + } + + private static byte[] deserializeValue(NamedKey key, byte[] bytes) { + ByteBuffer buf = ByteBuffer.wrap(bytes); + + final int keyLength = buf.getInt(); + byte[] keyBytes = new byte[keyLength]; + buf.get(keyBytes); + byte[] value = new byte[buf.remaining()]; + buf.get(value); + + Preconditions.checkState(Arrays.equals(keyBytes, key.toByteArray()), + "Keys do not match, possible hash collision?"); + return value; + } + + @Override + public Map getBulk(Iterable keys) + { + Map keyLookup = Maps.uniqueIndex( + keys, + new Function() + { + @Override + public String apply( + @Nullable NamedKey input + ) + { + return computeKeyHash(memcachedPrefix, input); + } + } + ); + + BulkFuture> future = client.asyncGetBulk(keyLookup.keySet()); + + try { + Map some = future.getSome(timeout, TimeUnit.MILLISECONDS); + + if(future.isTimeout()) { + future.cancel(false); + timeoutCount.incrementAndGet(); + } + missCount.addAndGet(keyLookup.size() - some.size()); + hitCount.addAndGet(some.size()); + + Map results = Maps.newHashMap(); + for(Map.Entry entry : some.entrySet()) { + final NamedKey key = keyLookup.get(entry.getKey()); + final byte[] value = (byte[]) entry.getValue(); + results.put( + key, + value == null ? null : deserializeValue(key, value) + ); + } + + return results; + } + catch(InterruptedException e) { + Thread.currentThread().interrupt(); + throw Throwables.propagate(e); + } + catch(ExecutionException e) { + throw Throwables.propagate(e); + } + } + + @Override + public void close(String namespace) + { + // no resources to cleanup + } + + public static final int MAX_PREFIX_LENGTH = + MemcachedClientIF.MAX_KEY_LENGTH + - 40 // length of namespace hash + - 40 // length of key hash + - 2 // length of separators + ; + + private static String computeKeyHash(String memcachedPrefix, NamedKey key) { + // hash keys to keep things under 250 characters for memcached + return memcachedPrefix + ":" + DigestUtils.sha1Hex(key.namespace) + ":" + DigestUtils.sha1Hex(key.key); + } +} diff --git a/client/src/main/java/com/metamx/druid/client/cache/MemcachedCacheBroker.java b/client/src/main/java/com/metamx/druid/client/cache/MemcachedCacheBroker.java deleted file mode 100644 index 2f1af877d8c..00000000000 --- a/client/src/main/java/com/metamx/druid/client/cache/MemcachedCacheBroker.java +++ /dev/null @@ -1,145 +0,0 @@ -/* - * Druid - a distributed column store. - * Copyright (C) 2012 Metamarkets Group Inc. - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation; either version 2 - * of the License, or (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. - */ - -package com.metamx.druid.client.cache; - -import com.google.common.base.Throwables; -import net.iharder.base64.Base64; -import net.spy.memcached.AddrUtil; -import net.spy.memcached.ConnectionFactoryBuilder; -import net.spy.memcached.DefaultHashAlgorithm; -import net.spy.memcached.FailureMode; -import net.spy.memcached.MemcachedClient; -import net.spy.memcached.MemcachedClientIF; -import net.spy.memcached.transcoders.SerializingTranscoder; - -import java.io.IOException; -import java.util.concurrent.ExecutionException; -import java.util.concurrent.Future; -import java.util.concurrent.TimeUnit; -import java.util.concurrent.TimeoutException; -import java.util.concurrent.atomic.AtomicLong; - -public class MemcachedCacheBroker implements CacheBroker -{ - public static MemcachedCacheBroker create(final MemcachedCacheBrokerConfig config) - { - try { - SerializingTranscoder transcoder = new SerializingTranscoder(config.getMaxObjectSize()); - // disable compression - transcoder.setCompressionThreshold(Integer.MAX_VALUE); - - return new MemcachedCacheBroker( - new MemcachedClient( - new ConnectionFactoryBuilder().setProtocol(ConnectionFactoryBuilder.Protocol.BINARY) - .setHashAlg(DefaultHashAlgorithm.FNV1A_64_HASH) - .setLocatorType(ConnectionFactoryBuilder.Locator.CONSISTENT) - .setDaemon(true) - .setFailureMode(FailureMode.Retry) - .setTranscoder(transcoder) - .setShouldOptimize(true) - .build(), - AddrUtil.getAddresses(config.getHosts()) - ), - config.getTimeout(), - config.getExpiration() - ); - } catch(IOException e) { - throw Throwables.propagate(e); - } - } - - private final int timeout; - private final int expiration; - - private final MemcachedClientIF client; - - private final AtomicLong hitCount = new AtomicLong(0); - private final AtomicLong missCount = new AtomicLong(0); - private final AtomicLong timeoutCount = new AtomicLong(0); - - MemcachedCacheBroker(MemcachedClientIF client, int timeout, int expiration) { - this.timeout = timeout; - this.expiration = expiration; - this.client = client; - } - - @Override - public CacheStats getStats() - { - return new CacheStats( - hitCount.get(), - missCount.get(), - 0, - 0, - 0, - timeoutCount.get() - ); - } - - @Override - public Cache provideCache(final String identifier) - { - return new Cache() - { - @Override - public byte[] get(byte[] key) - { - Future future = client.asyncGet(computeKey(identifier, key)); - try { - byte[] bytes = (byte[]) future.get(timeout, TimeUnit.MILLISECONDS); - if(bytes != null) { - hitCount.incrementAndGet(); - } - else { - missCount.incrementAndGet(); - } - return bytes; - } - catch(TimeoutException e) { - timeoutCount.incrementAndGet(); - future.cancel(false); - return null; - } - catch(InterruptedException e) { - throw Throwables.propagate(e); - } - catch(ExecutionException e) { - throw Throwables.propagate(e); - } - } - - @Override - public void put(byte[] key, byte[] value) - { - client.set(computeKey(identifier, key), expiration, value); - } - - @Override - public void close() - { - // no resources to cleanup - } - }; - } - - private String computeKey(String identifier, byte[] key) { - return identifier + Base64.encodeBytes(key, Base64.DONT_BREAK_LINES); - } -} diff --git a/client/src/main/java/com/metamx/druid/client/cache/MemcachedCacheBrokerConfig.java b/client/src/main/java/com/metamx/druid/client/cache/MemcachedCacheConfig.java similarity index 68% rename from client/src/main/java/com/metamx/druid/client/cache/MemcachedCacheBrokerConfig.java rename to client/src/main/java/com/metamx/druid/client/cache/MemcachedCacheConfig.java index 5799d739bb6..2bd7b84e4cc 100644 --- a/client/src/main/java/com/metamx/druid/client/cache/MemcachedCacheBrokerConfig.java +++ b/client/src/main/java/com/metamx/druid/client/cache/MemcachedCacheConfig.java @@ -3,10 +3,10 @@ package com.metamx.druid.client.cache; import org.skife.config.Config; import org.skife.config.Default; -public abstract class MemcachedCacheBrokerConfig +public abstract class MemcachedCacheConfig { @Config("${prefix}.expiration") - @Default("31536000") + @Default("2592000") public abstract int getExpiration(); @Config("${prefix}.timeout") @@ -17,5 +17,10 @@ public abstract class MemcachedCacheBrokerConfig public abstract String getHosts(); @Config("${prefix}.maxObjectSize") + @Default("52428800") public abstract int getMaxObjectSize(); + + @Config("${prefix}.memcachedPrefix") + @Default("druid") + public abstract String getMemcachedPrefix(); } diff --git a/client/src/main/java/com/metamx/druid/http/BrokerNode.java b/client/src/main/java/com/metamx/druid/http/BrokerNode.java index aa14b440104..2a94a00d76d 100644 --- a/client/src/main/java/com/metamx/druid/http/BrokerNode.java +++ b/client/src/main/java/com/metamx/druid/http/BrokerNode.java @@ -34,13 +34,13 @@ import com.metamx.druid.client.BrokerServerView; import com.metamx.druid.client.CachingClusteredClient; import com.metamx.druid.client.ClientConfig; import com.metamx.druid.client.ClientInventoryManager; -import com.metamx.druid.client.cache.CacheBroker; +import com.metamx.druid.client.cache.Cache; import com.metamx.druid.client.cache.CacheConfig; import com.metamx.druid.client.cache.CacheMonitor; -import com.metamx.druid.client.cache.MapCacheBroker; -import com.metamx.druid.client.cache.MapCacheBrokerConfig; -import com.metamx.druid.client.cache.MemcachedCacheBroker; -import com.metamx.druid.client.cache.MemcachedCacheBrokerConfig; +import com.metamx.druid.client.cache.MapCache; +import com.metamx.druid.client.cache.MapCacheConfig; +import com.metamx.druid.client.cache.MemcachedCache; +import com.metamx.druid.client.cache.MemcachedCacheConfig; import com.metamx.druid.initialization.Initialization; import com.metamx.druid.initialization.ServiceDiscoveryConfig; import com.metamx.druid.jackson.DefaultObjectMapper; @@ -78,7 +78,7 @@ public class BrokerNode extends QueryableNode private QueryToolChestWarehouse warehouse = null; private HttpClient brokerHttpClient = null; - private CacheBroker cacheBroker = null; + private Cache cache = null; private boolean useDiscovery = true; @@ -122,15 +122,15 @@ public class BrokerNode extends QueryableNode return this; } - public CacheBroker getCacheBroker() + public Cache getCache() { initializeCacheBroker(); - return cacheBroker; + return cache; } - public BrokerNode setCacheBroker(CacheBroker cacheBroker) + public BrokerNode setCache(Cache cache) { - checkFieldNotSetAndSet("cacheBroker", cacheBroker); + checkFieldNotSetAndSet("cache", cache); return this; } @@ -185,7 +185,7 @@ public class BrokerNode extends QueryableNode final Lifecycle lifecycle = getLifecycle(); final List monitors = getMonitors(); - monitors.add(new CacheMonitor(cacheBroker)); + monitors.add(new CacheMonitor(cache)); startMonitoring(monitors); final BrokerServerView view = new BrokerServerView(warehouse, getSmileMapper(), brokerHttpClient); @@ -194,7 +194,7 @@ public class BrokerNode extends QueryableNode ); lifecycle.addManagedInstance(clientInventoryManager); - final CachingClusteredClient baseClient = new CachingClusteredClient(warehouse, view, cacheBroker, getSmileMapper()); + final CachingClusteredClient baseClient = new CachingClusteredClient(warehouse, view, cache, getSmileMapper()); lifecycle.addManagedInstance(baseClient); @@ -239,25 +239,25 @@ public class BrokerNode extends QueryableNode private void initializeCacheBroker() { - if (cacheBroker == null) { + if (cache == null) { String cacheType = getConfigFactory() .build(CacheConfig.class) .getType(); if (cacheType.equals(CACHE_TYPE_LOCAL)) { - setCacheBroker( - MapCacheBroker.create( + setCache( + MapCache.create( getConfigFactory().buildWithReplacements( - MapCacheBrokerConfig.class, + MapCacheConfig.class, ImmutableMap.of("prefix", CACHE_PROPERTY_PREFIX) ) ) ); } else if (cacheType.equals(CACHE_TYPE_MEMCACHED)) { - setCacheBroker( - MemcachedCacheBroker.create( + setCache( + MemcachedCache.create( getConfigFactory().buildWithReplacements( - MemcachedCacheBrokerConfig.class, + MemcachedCacheConfig.class, ImmutableMap.of("prefix", CACHE_PROPERTY_PREFIX) ) ) diff --git a/client/src/main/java/com/metamx/druid/http/DirectClientQuerySegmentWalker.java b/client/src/main/java/com/metamx/druid/http/DirectClientQuerySegmentWalker.java new file mode 100644 index 00000000000..e1cac8a90c3 --- /dev/null +++ b/client/src/main/java/com/metamx/druid/http/DirectClientQuerySegmentWalker.java @@ -0,0 +1,63 @@ +/* + * Druid - a distributed column store. + * Copyright (C) 2012 Metamarkets Group Inc. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version 2 + * of the License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + */ + +package com.metamx.druid.http; + +import com.metamx.druid.Query; +import com.metamx.druid.client.DirectDruidClient; +import com.metamx.druid.query.FinalizeResultsQueryRunner; +import com.metamx.druid.query.QueryRunner; +import com.metamx.druid.query.QueryToolChestWarehouse; +import com.metamx.druid.query.segment.QuerySegmentWalker; +import com.metamx.druid.query.segment.SegmentDescriptor; +import org.joda.time.Interval; + +/** + */ +public class DirectClientQuerySegmentWalker implements QuerySegmentWalker +{ + private final QueryToolChestWarehouse warehouse; + private final DirectDruidClient baseClient; + + public DirectClientQuerySegmentWalker( + QueryToolChestWarehouse warehouse, + DirectDruidClient baseClient + ) + { + this.warehouse = warehouse; + this.baseClient = baseClient; + } + + @Override + public QueryRunner getQueryRunnerForIntervals(Query query, Iterable intervals) + { + return makeRunner(query); + } + + @Override + public QueryRunner getQueryRunnerForSegments(Query query, Iterable specs) + { + return makeRunner(query); + } + + private FinalizeResultsQueryRunner makeRunner(final Query query) + { + return new FinalizeResultsQueryRunner(baseClient, warehouse.getToolChest(query)); + } +} diff --git a/client/src/main/java/com/metamx/druid/http/QueryServlet.java b/client/src/main/java/com/metamx/druid/http/QueryServlet.java index f68efbc1e89..108863bf7af 100644 --- a/client/src/main/java/com/metamx/druid/http/QueryServlet.java +++ b/client/src/main/java/com/metamx/druid/http/QueryServlet.java @@ -148,6 +148,7 @@ public class QueryServlet extends HttpServlet ImmutableMap.builder() .put("exception", e.toString()) .put("query", queryString) + .put("host", req.getRemoteAddr()) .build() ) ); diff --git a/client/src/main/java/com/metamx/druid/initialization/Initialization.java b/client/src/main/java/com/metamx/druid/initialization/Initialization.java index 7745260609f..cfb2d9302a2 100644 --- a/client/src/main/java/com/metamx/druid/initialization/Initialization.java +++ b/client/src/main/java/com/metamx/druid/initialization/Initialization.java @@ -203,7 +203,7 @@ public class Initialization log.info("Loaded(properties stored in zk) Property[%s] as [%s]", prop, zkProps.getProperty(prop)); } } // get props from zk - } else { // ToDo: should this be an error? + } else { log.warn("property druid.zk.service.host is not set, so no way to contact zookeeper for coordination."); } // validate properties now that all levels of precedence are loaded diff --git a/client/src/main/java/com/metamx/druid/query/CacheStrategy.java b/client/src/main/java/com/metamx/druid/query/CacheStrategy.java index abdbe4da259..f8f5098f6ca 100644 --- a/client/src/main/java/com/metamx/druid/query/CacheStrategy.java +++ b/client/src/main/java/com/metamx/druid/query/CacheStrategy.java @@ -22,16 +22,19 @@ package com.metamx.druid.query; import com.google.common.base.Function; import com.metamx.common.guava.Sequence; import com.metamx.druid.Query; +import org.codehaus.jackson.type.TypeReference; /** */ -public interface CacheStrategy> +public interface CacheStrategy> { public byte[] computeCacheKey(QueryType query); - public Function prepareForCache(); + public TypeReference getCacheObjectClazz(); - public Function pullFromCache(); + public Function prepareForCache(); + + public Function pullFromCache(); public Sequence mergeSequences(Sequence> seqOfSequences); } diff --git a/client/src/main/java/com/metamx/druid/query/Queries.java b/client/src/main/java/com/metamx/druid/query/Queries.java index 9d8e052c0c0..ae5f958b563 100644 --- a/client/src/main/java/com/metamx/druid/query/Queries.java +++ b/client/src/main/java/com/metamx/druid/query/Queries.java @@ -20,6 +20,7 @@ package com.metamx.druid.query; import com.google.common.base.Function; +import com.google.common.base.Joiner; import com.google.common.base.Preconditions; import com.google.common.collect.Lists; import com.google.common.collect.Sets; @@ -58,9 +59,12 @@ public class Queries ); for (PostAggregator postAgg : postAggs) { + Set dependencies = postAgg.getDependentFields(); + Set missing = Sets.difference(dependencies, combinedAggNames); + Preconditions.checkArgument( - postAgg.verifyFields(combinedAggNames), - String.format("Missing field[%s]", postAgg.getName()) + missing.isEmpty(), + "Missing fields [%s] for postAggregator [%s]", missing, postAgg.getName() ); combinedAggNames.add(postAgg.getName()); } diff --git a/client/src/main/java/com/metamx/druid/query/QueryToolChest.java b/client/src/main/java/com/metamx/druid/query/QueryToolChest.java index ebf77f64af4..bec2170ec92 100644 --- a/client/src/main/java/com/metamx/druid/query/QueryToolChest.java +++ b/client/src/main/java/com/metamx/druid/query/QueryToolChest.java @@ -44,7 +44,7 @@ public interface QueryToolChest> public ServiceMetricEvent.Builder makeMetricBuilder(QueryType query); public Function makeMetricManipulatorFn(QueryType query, MetricManipulationFn fn); public TypeReference getResultTypeReference(); - public CacheStrategy getCacheStrategy(QueryType query); + public CacheStrategy getCacheStrategy(QueryType query); public QueryRunner preMergeQueryDecoration(QueryRunner runner); public QueryRunner postMergeQueryDecoration(QueryRunner runner); } diff --git a/client/src/main/java/com/metamx/druid/query/group/GroupByQueryQueryToolChest.java b/client/src/main/java/com/metamx/druid/query/group/GroupByQueryQueryToolChest.java index abe5610732d..9dcf6110322 100644 --- a/client/src/main/java/com/metamx/druid/query/group/GroupByQueryQueryToolChest.java +++ b/client/src/main/java/com/metamx/druid/query/group/GroupByQueryQueryToolChest.java @@ -29,9 +29,11 @@ import com.metamx.common.guava.ConcatSequence; import com.metamx.common.guava.Sequence; import com.metamx.common.guava.Sequences; import com.metamx.druid.Query; +import com.metamx.druid.QueryGranularity; import com.metamx.druid.aggregation.AggregatorFactory; import com.metamx.druid.index.v1.IncrementalIndex; import com.metamx.druid.initialization.Initialization; +import com.metamx.druid.input.MapBasedRow; import com.metamx.druid.input.Row; import com.metamx.druid.input.Rows; import com.metamx.druid.query.CacheStrategy; @@ -99,10 +101,11 @@ public class GroupByQueryQueryToolChest implements QueryToolChest() @@ -119,7 +122,21 @@ public class GroupByQueryQueryToolChest implements QueryToolChest() + { + private final QueryGranularity granularity = query.getGranularity(); + + @Override + public Row apply(Row input) + { + final MapBasedRow row = (MapBasedRow) input; + return new MapBasedRow(granularity.toDateTime(row.getTimestampFromEpoch()), row.getEvent()); + } + } + ); } }; } @@ -161,7 +178,7 @@ public class GroupByQueryQueryToolChest implements QueryToolChest getCacheStrategy(GroupByQuery query) + public CacheStrategy getCacheStrategy(GroupByQuery query) { return null; } diff --git a/client/src/main/java/com/metamx/druid/query/metadata/AllColumnIncluderator.java b/client/src/main/java/com/metamx/druid/query/metadata/AllColumnIncluderator.java new file mode 100644 index 00000000000..cd96b5d718a --- /dev/null +++ b/client/src/main/java/com/metamx/druid/query/metadata/AllColumnIncluderator.java @@ -0,0 +1,37 @@ +/* + * Druid - a distributed column store. + * Copyright (C) 2012 Metamarkets Group Inc. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version 2 + * of the License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + */ + +package com.metamx.druid.query.metadata; + +/** + */ +public class AllColumnIncluderator implements ColumnIncluderator +{ + @Override + public boolean include(String columnName) + { + return true; + } + + @Override + public byte[] getCacheKey() + { + return ALL_CACHE_PREFIX; + } +} diff --git a/client/src/main/java/com/metamx/druid/query/metadata/ColumnAnalysis.java b/client/src/main/java/com/metamx/druid/query/metadata/ColumnAnalysis.java new file mode 100644 index 00000000000..33b894ca07a --- /dev/null +++ b/client/src/main/java/com/metamx/druid/query/metadata/ColumnAnalysis.java @@ -0,0 +1,119 @@ +/* + * Druid - a distributed column store. + * Copyright (C) 2012 Metamarkets Group Inc. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version 2 + * of the License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + */ + +package com.metamx.druid.query.metadata; + +import com.google.common.base.Preconditions; +import com.metamx.druid.index.column.ValueType; +import org.codehaus.jackson.annotate.JsonCreator; +import org.codehaus.jackson.annotate.JsonProperty; + +/** +*/ +public class ColumnAnalysis +{ + private static final String ERROR_PREFIX = "error:"; + + public static ColumnAnalysis error(String reason) + { + return new ColumnAnalysis(ERROR_PREFIX + reason, -1, null); + } + + private final String type; + private final long size; + private final Integer cardinality; + + @JsonCreator + public ColumnAnalysis( + @JsonProperty("type") ValueType type, + @JsonProperty("size") long size, + @JsonProperty("cardinality") Integer cardinality + ) + { + this(type.name(), size, cardinality); + } + + private ColumnAnalysis( + String type, + long size, + Integer cardinality + ) + { + this.type = type; + this.size = size; + this.cardinality = cardinality; + } + + @JsonProperty + public String getType() + { + return type; + } + + @JsonProperty + public long getSize() + { + return size; + } + + @JsonProperty + public Integer getCardinality() + { + return cardinality; + } + + public boolean isError() + { + return type.startsWith(ERROR_PREFIX); + } + + public ColumnAnalysis fold(ColumnAnalysis rhs) + { + if (rhs == null) { + return this; + } + + if (!type.equals(rhs.getType())) { + return ColumnAnalysis.error("cannot_merge_diff_types"); + } + + Integer cardinality = getCardinality(); + final Integer rhsCardinality = rhs.getCardinality(); + if (cardinality == null) { + cardinality = rhsCardinality; + } + else { + if (rhsCardinality != null) { + cardinality = Math.max(cardinality, rhsCardinality); + } + } + + return new ColumnAnalysis(type, size + rhs.getSize(), cardinality); + } + + @Override + public String toString() + { + return "ColumnAnalysis{" + + "type='" + type + '\'' + + ", size=" + size + + ", cardinality=" + cardinality + + '}'; + } +} diff --git a/client/src/main/java/com/metamx/druid/query/metadata/ColumnIncluderator.java b/client/src/main/java/com/metamx/druid/query/metadata/ColumnIncluderator.java new file mode 100644 index 00000000000..90533c4eaca --- /dev/null +++ b/client/src/main/java/com/metamx/druid/query/metadata/ColumnIncluderator.java @@ -0,0 +1,41 @@ +/* + * Druid - a distributed column store. + * Copyright (C) 2012 Metamarkets Group Inc. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version 2 + * of the License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + */ + +package com.metamx.druid.query.metadata; + +import org.codehaus.jackson.annotate.JsonSubTypes; +import org.codehaus.jackson.annotate.JsonTypeInfo; + +/** + */ +@JsonTypeInfo(use = JsonTypeInfo.Id.NAME, property = "type") +@JsonSubTypes(value = { + @JsonSubTypes.Type(name = "none", value= NoneColumnIncluderator.class), + @JsonSubTypes.Type(name = "all", value= AllColumnIncluderator.class), + @JsonSubTypes.Type(name = "list", value= ListColumnIncluderator.class) +}) +public interface ColumnIncluderator +{ + public static final byte[] NONE_CACHE_PREFIX = new byte[]{0x0}; + public static final byte[] ALL_CACHE_PREFIX = new byte[]{0x1}; + public static final byte[] LIST_CACHE_PREFIX = new byte[]{0x2}; + + public boolean include(String columnName); + public byte[] getCacheKey(); +} diff --git a/client/src/main/java/com/metamx/druid/query/metadata/ListColumnIncluderator.java b/client/src/main/java/com/metamx/druid/query/metadata/ListColumnIncluderator.java new file mode 100644 index 00000000000..e74661d6822 --- /dev/null +++ b/client/src/main/java/com/metamx/druid/query/metadata/ListColumnIncluderator.java @@ -0,0 +1,82 @@ +/* + * Druid - a distributed column store. + * Copyright (C) 2012 Metamarkets Group Inc. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version 2 + * of the License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + */ + +package com.metamx.druid.query.metadata; + +import com.google.common.base.Charsets; +import com.google.common.base.Throwables; +import com.google.common.collect.Lists; +import com.google.common.collect.Sets; +import org.codehaus.jackson.annotate.JsonCreator; +import org.codehaus.jackson.annotate.JsonProperty; + +import java.io.ByteArrayOutputStream; +import java.io.IOException; +import java.nio.ByteBuffer; +import java.util.Collections; +import java.util.List; +import java.util.Set; + +/** + */ +public class ListColumnIncluderator implements ColumnIncluderator +{ + private final Set columns; + + @JsonCreator + public ListColumnIncluderator( + @JsonProperty("columns") List columns + ) + { + this.columns = Sets.newTreeSet(String.CASE_INSENSITIVE_ORDER); + this.columns.addAll(columns); + } + + @JsonProperty + public Set getColumns() + { + return Collections.unmodifiableSet(columns); + } + + @Override + public boolean include(String columnName) + { + return columns.contains(columnName); + } + + @Override + public byte[] getCacheKey() + { + int size = 1; + List columns = Lists.newArrayListWithExpectedSize(this.columns.size()); + + for (String column : this.columns) { + final byte[] bytes = column.getBytes(Charsets.UTF_8); + columns.add(bytes); + size += bytes.length; + } + + final ByteBuffer bytes = ByteBuffer.allocate(size).put(LIST_CACHE_PREFIX); + for (byte[] column : columns) { + bytes.put(column); + } + + return bytes.array(); + } +} diff --git a/client/src/main/java/com/metamx/druid/query/metadata/NoneColumnIncluderator.java b/client/src/main/java/com/metamx/druid/query/metadata/NoneColumnIncluderator.java new file mode 100644 index 00000000000..d1d66d26778 --- /dev/null +++ b/client/src/main/java/com/metamx/druid/query/metadata/NoneColumnIncluderator.java @@ -0,0 +1,37 @@ +/* + * Druid - a distributed column store. + * Copyright (C) 2012 Metamarkets Group Inc. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version 2 + * of the License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + */ + +package com.metamx.druid.query.metadata; + +/** + */ +public class NoneColumnIncluderator implements ColumnIncluderator +{ + @Override + public boolean include(String columnName) + { + return false; + } + + @Override + public byte[] getCacheKey() + { + return NONE_CACHE_PREFIX; + } +} diff --git a/client/src/main/java/com/metamx/druid/result/SegmentMetadataResultValue.java b/client/src/main/java/com/metamx/druid/query/metadata/SegmentAnalysis.java similarity index 53% rename from client/src/main/java/com/metamx/druid/result/SegmentMetadataResultValue.java rename to client/src/main/java/com/metamx/druid/query/metadata/SegmentAnalysis.java index 5904264017e..1182bfeb9fa 100644 --- a/client/src/main/java/com/metamx/druid/result/SegmentMetadataResultValue.java +++ b/client/src/main/java/com/metamx/druid/query/metadata/SegmentAnalysis.java @@ -17,61 +17,34 @@ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. */ -package com.metamx.druid.result; +package com.metamx.druid.query.metadata; import org.codehaus.jackson.annotate.JsonCreator; import org.codehaus.jackson.annotate.JsonProperty; +import org.joda.time.Interval; +import java.util.List; import java.util.Map; -public class SegmentMetadataResultValue +public class SegmentAnalysis { - public static class Dimension { - @JsonProperty public long size; - @JsonProperty public int cardinality; - - @JsonCreator - public Dimension( - @JsonProperty("size") long size, - @JsonProperty("cardinality") int cardinality - ) - { - this.size = size; - this.cardinality = cardinality; - } - } - public static class Metric { - @JsonProperty public String type; - @JsonProperty public long size; - - @JsonCreator - public Metric( - @JsonProperty("type") String type, - @JsonProperty("size") long size - ) - { - this.type = type; - this.size = size; - } - } - private final String id; - private final Map dimensions; - private final Map metrics; + private final List interval; + private final Map columns; private final long size; @JsonCreator - public SegmentMetadataResultValue( + public SegmentAnalysis( @JsonProperty("id") String id, - @JsonProperty("dimensions") Map dimensions, - @JsonProperty("metrics") Map metrics, + @JsonProperty("intervals") List interval, + @JsonProperty("columns") Map columns, @JsonProperty("size") long size ) { this.id = id; - this.dimensions = dimensions; - this.metrics = metrics; + this.interval = interval; + this.columns = columns; this.size = size; } @@ -82,15 +55,15 @@ public class SegmentMetadataResultValue } @JsonProperty - public Map getDimensions() + public List getIntervals() { - return dimensions; + return interval; } @JsonProperty - public Map getMetrics() + public Map getColumns() { - return metrics; + return columns; } @JsonProperty @@ -98,4 +71,24 @@ public class SegmentMetadataResultValue { return size; } + + public String toDetailedString() + { + return "SegmentAnalysis{" + + "id='" + id + '\'' + + ", interval=" + interval + + ", columns=" + columns + + ", size=" + size + + '}'; + } + + @Override + public String toString() + { + return "SegmentAnalysis{" + + "id='" + id + '\'' + + ", interval=" + interval + + ", size=" + size + + '}'; + } } diff --git a/client/src/main/java/com/metamx/druid/query/metadata/SegmentMetadataQuery.java b/client/src/main/java/com/metamx/druid/query/metadata/SegmentMetadataQuery.java index e72b85a4423..7e0d04c0739 100644 --- a/client/src/main/java/com/metamx/druid/query/metadata/SegmentMetadataQuery.java +++ b/client/src/main/java/com/metamx/druid/query/metadata/SegmentMetadataQuery.java @@ -22,26 +22,40 @@ package com.metamx.druid.query.metadata; import com.metamx.druid.BaseQuery; import com.metamx.druid.Query; import com.metamx.druid.query.segment.QuerySegmentSpec; -import com.metamx.druid.result.Result; -import com.metamx.druid.result.SegmentMetadataResultValue; import org.codehaus.jackson.annotate.JsonProperty; import java.util.Map; -public class SegmentMetadataQuery extends BaseQuery> +public class SegmentMetadataQuery extends BaseQuery { + private final ColumnIncluderator toInclude; + private final boolean merge; + public SegmentMetadataQuery( @JsonProperty("dataSource") String dataSource, @JsonProperty("intervals") QuerySegmentSpec querySegmentSpec, + @JsonProperty("toInclude") ColumnIncluderator toInclude, + @JsonProperty("merge") Boolean merge, @JsonProperty("context") Map context ) { - super( - dataSource, - querySegmentSpec, - context - ); + super(dataSource, querySegmentSpec, context); + + this.toInclude = toInclude == null ? new AllColumnIncluderator() : toInclude; + this.merge = merge == null ? false : merge; + } + + @JsonProperty + public ColumnIncluderator getToInclude() + { + return toInclude; + } + + @JsonProperty + public boolean isMerge() + { + return merge; } @Override @@ -57,22 +71,16 @@ public class SegmentMetadataQuery extends BaseQuery> withOverriddenContext(Map contextOverride) + public Query withOverriddenContext(Map contextOverride) { return new SegmentMetadataQuery( - getDataSource(), - getQuerySegmentSpec(), - computeOverridenContext(contextOverride) + getDataSource(), getQuerySegmentSpec(), toInclude, merge, computeOverridenContext(contextOverride) ); } @Override - public Query> withQuerySegmentSpec(QuerySegmentSpec spec) + public Query withQuerySegmentSpec(QuerySegmentSpec spec) { - return new SegmentMetadataQuery( - getDataSource(), - spec, - getContext() - ); + return new SegmentMetadataQuery(getDataSource(), spec, toInclude, merge, getContext()); } } diff --git a/client/src/main/java/com/metamx/druid/query/metadata/SegmentMetadataQueryQueryToolChest.java b/client/src/main/java/com/metamx/druid/query/metadata/SegmentMetadataQueryQueryToolChest.java index 14ef61b8d3c..160c23cd958 100644 --- a/client/src/main/java/com/metamx/druid/query/metadata/SegmentMetadataQueryQueryToolChest.java +++ b/client/src/main/java/com/metamx/druid/query/metadata/SegmentMetadataQueryQueryToolChest.java @@ -22,32 +22,116 @@ package com.metamx.druid.query.metadata; import com.google.common.base.Function; import com.google.common.base.Functions; import com.google.common.base.Joiner; -import com.google.common.collect.ImmutableList; -import com.metamx.common.guava.ConcatSequence; +import com.google.common.collect.Iterables; +import com.google.common.collect.Maps; +import com.google.common.collect.Ordering; +import com.google.common.collect.Sets; +import com.metamx.common.ISE; +import com.metamx.common.guava.MergeSequence; import com.metamx.common.guava.Sequence; -import com.metamx.common.guava.Sequences; +import com.metamx.common.guava.nary.BinaryFn; +import com.metamx.druid.Query; +import com.metamx.druid.collect.OrderedMergeSequence; import com.metamx.druid.query.CacheStrategy; -import com.metamx.druid.query.ConcatQueryRunner; import com.metamx.druid.query.MetricManipulationFn; import com.metamx.druid.query.QueryRunner; import com.metamx.druid.query.QueryToolChest; -import com.metamx.druid.result.Result; -import com.metamx.druid.result.SegmentMetadataResultValue; +import com.metamx.druid.query.ResultMergeQueryRunner; +import com.metamx.druid.utils.JodaUtils; import com.metamx.emitter.service.ServiceMetricEvent; import org.codehaus.jackson.type.TypeReference; import org.joda.time.Interval; import org.joda.time.Minutes; +import javax.annotation.Nullable; +import java.nio.ByteBuffer; +import java.util.List; +import java.util.Map; +import java.util.Set; -public class SegmentMetadataQueryQueryToolChest implements QueryToolChest, SegmentMetadataQuery> + +public class SegmentMetadataQueryQueryToolChest implements QueryToolChest { - - private static final TypeReference> TYPE_REFERENCE = new TypeReference>(){}; + private static final TypeReference TYPE_REFERENCE = new TypeReference(){}; + private static final byte[] SEGMENT_METADATA_CACHE_PREFIX = new byte[]{0x4}; @Override - public QueryRunner> mergeResults(final QueryRunner> runner) + public QueryRunner mergeResults(final QueryRunner runner) { - return new ConcatQueryRunner>(Sequences.simple(ImmutableList.of(runner))); + return new ResultMergeQueryRunner(runner) + { + @Override + protected Ordering makeOrdering(Query query) + { + if (((SegmentMetadataQuery) query).isMerge()) { + // Merge everything always + return new Ordering() + { + @Override + public int compare( + @Nullable SegmentAnalysis left, @Nullable SegmentAnalysis right + ) + { + return 0; + } + }; + } + + return getOrdering(); // No two elements should be equal, so it should never merge + } + + @Override + protected BinaryFn createMergeFn(final Query inQ) + { + return new BinaryFn() + { + private final SegmentMetadataQuery query = (SegmentMetadataQuery) inQ; + + @Override + public SegmentAnalysis apply(SegmentAnalysis arg1, SegmentAnalysis arg2) + { + if (arg1 == null) { + return arg2; + } + + if (arg2 == null) { + return arg1; + } + + if (!query.isMerge()) { + throw new ISE("Merging when a merge isn't supposed to happen[%s], [%s]", arg1, arg2); + } + + List newIntervals = JodaUtils.condenseIntervals( + Iterables.concat(arg1.getIntervals(), arg2.getIntervals()) + ); + + final Map leftColumns = arg1.getColumns(); + final Map rightColumns = arg2.getColumns(); + Map columns = Maps.newTreeMap(); + + Set rightColumnNames = Sets.newHashSet(rightColumns.keySet()); + for (Map.Entry entry : leftColumns.entrySet()) { + final String columnName = entry.getKey(); + columns.put(columnName, entry.getValue().fold(rightColumns.get(columnName))); + rightColumnNames.remove(columnName); + } + + for (String columnName : rightColumnNames) { + columns.put(columnName, rightColumns.get(columnName)); + } + + return new SegmentAnalysis("merged", newIntervals, columns, arg1.getSize() + arg2.getSize()); + } + }; + } + }; + } + + @Override + public Sequence mergeSequences(Sequence> seqOfSequences) + { + return new OrderedMergeSequence(getOrdering(), seqOfSequences); } @Override @@ -67,13 +151,7 @@ public class SegmentMetadataQueryQueryToolChest implements QueryToolChest> mergeSequences(Sequence>> seqOfSequences) - { - return new ConcatSequence>(seqOfSequences); - } - - @Override - public Function, Result> makeMetricManipulatorFn( + public Function makeMetricManipulatorFn( SegmentMetadataQuery query, MetricManipulationFn fn ) { @@ -81,26 +159,87 @@ public class SegmentMetadataQueryQueryToolChest implements QueryToolChest> getResultTypeReference() + public TypeReference getResultTypeReference() { return TYPE_REFERENCE; } @Override - public CacheStrategy, SegmentMetadataQuery> getCacheStrategy(SegmentMetadataQuery query) + public CacheStrategy getCacheStrategy(SegmentMetadataQuery query) { - return null; + return new CacheStrategy() + { + @Override + public byte[] computeCacheKey(SegmentMetadataQuery query) + { + byte[] includerBytes = query.getToInclude().getCacheKey(); + return ByteBuffer.allocate(1 + includerBytes.length) + .put(SEGMENT_METADATA_CACHE_PREFIX) + .put(includerBytes) + .array(); + } + + @Override + public TypeReference getCacheObjectClazz() + { + return getResultTypeReference(); + } + + @Override + public Function prepareForCache() + { + return new Function() + { + @Override + public SegmentAnalysis apply(@Nullable SegmentAnalysis input) + { + return input; + } + }; + } + + @Override + public Function pullFromCache() + { + return new Function() + { + @Override + public SegmentAnalysis apply(@Nullable SegmentAnalysis input) + { + return input; + } + }; + } + + @Override + public Sequence mergeSequences(Sequence> seqOfSequences) + { + return new MergeSequence(getOrdering(), seqOfSequences); + } + }; } @Override - public QueryRunner> preMergeQueryDecoration(QueryRunner> runner) + public QueryRunner preMergeQueryDecoration(QueryRunner runner) { return runner; } @Override - public QueryRunner> postMergeQueryDecoration(QueryRunner> runner) + public QueryRunner postMergeQueryDecoration(QueryRunner runner) { return runner; } + + private Ordering getOrdering() + { + return new Ordering() + { + @Override + public int compare(SegmentAnalysis left, SegmentAnalysis right) + { + return left.getId().compareTo(right.getId()); + } + }; + } } diff --git a/client/src/main/java/com/metamx/druid/query/search/SearchQueryQueryToolChest.java b/client/src/main/java/com/metamx/druid/query/search/SearchQueryQueryToolChest.java index 95757fc60b1..ce3fcc86114 100644 --- a/client/src/main/java/com/metamx/druid/query/search/SearchQueryQueryToolChest.java +++ b/client/src/main/java/com/metamx/druid/query/search/SearchQueryQueryToolChest.java @@ -82,6 +82,10 @@ public class SearchQueryQueryToolChest implements QueryToolChest OBJECT_TYPE_REFERENCE = new TypeReference() + { + }; + @Override public QueryRunner> mergeResults(QueryRunner> runner) { @@ -143,9 +147,9 @@ public class SearchQueryQueryToolChest implements QueryToolChest, SearchQuery> getCacheStrategy(SearchQuery query) + public CacheStrategy, Object, SearchQuery> getCacheStrategy(SearchQuery query) { - return new CacheStrategy, SearchQuery>() + return new CacheStrategy, Object, SearchQuery>() { @Override public byte[] computeCacheKey(SearchQuery query) @@ -183,6 +187,12 @@ public class SearchQueryQueryToolChest implements QueryToolChest getCacheObjectClazz() + { + return OBJECT_TYPE_REFERENCE; + } + @Override public Function, Object> prepareForCache() { diff --git a/client/src/main/java/com/metamx/druid/query/segment/QuerySegmentSpecs.java b/client/src/main/java/com/metamx/druid/query/segment/QuerySegmentSpecs.java new file mode 100644 index 00000000000..9516db4c508 --- /dev/null +++ b/client/src/main/java/com/metamx/druid/query/segment/QuerySegmentSpecs.java @@ -0,0 +1,45 @@ +/* + * Druid - a distributed column store. + * Copyright (C) 2012 Metamarkets Group Inc. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version 2 + * of the License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + */ + +package com.metamx.druid.query.segment; + +import org.joda.time.Interval; + +import java.util.Arrays; +import java.util.List; + +/** + */ +public class QuerySegmentSpecs +{ + public static QuerySegmentSpec create(String isoInterval) + { + return new LegacySegmentSpec(isoInterval); + } + + public static QuerySegmentSpec create(Interval interval) + { + return create(Arrays.asList(interval)); + } + + public static QuerySegmentSpec create(List intervals) + { + return new MultipleIntervalSegmentSpec(intervals); + } +} diff --git a/client/src/main/java/com/metamx/druid/query/timeboundary/TimeBoundaryQueryQueryToolChest.java b/client/src/main/java/com/metamx/druid/query/timeboundary/TimeBoundaryQueryQueryToolChest.java index 9d65ab9b47c..5ee6c321bbb 100644 --- a/client/src/main/java/com/metamx/druid/query/timeboundary/TimeBoundaryQueryQueryToolChest.java +++ b/client/src/main/java/com/metamx/druid/query/timeboundary/TimeBoundaryQueryQueryToolChest.java @@ -53,6 +53,9 @@ public class TimeBoundaryQueryQueryToolChest private static final TypeReference> TYPE_REFERENCE = new TypeReference>() { }; + private static final TypeReference OBJECT_TYPE_REFERENCE = new TypeReference() + { + }; @Override public QueryRunner> mergeResults( @@ -106,9 +109,9 @@ public class TimeBoundaryQueryQueryToolChest } @Override - public CacheStrategy, TimeBoundaryQuery> getCacheStrategy(TimeBoundaryQuery query) + public CacheStrategy, Object, TimeBoundaryQuery> getCacheStrategy(TimeBoundaryQuery query) { - return new CacheStrategy, TimeBoundaryQuery>() + return new CacheStrategy, Object, TimeBoundaryQuery>() { @Override public byte[] computeCacheKey(TimeBoundaryQuery query) @@ -119,6 +122,12 @@ public class TimeBoundaryQueryQueryToolChest .array(); } + @Override + public TypeReference getCacheObjectClazz() + { + return OBJECT_TYPE_REFERENCE; + } + @Override public Function, Object> prepareForCache() { diff --git a/client/src/main/java/com/metamx/druid/query/timeseries/TimeseriesQueryQueryToolChest.java b/client/src/main/java/com/metamx/druid/query/timeseries/TimeseriesQueryQueryToolChest.java index 99bf679c006..9c633507ec5 100644 --- a/client/src/main/java/com/metamx/druid/query/timeseries/TimeseriesQueryQueryToolChest.java +++ b/client/src/main/java/com/metamx/druid/query/timeseries/TimeseriesQueryQueryToolChest.java @@ -28,6 +28,7 @@ import com.metamx.common.guava.MergeSequence; import com.metamx.common.guava.Sequence; import com.metamx.common.guava.nary.BinaryFn; import com.metamx.druid.Query; +import com.metamx.druid.QueryGranularity; import com.metamx.druid.ResultGranularTimestampComparator; import com.metamx.druid.TimeseriesBinaryFn; import com.metamx.druid.aggregation.AggregatorFactory; @@ -49,6 +50,7 @@ import org.joda.time.DateTime; import org.joda.time.Interval; import org.joda.time.Minutes; import org.joda.time.Period; +import org.joda.time.format.ISODateTimeFormat; import javax.annotation.Nullable; import java.nio.ByteBuffer; @@ -66,6 +68,9 @@ public class TimeseriesQueryQueryToolChest implements QueryToolChest> TYPE_REFERENCE = new TypeReference>() { }; + private static final TypeReference OBJECT_TYPE_REFERENCE = new TypeReference() + { + }; @Override public QueryRunner> mergeResults(QueryRunner> queryRunner) @@ -100,10 +105,7 @@ public class TimeseriesQueryQueryToolChest implements QueryToolChest> mergeSequences(Sequence>> seqOfSequences) { - return new OrderedMergeSequence>( - getOrdering(), - seqOfSequences - ); + return new OrderedMergeSequence>(getOrdering(), seqOfSequences); } @Override @@ -156,9 +158,9 @@ public class TimeseriesQueryQueryToolChest implements QueryToolChest, TimeseriesQuery> getCacheStrategy(final TimeseriesQuery query) + public CacheStrategy, Object, TimeseriesQuery> getCacheStrategy(final TimeseriesQuery query) { - return new CacheStrategy, TimeseriesQuery>() + return new CacheStrategy, Object, TimeseriesQuery>() { private final List aggs = query.getAggregatorSpecs(); private final List postAggs = query.getPostAggregatorSpecs(); @@ -180,6 +182,12 @@ public class TimeseriesQueryQueryToolChest implements QueryToolChest getCacheObjectClazz() + { + return OBJECT_TYPE_REFERENCE; + } + @Override public Function, Object> prepareForCache() { @@ -206,6 +214,8 @@ public class TimeseriesQueryQueryToolChest implements QueryToolChest>() { + private final QueryGranularity granularity = query.getGranularity(); + @Override public Result apply(@Nullable Object input) { @@ -215,7 +225,8 @@ public class TimeseriesQueryQueryToolChest implements QueryToolChest aggsIter = aggs.iterator(); Iterator resultIter = results.iterator(); - DateTime timestamp = new DateTime(resultIter.next()); + DateTime timestamp = granularity.toDateTime(((Number) resultIter.next()).longValue()); + while (aggsIter.hasNext() && resultIter.hasNext()) { final AggregatorFactory factory = aggsIter.next(); retVal.put(factory.getName(), factory.deserialize(resultIter.next())); @@ -257,6 +268,4 @@ public class TimeseriesQueryQueryToolChest implements QueryToolChest implements Comparable> Result result = (Result) o; - if (timestamp != null ? !timestamp.equals(result.timestamp) : result.timestamp != null) { + if (timestamp != null ? !(timestamp.isEqual(result.timestamp) && timestamp.getZone().getOffset(timestamp) == result.timestamp.getZone().getOffset(result.timestamp)) : result.timestamp != null) { return false; } if (value != null ? !value.equals(result.value) : result.value != null) { diff --git a/client/src/main/java/com/metamx/phonebook/StoppedPhoneBook.java b/client/src/main/java/com/metamx/phonebook/StoppedPhoneBook.java index 907ee186b06..99abd2964cd 100644 --- a/client/src/main/java/com/metamx/phonebook/StoppedPhoneBook.java +++ b/client/src/main/java/com/metamx/phonebook/StoppedPhoneBook.java @@ -112,7 +112,8 @@ class StoppedPhoneBook implements PhoneBook } if (! serviceAnnouncements.containsKey(nodeName)) { - throw new IAE("Cannot unannounce node[%s] on service[%s]", nodeName, serviceName); + log.warn("Cannot unannounce[%s]: it doesn't exist for service[%s]", nodeName, serviceName); + return; } serviceAnnouncements.remove(nodeName); diff --git a/client/src/test/java/com/metamx/druid/client/DataSegmentTest.java b/client/src/test/java/com/metamx/druid/client/DataSegmentTest.java index c29d10781e2..24d5986a8f8 100644 --- a/client/src/test/java/com/metamx/druid/client/DataSegmentTest.java +++ b/client/src/test/java/com/metamx/druid/client/DataSegmentTest.java @@ -23,6 +23,7 @@ import com.google.common.collect.ImmutableList; import com.google.common.collect.ImmutableMap; import com.google.common.collect.Lists; import com.google.common.collect.Sets; +import com.metamx.druid.index.v1.IndexIO; import com.metamx.druid.jackson.DefaultObjectMapper; import com.metamx.druid.shard.NoneShardSpec; import com.metamx.druid.shard.SingleDimensionShardSpec; @@ -60,12 +61,13 @@ public class DataSegmentTest Arrays.asList("dim1", "dim2"), Arrays.asList("met1", "met2"), new NoneShardSpec(), + IndexIO.CURRENT_VERSION_ID, 1 ); final Map objectMap = mapper.readValue(mapper.writeValueAsString(segment), new TypeReference>(){}); - Assert.assertEquals(9, objectMap.size()); + Assert.assertEquals(10, objectMap.size()); Assert.assertEquals("something", objectMap.get("dataSource")); Assert.assertEquals(interval.toString(), objectMap.get("interval")); Assert.assertEquals("1", objectMap.get("version")); @@ -73,6 +75,7 @@ public class DataSegmentTest Assert.assertEquals("dim1,dim2", objectMap.get("dimensions")); Assert.assertEquals("met1,met2", objectMap.get("metrics")); Assert.assertEquals(ImmutableMap.of("type", "none"), objectMap.get("shardSpec")); + Assert.assertEquals(IndexIO.CURRENT_VERSION_ID, objectMap.get("binaryVersion")); Assert.assertEquals(1, objectMap.get("size")); DataSegment deserializedSegment = mapper.readValue(mapper.writeValueAsString(segment), DataSegment.class); diff --git a/client/src/test/java/com/metamx/druid/client/cache/MapCacheBrokerTest.java b/client/src/test/java/com/metamx/druid/client/cache/MapCacheTest.java similarity index 58% rename from client/src/test/java/com/metamx/druid/client/cache/MapCacheBrokerTest.java rename to client/src/test/java/com/metamx/druid/client/cache/MapCacheTest.java index 4338a23a10d..23a3bd1d641 100644 --- a/client/src/test/java/com/metamx/druid/client/cache/MapCacheBrokerTest.java +++ b/client/src/test/java/com/metamx/druid/client/cache/MapCacheTest.java @@ -26,61 +26,58 @@ import org.junit.Test; /** */ -public class MapCacheBrokerTest +public class MapCacheTest { private static final byte[] HI = "hi".getBytes(); private static final byte[] HO = "ho".getBytes(); private ByteCountingLRUMap baseMap; - private MapCacheBroker broker; + private MapCache cache; @Before public void setUp() throws Exception { baseMap = new ByteCountingLRUMap(1024 * 1024); - broker = new MapCacheBroker(baseMap); + cache = new MapCache(baseMap); } @Test public void testSanity() throws Exception { - Cache aCache = broker.provideCache("a"); - Cache theCache = broker.provideCache("the"); - - Assert.assertNull(aCache.get(HI)); + Assert.assertNull(cache.get(new Cache.NamedKey("a", HI))); Assert.assertEquals(0, baseMap.size()); - put(aCache, HI, 1); + put(cache, "a", HI, 1); Assert.assertEquals(1, baseMap.size()); - Assert.assertEquals(1, get(aCache, HI)); - Assert.assertNull(theCache.get(HI)); + Assert.assertEquals(1, get(cache, "a", HI)); + Assert.assertNull(cache.get(new Cache.NamedKey("the", HI))); - put(theCache, HI, 2); + put(cache, "the", HI, 2); Assert.assertEquals(2, baseMap.size()); - Assert.assertEquals(1, get(aCache, HI)); - Assert.assertEquals(2, get(theCache, HI)); + Assert.assertEquals(1, get(cache, "a", HI)); + Assert.assertEquals(2, get(cache, "the", HI)); - put(theCache, HO, 10); + put(cache, "the", HO, 10); Assert.assertEquals(3, baseMap.size()); - Assert.assertEquals(1, get(aCache, HI)); - Assert.assertNull(aCache.get(HO)); - Assert.assertEquals(2, get(theCache, HI)); - Assert.assertEquals(10, get(theCache, HO)); + Assert.assertEquals(1, get(cache, "a", HI)); + Assert.assertNull(cache.get(new Cache.NamedKey("a", HO))); + Assert.assertEquals(2, get(cache, "the", HI)); + Assert.assertEquals(10, get(cache, "the", HO)); - theCache.close(); + cache.close("the"); Assert.assertEquals(1, baseMap.size()); - Assert.assertEquals(1, get(aCache, HI)); - Assert.assertNull(aCache.get(HO)); + Assert.assertEquals(1, get(cache, "a", HI)); + Assert.assertNull(cache.get(new Cache.NamedKey("a", HO))); - aCache.close(); + cache.close("a"); Assert.assertEquals(0, baseMap.size()); } - public void put(Cache cache, byte[] key, Integer value) + public void put(Cache cache, String namespace, byte[] key, Integer value) { - cache.put(key, Ints.toByteArray(value)); + cache.put(new Cache.NamedKey(namespace, key), Ints.toByteArray(value)); } - public int get(Cache cache, byte[] key) + public int get(Cache cache, String namespace, byte[] key) { - return Ints.fromByteArray(cache.get(key)); + return Ints.fromByteArray(cache.get(new Cache.NamedKey(namespace, key))); } } diff --git a/client/src/test/java/com/metamx/druid/client/cache/MemcachedCacheBrokerBenchmark.java b/client/src/test/java/com/metamx/druid/client/cache/MemcachedCacheBenchmark.java similarity index 65% rename from client/src/test/java/com/metamx/druid/client/cache/MemcachedCacheBrokerBenchmark.java rename to client/src/test/java/com/metamx/druid/client/cache/MemcachedCacheBenchmark.java index 2fa7d3b4193..3a746c9484b 100644 --- a/client/src/test/java/com/metamx/druid/client/cache/MemcachedCacheBrokerBenchmark.java +++ b/client/src/test/java/com/metamx/druid/client/cache/MemcachedCacheBenchmark.java @@ -3,6 +3,7 @@ package com.metamx.druid.client.cache; import com.google.caliper.Param; import com.google.caliper.Runner; import com.google.caliper.SimpleBenchmark; +import com.google.common.collect.Lists; import net.spy.memcached.AddrUtil; import net.spy.memcached.ConnectionFactoryBuilder; import net.spy.memcached.DefaultHashAlgorithm; @@ -11,17 +12,19 @@ import net.spy.memcached.MemcachedClient; import net.spy.memcached.MemcachedClientIF; import net.spy.memcached.transcoders.SerializingTranscoder; +import java.util.List; +import java.util.Map; import java.util.Random; import java.util.concurrent.TimeUnit; -public class MemcachedCacheBrokerBenchmark extends SimpleBenchmark +public class MemcachedCacheBenchmark extends SimpleBenchmark { private static final String BASE_KEY = "test_2012-11-26T00:00:00.000Z_2012-11-27T00:00:00.000Z_2012-11-27T04:11:25.979Z_"; + public static final String NAMESPACE = "default"; - private MemcachedCacheBroker broker; + private MemcachedCache cache; private MemcachedClientIF client; - private Cache cache; private static byte[] randBytes; @Param({"localhost:11211"}) String hosts; @@ -39,8 +42,6 @@ public class MemcachedCacheBrokerBenchmark extends SimpleBenchmark // disable compression transcoder.setCompressionThreshold(Integer.MAX_VALUE); - System.out.println(String.format("Using memcached hosts [%s]", hosts)); - client = new MemcachedClient( new ConnectionFactoryBuilder().setProtocol(ConnectionFactoryBuilder.Protocol.BINARY) .setHashAlg(DefaultHashAlgorithm.FNV1A_64_HASH) @@ -53,14 +54,13 @@ public class MemcachedCacheBrokerBenchmark extends SimpleBenchmark AddrUtil.getAddresses(hosts) ); - broker = new MemcachedCacheBroker( + cache = new MemcachedCache( client, - 500, // 500 milliseconds - 3600 * 24 * 365 // 1 year + "druid-memcached-benchmark", + 30000, // 30 seconds + 3600 // 1 hour ); - cache = broker.provideCache("default"); - randBytes = new byte[objectSize * 1024]; new Random(0).nextBytes(randBytes); @@ -69,33 +69,51 @@ public class MemcachedCacheBrokerBenchmark extends SimpleBenchmark @Override protected void tearDown() throws Exception { - client.flush(); - client.shutdown(); + client.shutdown(1, TimeUnit.MINUTES); } public void timePutObjects(int reps) { for(int i = 0; i < reps; ++i) { for(int k = 0; k < objectCount; ++k) { - String key = BASE_KEY + i; - cache.put(key.getBytes(), randBytes); + String key = BASE_KEY + k; + cache.put(new Cache.NamedKey(NAMESPACE, key.getBytes()), randBytes); } // make sure the write queue is empty client.waitForQueues(1, TimeUnit.HOURS); } } - public byte[] timeGetObject(int reps) { + public long timeGetObject(int reps) { byte[] bytes = null; + long count = 0; for (int i = 0; i < reps; i++) { for(int k = 0; k < objectCount; ++k) { - String key = BASE_KEY + i; - bytes = cache.get(key.getBytes()); + String key = BASE_KEY + k; + bytes = cache.get(new Cache.NamedKey(NAMESPACE, key.getBytes())); + count += bytes.length; } } - return bytes; + return count; + } + + public long timeBulkGetObjects(int reps) { + long count = 0; + for (int i = 0; i < reps; i++) { + List keys = Lists.newArrayList(); + for(int k = 0; k < objectCount; ++k) { + String key = BASE_KEY + k; + keys.add(new Cache.NamedKey(NAMESPACE, key.getBytes())); + } + Map results = cache.getBulk(keys); + for(Cache.NamedKey key : keys) { + byte[] bytes = results.get(key); + count += bytes.length; + } + } + return count; } public static void main(String[] args) throws Exception { - Runner.main(MemcachedCacheBrokerBenchmark.class, args); + Runner.main(MemcachedCacheBenchmark.class, args); } } diff --git a/client/src/test/java/com/metamx/druid/client/cache/MemcachedCacheBrokerTest.java b/client/src/test/java/com/metamx/druid/client/cache/MemcachedCacheTest.java similarity index 79% rename from client/src/test/java/com/metamx/druid/client/cache/MemcachedCacheBrokerTest.java rename to client/src/test/java/com/metamx/druid/client/cache/MemcachedCacheTest.java index 3cb2dba09b8..287d208db62 100644 --- a/client/src/test/java/com/metamx/druid/client/cache/MemcachedCacheBrokerTest.java +++ b/client/src/test/java/com/metamx/druid/client/cache/MemcachedCacheTest.java @@ -19,6 +19,8 @@ package com.metamx.druid.client.cache; +import com.google.common.collect.Lists; +import com.google.common.collect.Maps; import com.google.common.primitives.Ints; import net.spy.memcached.CASResponse; import net.spy.memcached.CASValue; @@ -27,6 +29,7 @@ import net.spy.memcached.ConnectionObserver; import net.spy.memcached.MemcachedClientIF; import net.spy.memcached.NodeLocator; import net.spy.memcached.internal.BulkFuture; +import net.spy.memcached.ops.OperationStatus; import net.spy.memcached.transcoders.SerializingTranscoder; import net.spy.memcached.transcoders.Transcoder; import org.junit.Assert; @@ -47,55 +50,74 @@ import java.util.concurrent.TimeoutException; /** */ -public class MemcachedCacheBrokerTest +public class MemcachedCacheTest { private static final byte[] HI = "hi".getBytes(); private static final byte[] HO = "ho".getBytes(); - private MemcachedCacheBroker broker; + private MemcachedCache cache; @Before public void setUp() throws Exception { MemcachedClientIF client = new MockMemcachedClient(); - broker = new MemcachedCacheBroker(client, 500, 3600); + cache = new MemcachedCache(client, "druid-memcached-test", 500, 3600); } @Test public void testSanity() throws Exception { - Cache aCache = broker.provideCache("a"); - Cache theCache = broker.provideCache("the"); + Assert.assertNull(cache.get(new Cache.NamedKey("a", HI))); + put(cache, "a", HI, 1); + Assert.assertEquals(1, get(cache, "a", HI)); + Assert.assertNull(cache.get(new Cache.NamedKey("the", HI))); - Assert.assertNull(aCache.get(HI)); - put(aCache, HI, 1); - Assert.assertEquals(1, get(aCache, HI)); - Assert.assertNull(theCache.get(HI)); + put(cache, "the", HI, 2); + Assert.assertEquals(1, get(cache, "a", HI)); + Assert.assertEquals(2, get(cache, "the", HI)); - put(theCache, HI, 2); - Assert.assertEquals(1, get(aCache, HI)); - Assert.assertEquals(2, get(theCache, HI)); + put(cache, "the", HO, 10); + Assert.assertEquals(1, get(cache, "a", HI)); + Assert.assertNull(cache.get(new Cache.NamedKey("a", HO))); + Assert.assertEquals(2, get(cache, "the", HI)); + Assert.assertEquals(10, get(cache, "the", HO)); - put(theCache, HO, 10); - Assert.assertEquals(1, get(aCache, HI)); - Assert.assertNull(aCache.get(HO)); - Assert.assertEquals(2, get(theCache, HI)); - Assert.assertEquals(10, get(theCache, HO)); + cache.close("the"); + Assert.assertEquals(1, get(cache, "a", HI)); + Assert.assertNull(cache.get(new Cache.NamedKey("a", HO))); - theCache.close(); - Assert.assertEquals(1, get(aCache, HI)); - Assert.assertNull(aCache.get(HO)); - - aCache.close(); + cache.close("a"); } - public void put(Cache cache, byte[] key, Integer value) + @Test + public void testGetBulk() throws Exception { - cache.put(key, Ints.toByteArray(value)); + Assert.assertNull(cache.get(new Cache.NamedKey("the", HI))); + + put(cache, "the", HI, 2); + put(cache, "the", HO, 10); + + Cache.NamedKey key1 = new Cache.NamedKey("the", HI); + Cache.NamedKey key2 = new Cache.NamedKey("the", HO); + + Map result = cache.getBulk( + Lists.newArrayList( + key1, + key2 + ) + ); + + Assert.assertEquals(2, Ints.fromByteArray(result.get(key1))); + Assert.assertEquals(10, Ints.fromByteArray(result.get(key2))); } - public int get(Cache cache, byte[] key) + public void put(Cache cache, String namespace, byte[] key, Integer value) { - return Ints.fromByteArray(cache.get(key)); + cache.put(new Cache.NamedKey(namespace, key), Ints.toByteArray(value)); + } + + public int get(Cache cache, String namespace, byte[] key) + { + return Ints.fromByteArray(cache.get(new Cache.NamedKey(namespace, key))); } } @@ -365,9 +387,67 @@ class MockMemcachedClient implements MemcachedClientIF } @Override - public BulkFuture> asyncGetBulk(Iterator keys, Transcoder tc) + public BulkFuture> asyncGetBulk(final Iterator keys, final Transcoder tc) { - throw new UnsupportedOperationException("not implemented"); + return new BulkFuture>() + { + @Override + public boolean isTimeout() + { + return false; + } + + @Override + public Map getSome(long timeout, TimeUnit unit) throws InterruptedException, ExecutionException + { + return get(); + } + + @Override + public OperationStatus getStatus() + { + return null; + } + + @Override + public boolean cancel(boolean b) + { + return false; + } + + @Override + public boolean isCancelled() + { + return false; + } + + @Override + public boolean isDone() + { + return true; + } + + @Override + public Map get() throws InterruptedException, ExecutionException + { + Map retVal = Maps.newHashMap(); + + while(keys.hasNext()) { + String key = keys.next(); + CachedData data = theMap.get(key); + retVal.put(key, data != null ? tc.decode(data) : null); + } + + return retVal; + } + + @Override + public Map get(long l, TimeUnit timeUnit) + throws InterruptedException, ExecutionException, TimeoutException + { + return get(); + } + }; } @Override @@ -383,9 +463,9 @@ class MockMemcachedClient implements MemcachedClientIF } @Override - public BulkFuture> asyncGetBulk(Collection keys) + public BulkFuture> asyncGetBulk(final Collection keys) { - throw new UnsupportedOperationException("not implemented"); + return asyncGetBulk(keys.iterator(), transcoder); } @Override diff --git a/common/pom.xml b/common/pom.xml index cfdbf4782d8..6daa2117d7a 100644 --- a/common/pom.xml +++ b/common/pom.xml @@ -28,7 +28,7 @@ com.metamx druid - 0.1.25-SNAPSHOT + 0.2.7-SNAPSHOT diff --git a/common/src/main/java/com/metamx/druid/aggregation/post/ArithmeticPostAggregator.java b/common/src/main/java/com/metamx/druid/aggregation/post/ArithmeticPostAggregator.java index 035d0fa6652..cec19d80d78 100644 --- a/common/src/main/java/com/metamx/druid/aggregation/post/ArithmeticPostAggregator.java +++ b/common/src/main/java/com/metamx/druid/aggregation/post/ArithmeticPostAggregator.java @@ -20,6 +20,7 @@ package com.metamx.druid.aggregation.post; import com.google.common.collect.Maps; +import com.google.common.collect.Sets; import com.metamx.common.IAE; import org.codehaus.jackson.annotate.JsonCreator; import org.codehaus.jackson.annotate.JsonProperty; @@ -69,14 +70,13 @@ public class ArithmeticPostAggregator implements PostAggregator } @Override - public boolean verifyFields(Set fieldNames) + public Set getDependentFields() { + Set dependentFields = Sets.newHashSet(); for (PostAggregator field : fields) { - if (!field.verifyFields(fieldNames)) { - return false; - } + dependentFields.addAll(field.getDependentFields()); } - return true; + return dependentFields; } @Override diff --git a/common/src/main/java/com/metamx/druid/aggregation/post/ConstantPostAggregator.java b/common/src/main/java/com/metamx/druid/aggregation/post/ConstantPostAggregator.java index 645de6c9b80..f1bbb0d8392 100644 --- a/common/src/main/java/com/metamx/druid/aggregation/post/ConstantPostAggregator.java +++ b/common/src/main/java/com/metamx/druid/aggregation/post/ConstantPostAggregator.java @@ -19,6 +19,7 @@ package com.metamx.druid.aggregation.post; +import com.google.common.collect.Sets; import org.codehaus.jackson.annotate.JsonCreator; import org.codehaus.jackson.annotate.JsonProperty; @@ -44,9 +45,9 @@ public class ConstantPostAggregator implements PostAggregator } @Override - public boolean verifyFields(Set fields) + public Set getDependentFields() { - return true; + return Sets.newHashSet(); } @Override diff --git a/common/src/main/java/com/metamx/druid/aggregation/post/FieldAccessPostAggregator.java b/common/src/main/java/com/metamx/druid/aggregation/post/FieldAccessPostAggregator.java index 0a1c866d044..780c720103d 100644 --- a/common/src/main/java/com/metamx/druid/aggregation/post/FieldAccessPostAggregator.java +++ b/common/src/main/java/com/metamx/druid/aggregation/post/FieldAccessPostAggregator.java @@ -19,6 +19,7 @@ package com.metamx.druid.aggregation.post; +import com.google.common.collect.Sets; import com.metamx.common.ISE; import org.codehaus.jackson.annotate.JsonCreator; import org.codehaus.jackson.annotate.JsonProperty; @@ -45,9 +46,9 @@ public class FieldAccessPostAggregator implements PostAggregator } @Override - public boolean verifyFields(Set fieldNames) + public Set getDependentFields() { - return fieldNames.contains(fieldName); + return Sets.newHashSet(fieldName); } @Override diff --git a/common/src/main/java/com/metamx/druid/aggregation/post/PostAggregator.java b/common/src/main/java/com/metamx/druid/aggregation/post/PostAggregator.java index 487ac30efb3..5b1ebc60528 100644 --- a/common/src/main/java/com/metamx/druid/aggregation/post/PostAggregator.java +++ b/common/src/main/java/com/metamx/druid/aggregation/post/PostAggregator.java @@ -37,7 +37,7 @@ import java.util.Set; }) public interface PostAggregator { - public boolean verifyFields(Set fieldNames); + public Set getDependentFields(); public Comparator getComparator(); diff --git a/common/src/main/java/com/metamx/druid/db/DbConnector.java b/common/src/main/java/com/metamx/druid/db/DbConnector.java index 99712df22a5..73013ce6aa2 100644 --- a/common/src/main/java/com/metamx/druid/db/DbConnector.java +++ b/common/src/main/java/com/metamx/druid/db/DbConnector.java @@ -59,6 +59,18 @@ public class DbConnector ); } + public static void createConfigTable(final DBI dbi, final String configTableName) + { + createTable( + dbi, + configTableName, + String.format( + "CREATE table %s (name VARCHAR(255) NOT NULL, payload LONGTEXT NOT NULL, PRIMARY KEY(name))", + configTableName + ) + ); + } + public static void createTable( final DBI dbi, final String tableName, diff --git a/common/src/main/java/com/metamx/druid/input/MapBasedRow.java b/common/src/main/java/com/metamx/druid/input/MapBasedRow.java index 03c56e2f66d..d823762eddd 100644 --- a/common/src/main/java/com/metamx/druid/input/MapBasedRow.java +++ b/common/src/main/java/com/metamx/druid/input/MapBasedRow.java @@ -37,7 +37,7 @@ import java.util.Map; */ public class MapBasedRow implements Row { - private final long timestamp; + private final DateTime timestamp; private final Map event; @JsonCreator @@ -46,22 +46,21 @@ public class MapBasedRow implements Row @JsonProperty("event") Map event ) { - this(timestamp.getMillis(), event); + this.timestamp = timestamp; + this.event = event; } public MapBasedRow( long timestamp, Map event - ) - { - this.timestamp = timestamp; - this.event = event; + ) { + this(new DateTime(timestamp), event); } @Override public long getTimestampFromEpoch() { - return timestamp; + return timestamp.getMillis(); } @Override @@ -120,7 +119,7 @@ public class MapBasedRow implements Row @JsonProperty public DateTime getTimestamp() { - return new DateTime(timestamp); + return timestamp; } @JsonProperty @@ -133,9 +132,38 @@ public class MapBasedRow implements Row public String toString() { return "MapBasedRow{" + - "timestamp=" + new DateTime(timestamp) + + "timestamp=" + timestamp + ", event=" + event + '}'; } + @Override + public boolean equals(Object o) + { + if (this == o) { + return true; + } + if (o == null || getClass() != o.getClass()) { + return false; + } + + MapBasedRow that = (MapBasedRow) o; + + if (!event.equals(that.event)) { + return false; + } + if (!timestamp.equals(that.timestamp)) { + return false; + } + + return true; + } + + @Override + public int hashCode() + { + int result = timestamp.hashCode(); + result = 31 * result + event.hashCode(); + return result; + } } diff --git a/common/src/main/java/com/metamx/druid/input/Rows.java b/common/src/main/java/com/metamx/druid/input/Rows.java index b77ab749761..3e70b4c26d5 100644 --- a/common/src/main/java/com/metamx/druid/input/Rows.java +++ b/common/src/main/java/com/metamx/druid/input/Rows.java @@ -52,6 +52,12 @@ public class Rows { return row.getFloatMetric(metric); } + + @Override + public String toString() + { + return row.toString(); + } }; } } diff --git a/common/src/main/java/com/metamx/druid/jackson/DefaultObjectMapper.java b/common/src/main/java/com/metamx/druid/jackson/DefaultObjectMapper.java index 7c96a8a3b2b..c80958eff88 100644 --- a/common/src/main/java/com/metamx/druid/jackson/DefaultObjectMapper.java +++ b/common/src/main/java/com/metamx/druid/jackson/DefaultObjectMapper.java @@ -35,10 +35,13 @@ import org.codehaus.jackson.map.JsonSerializer; import org.codehaus.jackson.map.ObjectMapper; import org.codehaus.jackson.map.SerializationConfig; import org.codehaus.jackson.map.SerializerProvider; +import org.codehaus.jackson.map.Serializers; import org.codehaus.jackson.map.module.SimpleModule; +import org.codehaus.jackson.map.ser.std.ToStringSerializer; import org.joda.time.DateTimeZone; import java.io.IOException; +import java.nio.ByteOrder; import java.util.TimeZone; /** @@ -131,6 +134,23 @@ public class DefaultObjectMapper extends ObjectMapper } } ); + serializerModule.addSerializer(ByteOrder.class, ToStringSerializer.instance); + serializerModule.addDeserializer( + ByteOrder.class, + new JsonDeserializer() + { + @Override + public ByteOrder deserialize( + JsonParser jp, DeserializationContext ctxt + ) throws IOException, JsonProcessingException + { + if (ByteOrder.BIG_ENDIAN.toString().equals(jp.getText())) { + return ByteOrder.BIG_ENDIAN; + } + return ByteOrder.LITTLE_ENDIAN; + } + } + ); registerModule(serializerModule); configure(DeserializationConfig.Feature.FAIL_ON_UNKNOWN_PROPERTIES, false); diff --git a/common/src/main/java/com/metamx/druid/processing/MetricSelectorFactory.java b/common/src/main/java/com/metamx/druid/processing/MetricSelectorFactory.java index 8c023c322f4..5864766787f 100644 --- a/common/src/main/java/com/metamx/druid/processing/MetricSelectorFactory.java +++ b/common/src/main/java/com/metamx/druid/processing/MetricSelectorFactory.java @@ -19,8 +19,6 @@ package com.metamx.druid.processing; -import java.io.Closeable; - /** * Factory class for MetricSelectors */ diff --git a/druid-services/pom.xml b/druid-services/pom.xml index cba7eec5f3b..7e7aaf65fa6 100644 --- a/druid-services/pom.xml +++ b/druid-services/pom.xml @@ -24,11 +24,11 @@ druid-services druid-services druid-services - 0.1.25-SNAPSHOT + 0.2.7-SNAPSHOT com.metamx druid - 0.1.25-SNAPSHOT + 0.2.7-SNAPSHOT diff --git a/examples/pom.xml b/examples/pom.xml index 489d4481371..ea253aae545 100644 --- a/examples/pom.xml +++ b/examples/pom.xml @@ -28,7 +28,7 @@ com.metamx druid - 0.1.25-SNAPSHOT + 0.2.7-SNAPSHOT diff --git a/examples/rand/pom.xml b/examples/rand/pom.xml index 995d649ff0c..0ef7456c907 100644 --- a/examples/rand/pom.xml +++ b/examples/rand/pom.xml @@ -9,7 +9,7 @@ com.metamx druid-examples - 0.1.25-SNAPSHOT + 0.2.7-SNAPSHOT diff --git a/examples/rand/src/main/java/druid/examples/RealtimeStandaloneMain.java b/examples/rand/src/main/java/druid/examples/RealtimeStandaloneMain.java index 29263f1d6e5..4ffd7119431 100644 --- a/examples/rand/src/main/java/druid/examples/RealtimeStandaloneMain.java +++ b/examples/rand/src/main/java/druid/examples/RealtimeStandaloneMain.java @@ -11,7 +11,7 @@ import com.metamx.druid.log.LogLevelAdjuster; import com.metamx.druid.realtime.MetadataUpdater; import com.metamx.druid.realtime.MetadataUpdaterConfig; import com.metamx.druid.realtime.RealtimeNode; -import com.metamx.druid.realtime.SegmentPusher; +import com.metamx.druid.loading.SegmentPusher; import com.metamx.phonebook.PhoneBook; import org.codehaus.jackson.map.jsontype.NamedType; @@ -21,8 +21,6 @@ import java.io.IOException; /** * Standalone Demo Realtime process. * Created: 20121009T2050 - * - * @author pbaclace */ public class RealtimeStandaloneMain { diff --git a/examples/rand/src/main/resources/runtime.properties b/examples/rand/src/main/resources/runtime.properties index 35ed5646a3f..c9483846106 100644 --- a/examples/rand/src/main/resources/runtime.properties +++ b/examples/rand/src/main/resources/runtime.properties @@ -41,14 +41,6 @@ druid.paths.segmentInfoCache=/tmp/rand_realtime/segmentInfoCache # Path to schema definition file druid.request.logging.dir=/tmp/rand_realtime/log -# TODO: have these moved to spec file? -# unknown # druid.realtime.dataSources= -# unknown # druid.realtime.index.maxSize=500000 -# unknown # druid.realtime.persistPeriod=PT600S -# unknown # druid.realtime.scheduledExec.threads=1 -# unknown # druid.realtime.uploadPeriod=PT3600S -# unknown # druid.realtime.windowPeriod=PT600S - #druid.server.maxSize=0 druid.server.maxSize=300000000000 # =realtime or =historical (default) diff --git a/examples/twitter/group_by_query.body b/examples/twitter/group_by_query.body index 4ea7806e94c..e0607aa1554 100644 --- a/examples/twitter/group_by_query.body +++ b/examples/twitter/group_by_query.body @@ -2,19 +2,11 @@ "queryType": "groupBy", "dataSource": "twitterstream", "granularity": "all", - "dimensions": ["lang"], + "dimensions": ["lang", "utc_offset"], "aggregations":[ - { "type": "count", "name": "rows"}, - { "type": "doubleSum", "fieldName": "tweets", "name": "tweets"}, - - { "type": "max", "fieldName": "max_statuses_count", "name": "theMaxStatusesCount"}, - { "type": "max", "fieldName": "max_retweet_count", "name": "theMaxRetweetCount"}, - - { "type": "max", "fieldName": "max_friends_count", "name": "theMaxFriendsCount"}, - { "type": "max", "fieldName": "max_follower_count", "name": "theMaxFollowerCount"}, - - { "type": "doubleSum", "fieldName": "total_statuses_count", "name": "total_tweets_all_time"} - + { "type": "count", "name": "rows"}, + { "type": "doubleSum", "fieldName": "tweets", "name": "tweets"} ], + "filter": { "type": "selector", "dimension": "lang", "value": "en" }, "intervals":["2012-10-01T00:00/2020-01-01T00"] } diff --git a/examples/twitter/pom.xml b/examples/twitter/pom.xml index a5e7d242f1a..7c572cd2f32 100644 --- a/examples/twitter/pom.xml +++ b/examples/twitter/pom.xml @@ -9,7 +9,7 @@ com.metamx druid-examples - 0.1.25-SNAPSHOT + 0.2.7-SNAPSHOT diff --git a/examples/twitter/src/main/java/druid/examples/RealtimeStandaloneMain.java b/examples/twitter/src/main/java/druid/examples/RealtimeStandaloneMain.java index 32f355c756b..fd072a427f3 100644 --- a/examples/twitter/src/main/java/druid/examples/RealtimeStandaloneMain.java +++ b/examples/twitter/src/main/java/druid/examples/RealtimeStandaloneMain.java @@ -11,7 +11,7 @@ import com.metamx.druid.log.LogLevelAdjuster; import com.metamx.druid.realtime.MetadataUpdater; import com.metamx.druid.realtime.MetadataUpdaterConfig; import com.metamx.druid.realtime.RealtimeNode; -import com.metamx.druid.realtime.SegmentPusher; +import com.metamx.druid.loading.SegmentPusher; import com.metamx.phonebook.PhoneBook; import druid.examples.twitter.TwitterSpritzerFirehoseFactory; import org.codehaus.jackson.map.jsontype.NamedType; @@ -21,8 +21,6 @@ import java.io.IOException; /** Standalone Demo Realtime process. * Created: 20121009T2050 - * - * @author pbaclace */ public class RealtimeStandaloneMain { diff --git a/examples/twitter/src/main/java/druid/examples/twitter/TwitterSpritzerFirehoseFactory.java b/examples/twitter/src/main/java/druid/examples/twitter/TwitterSpritzerFirehoseFactory.java index 249ea12e3b1..992cd239487 100644 --- a/examples/twitter/src/main/java/druid/examples/twitter/TwitterSpritzerFirehoseFactory.java +++ b/examples/twitter/src/main/java/druid/examples/twitter/TwitterSpritzerFirehoseFactory.java @@ -1,25 +1,34 @@ package druid.examples.twitter; +import com.google.common.collect.Lists; import com.metamx.common.logger.Logger; import com.metamx.druid.input.InputRow; import com.metamx.druid.input.MapBasedInputRow; import com.metamx.druid.realtime.Firehose; import com.metamx.druid.realtime.FirehoseFactory; import org.codehaus.jackson.annotate.JsonCreator; -import org.codehaus.jackson.annotate.JsonTypeName; import org.codehaus.jackson.annotate.JsonProperty; -import org.codehaus.jackson.map.ObjectMapper; -import twitter4j.*; +import org.codehaus.jackson.annotate.JsonTypeName; +import twitter4j.ConnectionLifeCycleListener; +import twitter4j.HashtagEntity; +import twitter4j.Status; +import twitter4j.StatusDeletionNotice; +import twitter4j.StatusListener; +import twitter4j.TwitterStream; +import twitter4j.TwitterStreamFactory; +import twitter4j.User; import java.io.IOException; +import java.util.Arrays; import java.util.HashMap; import java.util.LinkedList; +import java.util.List; import java.util.Map; import java.util.concurrent.ArrayBlockingQueue; import java.util.concurrent.BlockingQueue; import java.util.concurrent.TimeUnit; -import static java.lang.Thread.*; +import static java.lang.Thread.sleep; /** @@ -117,12 +126,8 @@ public class TwitterSpritzerFirehoseFactory implements FirehoseFactory { final long startMsec = System.currentTimeMillis(); dimensions.add("htags"); - dimensions.add("retweet_count"); - dimensions.add("follower_count"); - dimensions.add("friends_count"); dimensions.add("lang"); dimensions.add("utc_offset"); - dimensions.add("statuses_count"); // // set up Twitter Spritzer @@ -245,39 +250,26 @@ public class TwitterSpritzerFirehoseFactory implements FirehoseFactory { } catch (InterruptedException e) { throw new RuntimeException("InterruptedException", e); } - //log.info("twitterStatus: "+ status.getCreatedAt() + " @" + status.getUser().getScreenName() + " - " + status.getText());//DEBUG - - // theMap.put("twid", status.getUser().getScreenName()); - // theMap.put("msg", status.getText()); // ToDo: verify encoding HashtagEntity[] hts = status.getHashtagEntities(); if (hts != null && hts.length > 0) { - // ToDo: get all the hash tags instead of just the first one - theMap.put("htags", hts[0].getText()); - log.info("htags=" + hts[0].getText()); // about 16% - } else { - theMap.put("htags", null); + List hashTags = Lists.newArrayListWithExpectedSize(hts.length); + for (HashtagEntity ht : hts) { + hashTags.add(ht.getText()); + } + + theMap.put("htags", Arrays.asList(hashTags.get(0))); } long retweetCount = status.getRetweetCount(); theMap.put("retweet_count", retweetCount); - User u = status.getUser(); - if (u != null) { - theMap.put("follower_count", u.getFollowersCount()); - theMap.put("friends_count", u.getFriendsCount()); - theMap.put("lang", u.getLang()); - theMap.put("utc_offset", u.getUtcOffset()); // resolution in seconds, -1 if not available? - theMap.put("statuses_count", u.getStatusesCount()); - } else { - log.error("status.getUser() is null"); - } - if (rowCount % 10 == 0) { - log.info("" + status.getCreatedAt() + - " follower_count=" + u.getFollowersCount() + - " friends_count=" + u.getFriendsCount() + - " statuses_count=" + u.getStatusesCount() + - " retweet_count=" + retweetCount - ); + User user = status.getUser(); + if (user != null) { + theMap.put("follower_count", user.getFollowersCount()); + theMap.put("friends_count", user.getFriendsCount()); + theMap.put("lang", user.getLang()); + theMap.put("utc_offset", user.getUtcOffset()); // resolution in seconds, -1 if not available? + theMap.put("statuses_count", user.getStatusesCount()); } return new MapBasedInputRow(status.getCreatedAt().getTime(), dimensions, theMap); diff --git a/examples/twitter/src/main/resources/runtime.properties b/examples/twitter/src/main/resources/runtime.properties index dfa12a8f0ec..8299b6d3c36 100644 --- a/examples/twitter/src/main/resources/runtime.properties +++ b/examples/twitter/src/main/resources/runtime.properties @@ -41,14 +41,6 @@ druid.paths.segmentInfoCache=/tmp/twitter_realtime/segmentInfoCache # Path to schema definition file druid.request.logging.dir=/tmp/twitter_realtime/log -# TODO: have these moved to spec file? -# unknown # druid.realtime.dataSources= -# unknown # druid.realtime.index.maxSize=500000 -# unknown # druid.realtime.persistPeriod=PT600S -# unknown # druid.realtime.scheduledExec.threads=1 -# unknown # druid.realtime.uploadPeriod=PT3600S -# unknown # druid.realtime.windowPeriod=PT600S - #druid.server.maxSize=0 druid.server.maxSize=300000000000 # =realtime or =historical (default) diff --git a/examples/twitter/twitter_realtime.spec b/examples/twitter/twitter_realtime.spec index 14d34421c6b..00b1707028d 100644 --- a/examples/twitter/twitter_realtime.spec +++ b/examples/twitter/twitter_realtime.spec @@ -31,8 +31,8 @@ "firehose": { "type": "twitzer", - "maxEventCount": 50000, - "maxRunMinutes": 10 + "maxEventCount": 500000, + "maxRunMinutes": 120 }, "plumber": { diff --git a/index-common/pom.xml b/index-common/pom.xml index 4cb30db8b05..7fa9e7cabd7 100644 --- a/index-common/pom.xml +++ b/index-common/pom.xml @@ -28,7 +28,7 @@ com.metamx druid - 0.1.25-SNAPSHOT + 0.2.7-SNAPSHOT diff --git a/index-common/src/main/java/com/metamx/druid/index/QueryableIndex.java b/index-common/src/main/java/com/metamx/druid/index/QueryableIndex.java new file mode 100644 index 00000000000..82cee9e54dd --- /dev/null +++ b/index-common/src/main/java/com/metamx/druid/index/QueryableIndex.java @@ -0,0 +1,34 @@ +/* + * Druid - a distributed column store. + * Copyright (C) 2012 Metamarkets Group Inc. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version 2 + * of the License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + */ + +package com.metamx.druid.index; + +import com.metamx.druid.index.column.ColumnSelector; +import com.metamx.druid.kv.Indexed; +import org.joda.time.Interval; + +/** + */ +public interface QueryableIndex extends ColumnSelector +{ + public Interval getDataInterval(); + public int getNumRows(); + public Indexed getColumnNames(); + public Indexed getAvailableDimensions(); +} diff --git a/index-common/src/main/java/com/metamx/druid/index/SimpleQueryableIndex.java b/index-common/src/main/java/com/metamx/druid/index/SimpleQueryableIndex.java new file mode 100644 index 00000000000..2f60b73adc6 --- /dev/null +++ b/index-common/src/main/java/com/metamx/druid/index/SimpleQueryableIndex.java @@ -0,0 +1,88 @@ +/* + * Druid - a distributed column store. + * Copyright (C) 2012 Metamarkets Group Inc. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version 2 + * of the License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + */ + +package com.metamx.druid.index; + +import com.metamx.druid.index.column.Column; +import com.metamx.druid.kv.Indexed; +import org.joda.time.Interval; + +import java.util.Map; + +/** + */ +public class SimpleQueryableIndex implements QueryableIndex +{ + private final Interval dataInterval; + private final Indexed columnNames; + private final Indexed availableDimensions; + private final Column timeColumn; + private final Map otherColumns; + + public SimpleQueryableIndex( + Interval dataInterval, + Indexed columnNames, + Indexed dimNames, + Column timeColumn, + Map otherColumns + ) + { + this.dataInterval = dataInterval; + this.columnNames = columnNames; + this.availableDimensions = dimNames; + this.timeColumn = timeColumn; + this.otherColumns = otherColumns; + } + + @Override + public Interval getDataInterval() + { + return dataInterval; + } + + @Override + public int getNumRows() + { + return timeColumn.getLength(); + } + + @Override + public Indexed getColumnNames() + { + return columnNames; + } + + @Override + public Indexed getAvailableDimensions() + { + return availableDimensions; + } + + @Override + public Column getTimeColumn() + { + return timeColumn; + } + + @Override + public Column getColumn(String columnName) + { + return otherColumns.get(columnName); + } +} diff --git a/index-common/src/main/java/com/metamx/druid/index/column/AbstractColumn.java b/index-common/src/main/java/com/metamx/druid/index/column/AbstractColumn.java new file mode 100644 index 00000000000..978a3822071 --- /dev/null +++ b/index-common/src/main/java/com/metamx/druid/index/column/AbstractColumn.java @@ -0,0 +1,61 @@ +/* + * Druid - a distributed column store. + * Copyright (C) 2012 Metamarkets Group Inc. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version 2 + * of the License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + */ + +package com.metamx.druid.index.column; + +/** + */ +public abstract class AbstractColumn implements Column +{ + @Override + public BitmapIndex getBitmapIndex() + { + throw new UnsupportedOperationException(); + } + + @Override + public ComplexColumn getComplexColumn() + { + throw new UnsupportedOperationException(); + } + + @Override + public GenericColumn getGenericColumn() + { + throw new UnsupportedOperationException(); + } + + @Override + public RunLengthColumn getRunLengthColumn() + { + throw new UnsupportedOperationException(); + } + + @Override + public DictionaryEncodedColumn getDictionaryEncoding() + { + throw new UnsupportedOperationException(); + } + + @Override + public ColumnCapabilities getCapabilities() + { + throw new UnsupportedOperationException(); + } +} diff --git a/index-common/src/main/java/com/metamx/druid/index/column/BitmapIndex.java b/index-common/src/main/java/com/metamx/druid/index/column/BitmapIndex.java new file mode 100644 index 00000000000..6873292fee2 --- /dev/null +++ b/index-common/src/main/java/com/metamx/druid/index/column/BitmapIndex.java @@ -0,0 +1,31 @@ +/* + * Druid - a distributed column store. + * Copyright (C) 2012 Metamarkets Group Inc. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version 2 + * of the License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + */ + +package com.metamx.druid.index.column; + +import it.uniroma3.mat.extendedset.intset.ImmutableConciseSet; + +/** + */ +public interface BitmapIndex +{ + public int getCardinality(); + public String getValue(int index); + public ImmutableConciseSet getConciseSet(String value); +} diff --git a/index-common/src/main/java/com/metamx/druid/index/column/Column.java b/index-common/src/main/java/com/metamx/druid/index/column/Column.java new file mode 100644 index 00000000000..fa418a3398a --- /dev/null +++ b/index-common/src/main/java/com/metamx/druid/index/column/Column.java @@ -0,0 +1,34 @@ +/* + * Druid - a distributed column store. + * Copyright (C) 2012 Metamarkets Group Inc. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version 2 + * of the License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + */ + +package com.metamx.druid.index.column; + +/** + */ +public interface Column +{ + public ColumnCapabilities getCapabilities(); + + public int getLength(); + public DictionaryEncodedColumn getDictionaryEncoding(); + public RunLengthColumn getRunLengthColumn(); + public GenericColumn getGenericColumn(); + public ComplexColumn getComplexColumn(); + public BitmapIndex getBitmapIndex(); +} diff --git a/index-common/src/main/java/com/metamx/druid/index/column/ColumnBuilder.java b/index-common/src/main/java/com/metamx/druid/index/column/ColumnBuilder.java new file mode 100644 index 00000000000..7bf77e4fa10 --- /dev/null +++ b/index-common/src/main/java/com/metamx/druid/index/column/ColumnBuilder.java @@ -0,0 +1,99 @@ +/* + * Druid - a distributed column store. + * Copyright (C) 2012 Metamarkets Group Inc. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version 2 + * of the License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + */ + +package com.metamx.druid.index.column; + +import com.google.common.base.Preconditions; +import com.google.common.base.Supplier; + +/** + */ +public class ColumnBuilder +{ + private ValueType type = null; + private boolean hasMultipleValues = false; + + private Supplier dictionaryEncodedColumn = null; + private Supplier runLengthColumn = null; + private Supplier genericColumn = null; + private Supplier complexColumn = null; + private Supplier bitmapIndex = null; + + public ColumnBuilder setType(ValueType type) + { + this.type = type; + return this; + } + + public ColumnBuilder setHasMultipleValues(boolean hasMultipleValues) + { + this.hasMultipleValues = hasMultipleValues; + return this; + } + + public ColumnBuilder setDictionaryEncodedColumn(Supplier dictionaryEncodedColumn) + { + this.dictionaryEncodedColumn = dictionaryEncodedColumn; + return this; + } + + public ColumnBuilder setRunLengthColumn(Supplier runLengthColumn) + { + this.runLengthColumn = runLengthColumn; + return this; + } + + public ColumnBuilder setGenericColumn(Supplier genericColumn) + { + this.genericColumn = genericColumn; + return this; + } + + public ColumnBuilder setComplexColumn(Supplier complexColumn) + { + this.complexColumn = complexColumn; + return this; + } + + public ColumnBuilder setBitmapIndex(Supplier bitmapIndex) + { + this.bitmapIndex = bitmapIndex; + return this; + } + + public Column build() + { + Preconditions.checkState(type != null, "Type must be set."); + + return new SimpleColumn( + new ColumnCapabilitiesImpl() + .setType(type) + .setDictionaryEncoded(dictionaryEncodedColumn != null) + .setHasBitmapIndexes(bitmapIndex != null) + .setRunLengthEncoded(runLengthColumn != null) + .setHasMultipleValues(hasMultipleValues) + , + dictionaryEncodedColumn, + runLengthColumn, + genericColumn, + complexColumn, + bitmapIndex + ); + } +} diff --git a/index-common/src/main/java/com/metamx/druid/index/column/ColumnCapabilities.java b/index-common/src/main/java/com/metamx/druid/index/column/ColumnCapabilities.java new file mode 100644 index 00000000000..8e2ee8d1af4 --- /dev/null +++ b/index-common/src/main/java/com/metamx/druid/index/column/ColumnCapabilities.java @@ -0,0 +1,32 @@ +/* + * Druid - a distributed column store. + * Copyright (C) 2012 Metamarkets Group Inc. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version 2 + * of the License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + */ + +package com.metamx.druid.index.column; + +/** + */ +public interface ColumnCapabilities +{ + public ValueType getType(); + + public boolean isDictionaryEncoded(); + public boolean isRunLengthEncoded(); + public boolean hasBitmapIndexes(); + public boolean hasMultipleValues(); +} diff --git a/index-common/src/main/java/com/metamx/druid/index/column/ColumnCapabilitiesImpl.java b/index-common/src/main/java/com/metamx/druid/index/column/ColumnCapabilitiesImpl.java new file mode 100644 index 00000000000..2be3a50a595 --- /dev/null +++ b/index-common/src/main/java/com/metamx/druid/index/column/ColumnCapabilitiesImpl.java @@ -0,0 +1,98 @@ +/* + * Druid - a distributed column store. + * Copyright (C) 2012 Metamarkets Group Inc. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version 2 + * of the License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + */ + +package com.metamx.druid.index.column; + +import org.codehaus.jackson.annotate.JsonProperty; + +/** + */ +public class ColumnCapabilitiesImpl implements ColumnCapabilities +{ + private ValueType type = null; + private boolean dictionaryEncoded = false; + private boolean runLengthEncoded = false; + private boolean hasInvertedIndexes = false; + private boolean hasMultipleValues = false; + + @Override + @JsonProperty + public ValueType getType() + { + return type; + } + + public ColumnCapabilitiesImpl setType(ValueType type) + { + this.type = type; + return this; + } + + @Override + @JsonProperty + public boolean isDictionaryEncoded() + { + return dictionaryEncoded; + } + + public ColumnCapabilitiesImpl setDictionaryEncoded(boolean dictionaryEncoded) + { + this.dictionaryEncoded = dictionaryEncoded; + return this; + } + + @Override + @JsonProperty + public boolean isRunLengthEncoded() + { + return runLengthEncoded; + } + + public ColumnCapabilitiesImpl setRunLengthEncoded(boolean runLengthEncoded) + { + this.runLengthEncoded = runLengthEncoded; + return this; + } + + @Override + @JsonProperty("hasBitmapIndexes") + public boolean hasBitmapIndexes() + { + return hasInvertedIndexes; + } + + public ColumnCapabilitiesImpl setHasBitmapIndexes(boolean hasInvertedIndexes) + { + this.hasInvertedIndexes = hasInvertedIndexes; + return this; + } + + @Override + @JsonProperty("hasMultipleValues") + public boolean hasMultipleValues() + { + return hasMultipleValues; + } + + public ColumnCapabilitiesImpl setHasMultipleValues(boolean hasMultipleValues) + { + this.hasMultipleValues = hasMultipleValues; + return this; + } +} diff --git a/index-common/src/main/java/com/metamx/druid/index/column/ColumnDescriptor.java b/index-common/src/main/java/com/metamx/druid/index/column/ColumnDescriptor.java new file mode 100644 index 00000000000..7f9b7ac7ee2 --- /dev/null +++ b/index-common/src/main/java/com/metamx/druid/index/column/ColumnDescriptor.java @@ -0,0 +1,147 @@ +/* + * Druid - a distributed column store. + * Copyright (C) 2012 Metamarkets Group Inc. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version 2 + * of the License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + */ + +package com.metamx.druid.index.column; + +import com.google.common.base.Preconditions; +import com.google.common.collect.Lists; +import com.metamx.common.IAE; +import com.metamx.druid.index.serde.ColumnPartSerde; +import org.codehaus.jackson.annotate.JsonCreator; +import org.codehaus.jackson.annotate.JsonProperty; + +import java.io.IOException; +import java.nio.ByteBuffer; +import java.nio.channels.WritableByteChannel; +import java.util.List; + +/** + */ +public class ColumnDescriptor +{ + public static Builder builder() + { + return new Builder(); + } + + private final ValueType valueType; + private final boolean hasMultipleValues; + private final List parts; + + @JsonCreator + public ColumnDescriptor( + @JsonProperty("valueType") ValueType valueType, + @JsonProperty("hasMultipleValues") boolean hasMultipleValues, + @JsonProperty("parts") List parts + ) + { + this.valueType = valueType; + this.hasMultipleValues = hasMultipleValues; + this.parts = parts; + } + + @JsonProperty + public ValueType getValueType() + { + return valueType; + } + + @JsonProperty + public boolean isHasMultipleValues() + { + return hasMultipleValues; + } + + @JsonProperty + public List getParts() + { + return parts; + } + + public int numBytes() + { + int retVal = 0; + + for (ColumnPartSerde part : parts) { + retVal += part.numBytes(); + } + + return retVal; + } + + public void write(WritableByteChannel channel) throws IOException + { + for (ColumnPartSerde part : parts) { + part.write(channel); + } + } + + public Column read(ByteBuffer buffer) + { + final ColumnBuilder builder = new ColumnBuilder() + .setType(valueType) + .setHasMultipleValues(hasMultipleValues); + + for (ColumnPartSerde part : parts) { + part.read(buffer, builder); + } + + return builder.build(); + } + + public static class Builder + { + private ValueType valueType = null; + private Boolean hasMultipleValues = null; + + private final List parts = Lists.newArrayList(); + + public Builder setValueType(ValueType valueType) + { + if (this.valueType != null && this.valueType != valueType) { + throw new IAE("valueType[%s] is already set, cannot change to[%s]", this.valueType, valueType); + } + this.valueType = valueType; + return this; + } + + public Builder setHasMultipleValues(boolean hasMultipleValues) + { + if (this.hasMultipleValues != null && this.hasMultipleValues != hasMultipleValues) { + throw new IAE( + "hasMultipleValues[%s] is already set, cannot change to[%s]", this.hasMultipleValues, hasMultipleValues + ); + } + this.hasMultipleValues = hasMultipleValues; + return this; + } + + public Builder addSerde(ColumnPartSerde serde) + { + parts.add(serde); + return this; + } + + public ColumnDescriptor build() + { + Preconditions.checkNotNull(valueType, "must specify a valueType"); + return new ColumnDescriptor(valueType, hasMultipleValues == null ? false : hasMultipleValues, parts); + } + } +} diff --git a/client/src/main/java/com/metamx/druid/client/cache/CacheBroker.java b/index-common/src/main/java/com/metamx/druid/index/column/ColumnSelector.java similarity index 84% rename from client/src/main/java/com/metamx/druid/client/cache/CacheBroker.java rename to index-common/src/main/java/com/metamx/druid/index/column/ColumnSelector.java index 5f028c06e72..0ed66850dcb 100644 --- a/client/src/main/java/com/metamx/druid/client/cache/CacheBroker.java +++ b/index-common/src/main/java/com/metamx/druid/index/column/ColumnSelector.java @@ -17,12 +17,12 @@ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. */ -package com.metamx.druid.client.cache; +package com.metamx.druid.index.column; /** */ -public interface CacheBroker +public interface ColumnSelector { - public CacheStats getStats(); - public Cache provideCache(String identifier); + public Column getTimeColumn(); + public Column getColumn(String columnName); } diff --git a/index-common/src/main/java/com/metamx/druid/index/column/ComplexColumn.java b/index-common/src/main/java/com/metamx/druid/index/column/ComplexColumn.java new file mode 100644 index 00000000000..f7cfb706e44 --- /dev/null +++ b/index-common/src/main/java/com/metamx/druid/index/column/ComplexColumn.java @@ -0,0 +1,31 @@ +/* + * Druid - a distributed column store. + * Copyright (C) 2012 Metamarkets Group Inc. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version 2 + * of the License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + */ + +package com.metamx.druid.index.column; + +import java.io.Closeable; + +/** + */ +public interface ComplexColumn extends Closeable +{ + public Class getClazz(); + public String getTypeName(); + public Object getRowValue(int rowNum); +} diff --git a/index-common/src/main/java/com/metamx/druid/index/column/ComplexColumnImpl.java b/index-common/src/main/java/com/metamx/druid/index/column/ComplexColumnImpl.java new file mode 100644 index 00000000000..46f665c57c6 --- /dev/null +++ b/index-common/src/main/java/com/metamx/druid/index/column/ComplexColumnImpl.java @@ -0,0 +1,57 @@ +/* + * Druid - a distributed column store. + * Copyright (C) 2012 Metamarkets Group Inc. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version 2 + * of the License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + */ + +package com.metamx.druid.index.column; + +import com.metamx.druid.kv.Indexed; + +/** + */ +public class ComplexColumnImpl extends AbstractColumn +{ + private static final ColumnCapabilitiesImpl CAPABILITIES = new ColumnCapabilitiesImpl() + .setType(ValueType.COMPLEX); + + private final Indexed column; + private final String typeName; + + public ComplexColumnImpl(String typeName, Indexed column) + { + this.column = column; + this.typeName = typeName; + } + + @Override + public ColumnCapabilities getCapabilities() + { + return CAPABILITIES; + } + + @Override + public int getLength() + { + return column.size(); + } + + @Override + public ComplexColumn getComplexColumn() + { + return new IndexedComplexColumn(typeName, column); + } +} diff --git a/index-common/src/main/java/com/metamx/druid/index/column/DictionaryEncodedColumn.java b/index-common/src/main/java/com/metamx/druid/index/column/DictionaryEncodedColumn.java new file mode 100644 index 00000000000..62057cb8e10 --- /dev/null +++ b/index-common/src/main/java/com/metamx/druid/index/column/DictionaryEncodedColumn.java @@ -0,0 +1,35 @@ +/* + * Druid - a distributed column store. + * Copyright (C) 2012 Metamarkets Group Inc. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version 2 + * of the License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + */ + +package com.metamx.druid.index.column; + +import com.metamx.druid.kv.IndexedInts; + +/** + */ +public interface DictionaryEncodedColumn +{ + public int length(); + public boolean hasMultipleValues(); + public int getSingleValueRow(int rowNum); + public IndexedInts getMultiValueRow(int rowNum); + public String lookupName(int id); + public int lookupId(String name); + public int getCardinality(); +} diff --git a/index-common/src/main/java/com/metamx/druid/index/column/FloatColumn.java b/index-common/src/main/java/com/metamx/druid/index/column/FloatColumn.java new file mode 100644 index 00000000000..44ffd7e970a --- /dev/null +++ b/index-common/src/main/java/com/metamx/druid/index/column/FloatColumn.java @@ -0,0 +1,55 @@ +/* + * Druid - a distributed column store. + * Copyright (C) 2012 Metamarkets Group Inc. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version 2 + * of the License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + */ + +package com.metamx.druid.index.column; + +import com.metamx.druid.index.v1.CompressedFloatsIndexedSupplier; + +/** + */ +public class FloatColumn extends AbstractColumn +{ + private static final ColumnCapabilitiesImpl CAPABILITIES = new ColumnCapabilitiesImpl() + .setType(ValueType.FLOAT); + + private final CompressedFloatsIndexedSupplier column; + + public FloatColumn(CompressedFloatsIndexedSupplier column) + { + this.column = column; + } + + @Override + public ColumnCapabilities getCapabilities() + { + return CAPABILITIES; + } + + @Override + public int getLength() + { + return column.size(); + } + + @Override + public GenericColumn getGenericColumn() + { + return new IndexedFloatsGenericColumn(column.get()); + } +} diff --git a/index-common/src/main/java/com/metamx/druid/index/column/GenericColumn.java b/index-common/src/main/java/com/metamx/druid/index/column/GenericColumn.java new file mode 100644 index 00000000000..530eb2fc232 --- /dev/null +++ b/index-common/src/main/java/com/metamx/druid/index/column/GenericColumn.java @@ -0,0 +1,42 @@ +/* + * Druid - a distributed column store. + * Copyright (C) 2012 Metamarkets Group Inc. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version 2 + * of the License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + */ + +package com.metamx.druid.index.column; + +import com.metamx.druid.kv.Indexed; +import com.metamx.druid.kv.IndexedFloats; +import com.metamx.druid.kv.IndexedLongs; + +import java.io.Closeable; + +/** + */ +public interface GenericColumn extends Closeable +{ + public int length(); + public ValueType getType(); + public boolean hasMultipleValues(); + + public String getStringSingleValueRow(int rowNum); + public Indexed getStringMultiValueRow(int rowNum); + public float getFloatSingleValueRow(int rowNum); + public IndexedFloats getFloatMultiValueRow(int rowNum); + public long getLongSingleValueRow(int rowNum); + public IndexedLongs getLongMultiValueRow(int rowNum); +} diff --git a/index-common/src/main/java/com/metamx/druid/index/column/IndexedComplexColumn.java b/index-common/src/main/java/com/metamx/druid/index/column/IndexedComplexColumn.java new file mode 100644 index 00000000000..bafb6977dd0 --- /dev/null +++ b/index-common/src/main/java/com/metamx/druid/index/column/IndexedComplexColumn.java @@ -0,0 +1,62 @@ +/* + * Druid - a distributed column store. + * Copyright (C) 2012 Metamarkets Group Inc. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version 2 + * of the License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + */ + +package com.metamx.druid.index.column; + +import com.metamx.druid.kv.Indexed; + +import java.io.IOException; + +/** +*/ +public class IndexedComplexColumn implements ComplexColumn +{ + private final Indexed column; + private final String typeName; + + public IndexedComplexColumn( + String typeName, Indexed column + ) + { + this.column = column; + this.typeName = typeName; + } + @Override + public Class getClazz() + { + return column.getClazz(); + } + + @Override + public String getTypeName() + { + return typeName; + } + + @Override + public Object getRowValue(int rowNum) + { + return column.get(rowNum); + } + + @Override + public void close() throws IOException + { + } +} diff --git a/index-common/src/main/java/com/metamx/druid/index/column/IndexedFloatsGenericColumn.java b/index-common/src/main/java/com/metamx/druid/index/column/IndexedFloatsGenericColumn.java new file mode 100644 index 00000000000..5df228435c1 --- /dev/null +++ b/index-common/src/main/java/com/metamx/druid/index/column/IndexedFloatsGenericColumn.java @@ -0,0 +1,99 @@ +/* + * Druid - a distributed column store. + * Copyright (C) 2012 Metamarkets Group Inc. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version 2 + * of the License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + */ + +package com.metamx.druid.index.column; + +import com.metamx.druid.kv.Indexed; +import com.metamx.druid.kv.IndexedFloats; +import com.metamx.druid.kv.IndexedLongs; + +import java.io.IOException; + +/** +*/ +public class IndexedFloatsGenericColumn implements GenericColumn +{ + private final IndexedFloats column; + + public IndexedFloatsGenericColumn( + final IndexedFloats column + ) { + this.column = column; + } + + @Override + public int length() + { + return column.size(); + } + + @Override + public ValueType getType() + { + return ValueType.FLOAT; + } + + @Override + public boolean hasMultipleValues() + { + return false; + } + + @Override + public String getStringSingleValueRow(int rowNum) + { + throw new UnsupportedOperationException(); + } + + @Override + public Indexed getStringMultiValueRow(int rowNum) + { + throw new UnsupportedOperationException(); + } + + @Override + public float getFloatSingleValueRow(int rowNum) + { + return column.get(rowNum); + } + + @Override + public IndexedFloats getFloatMultiValueRow(int rowNum) + { + throw new UnsupportedOperationException(); + } + + @Override + public long getLongSingleValueRow(int rowNum) + { + throw new UnsupportedOperationException(); + } + + @Override + public IndexedLongs getLongMultiValueRow(int rowNum) + { + throw new UnsupportedOperationException(); + } + + @Override + public void close() throws IOException + { + column.close(); + } +} diff --git a/index-common/src/main/java/com/metamx/druid/index/column/IndexedLongsGenericColumn.java b/index-common/src/main/java/com/metamx/druid/index/column/IndexedLongsGenericColumn.java new file mode 100644 index 00000000000..211dab35a19 --- /dev/null +++ b/index-common/src/main/java/com/metamx/druid/index/column/IndexedLongsGenericColumn.java @@ -0,0 +1,99 @@ +/* + * Druid - a distributed column store. + * Copyright (C) 2012 Metamarkets Group Inc. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version 2 + * of the License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + */ + +package com.metamx.druid.index.column; + +import com.metamx.druid.kv.Indexed; +import com.metamx.druid.kv.IndexedFloats; +import com.metamx.druid.kv.IndexedLongs; + +import java.io.IOException; + +/** +*/ +public class IndexedLongsGenericColumn implements GenericColumn +{ + private final IndexedLongs column; + + public IndexedLongsGenericColumn( + final IndexedLongs column + ) { + this.column = column; + } + + @Override + public int length() + { + return column.size(); + } + + @Override + public ValueType getType() + { + return ValueType.LONG; + } + + @Override + public boolean hasMultipleValues() + { + return false; + } + + @Override + public String getStringSingleValueRow(int rowNum) + { + throw new UnsupportedOperationException(); + } + + @Override + public Indexed getStringMultiValueRow(int rowNum) + { + throw new UnsupportedOperationException(); + } + + @Override + public float getFloatSingleValueRow(int rowNum) + { + throw new UnsupportedOperationException(); + } + + @Override + public IndexedFloats getFloatMultiValueRow(int rowNum) + { + throw new UnsupportedOperationException(); + } + + @Override + public long getLongSingleValueRow(int rowNum) + { + return column.get(rowNum); + } + + @Override + public IndexedLongs getLongMultiValueRow(int rowNum) + { + throw new UnsupportedOperationException(); + } + + @Override + public void close() throws IOException + { + column.close(); + } +} diff --git a/index-common/src/main/java/com/metamx/druid/index/column/LongColumn.java b/index-common/src/main/java/com/metamx/druid/index/column/LongColumn.java new file mode 100644 index 00000000000..1ec297ea61c --- /dev/null +++ b/index-common/src/main/java/com/metamx/druid/index/column/LongColumn.java @@ -0,0 +1,55 @@ +/* + * Druid - a distributed column store. + * Copyright (C) 2012 Metamarkets Group Inc. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version 2 + * of the License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + */ + +package com.metamx.druid.index.column; + +import com.metamx.druid.index.v1.CompressedLongsIndexedSupplier; + +/** + */ +public class LongColumn extends AbstractColumn +{ + private static final ColumnCapabilitiesImpl CAPABILITIES = new ColumnCapabilitiesImpl() + .setType(ValueType.LONG); + + private final CompressedLongsIndexedSupplier column; + + public LongColumn(CompressedLongsIndexedSupplier column) + { + this.column = column; + } + + @Override + public ColumnCapabilities getCapabilities() + { + return CAPABILITIES; + } + + @Override + public int getLength() + { + return column.size(); + } + + @Override + public GenericColumn getGenericColumn() + { + return new IndexedLongsGenericColumn(column.get()); + } +} diff --git a/index-common/src/main/java/com/metamx/druid/index/column/RunLengthColumn.java b/index-common/src/main/java/com/metamx/druid/index/column/RunLengthColumn.java new file mode 100644 index 00000000000..5242e994008 --- /dev/null +++ b/index-common/src/main/java/com/metamx/druid/index/column/RunLengthColumn.java @@ -0,0 +1,27 @@ +/* + * Druid - a distributed column store. + * Copyright (C) 2012 Metamarkets Group Inc. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version 2 + * of the License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + */ + +package com.metamx.druid.index.column; + +/** + */ +public interface RunLengthColumn +{ + public void thisIsAFictionalInterfaceThatWillHopefullyMeanSomethingSometime(); +} diff --git a/index-common/src/main/java/com/metamx/druid/index/column/SimpleColumn.java b/index-common/src/main/java/com/metamx/druid/index/column/SimpleColumn.java new file mode 100644 index 00000000000..93825a8e9f0 --- /dev/null +++ b/index-common/src/main/java/com/metamx/druid/index/column/SimpleColumn.java @@ -0,0 +1,101 @@ +/* + * Druid - a distributed column store. + * Copyright (C) 2012 Metamarkets Group Inc. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version 2 + * of the License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + */ + +package com.metamx.druid.index.column; + +import com.google.common.base.Supplier; +import com.google.common.io.Closeables; + +/** + */ +class SimpleColumn implements Column +{ + private final ColumnCapabilities capabilities; + private final Supplier dictionaryEncodedColumn; + private final Supplier runLengthColumn; + private final Supplier genericColumn; + private final Supplier complexColumn; + private final Supplier bitmapIndex; + + SimpleColumn( + ColumnCapabilities capabilities, + Supplier dictionaryEncodedColumn, + Supplier runLengthColumn, + Supplier genericColumn, + Supplier complexColumn, + Supplier bitmapIndex + ) + { + this.capabilities = capabilities; + this.dictionaryEncodedColumn = dictionaryEncodedColumn; + this.runLengthColumn = runLengthColumn; + this.genericColumn = genericColumn; + this.complexColumn = complexColumn; + this.bitmapIndex = bitmapIndex; + } + + @Override + public ColumnCapabilities getCapabilities() + { + return capabilities; + } + + @Override + public int getLength() + { + GenericColumn column = null; + try { + column = genericColumn.get(); + return column.length(); + } + finally { + Closeables.closeQuietly(column); + } + } + + @Override + public DictionaryEncodedColumn getDictionaryEncoding() + { + return dictionaryEncodedColumn.get(); + } + + @Override + public RunLengthColumn getRunLengthColumn() + { + return runLengthColumn.get(); + } + + @Override + public GenericColumn getGenericColumn() + { + return genericColumn.get(); + } + + @Override + public ComplexColumn getComplexColumn() + { + return complexColumn.get(); + } + + @Override + public BitmapIndex getBitmapIndex() + { + return bitmapIndex.get(); + } +} diff --git a/index-common/src/main/java/com/metamx/druid/index/column/SimpleDictionaryEncodedColumn.java b/index-common/src/main/java/com/metamx/druid/index/column/SimpleDictionaryEncodedColumn.java new file mode 100644 index 00000000000..fbc3877c5fe --- /dev/null +++ b/index-common/src/main/java/com/metamx/druid/index/column/SimpleDictionaryEncodedColumn.java @@ -0,0 +1,87 @@ +/* + * Druid - a distributed column store. + * Copyright (C) 2012 Metamarkets Group Inc. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version 2 + * of the License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + */ + +package com.metamx.druid.index.column; + +import com.metamx.druid.kv.GenericIndexed; +import com.metamx.druid.kv.IndexedInts; +import com.metamx.druid.kv.VSizeIndexed; +import com.metamx.druid.kv.VSizeIndexedInts; + +/** +*/ +public class SimpleDictionaryEncodedColumn implements DictionaryEncodedColumn +{ + private final VSizeIndexedInts column; + private final VSizeIndexed multiValueColumn; + private final GenericIndexed lookups; + + public SimpleDictionaryEncodedColumn( + VSizeIndexedInts singleValueColumn, + VSizeIndexed multiValueColumn, + GenericIndexed lookups + ) + { + this.column = singleValueColumn; + this.multiValueColumn = multiValueColumn; + this.lookups = lookups; + } + + @Override + public int length() + { + return hasMultipleValues() ? multiValueColumn.size() : column.size(); + } + + @Override + public boolean hasMultipleValues() + { + return column == null; + } + + @Override + public int getSingleValueRow(int rowNum) + { + return column.get(rowNum); + } + + @Override + public IndexedInts getMultiValueRow(int rowNum) + { + return multiValueColumn.get(rowNum); + } + + @Override + public String lookupName(int id) + { + return lookups.get(id); + } + + @Override + public int lookupId(String name) + { + return lookups.indexOf(name); + } + + @Override + public int getCardinality() + { + return lookups.size(); + } +} diff --git a/index-common/src/main/java/com/metamx/druid/index/column/StringMultiValueColumn.java b/index-common/src/main/java/com/metamx/druid/index/column/StringMultiValueColumn.java new file mode 100644 index 00000000000..79327cb8400 --- /dev/null +++ b/index-common/src/main/java/com/metamx/druid/index/column/StringMultiValueColumn.java @@ -0,0 +1,118 @@ +/* + * Druid - a distributed column store. + * Copyright (C) 2012 Metamarkets Group Inc. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version 2 + * of the License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + */ + +package com.metamx.druid.index.column; + +import com.metamx.druid.kv.Indexed; +import com.metamx.druid.kv.IndexedInts; +import it.uniroma3.mat.extendedset.intset.ImmutableConciseSet; + +/** + */ +public class StringMultiValueColumn extends AbstractColumn +{ + private static final ImmutableConciseSet emptySet = new ImmutableConciseSet(); + private static final ColumnCapabilitiesImpl CAPABILITIES = new ColumnCapabilitiesImpl() + .setType(ValueType.STRING) + .setDictionaryEncoded(true) + .setHasBitmapIndexes(true) + .setHasMultipleValues(true); + + private final Indexed lookups; + private final Indexed column; + private final Indexed bitmapIndexes; + + public StringMultiValueColumn( + Indexed lookups, + Indexed column, + Indexed bitmapIndexes + ) + { + this.lookups = lookups; + this.column = column; + this.bitmapIndexes = bitmapIndexes; + } + + @Override + public ColumnCapabilities getCapabilities() + { + return CAPABILITIES; + } + + @Override + public int getLength() + { + return column.size(); + } + + @Override + public DictionaryEncodedColumn getDictionaryEncoding() + { + return new DictionaryEncodedColumn() + { + @Override + public int length() + { + return column.size(); + } + + @Override + public boolean hasMultipleValues() + { + return true; + } + + @Override + public int getSingleValueRow(int rowNum) + { + throw new UnsupportedOperationException(); + } + + @Override + public IndexedInts getMultiValueRow(int rowNum) + { + return column.get(rowNum); + } + + @Override + public String lookupName(int id) + { + return lookups.get(id); + } + + @Override + public int lookupId(String name) + { + return lookups.indexOf(name); + } + + @Override + public int getCardinality() + { + return lookups.size(); + } + }; + } + + @Override + public BitmapIndex getBitmapIndex() + { + throw new UnsupportedOperationException(); + } +} diff --git a/index-common/src/main/java/com/metamx/druid/index/column/ValueType.java b/index-common/src/main/java/com/metamx/druid/index/column/ValueType.java new file mode 100644 index 00000000000..245ff682bd7 --- /dev/null +++ b/index-common/src/main/java/com/metamx/druid/index/column/ValueType.java @@ -0,0 +1,11 @@ +package com.metamx.druid.index.column; + +/** +*/ +public enum ValueType +{ + FLOAT, + LONG, + STRING, + COMPLEX +} diff --git a/index-common/src/main/java/com/metamx/druid/index/serde/BitmapIndexColumnPartSupplier.java b/index-common/src/main/java/com/metamx/druid/index/serde/BitmapIndexColumnPartSupplier.java new file mode 100644 index 00000000000..2381639a0a8 --- /dev/null +++ b/index-common/src/main/java/com/metamx/druid/index/serde/BitmapIndexColumnPartSupplier.java @@ -0,0 +1,76 @@ +/* + * Druid - a distributed column store. + * Copyright (C) 2012 Metamarkets Group Inc. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version 2 + * of the License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + */ + +package com.metamx.druid.index.serde; + +import com.google.common.base.Supplier; +import com.metamx.druid.index.column.BitmapIndex; +import com.metamx.druid.kv.GenericIndexed; +import com.metamx.druid.kv.Indexed; +import it.uniroma3.mat.extendedset.intset.ImmutableConciseSet; + +/** +*/ +public class BitmapIndexColumnPartSupplier implements Supplier +{ + private static final ImmutableConciseSet EMPTY_SET = new ImmutableConciseSet(); + + private final GenericIndexed bitmaps; + private final GenericIndexed dictionary; + + public BitmapIndexColumnPartSupplier( + GenericIndexed bitmaps, + GenericIndexed dictionary + ) { + this.bitmaps = bitmaps; + this.dictionary = dictionary; + } + + @Override + public BitmapIndex get() + { + return new BitmapIndex() + { + @Override + public int getCardinality() + { + return dictionary.size(); + } + + @Override + public String getValue(int index) + { + return dictionary.get(index); + } + + @Override + public ImmutableConciseSet getConciseSet(String value) + { + final int index = dictionary.indexOf(value); + + if (index < 0) { + return EMPTY_SET; + } + + final ImmutableConciseSet bitmap = bitmaps.get(index); + return bitmap == null ? EMPTY_SET : bitmap; + } + }; + } +} diff --git a/index-common/src/main/java/com/metamx/druid/index/serde/ColumnPartSerde.java b/index-common/src/main/java/com/metamx/druid/index/serde/ColumnPartSerde.java new file mode 100644 index 00000000000..d60428fbe3e --- /dev/null +++ b/index-common/src/main/java/com/metamx/druid/index/serde/ColumnPartSerde.java @@ -0,0 +1,44 @@ +/* + * Druid - a distributed column store. + * Copyright (C) 2012 Metamarkets Group Inc. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version 2 + * of the License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + */ + +package com.metamx.druid.index.serde; + +import com.metamx.druid.index.column.ColumnBuilder; +import org.codehaus.jackson.annotate.JsonSubTypes; +import org.codehaus.jackson.annotate.JsonTypeInfo; + +import java.io.IOException; +import java.nio.ByteBuffer; +import java.nio.channels.WritableByteChannel; + +/** + */ +@JsonTypeInfo(use = JsonTypeInfo.Id.NAME, property = "type") +@JsonSubTypes(value = { + @JsonSubTypes.Type(name = "complex", value = ComplexColumnPartSerde.class), + @JsonSubTypes.Type(name = "float", value = FloatGenericColumnPartSerde.class), + @JsonSubTypes.Type(name = "long", value = LongGenericColumnPartSerde.class), + @JsonSubTypes.Type(name = "stringDictionary", value = DictionaryEncodedColumnPartSerde.class) +}) +public interface ColumnPartSerde +{ + public int numBytes(); + public void write(WritableByteChannel channel) throws IOException; + public ColumnPartSerde read(ByteBuffer buffer, ColumnBuilder builder); +} diff --git a/index-common/src/main/java/com/metamx/druid/index/serde/ComplexColumnPartSerde.java b/index-common/src/main/java/com/metamx/druid/index/serde/ComplexColumnPartSerde.java new file mode 100644 index 00000000000..760fcbb3f20 --- /dev/null +++ b/index-common/src/main/java/com/metamx/druid/index/serde/ComplexColumnPartSerde.java @@ -0,0 +1,80 @@ +/* + * Druid - a distributed column store. + * Copyright (C) 2012 Metamarkets Group Inc. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version 2 + * of the License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + */ + +package com.metamx.druid.index.serde; + +import com.metamx.druid.index.column.ColumnBuilder; +import com.metamx.druid.index.v1.serde.ComplexMetricSerde; +import com.metamx.druid.index.v1.serde.ComplexMetrics; +import com.metamx.druid.kv.GenericIndexed; +import org.codehaus.jackson.annotate.JsonCreator; +import org.codehaus.jackson.annotate.JsonProperty; + +import java.io.IOException; +import java.nio.ByteBuffer; +import java.nio.channels.WritableByteChannel; + +/** +*/ +public class ComplexColumnPartSerde implements ColumnPartSerde +{ + @JsonCreator + public static ComplexColumnPartSerde createDeserializer( + @JsonProperty("typeName") String complexType + ) + { + return new ComplexColumnPartSerde(null, complexType); + } + + private final GenericIndexed column; + private final String typeName; + + private final ComplexMetricSerde serde; + + public ComplexColumnPartSerde(GenericIndexed column, String typeName) + { + this.column = column; + this.typeName = typeName; + serde = ComplexMetrics.getSerdeForType(typeName); + } + + @JsonProperty + public String getTypeName() + { + return typeName; + } + + @Override + public int numBytes() + { + return column.getSerializedSize(); + } + + @Override + public void write(WritableByteChannel channel) throws IOException + { + column.writeToChannel(channel); + } + + @Override + public ColumnPartSerde read(ByteBuffer buffer, ColumnBuilder builder) + { + return serde.deserializeColumn(buffer, builder); + } +} diff --git a/index-common/src/main/java/com/metamx/druid/index/serde/ComplexColumnPartSupplier.java b/index-common/src/main/java/com/metamx/druid/index/serde/ComplexColumnPartSupplier.java new file mode 100644 index 00000000000..4a4cfce4f77 --- /dev/null +++ b/index-common/src/main/java/com/metamx/druid/index/serde/ComplexColumnPartSupplier.java @@ -0,0 +1,46 @@ +/* + * Druid - a distributed column store. + * Copyright (C) 2012 Metamarkets Group Inc. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version 2 + * of the License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + */ + +package com.metamx.druid.index.serde; + +import com.google.common.base.Supplier; +import com.metamx.druid.index.column.ComplexColumn; +import com.metamx.druid.index.column.IndexedComplexColumn; +import com.metamx.druid.kv.GenericIndexed; + +/** +*/ +public class ComplexColumnPartSupplier implements Supplier +{ + private final GenericIndexed complexType; + private final String typeName; + + public ComplexColumnPartSupplier( + final String typeName, final GenericIndexed complexType + ) { + this.complexType = complexType; + this.typeName = typeName; + } + + @Override + public ComplexColumn get() + { + return new IndexedComplexColumn(typeName, complexType); + } +} diff --git a/index-common/src/main/java/com/metamx/druid/index/serde/DictionaryEncodedColumnPartSerde.java b/index-common/src/main/java/com/metamx/druid/index/serde/DictionaryEncodedColumnPartSerde.java new file mode 100644 index 00000000000..d30c6c6bbc0 --- /dev/null +++ b/index-common/src/main/java/com/metamx/druid/index/serde/DictionaryEncodedColumnPartSerde.java @@ -0,0 +1,147 @@ +/* + * Druid - a distributed column store. + * Copyright (C) 2012 Metamarkets Group Inc. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version 2 + * of the License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + */ + +package com.metamx.druid.index.serde; + +import com.metamx.common.IAE; +import com.metamx.druid.index.column.ColumnBuilder; +import com.metamx.druid.index.column.ValueType; +import com.metamx.druid.kv.ConciseCompressedIndexedInts; +import com.metamx.druid.kv.GenericIndexed; +import com.metamx.druid.kv.VSizeIndexed; +import com.metamx.druid.kv.VSizeIndexedInts; +import it.uniroma3.mat.extendedset.intset.ImmutableConciseSet; +import org.codehaus.jackson.annotate.JsonCreator; +import org.codehaus.jackson.annotate.JsonProperty; + +import java.io.IOException; +import java.nio.ByteBuffer; +import java.nio.channels.WritableByteChannel; + +/** +*/ +public class DictionaryEncodedColumnPartSerde implements ColumnPartSerde +{ + @JsonCreator + public static DictionaryEncodedColumnPartSerde createDeserializer(boolean singleValued) + { + return new DictionaryEncodedColumnPartSerde(); + } + + private final GenericIndexed dictionary; + private final VSizeIndexedInts singleValuedColumn; + private final VSizeIndexed multiValuedColumn; + private final GenericIndexed bitmaps; + + private final int size; + + public DictionaryEncodedColumnPartSerde( + GenericIndexed dictionary, + VSizeIndexedInts singleValCol, + VSizeIndexed multiValCol, + GenericIndexed bitmaps + ) + { + this.dictionary = dictionary; + this.singleValuedColumn = singleValCol; + this.multiValuedColumn = multiValCol; + this.bitmaps = bitmaps; + + int size = dictionary.getSerializedSize(); + if (singleValCol != null && multiValCol == null) { + size += singleValCol.getSerializedSize(); + } + else if (singleValCol == null && multiValCol != null) { + size += multiValCol.getSerializedSize(); + } + else { + throw new IAE("Either singleValCol[%s] or multiValCol[%s] must be set", singleValCol, multiValCol); + } + size += bitmaps.getSerializedSize(); + + this.size = size; + } + + private DictionaryEncodedColumnPartSerde() + { + dictionary = null; + singleValuedColumn = null; + multiValuedColumn = null; + bitmaps = null; + size = 0; + } + + @JsonProperty + private boolean isSingleValued() + { + return singleValuedColumn != null; + } + + @Override + public int numBytes() + { + return 1 + size; + } + + @Override + public void write(WritableByteChannel channel) throws IOException + { + channel.write(ByteBuffer.wrap(new byte[]{(byte) (isSingleValued() ? 0x0 : 0x1)})); + dictionary.writeToChannel(channel); + if (isSingleValued()) { + singleValuedColumn.writeToChannel(channel); + } + else { + multiValuedColumn.writeToChannel(channel); + } + bitmaps.writeToChannel(channel); + } + + @Override + public ColumnPartSerde read(ByteBuffer buffer, ColumnBuilder builder) + { + final boolean isSingleValued = buffer.get() == 0x0; + final GenericIndexed dictionary = GenericIndexed.read(buffer, GenericIndexed.stringStrategy); + final VSizeIndexedInts singleValuedColumn; + final VSizeIndexed multiValuedColumn; + + builder.setType(ValueType.STRING); + + if (isSingleValued) { + singleValuedColumn = VSizeIndexedInts.readFromByteBuffer(buffer); + multiValuedColumn = null; + builder.setHasMultipleValues(false) + .setDictionaryEncodedColumn(new DictionaryEncodedColumnSupplier(dictionary, singleValuedColumn, null)); + } + else { + singleValuedColumn = null; + multiValuedColumn = VSizeIndexed.readFromByteBuffer(buffer); + builder.setHasMultipleValues(true) + .setDictionaryEncodedColumn(new DictionaryEncodedColumnSupplier(dictionary, null, multiValuedColumn)); + } + + GenericIndexed bitmaps = GenericIndexed.read( + buffer, ConciseCompressedIndexedInts.objectStrategy + ); + + builder.setBitmapIndex(new BitmapIndexColumnPartSupplier(bitmaps, dictionary)); + + return new DictionaryEncodedColumnPartSerde(dictionary, singleValuedColumn, multiValuedColumn, bitmaps); + } +} diff --git a/index-common/src/main/java/com/metamx/druid/index/serde/DictionaryEncodedColumnSupplier.java b/index-common/src/main/java/com/metamx/druid/index/serde/DictionaryEncodedColumnSupplier.java new file mode 100644 index 00000000000..40285cf550d --- /dev/null +++ b/index-common/src/main/java/com/metamx/druid/index/serde/DictionaryEncodedColumnSupplier.java @@ -0,0 +1,53 @@ +/* + * Druid - a distributed column store. + * Copyright (C) 2012 Metamarkets Group Inc. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version 2 + * of the License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + */ + +package com.metamx.druid.index.serde; + +import com.google.common.base.Supplier; +import com.metamx.druid.index.column.DictionaryEncodedColumn; +import com.metamx.druid.index.column.SimpleDictionaryEncodedColumn; +import com.metamx.druid.kv.GenericIndexed; +import com.metamx.druid.kv.VSizeIndexed; +import com.metamx.druid.kv.VSizeIndexedInts; + +/** +*/ +public class DictionaryEncodedColumnSupplier implements Supplier +{ + private final GenericIndexed dictionary; + private final VSizeIndexedInts singleValuedColumn; + private final VSizeIndexed multiValuedColumn; + + public DictionaryEncodedColumnSupplier( + GenericIndexed dictionary, + VSizeIndexedInts singleValuedColumn, + VSizeIndexed multiValuedColumn + ) + { + this.dictionary = dictionary; + this.singleValuedColumn = singleValuedColumn; + this.multiValuedColumn = multiValuedColumn; + } + + @Override + public DictionaryEncodedColumn get() + { + return new SimpleDictionaryEncodedColumn(singleValuedColumn, multiValuedColumn, dictionary); + } +} diff --git a/index-common/src/main/java/com/metamx/druid/index/serde/FloatGenericColumnPartSerde.java b/index-common/src/main/java/com/metamx/druid/index/serde/FloatGenericColumnPartSerde.java new file mode 100644 index 00000000000..87cb95758db --- /dev/null +++ b/index-common/src/main/java/com/metamx/druid/index/serde/FloatGenericColumnPartSerde.java @@ -0,0 +1,83 @@ +/* + * Druid - a distributed column store. + * Copyright (C) 2012 Metamarkets Group Inc. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version 2 + * of the License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + */ + +package com.metamx.druid.index.serde; + +import com.metamx.druid.index.column.ColumnBuilder; +import com.metamx.druid.index.column.ValueType; +import com.metamx.druid.index.v1.CompressedFloatsIndexedSupplier; +import org.codehaus.jackson.annotate.JsonCreator; +import org.codehaus.jackson.annotate.JsonProperty; + +import java.io.IOException; +import java.nio.ByteBuffer; +import java.nio.ByteOrder; +import java.nio.channels.WritableByteChannel; + +/** +*/ +public class FloatGenericColumnPartSerde implements ColumnPartSerde +{ + @JsonCreator + public static FloatGenericColumnPartSerde createDeserializer( + @JsonProperty("byteOrder") ByteOrder byteOrder + ) + { + return new FloatGenericColumnPartSerde(null, byteOrder); + } + + private final CompressedFloatsIndexedSupplier compressedFloats; + private final ByteOrder byteOrder; + + public FloatGenericColumnPartSerde(CompressedFloatsIndexedSupplier compressedFloats, ByteOrder byteOrder) + { + this.compressedFloats = compressedFloats; + this.byteOrder = byteOrder; + } + + @JsonProperty + public ByteOrder getByteOrder() + { + return byteOrder; + } + + @Override + public int numBytes() + { + return compressedFloats.getSerializedSize(); + } + + @Override + public void write(WritableByteChannel channel) throws IOException + { + compressedFloats.writeToChannel(channel); + } + + @Override + public ColumnPartSerde read(ByteBuffer buffer, ColumnBuilder builder) + { + final CompressedFloatsIndexedSupplier column = CompressedFloatsIndexedSupplier.fromByteBuffer(buffer, byteOrder); + + builder.setType(ValueType.FLOAT) + .setHasMultipleValues(false) + .setGenericColumn(new FloatGenericColumnSupplier(column, byteOrder)); + + return new FloatGenericColumnPartSerde(column, byteOrder); + } +} diff --git a/index-common/src/main/java/com/metamx/druid/index/serde/FloatGenericColumnSupplier.java b/index-common/src/main/java/com/metamx/druid/index/serde/FloatGenericColumnSupplier.java new file mode 100644 index 00000000000..c13a5c11942 --- /dev/null +++ b/index-common/src/main/java/com/metamx/druid/index/serde/FloatGenericColumnSupplier.java @@ -0,0 +1,49 @@ +/* + * Druid - a distributed column store. + * Copyright (C) 2012 Metamarkets Group Inc. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version 2 + * of the License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + */ + +package com.metamx.druid.index.serde; + +import com.google.common.base.Supplier; +import com.metamx.druid.index.column.GenericColumn; +import com.metamx.druid.index.column.IndexedFloatsGenericColumn; +import com.metamx.druid.index.v1.CompressedFloatsIndexedSupplier; + +import java.nio.ByteOrder; + +/** +*/ +public class FloatGenericColumnSupplier implements Supplier +{ + private final CompressedFloatsIndexedSupplier column; + private final ByteOrder byteOrder; + + public FloatGenericColumnSupplier( + CompressedFloatsIndexedSupplier column, + ByteOrder byteOrder + ) { + this.column = column; + this.byteOrder = byteOrder; + } + + @Override + public GenericColumn get() + { + return new IndexedFloatsGenericColumn(column.get()); + } +} diff --git a/index-common/src/main/java/com/metamx/druid/index/serde/LongGenericColumnPartSerde.java b/index-common/src/main/java/com/metamx/druid/index/serde/LongGenericColumnPartSerde.java new file mode 100644 index 00000000000..0d140751e4c --- /dev/null +++ b/index-common/src/main/java/com/metamx/druid/index/serde/LongGenericColumnPartSerde.java @@ -0,0 +1,83 @@ +/* + * Druid - a distributed column store. + * Copyright (C) 2012 Metamarkets Group Inc. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version 2 + * of the License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + */ + +package com.metamx.druid.index.serde; + +import com.metamx.druid.index.column.ColumnBuilder; +import com.metamx.druid.index.column.ValueType; +import com.metamx.druid.index.v1.CompressedLongsIndexedSupplier; +import org.codehaus.jackson.annotate.JsonCreator; +import org.codehaus.jackson.annotate.JsonProperty; + +import java.io.IOException; +import java.nio.ByteBuffer; +import java.nio.ByteOrder; +import java.nio.channels.WritableByteChannel; + +/** +*/ +public class LongGenericColumnPartSerde implements ColumnPartSerde +{ + @JsonCreator + public static LongGenericColumnPartSerde createDeserializer( + @JsonProperty("byteOrder") ByteOrder byteOrder + ) + { + return new LongGenericColumnPartSerde(null, byteOrder); + } + + private final CompressedLongsIndexedSupplier compressedLongs; + private final ByteOrder byteOrder; + + public LongGenericColumnPartSerde(CompressedLongsIndexedSupplier compressedLongs, ByteOrder byteOrder) + { + this.compressedLongs = compressedLongs; + this.byteOrder = byteOrder; + } + + @JsonProperty + public ByteOrder getByteOrder() + { + return byteOrder; + } + + @Override + public int numBytes() + { + return compressedLongs.getSerializedSize(); + } + + @Override + public void write(WritableByteChannel channel) throws IOException + { + compressedLongs.writeToChannel(channel); + } + + @Override + public ColumnPartSerde read(ByteBuffer buffer, ColumnBuilder builder) + { + final CompressedLongsIndexedSupplier column = CompressedLongsIndexedSupplier.fromByteBuffer(buffer, byteOrder); + + builder.setType(ValueType.LONG) + .setHasMultipleValues(false) + .setGenericColumn(new LongGenericColumnSupplier(column)); + + return new LongGenericColumnPartSerde(column, byteOrder); + } +} diff --git a/index-common/src/main/java/com/metamx/druid/index/serde/LongGenericColumnSupplier.java b/index-common/src/main/java/com/metamx/druid/index/serde/LongGenericColumnSupplier.java new file mode 100644 index 00000000000..babdf7949f6 --- /dev/null +++ b/index-common/src/main/java/com/metamx/druid/index/serde/LongGenericColumnSupplier.java @@ -0,0 +1,46 @@ +/* + * Druid - a distributed column store. + * Copyright (C) 2012 Metamarkets Group Inc. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version 2 + * of the License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + */ + +package com.metamx.druid.index.serde; + +import com.google.common.base.Supplier; +import com.metamx.druid.index.column.GenericColumn; +import com.metamx.druid.index.column.IndexedLongsGenericColumn; +import com.metamx.druid.index.v1.CompressedLongsIndexedSupplier; + +import java.nio.ByteOrder; + +/** +*/ +public class LongGenericColumnSupplier implements Supplier +{ + private final CompressedLongsIndexedSupplier column; + + public LongGenericColumnSupplier( + CompressedLongsIndexedSupplier column + ) { + this.column = column; + } + + @Override + public GenericColumn get() + { + return new IndexedLongsGenericColumn(column.get()); + } +} diff --git a/index-common/src/main/java/com/metamx/druid/index/v1/CompressedFloatsIndexedSupplier.java b/index-common/src/main/java/com/metamx/druid/index/v1/CompressedFloatsIndexedSupplier.java index 12751ebebe8..1def2af031f 100644 --- a/index-common/src/main/java/com/metamx/druid/index/v1/CompressedFloatsIndexedSupplier.java +++ b/index-common/src/main/java/com/metamx/druid/index/v1/CompressedFloatsIndexedSupplier.java @@ -29,7 +29,6 @@ import com.metamx.druid.collect.ResourceHolder; import com.metamx.druid.collect.StupidResourceHolder; import com.metamx.druid.kv.GenericIndexed; import com.metamx.druid.kv.IndexedFloats; -import com.metamx.druid.kv.IndexedIterable; import java.io.IOException; import java.nio.ByteBuffer; @@ -60,6 +59,11 @@ public class CompressedFloatsIndexedSupplier implements Supplier this.baseFloatBuffers = baseFloatBuffers; } + public int size() + { + return totalSize; + } + @Override public IndexedFloats get() { @@ -146,6 +150,11 @@ public class CompressedFloatsIndexedSupplier implements Supplier }; } + public int getSerializedSize() + { + return baseFloatBuffers.getSerializedSize() + 1 + 4 + 4; + } + public void writeToChannel(WritableByteChannel channel) throws IOException { channel.write(ByteBuffer.wrap(new byte[]{version})); @@ -178,7 +187,7 @@ public class CompressedFloatsIndexedSupplier implements Supplier return MAX_FLOATS_IN_BUFFER - (MAX_FLOATS_IN_BUFFER % numFloatsInChunk); } - public static CompressedFloatsIndexedSupplier fromByteBuffer(ByteBuffer buffer, ByteOrder order) throws IOException + public static CompressedFloatsIndexedSupplier fromByteBuffer(ByteBuffer buffer, ByteOrder order) { byte versionFromBuffer = buffer.get(); @@ -186,7 +195,7 @@ public class CompressedFloatsIndexedSupplier implements Supplier return new CompressedFloatsIndexedSupplier( buffer.getInt(), buffer.getInt(), - GenericIndexed.readFromByteBuffer(buffer, CompressedFloatBufferObjectStrategy.getBufferForOrder(order)) + GenericIndexed.read(buffer, CompressedFloatBufferObjectStrategy.getBufferForOrder(order)) ); } diff --git a/index-common/src/main/java/com/metamx/druid/index/v1/CompressedLongsIndexedSupplier.java b/index-common/src/main/java/com/metamx/druid/index/v1/CompressedLongsIndexedSupplier.java index 2f37d393763..d66b68a0c3d 100644 --- a/index-common/src/main/java/com/metamx/druid/index/v1/CompressedLongsIndexedSupplier.java +++ b/index-common/src/main/java/com/metamx/druid/index/v1/CompressedLongsIndexedSupplier.java @@ -28,7 +28,6 @@ import com.metamx.common.IAE; import com.metamx.druid.collect.ResourceHolder; import com.metamx.druid.collect.StupidResourceHolder; import com.metamx.druid.kv.GenericIndexed; -import com.metamx.druid.kv.IndexedIterable; import com.metamx.druid.kv.IndexedLongs; import java.io.IOException; @@ -162,6 +161,11 @@ public class CompressedLongsIndexedSupplier implements Supplier }; } + public int getSerializedSize() + { + return baseLongBuffers.getSerializedSize() + 1 + 4 + 4; + } + public void writeToChannel(WritableByteChannel channel) throws IOException { channel.write(ByteBuffer.wrap(new byte[]{version})); @@ -188,7 +192,7 @@ public class CompressedLongsIndexedSupplier implements Supplier return baseLongBuffers; } - public static CompressedLongsIndexedSupplier fromByteBuffer(ByteBuffer buffer, ByteOrder order) throws IOException + public static CompressedLongsIndexedSupplier fromByteBuffer(ByteBuffer buffer, ByteOrder order) { byte versionFromBuffer = buffer.get(); @@ -196,7 +200,7 @@ public class CompressedLongsIndexedSupplier implements Supplier return new CompressedLongsIndexedSupplier( buffer.getInt(), buffer.getInt(), - GenericIndexed.readFromByteBuffer(buffer, CompressedLongBufferObjectStrategy.getBufferForOrder(order)) + GenericIndexed.read(buffer, CompressedLongBufferObjectStrategy.getBufferForOrder(order)) ); } diff --git a/index-common/src/main/java/com/metamx/druid/index/v1/IncrementalIndex.java b/index-common/src/main/java/com/metamx/druid/index/v1/IncrementalIndex.java index 31ed9efbbed..624d6d4b375 100644 --- a/index-common/src/main/java/com/metamx/druid/index/v1/IncrementalIndex.java +++ b/index-common/src/main/java/com/metamx/druid/index/v1/IncrementalIndex.java @@ -524,7 +524,7 @@ public class IncrementalIndex implements Iterable public String get(String value) { - return poorMansInterning.get(value); + return value == null ? null : poorMansInterning.get(value); } public int getId(String value) diff --git a/index-common/src/main/java/com/metamx/druid/index/v1/IndexIO.java b/index-common/src/main/java/com/metamx/druid/index/v1/IndexIO.java index 4b5512e5ee7..34a8e06962d 100644 --- a/index-common/src/main/java/com/metamx/druid/index/v1/IndexIO.java +++ b/index-common/src/main/java/com/metamx/druid/index/v1/IndexIO.java @@ -20,35 +20,96 @@ package com.metamx.druid.index.v1; import com.google.common.base.Preconditions; +import com.google.common.collect.ImmutableMap; +import com.google.common.collect.Iterables; +import com.google.common.collect.Lists; import com.google.common.collect.Maps; +import com.google.common.collect.Sets; +import com.google.common.io.ByteStreams; import com.google.common.io.Closeables; +import com.google.common.io.Files; +import com.google.common.primitives.Ints; +import com.metamx.common.IAE; import com.metamx.common.ISE; +import com.metamx.common.io.smoosh.FileSmoosher; import com.metamx.common.io.smoosh.Smoosh; import com.metamx.common.io.smoosh.SmooshedFileMapper; +import com.metamx.common.io.smoosh.SmooshedWriter; import com.metamx.common.logger.Logger; +import com.metamx.druid.index.QueryableIndex; +import com.metamx.druid.index.SimpleQueryableIndex; +import com.metamx.druid.index.column.Column; +import com.metamx.druid.index.column.ColumnBuilder; +import com.metamx.druid.index.column.ColumnDescriptor; +import com.metamx.druid.index.column.ValueType; +import com.metamx.druid.index.serde.BitmapIndexColumnPartSupplier; +import com.metamx.druid.index.serde.ComplexColumnPartSerde; +import com.metamx.druid.index.serde.ComplexColumnPartSupplier; +import com.metamx.druid.index.serde.DictionaryEncodedColumnPartSerde; +import com.metamx.druid.index.serde.DictionaryEncodedColumnSupplier; +import com.metamx.druid.index.serde.FloatGenericColumnPartSerde; +import com.metamx.druid.index.serde.FloatGenericColumnSupplier; +import com.metamx.druid.index.serde.LongGenericColumnPartSerde; +import com.metamx.druid.index.serde.LongGenericColumnSupplier; +import com.metamx.druid.jackson.DefaultObjectMapper; +import com.metamx.druid.kv.ArrayIndexed; import com.metamx.druid.kv.ConciseCompressedIndexedInts; import com.metamx.druid.kv.GenericIndexed; import com.metamx.druid.kv.IndexedIterable; import com.metamx.druid.kv.VSizeIndexed; +import com.metamx.druid.kv.VSizeIndexedInts; import com.metamx.druid.utils.SerializerUtils; +import it.uniroma3.mat.extendedset.intset.ConciseSet; import it.uniroma3.mat.extendedset.intset.ImmutableConciseSet; +import org.codehaus.jackson.map.ObjectMapper; import org.joda.time.Interval; +import java.io.ByteArrayOutputStream; import java.io.File; import java.io.FileInputStream; import java.io.IOException; import java.io.InputStream; import java.nio.ByteBuffer; import java.nio.ByteOrder; +import java.util.AbstractList; +import java.util.Arrays; +import java.util.LinkedHashSet; +import java.util.List; import java.util.Map; +import java.util.Set; /** * This should be changed to use DI instead of a static reference... */ public class IndexIO { + public static final byte V8_VERSION = 0x8; + public static final byte V9_VERSION = 0x9; + + private static final Map indexLoaders = + ImmutableMap.builder() + .put(0, new LegacyIndexLoader()) + .put(1, new LegacyIndexLoader()) + .put(2, new LegacyIndexLoader()) + .put(3, new LegacyIndexLoader()) + .put(4, new LegacyIndexLoader()) + .put(5, new LegacyIndexLoader()) + .put(6, new LegacyIndexLoader()) + .put(7, new LegacyIndexLoader()) + .put(8, new LegacyIndexLoader()) + .put(9, new V9IndexLoader()) + .build(); + + private static final Logger log = new Logger(IndexIO.class); + private static final SerializerUtils serializerUtils = new SerializerUtils(); + public static final ByteOrder BYTE_ORDER = ByteOrder.nativeOrder(); + + // This should really be provided by DI, should be changed once we switch around to using a DI framework + private static final ObjectMapper mapper = new DefaultObjectMapper(); + + private static volatile IndexIOHandler handler = null; - public static final byte CURRENT_VERSION_ID = 0x8; + public static final int CURRENT_VERSION_ID = V9_VERSION; public static Index readIndex(File inDir) throws IOException { @@ -62,12 +123,27 @@ public class IndexIO return handler.canBeMapped(inDir); } + @Deprecated public static MMappedIndex mapDir(final File inDir) throws IOException { init(); return handler.mapDir(inDir); } + public static QueryableIndex loadIndex(File inDir) throws IOException + { + init(); + final int version = getVersionFromDir(inDir); + + final IndexLoader loader = indexLoaders.get(version); + + if (loader != null) { + return loader.load(inDir); + } else { + throw new ISE("Unknown index version[%s]", version); + } + } + public static void storeLatest(Index index, File file) throws IOException { handler.storeLatest(index, file); @@ -82,8 +158,7 @@ public class IndexIO { if (IndexIO.handler == null) { IndexIO.handler = handler; - } - else { + } else { throw new ISE("Already have a handler[%s], cannot register another[%s]", IndexIO.handler, handler); } } @@ -95,6 +170,26 @@ public class IndexIO } } + public static int getVersionFromDir(File inDir) throws IOException + { + File versionFile = new File(inDir, "version.bin"); + if (versionFile.exists()) { + return Ints.fromByteArray(Files.toByteArray(versionFile)); + } + + final File indexFile = new File(inDir, "index.drd"); + InputStream in = null; + int version; + try { + in = new FileInputStream(indexFile); + version = in.read(); + } + finally { + Closeables.closeQuietly(in); + } + return version; + } + public static void checkFileSize(File indexFile) throws IOException { final long fileSize = indexFile.length(); @@ -128,7 +223,6 @@ public class IndexIO * This only exists for some legacy compatibility reasons, Metamarkets is working on getting rid of it in * future versions. Normal persisting of indexes is done via IndexMerger. * - * * @param file */ public void storeLatest(Index index, File file) throws IOException; @@ -137,9 +231,6 @@ public class IndexIO static class DefaultIndexIOHandler implements IndexIOHandler { private static final Logger log = new Logger(DefaultIndexIOHandler.class); - private static final SerializerUtils serializerUtils = new SerializerUtils(); - private static final ByteOrder BYTE_ORDER = ByteOrder.nativeOrder(); - @Override public Index readIndex(File inDir) { @@ -152,8 +243,6 @@ public class IndexIO return true; } - public static final byte VERSION_ID = 0x8; - @Override public MMappedIndex mapDir(File inDir) throws IOException { @@ -164,7 +253,7 @@ public class IndexIO try { indexIn = new FileInputStream(new File(inDir, "index.drd")); byte theVersion = (byte) indexIn.read(); - if (theVersion != VERSION_ID) { + if (theVersion != V8_VERSION) { throw new IllegalArgumentException(String.format("Unknown version[%s]", theVersion)); } } @@ -176,10 +265,10 @@ public class IndexIO ByteBuffer indexBuffer = smooshedFiles.mapFile("index.drd"); indexBuffer.get(); // Skip the version byte - final GenericIndexed availableDimensions = GenericIndexed.readFromByteBuffer( + final GenericIndexed availableDimensions = GenericIndexed.read( indexBuffer, GenericIndexed.stringStrategy ); - final GenericIndexed availableMetrics = GenericIndexed.readFromByteBuffer( + final GenericIndexed availableMetrics = GenericIndexed.read( indexBuffer, GenericIndexed.stringStrategy ); final Interval dataInterval = new Interval(serializerUtils.readString(indexBuffer)); @@ -213,7 +302,7 @@ public class IndexIO fileDimensionName ); - dimValueLookups.put(dimension, GenericIndexed.readFromByteBuffer(dimBuffer, GenericIndexed.stringStrategy)); + dimValueLookups.put(dimension, GenericIndexed.read(dimBuffer, GenericIndexed.stringStrategy)); dimColumns.put(dimension, VSizeIndexed.readFromByteBuffer(dimBuffer)); } @@ -221,7 +310,7 @@ public class IndexIO for (int i = 0; i < availableDimensions.size(); ++i) { invertedIndexed.put( serializerUtils.readString(invertedBuffer), - GenericIndexed.readFromByteBuffer(invertedBuffer, ConciseCompressedIndexedInts.objectStrategy) + GenericIndexed.read(invertedBuffer, ConciseCompressedIndexedInts.objectStrategy) ); } @@ -246,6 +335,386 @@ public class IndexIO { throw new UnsupportedOperationException("Shouldn't ever happen in a cluster that is not owned by MMX."); } + + public static void convertV8toV9(File v8Dir, File v9Dir) throws IOException + { + log.info("Converting v8[%s] to v9[%s]", v8Dir, v9Dir); + + InputStream indexIn = null; + try { + indexIn = new FileInputStream(new File(v8Dir, "index.drd")); + byte theVersion = (byte) indexIn.read(); + if (theVersion != V8_VERSION) { + throw new IAE("Unknown version[%s]", theVersion); + } + } + finally { + Closeables.close(indexIn, false); + } + + SmooshedFileMapper v8SmooshedFiles = Smoosh.map(v8Dir); + + v9Dir.mkdirs(); + final FileSmoosher v9Smoosher = new FileSmoosher(v9Dir); + + ByteStreams.write(Ints.toByteArray(9), Files.newOutputStreamSupplier(new File(v9Dir, "version.bin"))); + Map> bitmapIndexes = Maps.newHashMap(); + + final ByteBuffer invertedBuffer = v8SmooshedFiles.mapFile("inverted.drd"); + while (invertedBuffer.hasRemaining()) { + bitmapIndexes.put( + serializerUtils.readString(invertedBuffer), + GenericIndexed.read(invertedBuffer, ConciseCompressedIndexedInts.objectStrategy) + ); + } + + LinkedHashSet skippedFiles = Sets.newLinkedHashSet(); + for (String filename : v8SmooshedFiles.getInternalFilenames()) { + log.info("Processing file[%s]", filename); + if (filename.startsWith("dim_")) { + final ColumnDescriptor.Builder builder = ColumnDescriptor.builder(); + builder.setValueType(ValueType.STRING); + + final List outParts = Lists.newArrayList(); + + ByteBuffer dimBuffer = v8SmooshedFiles.mapFile(filename); + String dimension = serializerUtils.readString(dimBuffer); + if (!filename.equals(String.format("dim_%s.drd", dimension))) { + throw new ISE("loaded dimension[%s] from file[%s]", dimension, filename); + } + + ByteArrayOutputStream nameBAOS = new ByteArrayOutputStream(); + serializerUtils.writeString(nameBAOS, dimension); + outParts.add(ByteBuffer.wrap(nameBAOS.toByteArray())); + + GenericIndexed dictionary = GenericIndexed.read( + dimBuffer, GenericIndexed.stringStrategy + ); + + VSizeIndexedInts singleValCol = null; + VSizeIndexed multiValCol = VSizeIndexed.readFromByteBuffer(dimBuffer.asReadOnlyBuffer()); + GenericIndexed bitmaps = bitmapIndexes.get(dimension); + + boolean onlyOneValue = true; + ConciseSet nullsSet = null; + for (int i = 0; i < multiValCol.size(); ++i) { + VSizeIndexedInts rowValue = multiValCol.get(i); + if (!onlyOneValue) { + break; + } + if (rowValue.size() > 1) { + onlyOneValue = false; + } + if (rowValue.size() == 0) { + if (nullsSet == null) { + nullsSet = new ConciseSet(); + } + nullsSet.add(i); + } + } + + if (onlyOneValue) { + log.info("Dimension[%s] is single value, converting...", dimension); + final boolean bumpedDictionary; + if (nullsSet != null) { + log.info("Dimension[%s] has null rows.", dimension); + final ImmutableConciseSet theNullSet = ImmutableConciseSet.newImmutableFromMutable(nullsSet); + + if (dictionary.get(0) != null) { + log.info("Dimension[%s] has no null value in the dictionary, expanding...", dimension); + bumpedDictionary = true; + final List nullList = Lists.newArrayList(); + nullList.add(null); + + dictionary = GenericIndexed.fromIterable( + Iterables.concat(nullList, dictionary), + GenericIndexed.stringStrategy + ); + + bitmaps = GenericIndexed.fromIterable( + Iterables.concat(Arrays.asList(theNullSet), bitmaps), + ConciseCompressedIndexedInts.objectStrategy + ); + } + else { + bumpedDictionary = false; + bitmaps = GenericIndexed.fromIterable( + Iterables.concat( + Arrays.asList(ImmutableConciseSet.union(theNullSet, bitmaps.get(0))), + Iterables.skip(bitmaps, 1) + ), + ConciseCompressedIndexedInts.objectStrategy + ); + } + } + else { + bumpedDictionary = false; + } + + final VSizeIndexed finalMultiValCol = multiValCol; + singleValCol = VSizeIndexedInts.fromList( + new AbstractList() + { + @Override + public Integer get(int index) + { + final VSizeIndexedInts ints = finalMultiValCol.get(index); + return ints.size() == 0 ? 0 : ints.get(0) + (bumpedDictionary ? 1 : 0); + } + + @Override + public int size() + { + return finalMultiValCol.size(); + } + }, + dictionary.size() + ); + multiValCol = null; + } else { + builder.setHasMultipleValues(true); + } + + builder.addSerde( + new DictionaryEncodedColumnPartSerde(dictionary, singleValCol, multiValCol, bitmaps) + ); + + final ColumnDescriptor serdeficator = builder.build(); + + ByteArrayOutputStream baos = new ByteArrayOutputStream(); + serializerUtils.writeString(baos, mapper.writeValueAsString(serdeficator)); + byte[] specBytes = baos.toByteArray(); + + final SmooshedWriter channel = v9Smoosher.addWithSmooshedWriter( + dimension, serdeficator.numBytes() + specBytes.length + ); + channel.write(ByteBuffer.wrap(specBytes)); + serdeficator.write(channel); + channel.close(); + } else if (filename.startsWith("met_")) { + if (!filename.endsWith(String.format("%s.drd", BYTE_ORDER))) { + skippedFiles.add(filename); + continue; + } + + MetricHolder holder = MetricHolder.fromByteBuffer(v8SmooshedFiles.mapFile(filename)); + final String metric = holder.getName(); + + final ColumnDescriptor.Builder builder = ColumnDescriptor.builder(); + + switch (holder.getType()) { + case FLOAT: + builder.setValueType(ValueType.FLOAT); + builder.addSerde(new FloatGenericColumnPartSerde(holder.floatType, BYTE_ORDER)); + break; + case COMPLEX: + if (!(holder.complexType instanceof GenericIndexed)) { + throw new ISE("Serialized complex types must be GenericIndexed objects."); + } + final GenericIndexed column = (GenericIndexed) holder.complexType; + final String complexType = holder.getTypeName(); + + builder.setValueType(ValueType.COMPLEX); + builder.addSerde(new ComplexColumnPartSerde(column, complexType)); + break; + default: + throw new ISE("Unknown type[%s]", holder.getType()); + } + + final ColumnDescriptor serdeficator = builder.build(); + + ByteArrayOutputStream baos = new ByteArrayOutputStream(); + serializerUtils.writeString(baos, mapper.writeValueAsString(serdeficator)); + byte[] specBytes = baos.toByteArray(); + + final SmooshedWriter channel = v9Smoosher.addWithSmooshedWriter( + metric, serdeficator.numBytes() + specBytes.length + ); + channel.write(ByteBuffer.wrap(specBytes)); + serdeficator.write(channel); + channel.close(); + } else if (String.format("time_%s.drd", BYTE_ORDER).equals(filename)) { + CompressedLongsIndexedSupplier timestamps = CompressedLongsIndexedSupplier.fromByteBuffer( + v8SmooshedFiles.mapFile(filename), BYTE_ORDER + ); + + final ColumnDescriptor.Builder builder = ColumnDescriptor.builder(); + builder.setValueType(ValueType.LONG); + builder.addSerde(new LongGenericColumnPartSerde(timestamps, BYTE_ORDER)); + + final ColumnDescriptor serdeficator = builder.build(); + + ByteArrayOutputStream baos = new ByteArrayOutputStream(); + serializerUtils.writeString(baos, mapper.writeValueAsString(serdeficator)); + byte[] specBytes = baos.toByteArray(); + + final SmooshedWriter channel = v9Smoosher.addWithSmooshedWriter( + "__time", serdeficator.numBytes() + specBytes.length + ); + channel.write(ByteBuffer.wrap(specBytes)); + serdeficator.write(channel); + channel.close(); + } else if ("index.drd".equals(filename)) { + final ByteBuffer indexBuffer = v8SmooshedFiles.mapFile(filename); + + indexBuffer.get(); // Skip the version byte + final GenericIndexed dims = GenericIndexed.read( + indexBuffer, GenericIndexed.stringStrategy + ); + final GenericIndexed availableMetrics = GenericIndexed.read( + indexBuffer, GenericIndexed.stringStrategy + ); + final Interval dataInterval = new Interval(serializerUtils.readString(indexBuffer)); + + Set columns = Sets.newTreeSet(); + columns.addAll(Lists.newArrayList(dims)); + columns.addAll(Lists.newArrayList(availableMetrics)); + + GenericIndexed cols = GenericIndexed.fromIterable(columns, GenericIndexed.stringStrategy); + + final int numBytes = cols.getSerializedSize() + dims.getSerializedSize() + 16; + final SmooshedWriter writer = v9Smoosher.addWithSmooshedWriter("index.drd", numBytes); + cols.writeToChannel(writer); + dims.writeToChannel(writer); + serializerUtils.writeLong(writer, dataInterval.getStartMillis()); + serializerUtils.writeLong(writer, dataInterval.getEndMillis()); + writer.close(); + } else { + skippedFiles.add(filename); + } + } + log.info("Skipped files[%s]", skippedFiles); + + v9Smoosher.close(); + } + } + + static interface IndexLoader + { + public QueryableIndex load(File inDir) throws IOException; + } + + static class LegacyIndexLoader implements IndexLoader + { + @Override + public QueryableIndex load(File inDir) throws IOException + { + MMappedIndex index = IndexIO.mapDir(inDir); + + Map columns = Maps.newHashMap(); + + for (String dimension : index.getAvailableDimensions()) { + columns.put( + dimension.toLowerCase(), + new ColumnBuilder() + .setType(ValueType.STRING) + .setHasMultipleValues(true) + .setDictionaryEncodedColumn( + new DictionaryEncodedColumnSupplier( + index.getDimValueLookup(dimension), null, (index.getDimColumn(dimension)) + ) + ) + .setBitmapIndex( + new BitmapIndexColumnPartSupplier( + index.getInvertedIndexes().get(dimension), index.getDimValueLookup(dimension) + ) + ) + .build() + ); + } + + for (String metric : index.getAvailableMetrics()) { + final MetricHolder metricHolder = index.getMetricHolder(metric); + if (metricHolder.getType() == MetricHolder.MetricType.FLOAT) { + columns.put( + metric.toLowerCase(), + new ColumnBuilder() + .setType(ValueType.FLOAT) + .setGenericColumn(new FloatGenericColumnSupplier(metricHolder.floatType, BYTE_ORDER)) + .build() + ); + } else if (metricHolder.getType() == MetricHolder.MetricType.COMPLEX) { + columns.put( + metric.toLowerCase(), + new ColumnBuilder() + .setType(ValueType.COMPLEX) + .setComplexColumn( + new ComplexColumnPartSupplier( + metricHolder.getTypeName(), (GenericIndexed) metricHolder.complexType + ) + ) + .build() + ); + } + } + + Set colSet = Sets.newTreeSet(); + for (String dimension : index.getAvailableDimensions()) { + colSet.add(dimension.toLowerCase()); + } + for (String metric : index.getAvailableMetrics()) { + colSet.add(metric.toLowerCase()); + } + + String[] cols = colSet.toArray(new String[colSet.size()]); + + return new SimpleQueryableIndex( + index.getDataInterval(), + new ArrayIndexed(cols, String.class), + index.getAvailableDimensions(), + new ColumnBuilder() + .setType(ValueType.LONG) + .setGenericColumn(new LongGenericColumnSupplier(index.timestamps)) + .build(), + columns + ); + } + } + + static class V9IndexLoader implements IndexLoader + { + @Override + public QueryableIndex load(File inDir) throws IOException + { + log.debug("Mapping v9 index[%s]", inDir); + long startTime = System.currentTimeMillis(); + + final int theVersion = Ints.fromByteArray(Files.toByteArray(new File(inDir, "version.bin"))); + if (theVersion != V9_VERSION) { + throw new IllegalArgumentException(String.format("Expected version[9], got[%s]", theVersion)); + } + + SmooshedFileMapper smooshedFiles = Smoosh.map(inDir); + + ByteBuffer indexBuffer = smooshedFiles.mapFile("index.drd"); + final GenericIndexed cols = GenericIndexed.read(indexBuffer, GenericIndexed.stringStrategy); + final GenericIndexed dims = GenericIndexed.read(indexBuffer, GenericIndexed.stringStrategy); + final Interval dataInterval = new Interval(indexBuffer.getLong(), indexBuffer.getLong()); + + Map columns = Maps.newHashMap(); + + ObjectMapper mapper = new DefaultObjectMapper(); + + for (String columnName : cols) { + columns.put(columnName, deserializeColumn(mapper, smooshedFiles.mapFile(columnName))); + } + + final QueryableIndex index = new SimpleQueryableIndex( + dataInterval, cols, dims, deserializeColumn(mapper, smooshedFiles.mapFile("__time")), columns + ); + + log.debug("Mapped v9 index[%s] in %,d millis", inDir, System.currentTimeMillis() - startTime); + + return index; + } + + private Column deserializeColumn(ObjectMapper mapper, ByteBuffer byteBuffer) throws IOException + { + ColumnDescriptor serde = mapper.readValue( + serializerUtils.readString(byteBuffer), ColumnDescriptor.class + ); + return serde.read(byteBuffer); + } } public static File makeDimFile(File dir, String dimension) diff --git a/index-common/src/main/java/com/metamx/druid/index/v1/MetricHolder.java b/index-common/src/main/java/com/metamx/druid/index/v1/MetricHolder.java index 91cbf4f6818..075b9dbfe52 100644 --- a/index-common/src/main/java/com/metamx/druid/index/v1/MetricHolder.java +++ b/index-common/src/main/java/com/metamx/druid/index/v1/MetricHolder.java @@ -39,7 +39,6 @@ import java.io.InputStream; import java.io.OutputStream; import java.nio.ByteBuffer; import java.nio.ByteOrder; -import java.nio.channels.Channels; import java.nio.channels.WritableByteChannel; /** @@ -140,7 +139,7 @@ public class MetricHolder break; case COMPLEX: if (strategy != null) { - holder.complexType = GenericIndexed.readFromByteBuffer(buf, strategy); + holder.complexType = GenericIndexed.read(buf, strategy); } else { final ComplexMetricSerde serdeForType = ComplexMetrics.getSerdeForType(holder.getTypeName()); @@ -148,7 +147,7 @@ public class MetricHolder throw new ISE("Unknown type[%s], cannot load.", holder.getTypeName()); } - holder.complexType = GenericIndexed.readFromByteBuffer(buf, serdeForType.getObjectStrategy()); + holder.complexType = GenericIndexed.read(buf, serdeForType.getObjectStrategy()); } break; } @@ -174,8 +173,8 @@ public class MetricHolder } } - private CompressedFloatsIndexedSupplier floatType = null; - private Indexed complexType = null; + CompressedFloatsIndexedSupplier floatType = null; + Indexed complexType = null; private MetricHolder( String name, diff --git a/index-common/src/main/java/com/metamx/druid/index/v1/serde/ComplexMetricSerde.java b/index-common/src/main/java/com/metamx/druid/index/v1/serde/ComplexMetricSerde.java index e2657dc60d1..e54fb1cceca 100644 --- a/index-common/src/main/java/com/metamx/druid/index/v1/serde/ComplexMetricSerde.java +++ b/index-common/src/main/java/com/metamx/druid/index/v1/serde/ComplexMetricSerde.java @@ -19,13 +19,57 @@ package com.metamx.druid.index.v1.serde; +import com.google.common.base.Function; +import com.metamx.druid.index.column.ColumnBuilder; +import com.metamx.druid.index.serde.ColumnPartSerde; import com.metamx.druid.kv.ObjectStrategy; +import java.nio.ByteBuffer; + /** */ -public interface ComplexMetricSerde +public abstract class ComplexMetricSerde { - public String getTypeName(); - public ComplexMetricExtractor getExtractor(); - public ObjectStrategy getObjectStrategy(); + public abstract String getTypeName(); + public abstract ComplexMetricExtractor getExtractor(); + + /** + * Deserializes a ByteBuffer and adds it to the ColumnBuilder. This method allows for the ComplexMetricSerde + * to implement it's own versioning scheme to allow for changes of binary format in a forward-compatible manner. + * + * The method is also in charge of returning a ColumnPartSerde that knows how to serialize out the object it + * added to the builder. + * + * @param buffer the buffer to deserialize + * @return a ColumnPartSerde that can serialize out the object that was read from the buffer to the builder + */ + public abstract ColumnPartSerde deserializeColumn(ByteBuffer buffer, ColumnBuilder builder); + + /** + * This is deprecated because its usage is going to be removed from the code. + * + * It was introduced before deserializeColumn() existed. This method creates the assumption that Druid knows + * how to interpret the actual column representation of the data, but I would much prefer that the ComplexMetricSerde + * objects be in charge of creating and interpreting the whole column, which is what deserializeColumn lets + * them do. + * + * @return an ObjectStrategy as used by GenericIndexed + */ + @Deprecated + public abstract ObjectStrategy getObjectStrategy(); + + + /** + * Returns a function that can convert the Object provided by the ComplexColumn created through deserializeColumn + * into a number of expected input bytes to produce that object. + * + * This is used to approximate the size of the input data via the SegmentMetadataQuery and does not need to be + * overridden if you do not care about the query. + * + * @return A function that can compute the size of the complex object or null if you cannot/do not want to compute it + */ + public Function inputSizeFn() + { + return null; + } } diff --git a/index-common/src/main/java/com/metamx/druid/indexer/data/StringInputRowParser.java b/index-common/src/main/java/com/metamx/druid/indexer/data/StringInputRowParser.java index 4c5e2dcd9bb..3721f2c14f7 100644 --- a/index-common/src/main/java/com/metamx/druid/indexer/data/StringInputRowParser.java +++ b/index-common/src/main/java/com/metamx/druid/indexer/data/StringInputRowParser.java @@ -19,6 +19,7 @@ package com.metamx.druid.indexer.data; +import com.google.common.base.Function; import com.google.common.collect.Lists; import com.google.common.collect.Sets; import com.metamx.common.exception.FormattedException; @@ -56,7 +57,18 @@ public class StringInputRowParser this.dimensionExclusions = Sets.newHashSet(); if (dimensionExclusions != null) { - this.dimensionExclusions.addAll(dimensionExclusions); + this.dimensionExclusions.addAll( + Lists.transform( + dimensionExclusions, new Function() + { + @Override + public String apply(String s) + { + return s.toLowerCase(); + } + } + ) + ); } this.dimensionExclusions.add(timestampSpec.getTimestampColumn()); diff --git a/index-common/src/main/java/com/metamx/druid/kv/GenericIndexed.java b/index-common/src/main/java/com/metamx/druid/kv/GenericIndexed.java index d98abc1b670..5fed0c13b1f 100644 --- a/index-common/src/main/java/com/metamx/druid/kv/GenericIndexed.java +++ b/index-common/src/main/java/com/metamx/druid/kv/GenericIndexed.java @@ -150,8 +150,11 @@ public class GenericIndexed implements Indexed @Override public T get(int index) { + if (index < 0) { + throw new IAE("Index[%s] < 0", index); + } if (index >= size) { - throw new IllegalArgumentException(String.format("Index[%s] >= size[%s]", index, size)); + throw new IAE(String.format("Index[%s] >= size[%s]", index, size)); } ByteBuffer myBuffer = theBuffer.asReadOnlyBuffer(); @@ -204,6 +207,11 @@ public class GenericIndexed implements Indexed return -(minIndex + 1); } + public int getSerializedSize() + { + return theBuffer.remaining() + 2 + 4 + 4; + } + public void writeToChannel(WritableByteChannel channel) throws IOException { channel.write(ByteBuffer.wrap(new byte[]{version, allowReverseLookup ? (byte) 0x1 : (byte) 0x0})); @@ -212,8 +220,7 @@ public class GenericIndexed implements Indexed channel.write(theBuffer.asReadOnlyBuffer()); } - public static GenericIndexed readFromByteBuffer(ByteBuffer buffer, ObjectStrategy strategy) - throws IOException + public static GenericIndexed read(ByteBuffer buffer, ObjectStrategy strategy) { byte versionFromBuffer = buffer.get(); diff --git a/index-common/src/main/java/com/metamx/druid/kv/VSizeIndexed.java b/index-common/src/main/java/com/metamx/druid/kv/VSizeIndexed.java index b77ab3ba4db..9ef63f39ac0 100644 --- a/index-common/src/main/java/com/metamx/druid/kv/VSizeIndexed.java +++ b/index-common/src/main/java/com/metamx/druid/kv/VSizeIndexed.java @@ -153,6 +153,11 @@ public class VSizeIndexed implements Indexed throw new UnsupportedOperationException("Reverse lookup not allowed."); } + public int getSerializedSize() + { + return theBuffer.remaining() + 4 + 4 + 2; + } + public void writeToChannel(WritableByteChannel channel) throws IOException { channel.write(ByteBuffer.wrap(new byte[]{version, (byte) numBytes})); @@ -161,10 +166,7 @@ public class VSizeIndexed implements Indexed channel.write(theBuffer.asReadOnlyBuffer()); } - public static VSizeIndexed readFromByteBuffer( - ByteBuffer buffer - ) - throws IOException + public static VSizeIndexed readFromByteBuffer(ByteBuffer buffer) { byte versionFromBuffer = buffer.get(); @@ -175,10 +177,7 @@ public class VSizeIndexed implements Indexed bufferToUse.limit(bufferToUse.position() + size); buffer.position(bufferToUse.limit()); - return new VSizeIndexed( - bufferToUse, - numBytes - ); + return new VSizeIndexed(bufferToUse, numBytes); } throw new IAE("Unknown version[%s]", versionFromBuffer); diff --git a/index-common/src/main/java/com/metamx/druid/kv/VSizeIndexedInts.java b/index-common/src/main/java/com/metamx/druid/kv/VSizeIndexedInts.java index c4bd1adad2f..2b880f96f97 100644 --- a/index-common/src/main/java/com/metamx/druid/kv/VSizeIndexedInts.java +++ b/index-common/src/main/java/com/metamx/druid/kv/VSizeIndexedInts.java @@ -22,7 +22,9 @@ package com.metamx.druid.kv; import com.google.common.primitives.Ints; import com.metamx.common.IAE; +import java.io.IOException; import java.nio.ByteBuffer; +import java.nio.channels.WritableByteChannel; import java.util.Iterator; import java.util.List; @@ -30,6 +32,8 @@ import java.util.List; */ public class VSizeIndexedInts implements IndexedInts, Comparable { + private static final byte version = 0x0; + public static VSizeIndexedInts fromArray(int[] array) { return fromArray(array, Ints.max(array)); @@ -143,9 +147,43 @@ public class VSizeIndexedInts implements IndexedInts, Comparable iterator() { return new IndexedIntsIterator(this); } + + public void writeToChannel(WritableByteChannel channel) throws IOException + { + channel.write(ByteBuffer.wrap(new byte[]{version, (byte) numBytes})); + channel.write(ByteBuffer.wrap(Ints.toByteArray(buffer.remaining()))); + channel.write(buffer.asReadOnlyBuffer()); + } + + public static VSizeIndexedInts readFromByteBuffer(ByteBuffer buffer) + { + byte versionFromBuffer = buffer.get(); + + if (version == versionFromBuffer) { + int numBytes = buffer.get(); + int size = buffer.getInt(); + ByteBuffer bufferToUse = buffer.asReadOnlyBuffer(); + bufferToUse.limit(bufferToUse.position() + size); + buffer.position(bufferToUse.limit()); + + return new VSizeIndexedInts( + bufferToUse, + numBytes + ); + } + + throw new IAE("Unknown version[%s]", versionFromBuffer); + } + } diff --git a/index-common/src/test/java/com/metamx/druid/index/v1/CompressedFloatsIndexedSupplierTest.java b/index-common/src/test/java/com/metamx/druid/index/v1/CompressedFloatsIndexedSupplierTest.java index 850217c1f0d..125aa7618f8 100644 --- a/index-common/src/test/java/com/metamx/druid/index/v1/CompressedFloatsIndexedSupplierTest.java +++ b/index-common/src/test/java/com/metamx/druid/index/v1/CompressedFloatsIndexedSupplierTest.java @@ -27,8 +27,12 @@ import org.junit.Assert; import org.junit.Before; import org.junit.Test; +import java.io.ByteArrayOutputStream; +import java.io.IOException; +import java.nio.ByteBuffer; import java.nio.ByteOrder; import java.nio.FloatBuffer; +import java.nio.channels.Channels; import java.util.concurrent.CountDownLatch; import java.util.concurrent.atomic.AtomicBoolean; import java.util.concurrent.atomic.AtomicReference; @@ -71,6 +75,25 @@ public class CompressedFloatsIndexedSupplierTest indexed = supplier.get(); } + private void setupSimpleWithSerde() throws IOException + { + vals = new float[]{ + 0.0f, 0.1f, 0.2f, 0.3f, 0.4f, 0.5f, 0.6f, 0.7f, 0.8f, 0.9f, 0.10f, 0.11f, 0.12f, 0.13f, 0.14f, 0.15f + }; + + ByteArrayOutputStream baos = new ByteArrayOutputStream(); + final CompressedFloatsIndexedSupplier theSupplier = CompressedFloatsIndexedSupplier.fromFloatBuffer( + FloatBuffer.wrap(vals), 5, ByteOrder.nativeOrder() + ); + theSupplier.writeToChannel(Channels.newChannel(baos)); + + final byte[] bytes = baos.toByteArray(); + Assert.assertEquals(theSupplier.getSerializedSize(), bytes.length); + + supplier = CompressedFloatsIndexedSupplier.fromByteBuffer(ByteBuffer.wrap(bytes), ByteOrder.nativeOrder()); + indexed = supplier.get(); + } + @Test public void testSanity() throws Exception { @@ -102,6 +125,37 @@ public class CompressedFloatsIndexedSupplierTest tryFill(7, 10); } + @Test + public void testSanityWithSerde() throws Exception + { + setupSimpleWithSerde(); + + Assert.assertEquals(4, supplier.getBaseFloatBuffers().size()); + + Assert.assertEquals(vals.length, indexed.size()); + for (int i = 0; i < indexed.size(); ++i) { + Assert.assertEquals(vals[i], indexed.get(i), 0.0); + } + } + + @Test + public void testBulkFillWithSerde() throws Exception + { + setupSimpleWithSerde(); + + tryFill(0, 15); + tryFill(3, 6); + tryFill(7, 7); + tryFill(7, 9); + } + + @Test(expected = IndexOutOfBoundsException.class) + public void testBulkFillTooMuchWithSerde() throws Exception + { + setupSimpleWithSerde(); + tryFill(7, 10); + } + // This test attempts to cause a race condition with the DirectByteBuffers, it's non-deterministic in causing it, // which sucks but I can't think of a way to deterministically cause it... @Test diff --git a/index-common/src/test/java/com/metamx/druid/index/v1/CompressedLongsIndexedSupplierTest.java b/index-common/src/test/java/com/metamx/druid/index/v1/CompressedLongsIndexedSupplierTest.java index f14fcd29f86..0f06cff2b64 100644 --- a/index-common/src/test/java/com/metamx/druid/index/v1/CompressedLongsIndexedSupplierTest.java +++ b/index-common/src/test/java/com/metamx/druid/index/v1/CompressedLongsIndexedSupplierTest.java @@ -27,8 +27,13 @@ import org.junit.Assert; import org.junit.Before; import org.junit.Test; +import java.io.ByteArrayOutputStream; +import java.io.IOException; +import java.nio.ByteBuffer; import java.nio.ByteOrder; +import java.nio.FloatBuffer; import java.nio.LongBuffer; +import java.nio.channels.Channels; import java.util.concurrent.CountDownLatch; import java.util.concurrent.atomic.AtomicBoolean; import java.util.concurrent.atomic.AtomicReference; @@ -69,6 +74,23 @@ public class CompressedLongsIndexedSupplierTest indexed = supplier.get(); } + private void setupSimpleWithSerde() throws IOException + { + vals = new long[]{0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 16}; + + ByteArrayOutputStream baos = new ByteArrayOutputStream(); + final CompressedLongsIndexedSupplier theSupplier = CompressedLongsIndexedSupplier.fromLongBuffer( + LongBuffer.wrap(vals), 5, ByteOrder.nativeOrder() + ); + theSupplier.writeToChannel(Channels.newChannel(baos)); + + final byte[] bytes = baos.toByteArray(); + Assert.assertEquals(theSupplier.getSerializedSize(), bytes.length); + + supplier = CompressedLongsIndexedSupplier.fromByteBuffer(ByteBuffer.wrap(bytes), ByteOrder.nativeOrder()); + indexed = supplier.get(); + } + @Test public void testSanity() throws Exception { @@ -100,6 +122,37 @@ public class CompressedLongsIndexedSupplierTest tryFill(7, 10); } + @Test + public void testSanityWithSerde() throws Exception + { + setupSimpleWithSerde(); + + Assert.assertEquals(4, supplier.getBaseLongBuffers().size()); + + Assert.assertEquals(vals.length, indexed.size()); + for (int i = 0; i < indexed.size(); ++i) { + Assert.assertEquals(vals[i], indexed.get(i)); + } + } + + @Test + public void testBulkFillWithSerde() throws Exception + { + setupSimpleWithSerde(); + + tryFill(0, 15); + tryFill(3, 6); + tryFill(7, 7); + tryFill(7, 9); + } + + @Test(expected = IndexOutOfBoundsException.class) + public void testBulkFillTooMuchWithSerde() throws Exception + { + setupSimpleWithSerde(); + tryFill(7, 10); + } + // This test attempts to cause a race condition with the DirectByteBuffers, it's non-deterministic in causing it, // which sucks but I can't think of a way to deterministically cause it... @Test diff --git a/index-common/src/test/java/com/metamx/druid/kv/GenericIndexedTest.java b/index-common/src/test/java/com/metamx/druid/kv/GenericIndexedTest.java index 41e6a65ebde..fad903abc96 100644 --- a/index-common/src/test/java/com/metamx/druid/kv/GenericIndexedTest.java +++ b/index-common/src/test/java/com/metamx/druid/kv/GenericIndexedTest.java @@ -114,7 +114,8 @@ public class GenericIndexedTest channel.close(); final ByteBuffer byteBuffer = ByteBuffer.wrap(baos.toByteArray()); - GenericIndexed deserialized = GenericIndexed.readFromByteBuffer( + Assert.assertEquals(indexed.getSerializedSize(), byteBuffer.remaining()); + GenericIndexed deserialized = GenericIndexed.read( byteBuffer, GenericIndexed.stringStrategy ); Assert.assertEquals(0, byteBuffer.remaining()); diff --git a/index-common/src/test/java/com/metamx/druid/kv/VSizeIndexedIntsTest.java b/index-common/src/test/java/com/metamx/druid/kv/VSizeIndexedIntsTest.java index 2c080e91469..d953958116b 100644 --- a/index-common/src/test/java/com/metamx/druid/kv/VSizeIndexedIntsTest.java +++ b/index-common/src/test/java/com/metamx/druid/kv/VSizeIndexedIntsTest.java @@ -22,6 +22,10 @@ package com.metamx.druid.kv; import org.junit.Assert; import org.junit.Test; +import java.io.ByteArrayOutputStream; +import java.nio.ByteBuffer; +import java.nio.channels.Channels; + /** */ public class VSizeIndexedIntsTest @@ -38,4 +42,25 @@ public class VSizeIndexedIntsTest Assert.assertEquals(array[i], ints.get(i)); } } + + @Test + public void testSerialization() throws Exception + { + final int[] array = {1, 2, 3, 4, 5, 6, 7, 8, 9, 10}; + VSizeIndexedInts ints = VSizeIndexedInts.fromArray(array); + + ByteArrayOutputStream baos = new ByteArrayOutputStream(); + ints.writeToChannel(Channels.newChannel(baos)); + + final byte[] bytes = baos.toByteArray(); + Assert.assertEquals(ints.getSerializedSize(), bytes.length); + VSizeIndexedInts deserialized = VSizeIndexedInts.readFromByteBuffer(ByteBuffer.wrap(bytes)); + + Assert.assertEquals(1, deserialized.getNumBytes()); + Assert.assertEquals(array.length, deserialized.size()); + for (int i = 0; i < array.length; i++) { + Assert.assertEquals(array[i], deserialized.get(i)); + } + } + } diff --git a/index-common/src/test/java/com/metamx/druid/kv/VSizeIndexedTest.java b/index-common/src/test/java/com/metamx/druid/kv/VSizeIndexedTest.java index 39ce52a5133..631ec03869f 100644 --- a/index-common/src/test/java/com/metamx/druid/kv/VSizeIndexedTest.java +++ b/index-common/src/test/java/com/metamx/druid/kv/VSizeIndexedTest.java @@ -61,10 +61,11 @@ public class VSizeIndexedTest assertSame(someInts, indexed); final ByteArrayOutputStream baos = new ByteArrayOutputStream(); - WritableByteChannel byteChannel = Channels.newChannel(baos); - indexed.writeToChannel(byteChannel); + indexed.writeToChannel(Channels.newChannel(baos)); - VSizeIndexed deserializedIndexed = VSizeIndexed.readFromByteBuffer(ByteBuffer.wrap(baos.toByteArray())); + final byte[] bytes = baos.toByteArray(); + Assert.assertEquals(indexed.getSerializedSize(), bytes.length); + VSizeIndexed deserializedIndexed = VSizeIndexed.readFromByteBuffer(ByteBuffer.wrap(bytes)); assertSame(someInts, deserializedIndexed); } diff --git a/indexer/pom.xml b/indexer/pom.xml index ff29c24a67c..13293775b2f 100644 --- a/indexer/pom.xml +++ b/indexer/pom.xml @@ -28,7 +28,7 @@ com.metamx druid - 0.1.25-SNAPSHOT + 0.2.7-SNAPSHOT @@ -51,7 +51,7 @@ com.amazonaws aws-java-sdk - 1.2.15 + 1.3.27 javax.mail diff --git a/indexer/src/main/java/com/metamx/druid/indexer/DeterminePartitionsJob.java b/indexer/src/main/java/com/metamx/druid/indexer/DeterminePartitionsJob.java index a40642ebec5..13490e3ef00 100644 --- a/indexer/src/main/java/com/metamx/druid/indexer/DeterminePartitionsJob.java +++ b/indexer/src/main/java/com/metamx/druid/indexer/DeterminePartitionsJob.java @@ -23,21 +23,23 @@ import com.google.common.base.Charsets; import com.google.common.base.Function; import com.google.common.base.Joiner; import com.google.common.base.Optional; -import com.google.common.base.Preconditions; import com.google.common.base.Splitter; +import com.google.common.base.Throwables; +import com.google.common.collect.ComparisonChain; +import com.google.common.collect.ImmutableList; +import com.google.common.collect.ImmutableSortedSet; import com.google.common.collect.Iterables; import com.google.common.collect.Iterators; import com.google.common.collect.Lists; import com.google.common.collect.Maps; import com.google.common.collect.PeekingIterator; import com.google.common.io.Closeables; -import com.metamx.common.IAE; -import com.metamx.common.Pair; +import com.metamx.common.ISE; import com.metamx.common.guava.nary.BinaryFn; import com.metamx.common.logger.Logger; -import com.metamx.common.parsers.Parser; -import com.metamx.common.parsers.ParserUtils; import com.metamx.druid.CombiningIterable; +import com.metamx.druid.QueryGranularity; +import com.metamx.druid.input.InputRow; import com.metamx.druid.shard.NoneShardSpec; import com.metamx.druid.shard.ShardSpec; import com.metamx.druid.shard.SingleDimensionShardSpec; @@ -45,7 +47,7 @@ import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; import org.apache.hadoop.io.BytesWritable; -import org.apache.hadoop.io.LongWritable; +import org.apache.hadoop.io.NullWritable; import org.apache.hadoop.io.Text; import org.apache.hadoop.io.Writable; import org.apache.hadoop.mapred.InvalidJobConfException; @@ -56,8 +58,11 @@ import org.apache.hadoop.mapreduce.RecordWriter; import org.apache.hadoop.mapreduce.Reducer; import org.apache.hadoop.mapreduce.TaskAttemptContext; import org.apache.hadoop.mapreduce.TaskInputOutputContext; +import org.apache.hadoop.mapreduce.lib.input.FileInputFormat; +import org.apache.hadoop.mapreduce.lib.input.SequenceFileInputFormat; import org.apache.hadoop.mapreduce.lib.input.TextInputFormat; import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat; +import org.apache.hadoop.mapreduce.lib.output.SequenceFileOutputFormat; import org.codehaus.jackson.type.TypeReference; import org.joda.time.DateTime; import org.joda.time.DateTimeComparator; @@ -65,20 +70,26 @@ import org.joda.time.Interval; import java.io.IOException; import java.io.OutputStream; -import java.util.ArrayList; import java.util.Comparator; import java.util.Iterator; import java.util.List; import java.util.Map; +import java.util.Set; /** + * Determines appropriate ShardSpecs for a job by determining whether or not partitioning is necessary, and if so, + * choosing the highest cardinality dimension that satisfies the criteria: + * + *
    + *
  • Must have exactly one value per row.
  • + *
  • Must not generate oversized partitions. A dimension with N rows having the same value will necessarily + * put all those rows in the same partition, and that partition may be much larger than the target size.
  • + *
*/ public class DeterminePartitionsJob implements Jobby { private static final Logger log = new Logger(DeterminePartitionsJob.class); - private static final Joiner keyJoiner = Joiner.on(","); - private static final Splitter keySplitter = Splitter.on(","); private static final Joiner tabJoiner = HadoopDruidIndexerConfig.tabJoiner; private static final Splitter tabSplitter = HadoopDruidIndexerConfig.tabSplitter; @@ -91,146 +102,314 @@ public class DeterminePartitionsJob implements Jobby this.config = config; } - public boolean run() + public static void injectSystemProperties(Job job) { - try { - Job job = new Job( - new Configuration(), - String.format("%s-determine_partitions-%s", config.getDataSource(), config.getIntervals()) - ); - - job.getConfiguration().set("io.sort.record.percent", "0.19"); - for (String propName : System.getProperties().stringPropertyNames()) { - Configuration conf = job.getConfiguration(); - if (propName.startsWith("hadoop.")) { - conf.set(propName.substring("hadoop.".length()), System.getProperty(propName)); - } + final Configuration conf = job.getConfiguration(); + for (String propName : System.getProperties().stringPropertyNames()) { + if (propName.startsWith("hadoop.")) { + conf.set(propName.substring("hadoop.".length()), System.getProperty(propName)); } - - job.setInputFormatClass(TextInputFormat.class); - - job.setMapperClass(DeterminePartitionsMapper.class); - job.setMapOutputValueClass(Text.class); - - SortableBytes.useSortableBytesAsKey(job); - - job.setCombinerClass(DeterminePartitionsCombiner.class); - job.setReducerClass(DeterminePartitionsReducer.class); - job.setOutputKeyClass(BytesWritable.class); - job.setOutputValueClass(Text.class); - job.setOutputFormatClass(DeterminePartitionsJob.DeterminePartitionsOutputFormat.class); - FileOutputFormat.setOutputPath(job, config.makeIntermediatePath()); - - config.addInputPaths(job); - config.intoConfiguration(job); - - job.setJarByClass(DeterminePartitionsJob.class); - - job.submit(); - log.info("Job submitted, status available at %s", job.getTrackingURL()); - - final boolean retVal = job.waitForCompletion(true); - - if (retVal) { - log.info("Job completed, loading up partitions for intervals[%s].", config.getSegmentGranularIntervals()); - FileSystem fileSystem = null; - Map> shardSpecs = Maps.newTreeMap(DateTimeComparator.getInstance()); - int shardCount = 0; - for (Interval segmentGranularity : config.getSegmentGranularIntervals()) { - DateTime bucket = segmentGranularity.getStart(); - - final Path partitionInfoPath = config.makeSegmentPartitionInfoPath(new Bucket(0, bucket, 0)); - if (fileSystem == null) { - fileSystem = partitionInfoPath.getFileSystem(job.getConfiguration()); - } - if (fileSystem.exists(partitionInfoPath)) { - List specs = config.jsonMapper.readValue( - Utils.openInputStream(job, partitionInfoPath), new TypeReference>() - { - } - ); - - List actualSpecs = Lists.newArrayListWithExpectedSize(specs.size()); - for (int i = 0; i < specs.size(); ++i) { - actualSpecs.add(new HadoopyShardSpec(specs.get(i), shardCount++)); - log.info("DateTime[%s], partition[%d], spec[%s]", bucket, i, actualSpecs.get(i)); - } - - shardSpecs.put(bucket, actualSpecs); - } - else { - log.info("Path[%s] didn't exist!?", partitionInfoPath); - } - } - config.setShardSpecs(shardSpecs); - } - else { - log.info("Job completed unsuccessfully."); - } - - return retVal; - } - catch (Exception e) { - throw new RuntimeException(e); } } - public static class DeterminePartitionsMapper extends Mapper + public boolean run() { - private HadoopDruidIndexerConfig config; - private String partitionDimension; - private Parser parser; - private Function timestampConverter; + try { + /* + * Group by (timestamp, dimensions) so we can correctly count dimension values as they would appear + * in the final segment. + */ + + if(!config.getPartitionsSpec().isAssumeGrouped()) { + final Job groupByJob = new Job( + new Configuration(), + String.format("%s-determine_partitions_groupby-%s", config.getDataSource(), config.getIntervals()) + ); + + injectSystemProperties(groupByJob); + groupByJob.setInputFormatClass(TextInputFormat.class); + groupByJob.setMapperClass(DeterminePartitionsGroupByMapper.class); + groupByJob.setMapOutputKeyClass(BytesWritable.class); + groupByJob.setMapOutputValueClass(NullWritable.class); + groupByJob.setCombinerClass(DeterminePartitionsGroupByReducer.class); + groupByJob.setReducerClass(DeterminePartitionsGroupByReducer.class); + groupByJob.setOutputKeyClass(BytesWritable.class); + groupByJob.setOutputValueClass(NullWritable.class); + groupByJob.setOutputFormatClass(SequenceFileOutputFormat.class); + groupByJob.setJarByClass(DeterminePartitionsJob.class); + + config.addInputPaths(groupByJob); + config.intoConfiguration(groupByJob); + FileOutputFormat.setOutputPath(groupByJob, config.makeGroupedDataDir()); + + groupByJob.submit(); + log.info("Job %s submitted, status available at: %s", groupByJob.getJobName(), groupByJob.getTrackingURL()); + + if(!groupByJob.waitForCompletion(true)) { + log.error("Job failed: %s", groupByJob.getJobID()); + return false; + } + } else { + log.info("Skipping group-by job."); + } + + /* + * Read grouped data and determine appropriate partitions. + */ + final Job dimSelectionJob = new Job( + new Configuration(), + String.format("%s-determine_partitions_dimselection-%s", config.getDataSource(), config.getIntervals()) + ); + + dimSelectionJob.getConfiguration().set("io.sort.record.percent", "0.19"); + + injectSystemProperties(dimSelectionJob); + + if(!config.getPartitionsSpec().isAssumeGrouped()) { + // Read grouped data from the groupByJob. + dimSelectionJob.setMapperClass(DeterminePartitionsDimSelectionPostGroupByMapper.class); + dimSelectionJob.setInputFormatClass(SequenceFileInputFormat.class); + FileInputFormat.addInputPath(dimSelectionJob, config.makeGroupedDataDir()); + } else { + // Directly read the source data, since we assume it's already grouped. + dimSelectionJob.setMapperClass(DeterminePartitionsDimSelectionAssumeGroupedMapper.class); + dimSelectionJob.setInputFormatClass(TextInputFormat.class); + config.addInputPaths(dimSelectionJob); + } + + SortableBytes.useSortableBytesAsMapOutputKey(dimSelectionJob); + dimSelectionJob.setMapOutputValueClass(Text.class); + dimSelectionJob.setCombinerClass(DeterminePartitionsDimSelectionCombiner.class); + dimSelectionJob.setReducerClass(DeterminePartitionsDimSelectionReducer.class); + dimSelectionJob.setOutputKeyClass(BytesWritable.class); + dimSelectionJob.setOutputValueClass(Text.class); + dimSelectionJob.setOutputFormatClass(DeterminePartitionsDimSelectionOutputFormat.class); + dimSelectionJob.setJarByClass(DeterminePartitionsJob.class); + + config.intoConfiguration(dimSelectionJob); + FileOutputFormat.setOutputPath(dimSelectionJob, config.makeIntermediatePath()); + + dimSelectionJob.submit(); + log.info( + "Job %s submitted, status available at: %s", + dimSelectionJob.getJobName(), + dimSelectionJob.getTrackingURL() + ); + + if(!dimSelectionJob.waitForCompletion(true)) { + log.error("Job failed: %s", dimSelectionJob.getJobID().toString()); + return false; + } + + /* + * Load partitions determined by the previous job. + */ + + log.info("Job completed, loading up partitions for intervals[%s].", config.getSegmentGranularIntervals()); + FileSystem fileSystem = null; + Map> shardSpecs = Maps.newTreeMap(DateTimeComparator.getInstance()); + int shardCount = 0; + for (Interval segmentGranularity : config.getSegmentGranularIntervals()) { + DateTime bucket = segmentGranularity.getStart(); + + final Path partitionInfoPath = config.makeSegmentPartitionInfoPath(new Bucket(0, bucket, 0)); + if (fileSystem == null) { + fileSystem = partitionInfoPath.getFileSystem(dimSelectionJob.getConfiguration()); + } + if (fileSystem.exists(partitionInfoPath)) { + List specs = config.jsonMapper.readValue( + Utils.openInputStream(dimSelectionJob, partitionInfoPath), new TypeReference>() + { + } + ); + + List actualSpecs = Lists.newArrayListWithExpectedSize(specs.size()); + for (int i = 0; i < specs.size(); ++i) { + actualSpecs.add(new HadoopyShardSpec(specs.get(i), shardCount++)); + log.info("DateTime[%s], partition[%d], spec[%s]", bucket, i, actualSpecs.get(i)); + } + + shardSpecs.put(bucket, actualSpecs); + } + else { + log.info("Path[%s] didn't exist!?", partitionInfoPath); + } + } + config.setShardSpecs(shardSpecs); + + return true; + } catch(Exception e) { + throw Throwables.propagate(e); + } + } + + public static class DeterminePartitionsGroupByMapper extends HadoopDruidIndexerMapper + { + private QueryGranularity rollupGranularity = null; @Override protected void setup(Context context) throws IOException, InterruptedException { - config = HadoopDruidIndexerConfig.fromConfiguration(context.getConfiguration()); - partitionDimension = config.getPartitionDimension(); - parser = config.getDataSpec().getParser(); - timestampConverter = ParserUtils.createTimestampParser(config.getTimestampFormat()); + super.setup(context); + rollupGranularity = getConfig().getRollupSpec().getRollupGranularity(); + } + + @Override + protected void innerMap( + InputRow inputRow, + Text text, + Context context + ) throws IOException, InterruptedException + { + // Create group key + // TODO -- There are more efficient ways to do this + final Map> dims = Maps.newTreeMap(); + for(final String dim : inputRow.getDimensions()) { + final Set dimValues = ImmutableSortedSet.copyOf(inputRow.getDimension(dim)); + if(dimValues.size() > 0) { + dims.put(dim, dimValues); + } + } + final List groupKey = ImmutableList.of( + rollupGranularity.truncate(inputRow.getTimestampFromEpoch()), + dims + ); + context.write( + new BytesWritable(HadoopDruidIndexerConfig.jsonMapper.writeValueAsBytes(groupKey)), + NullWritable.get() + ); + } + } + + public static class DeterminePartitionsGroupByReducer + extends Reducer + { + @Override + protected void reduce( + BytesWritable key, + Iterable values, + Context context + ) throws IOException, InterruptedException + { + context.write(key, NullWritable.get()); + } + } + + /** + * This DimSelection mapper runs on data generated by our GroupBy job. + */ + public static class DeterminePartitionsDimSelectionPostGroupByMapper + extends Mapper + { + private DeterminePartitionsDimSelectionMapperHelper helper; + + @Override + protected void setup(Context context) + throws IOException, InterruptedException + { + final HadoopDruidIndexerConfig config = HadoopDruidIndexerConfig.fromConfiguration(context.getConfiguration()); + final String partitionDimension = config.getPartitionDimension(); + helper = new DeterminePartitionsDimSelectionMapperHelper(config, partitionDimension); } @Override protected void map( - LongWritable key, Text value, Context context + BytesWritable key, NullWritable value, Context context ) throws IOException, InterruptedException { - Map values = parser.parse(value.toString()); - final DateTime timestamp; - final String tsStr = (String) values.get(config.getTimestampColumnName()); - try { - timestamp = timestampConverter.apply(tsStr); - } - catch(IllegalArgumentException e) { - if(config.isIgnoreInvalidRows()) { - context.getCounter(HadoopDruidIndexerConfig.IndexJobCounters.INVALID_ROW_COUNTER).increment(1); - return; // we're ignoring this invalid row - } - else { - throw e; - } - } + final List timeAndDims = HadoopDruidIndexerConfig.jsonMapper.readValue(key.getBytes(), List.class); + final DateTime timestamp = new DateTime(timeAndDims.get(0)); + final Map> dims = (Map>) timeAndDims.get(1); + + helper.emitDimValueCounts(context, timestamp, dims); + } + } + + /** + * This DimSelection mapper runs on raw input data that we assume has already been grouped. + */ + public static class DeterminePartitionsDimSelectionAssumeGroupedMapper + extends HadoopDruidIndexerMapper + { + private DeterminePartitionsDimSelectionMapperHelper helper; + + @Override + protected void setup(Context context) + throws IOException, InterruptedException + { + super.setup(context); + final HadoopDruidIndexerConfig config = HadoopDruidIndexerConfig.fromConfiguration(context.getConfiguration()); + final String partitionDimension = config.getPartitionDimension(); + helper = new DeterminePartitionsDimSelectionMapperHelper(config, partitionDimension); + } + + @Override + protected void innerMap( + InputRow inputRow, + Text text, + Context context + ) throws IOException, InterruptedException + { + final Map> dims = Maps.newHashMap(); + for(final String dim : inputRow.getDimensions()) { + dims.put(dim, inputRow.getDimension(dim)); + } + helper.emitDimValueCounts(context, new DateTime(inputRow.getTimestampFromEpoch()), dims); + } + } + + /** + * Since we have two slightly different DimSelectionMappers, this class encapsulates the shared logic for + * emitting dimension value counts. + */ + public static class DeterminePartitionsDimSelectionMapperHelper + { + private final HadoopDruidIndexerConfig config; + private final String partitionDimension; + + public DeterminePartitionsDimSelectionMapperHelper(HadoopDruidIndexerConfig config, String partitionDimension) + { + this.config = config; + this.partitionDimension = partitionDimension; + } + + public void emitDimValueCounts( + TaskInputOutputContext context, + DateTime timestamp, + Map> dims + ) throws IOException, InterruptedException + { final Optional maybeInterval = config.getGranularitySpec().bucketInterval(timestamp); - if(maybeInterval.isPresent()) { - final DateTime bucket = maybeInterval.get().getStart(); - final String outKey = keyJoiner.join(bucket.toString(), partitionDimension); - final Object dimValue = values.get(partitionDimension); - if (! (dimValue instanceof String)) { - throw new IAE("Cannot partition on a tag-style dimension[%s], line was[%s]", partitionDimension, value); + if(!maybeInterval.isPresent()) { + throw new ISE("WTF?! No bucket found for timestamp: %s", timestamp); + } + + final Interval interval = maybeInterval.get(); + final byte[] groupKey = interval.getStart().toString().getBytes(Charsets.UTF_8); + + for(final Map.Entry> dimAndValues : dims.entrySet()) { + final String dim = dimAndValues.getKey(); + + if(partitionDimension == null || partitionDimension.equals(dim)) { + final Iterable dimValues = dimAndValues.getValue(); + + if(Iterables.size(dimValues) == 1) { + // Emit this value. + write(context, groupKey, new DimValueCount(dim, Iterables.getOnlyElement(dimValues), 1)); + } else { + // This dimension is unsuitable for partitioning. Poison it by emitting a negative value. + write(context, groupKey, new DimValueCount(dim, "", -1)); + } } - - final byte[] groupKey = outKey.getBytes(Charsets.UTF_8); - write(context, groupKey, "", 1); - write(context, groupKey, (String) dimValue, 1); } } } - private static abstract class DeterminePartitionsBaseReducer extends Reducer + private static abstract class DeterminePartitionsDimSelectionBaseReducer + extends Reducer { protected static volatile HadoopDruidIndexerConfig config = null; @@ -240,7 +419,7 @@ public class DeterminePartitionsJob implements Jobby throws IOException, InterruptedException { if (config == null) { - synchronized (DeterminePartitionsBaseReducer.class) { + synchronized (DeterminePartitionsDimSelectionBaseReducer.class) { if (config == null) { config = HadoopDruidIndexerConfig.fromConfiguration(context.getConfiguration()); } @@ -255,166 +434,275 @@ public class DeterminePartitionsJob implements Jobby { SortableBytes keyBytes = SortableBytes.fromBytesWritable(key); - final Iterable> combinedIterable = combineRows(values); + final Iterable combinedIterable = combineRows(values); innerReduce(context, keyBytes, combinedIterable); } protected abstract void innerReduce( - Context context, SortableBytes keyBytes, Iterable> combinedIterable + Context context, SortableBytes keyBytes, Iterable combinedIterable ) throws IOException, InterruptedException; - private Iterable> combineRows(Iterable input) + private Iterable combineRows(Iterable input) { - return new CombiningIterable>( + return new CombiningIterable( Iterables.transform( input, - new Function>() + new Function() { @Override - public Pair apply(Text input) + public DimValueCount apply(Text input) { - Iterator splits = tabSplitter.split(input.toString()).iterator(); - return new Pair(splits.next(), Long.parseLong(splits.next())); + return DimValueCount.fromText(input); } } ), - new Comparator>() + new Comparator() { @Override - public int compare(Pair o1, Pair o2) + public int compare(DimValueCount o1, DimValueCount o2) { - return o1.lhs.compareTo(o2.lhs); + return ComparisonChain.start().compare(o1.dim, o2.dim).compare(o1.value, o2.value).result(); } }, - new BinaryFn, Pair, Pair>() + new BinaryFn() { @Override - public Pair apply(Pair arg1, Pair arg2) + public DimValueCount apply(DimValueCount arg1, DimValueCount arg2) { if (arg2 == null) { return arg1; } - return new Pair(arg1.lhs, arg1.rhs + arg2.rhs); + // Respect "poisoning" (negative values mean we can't use this dimension) + final int newNumRows = (arg1.numRows >= 0 && arg2.numRows >= 0 ? arg1.numRows + arg2.numRows : -1); + return new DimValueCount(arg1.dim, arg1.value, newNumRows); } } ); } } - public static class DeterminePartitionsCombiner extends DeterminePartitionsBaseReducer + public static class DeterminePartitionsDimSelectionCombiner extends DeterminePartitionsDimSelectionBaseReducer { @Override protected void innerReduce( - Context context, SortableBytes keyBytes, Iterable> combinedIterable + Context context, SortableBytes keyBytes, Iterable combinedIterable ) throws IOException, InterruptedException { - for (Pair pair : combinedIterable) { - write(context, keyBytes.getGroupKey(), pair.lhs, pair.rhs); + for (DimValueCount dvc : combinedIterable) { + write(context, keyBytes.getGroupKey(), dvc); } } } - public static class DeterminePartitionsReducer extends DeterminePartitionsBaseReducer + public static class DeterminePartitionsDimSelectionReducer extends DeterminePartitionsDimSelectionBaseReducer { - String previousBoundary; - long runningTotal; + private static final double SHARD_COMBINE_THRESHOLD = 0.25; + private static final double SHARD_OVERSIZE_THRESHOLD = 1.5; @Override protected void innerReduce( - Context context, SortableBytes keyBytes, Iterable> combinedIterable + Context context, SortableBytes keyBytes, Iterable combinedIterable ) throws IOException, InterruptedException { - PeekingIterator> iterator = Iterators.peekingIterator(combinedIterable.iterator()); - Pair totalPair = iterator.next(); + PeekingIterator iterator = Iterators.peekingIterator(combinedIterable.iterator()); - Preconditions.checkState(totalPair.lhs.equals(""), "Total pair value was[%s]!?", totalPair.lhs); - long totalRows = totalPair.rhs; + // "iterator" will take us over many candidate dimensions + DimPartitions currentDimPartitions = null; + DimPartition currentDimPartition = null; + String currentDimPartitionStart = null; + boolean currentDimSkip = false; - long numPartitions = Math.max(totalRows / config.getTargetPartitionSize(), 1); - long expectedRowsPerPartition = totalRows / numPartitions; + // We'll store possible partitions in here + final Map dimPartitionss = Maps.newHashMap(); - class PartitionsList extends ArrayList - { - } - List partitions = new PartitionsList(); + while(iterator.hasNext()) { + final DimValueCount dvc = iterator.next(); - runningTotal = 0; - Pair prev = null; - previousBoundary = null; - while (iterator.hasNext()) { - Pair curr = iterator.next(); - - if (runningTotal > expectedRowsPerPartition) { - Preconditions.checkNotNull( - prev, "Prev[null] while runningTotal[%s] was > expectedRows[%s]!?", runningTotal, expectedRowsPerPartition - ); - - addPartition(partitions, curr.lhs); + if(currentDimPartitions == null || !currentDimPartitions.dim.equals(dvc.dim)) { + // Starting a new dimension! Exciting! + currentDimPartitions = new DimPartitions(dvc.dim); + currentDimPartition = new DimPartition(); + currentDimPartitionStart = null; + currentDimSkip = false; } - runningTotal += curr.rhs; - prev = curr; + // Respect poisoning + if(!currentDimSkip && dvc.numRows < 0) { + log.info("Cannot partition on multi-valued dimension: %s", dvc.dim); + currentDimSkip = true; + } + + if(currentDimSkip) { + continue; + } + + // See if we need to cut a new partition ending immediately before this dimension value + if(currentDimPartition.rows > 0 && currentDimPartition.rows + dvc.numRows >= config.getTargetPartitionSize()) { + final ShardSpec shardSpec = new SingleDimensionShardSpec( + currentDimPartitions.dim, + currentDimPartitionStart, + dvc.value, + currentDimPartitions.partitions.size() + ); + + log.info( + "Adding possible shard with %,d rows and %,d unique values: %s", + currentDimPartition.rows, + currentDimPartition.cardinality, + shardSpec + ); + + currentDimPartition.shardSpec = shardSpec; + currentDimPartitions.partitions.add(currentDimPartition); + currentDimPartition = new DimPartition(); + currentDimPartitionStart = dvc.value; + } + + // Update counters + currentDimPartition.cardinality ++; + currentDimPartition.rows += dvc.numRows; + + if(!iterator.hasNext() || !currentDimPartitions.dim.equals(iterator.peek().dim)) { + // Finalize the current dimension + + if(currentDimPartition.rows > 0) { + // One more shard to go + final ShardSpec shardSpec; + + if (currentDimPartitions.partitions.isEmpty()) { + shardSpec = new NoneShardSpec(); + } else { + if(currentDimPartition.rows < config.getTargetPartitionSize() * SHARD_COMBINE_THRESHOLD) { + // Combine with previous shard + final DimPartition previousDimPartition = currentDimPartitions.partitions.remove( + currentDimPartitions.partitions.size() - 1 + ); + + final SingleDimensionShardSpec previousShardSpec = (SingleDimensionShardSpec) previousDimPartition.shardSpec; + + shardSpec = new SingleDimensionShardSpec( + currentDimPartitions.dim, + previousShardSpec.getStart(), + null, + previousShardSpec.getPartitionNum() + ); + + log.info("Removing possible shard: %s", previousShardSpec); + + currentDimPartition.rows += previousDimPartition.rows; + currentDimPartition.cardinality += previousDimPartition.cardinality; + } else { + // Create new shard + shardSpec = new SingleDimensionShardSpec( + currentDimPartitions.dim, + currentDimPartitionStart, + null, + currentDimPartitions.partitions.size() + ); + } + } + + log.info( + "Adding possible shard with %,d rows and %,d unique values: %s", + currentDimPartition.rows, + currentDimPartition.cardinality, + shardSpec + ); + + currentDimPartition.shardSpec = shardSpec; + currentDimPartitions.partitions.add(currentDimPartition); + } + + log.info( + "Completed dimension[%s]: %,d possible shards with %,d unique values", + currentDimPartitions.dim, + currentDimPartitions.partitions.size(), + currentDimPartitions.getCardinality() + ); + + // Add ourselves to the partitions map + dimPartitionss.put(currentDimPartitions.dim, currentDimPartitions); + } } - if (partitions.isEmpty()) { - partitions.add(new NoneShardSpec()); - } else if (((double) runningTotal / (double) expectedRowsPerPartition) < 0.25) { - final SingleDimensionShardSpec lastSpec = (SingleDimensionShardSpec) partitions.remove(partitions.size() - 1); - partitions.add( - new SingleDimensionShardSpec( - config.getPartitionDimension(), - lastSpec.getStart(), - null, - lastSpec.getPartitionNum() - ) - ); - } else { - partitions.add( - new SingleDimensionShardSpec( - config.getPartitionDimension(), - previousBoundary, - null, - partitions.size() - ) - ); + // Choose best dimension + if(dimPartitionss.isEmpty()) { + throw new ISE("No suitable partitioning dimension found!"); } - DateTime bucket = new DateTime( - Iterables.get(keySplitter.split(new String(keyBytes.getGroupKey(), Charsets.UTF_8)), 0) - ); - OutputStream out = Utils.makePathAndOutputStream( + final int totalRows = dimPartitionss.values().iterator().next().getRows(); + + int maxCardinality = -1; + DimPartitions maxCardinalityPartitions = null; + + for(final DimPartitions dimPartitions : dimPartitionss.values()) { + if(dimPartitions.getRows() != totalRows) { + throw new ISE( + "WTF?! Dimension[%s] row count %,d != expected row count %,d", + dimPartitions.dim, + dimPartitions.getRows(), + totalRows + ); + } + + // Make sure none of these shards are oversized + boolean oversized = false; + for(final DimPartition partition : dimPartitions.partitions) { + if(partition.rows > config.getTargetPartitionSize() * SHARD_OVERSIZE_THRESHOLD) { + log.info("Dimension[%s] has an oversized shard: %s", dimPartitions.dim, partition.shardSpec); + oversized = true; + } + } + + if(oversized) { + continue; + } + + if(dimPartitions.getCardinality() > maxCardinality) { + maxCardinality = dimPartitions.getCardinality(); + maxCardinalityPartitions = dimPartitions; + } + } + + if(maxCardinalityPartitions == null) { + throw new ISE("No suitable partitioning dimension found!"); + } + + final DateTime bucket = new DateTime(new String(keyBytes.getGroupKey(), Charsets.UTF_8)); + final OutputStream out = Utils.makePathAndOutputStream( context, config.makeSegmentPartitionInfoPath(new Bucket(0, bucket, 0)), config.isOverwriteFiles() ); - for (ShardSpec partition : partitions) { - log.info("%s", partition); + final List chosenShardSpecs = Lists.transform( + maxCardinalityPartitions.partitions, new Function() + { + @Override + public ShardSpec apply(DimPartition dimPartition) + { + return dimPartition.shardSpec; + } + } + ); + + log.info("Chosen partitions:"); + for (ShardSpec shardSpec : chosenShardSpecs) { + log.info(" %s", shardSpec); } try { - config.jsonMapper.writeValue(out, partitions); + HadoopDruidIndexerConfig.jsonMapper.writerWithType(new TypeReference>() {}).writeValue( + out, + chosenShardSpecs + ); } finally { Closeables.close(out, false); } } - - private void addPartition(List partitions, String boundary) - { - partitions.add( - new SingleDimensionShardSpec( - config.getPartitionDimension(), - previousBoundary, - boundary, - partitions.size() - ) - ); - previousBoundary = boundary; - runningTotal = 0; - } } - public static class DeterminePartitionsOutputFormat extends FileOutputFormat + public static class DeterminePartitionsDimSelectionOutputFormat extends FileOutputFormat { @Override public RecordWriter getRecordWriter(final TaskAttemptContext job) throws IOException, InterruptedException @@ -444,17 +732,81 @@ public class DeterminePartitionsJob implements Jobby } } + private static class DimPartitions + { + public final String dim; + public final List partitions = Lists.newArrayList(); + + private DimPartitions(String dim) + { + this.dim = dim; + } + + public int getCardinality() + { + int sum = 0; + for(final DimPartition dimPartition : partitions) { + sum += dimPartition.cardinality; + } + return sum; + } + + public int getRows() + { + int sum = 0; + for(final DimPartition dimPartition : partitions) { + sum += dimPartition.rows; + } + return sum; + } + } + + private static class DimPartition + { + public ShardSpec shardSpec = null; + public int cardinality = 0; + public int rows = 0; + } + + private static class DimValueCount + { + public final String dim; + public final String value; + public final int numRows; + + private DimValueCount(String dim, String value, int numRows) + { + this.dim = dim; + this.value = value; + this.numRows = numRows; + } + + public Text toText() + { + return new Text(tabJoiner.join(dim, String.valueOf(numRows), value)); + } + + public static DimValueCount fromText(Text text) + { + final Iterator splits = tabSplitter.limit(3).split(text.toString()).iterator(); + final String dim = splits.next(); + final int numRows = Integer.parseInt(splits.next()); + final String value = splits.next(); + + return new DimValueCount(dim, value, numRows); + } + } + private static void write( TaskInputOutputContext context, final byte[] groupKey, - String value, - long numRows + DimValueCount dimValueCount ) throws IOException, InterruptedException { context.write( - new SortableBytes(groupKey, value.getBytes(HadoopDruidIndexerConfig.javaNativeCharset)).toBytesWritable(), - new Text(tabJoiner.join(value, numRows)) + new SortableBytes(groupKey, tabJoiner.join(dimValueCount.dim, dimValueCount.value).getBytes(HadoopDruidIndexerConfig.javaNativeCharset)).toBytesWritable(), + dimValueCount.toText() ); } } diff --git a/indexer/src/main/java/com/metamx/druid/indexer/HadoopDruidIndexerConfig.java b/indexer/src/main/java/com/metamx/druid/indexer/HadoopDruidIndexerConfig.java index 403484b9c61..3d682dadce0 100644 --- a/indexer/src/main/java/com/metamx/druid/indexer/HadoopDruidIndexerConfig.java +++ b/indexer/src/main/java/com/metamx/druid/indexer/HadoopDruidIndexerConfig.java @@ -34,15 +34,20 @@ import com.metamx.common.MapUtils; import com.metamx.common.guava.FunctionalIterable; import com.metamx.common.logger.Logger; import com.metamx.druid.RegisteringNode; +import com.metamx.druid.aggregation.AggregatorFactory; import com.metamx.druid.client.DataSegment; import com.metamx.druid.index.v1.serde.Registererer; import com.metamx.druid.indexer.data.DataSpec; +import com.metamx.druid.indexer.data.StringInputRowParser; +import com.metamx.druid.indexer.data.TimestampSpec; import com.metamx.druid.indexer.data.ToLowercaseDataSpec; import com.metamx.druid.indexer.granularity.GranularitySpec; import com.metamx.druid.indexer.granularity.UniformGranularitySpec; +import com.metamx.druid.indexer.partitions.PartitionsSpec; import com.metamx.druid.indexer.path.PathSpec; import com.metamx.druid.indexer.rollup.DataRollupSpec; import com.metamx.druid.indexer.updater.UpdaterJobSpec; +import com.metamx.druid.input.InputRow; import com.metamx.druid.jackson.DefaultObjectMapper; import com.metamx.druid.shard.ShardSpec; import com.metamx.druid.utils.JodaUtils; @@ -50,6 +55,7 @@ import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.Path; import org.apache.hadoop.mapreduce.Job; import org.codehaus.jackson.JsonGenerator; +import org.codehaus.jackson.annotate.JsonCreator; import org.codehaus.jackson.annotate.JsonProperty; import org.codehaus.jackson.map.ObjectMapper; import org.codehaus.jackson.type.TypeReference; @@ -60,8 +66,6 @@ import org.joda.time.format.ISODateTimeFormat; import javax.annotation.Nullable; import java.io.File; import java.io.IOException; -import java.net.URI; -import java.net.URISyntaxException; import java.nio.charset.Charset; import java.util.Arrays; import java.util.Collections; @@ -162,8 +166,6 @@ public class HadoopDruidIndexerConfig private static final String CONFIG_PROPERTY = "druid.indexer.config"; - @Deprecated - private volatile List intervals; private volatile String dataSource; private volatile String timestampColumnName; private volatile String timestampFormat; @@ -175,8 +177,7 @@ public class HadoopDruidIndexerConfig private volatile String jobOutputDir; private volatile String segmentOutputDir; private volatile DateTime version = new DateTime(); - private volatile String partitionDimension; - private volatile Long targetPartitionSize; + private volatile PartitionsSpec partitionsSpec; private volatile boolean leaveIntermediate = false; private volatile boolean cleanupOnFailure = true; private volatile Map> shardSpecs = ImmutableMap.of(); @@ -186,22 +187,97 @@ public class HadoopDruidIndexerConfig private volatile boolean ignoreInvalidRows = false; private volatile List registererers = Lists.newArrayList(); + @JsonCreator + public HadoopDruidIndexerConfig( + final @JsonProperty("intervals") List intervals, + final @JsonProperty("dataSource") String dataSource, + final @JsonProperty("timestampColumnName") String timestampColumnName, + final @JsonProperty("timestampFormat") String timestampFormat, + final @JsonProperty("dataSpec") DataSpec dataSpec, + final @JsonProperty("segmentGranularity") Granularity segmentGranularity, + final @JsonProperty("granularitySpec") GranularitySpec granularitySpec, + final @JsonProperty("pathSpec") PathSpec pathSpec, + final @JsonProperty("jobOutputDir") String jobOutputDir, + final @JsonProperty("segmentOutputDir") String segmentOutputDir, + final @JsonProperty("version") DateTime version, + final @JsonProperty("partitionDimension") String partitionDimension, + final @JsonProperty("targetPartitionSize") Long targetPartitionSize, + final @JsonProperty("partitionsSpec") PartitionsSpec partitionsSpec, + final @JsonProperty("leaveIntermediate") boolean leaveIntermediate, + final @JsonProperty("cleanupOnFailure") boolean cleanupOnFailure, + final @JsonProperty("shardSpecs") Map> shardSpecs, + final @JsonProperty("overwriteFiles") boolean overwriteFiles, + final @JsonProperty("rollupSpec") DataRollupSpec rollupSpec, + final @JsonProperty("updaterJobSpec") UpdaterJobSpec updaterJobSpec, + final @JsonProperty("ignoreInvalidRows") boolean ignoreInvalidRows, + final @JsonProperty("registererers") List registererers + ) + { + this.dataSource = dataSource; + this.timestampColumnName = timestampColumnName; + this.timestampFormat = timestampFormat; + this.dataSpec = dataSpec; + this.granularitySpec = granularitySpec; + this.pathSpec = pathSpec; + this.jobOutputDir = jobOutputDir; + this.segmentOutputDir = segmentOutputDir; + this.version = version; + this.partitionsSpec = partitionsSpec; + this.leaveIntermediate = leaveIntermediate; + this.cleanupOnFailure = cleanupOnFailure; + this.shardSpecs = shardSpecs; + this.overwriteFiles = overwriteFiles; + this.rollupSpec = rollupSpec; + this.updaterJobSpec = updaterJobSpec; + this.ignoreInvalidRows = ignoreInvalidRows; + this.registererers = registererers; + + if(partitionsSpec != null) { + Preconditions.checkArgument( + partitionDimension == null && targetPartitionSize == null, + "Cannot mix partitionsSpec with partitionDimension/targetPartitionSize" + ); + + this.partitionsSpec = partitionsSpec; + } else { + // Backwards compatibility + this.partitionsSpec = new PartitionsSpec(partitionDimension, targetPartitionSize, false); + } + + if(granularitySpec != null) { + Preconditions.checkArgument( + segmentGranularity == null && intervals == null, + "Cannot mix granularitySpec with segmentGranularity/intervals" + ); + } else { + // Backwards compatibility + this.segmentGranularity = segmentGranularity; + if(segmentGranularity != null && intervals != null) { + this.granularitySpec = new UniformGranularitySpec(segmentGranularity, intervals); + } + } + } + + /** + * Default constructor does nothing. The caller is expected to use the various setX methods. + */ + public HadoopDruidIndexerConfig() + { + } + public List getIntervals() { return JodaUtils.condenseIntervals(getGranularitySpec().bucketIntervals()); } @Deprecated - @JsonProperty public void setIntervals(List intervals) { - Preconditions.checkState(this.granularitySpec == null, "Use setGranularitySpec"); + Preconditions.checkState(this.granularitySpec == null, "Cannot mix setIntervals with granularitySpec"); + Preconditions.checkState(this.segmentGranularity != null, "Cannot use setIntervals without segmentGranularity"); // For backwards compatibility - this.intervals = intervals; - if (this.segmentGranularity != null) { - this.granularitySpec = new UniformGranularitySpec(this.segmentGranularity, this.intervals); - } + this.granularitySpec = new UniformGranularitySpec(this.segmentGranularity, intervals); } @JsonProperty @@ -237,6 +313,11 @@ public class HadoopDruidIndexerConfig this.timestampFormat = timestampFormat; } + public TimestampSpec getTimestampSpec() + { + return new TimestampSpec(timestampColumnName, timestampFormat); + } + @JsonProperty public DataSpec getDataSpec() { @@ -248,17 +329,30 @@ public class HadoopDruidIndexerConfig this.dataSpec = new ToLowercaseDataSpec(dataSpec); } - @Deprecated - @JsonProperty - public void setSegmentGranularity(Granularity segmentGranularity) + public StringInputRowParser getParser() { - Preconditions.checkState(this.granularitySpec == null, "Use setGranularitySpec"); + final List dimensionExclusions; - // For backwards compatibility - this.segmentGranularity = segmentGranularity; - if (this.intervals != null) { - this.granularitySpec = new UniformGranularitySpec(this.segmentGranularity, this.intervals); + if(getDataSpec().hasCustomDimensions()) { + dimensionExclusions = null; + } else { + dimensionExclusions = Lists.newArrayList(); + dimensionExclusions.add(getTimestampColumnName()); + dimensionExclusions.addAll( + Lists.transform( + getRollupSpec().getAggs(), new Function() + { + @Override + public String apply(AggregatorFactory aggregatorFactory) + { + return aggregatorFactory.getName(); + } + } + ) + ); } + + return new StringInputRowParser(getTimestampSpec(), getDataSpec(), dimensionExclusions); } @JsonProperty @@ -269,15 +363,20 @@ public class HadoopDruidIndexerConfig public void setGranularitySpec(GranularitySpec granularitySpec) { - Preconditions.checkState(this.intervals == null, "Use setGranularitySpec instead of setIntervals"); - Preconditions.checkState( - this.segmentGranularity == null, - "Use setGranularitySpec instead of setSegmentGranularity" - ); - this.granularitySpec = granularitySpec; } + @JsonProperty + public PartitionsSpec getPartitionsSpec() + { + return partitionsSpec; + } + + public void setPartitionsSpec(PartitionsSpec partitionsSpec) + { + this.partitionsSpec = partitionsSpec; + } + @JsonProperty public PathSpec getPathSpec() { @@ -322,31 +421,19 @@ public class HadoopDruidIndexerConfig this.version = version; } - @JsonProperty public String getPartitionDimension() { - return partitionDimension; - } - - public void setPartitionDimension(String partitionDimension) - { - this.partitionDimension = (partitionDimension == null) ? partitionDimension : partitionDimension; + return partitionsSpec.getPartitionDimension(); } public boolean partitionByDimension() { - return partitionDimension != null; + return partitionsSpec.isDeterminingPartitions(); } - @JsonProperty public Long getTargetPartitionSize() { - return targetPartitionSize; - } - - public void setTargetPartitionSize(Long targetPartitionSize) - { - this.targetPartitionSize = targetPartitionSize; + return partitionsSpec.getTargetPartitionSize(); } public boolean isUpdaterJobSpecSet() @@ -447,21 +534,15 @@ public class HadoopDruidIndexerConfig ********************************************/ /** - * Get the proper bucket for this "row" + * Get the proper bucket for some input row. * - * @param theMap a Map that represents a "row", keys are column names, values are, well, values + * @param inputRow an InputRow * * @return the Bucket that this row belongs to */ - public Optional getBucket(Map theMap) + public Optional getBucket(InputRow inputRow) { - final Optional timeBucket = getGranularitySpec().bucketInterval( - new DateTime( - theMap.get( - getTimestampColumnName() - ) - ) - ); + final Optional timeBucket = getGranularitySpec().bucketInterval(new DateTime(inputRow.getTimestampFromEpoch())); if (!timeBucket.isPresent()) { return Optional.absent(); } @@ -473,7 +554,7 @@ public class HadoopDruidIndexerConfig for (final HadoopyShardSpec hadoopyShardSpec : shards) { final ShardSpec actualSpec = hadoopyShardSpec.getActualSpec(); - if (actualSpec.isInChunk(theMap)) { + if (actualSpec.isInChunk(inputRow)) { return Optional.of( new Bucket( hadoopyShardSpec.getShardNum(), @@ -484,7 +565,7 @@ public class HadoopDruidIndexerConfig } } - throw new ISE("row[%s] doesn't fit in any shard[%s]", theMap, shards); + throw new ISE("row[%s] doesn't fit in any shard[%s]", inputRow, shards); } public Set getSegmentGranularIntervals() @@ -566,6 +647,11 @@ public class HadoopDruidIndexerConfig return new Path(makeIntermediatePath(), "segmentDescriptorInfo"); } + public Path makeGroupedDataDir() + { + return new Path(makeIntermediatePath(), "groupedData"); + } + public Path makeDescriptorInfoPath(DataSegment segment) { return new Path(makeDescriptorInfoDir(), String.format("%s.json", segment.getIdentifier().replace(":", ""))); @@ -626,10 +712,5 @@ public class HadoopDruidIndexerConfig final int nIntervals = getIntervals().size(); Preconditions.checkArgument(nIntervals > 0, "intervals.size()[%s] <= 0", nIntervals); - - if (partitionByDimension()) { - Preconditions.checkNotNull(partitionDimension); - Preconditions.checkNotNull(targetPartitionSize); - } } } diff --git a/indexer/src/main/java/com/metamx/druid/indexer/HadoopDruidIndexerMapper.java b/indexer/src/main/java/com/metamx/druid/indexer/HadoopDruidIndexerMapper.java new file mode 100644 index 00000000000..651cb757023 --- /dev/null +++ b/indexer/src/main/java/com/metamx/druid/indexer/HadoopDruidIndexerMapper.java @@ -0,0 +1,66 @@ +package com.metamx.druid.indexer; + +import com.metamx.common.RE; +import com.metamx.druid.indexer.data.StringInputRowParser; +import com.metamx.druid.input.InputRow; +import org.apache.hadoop.io.LongWritable; +import org.apache.hadoop.io.Text; +import org.apache.hadoop.mapreduce.Mapper; +import org.joda.time.DateTime; + +import java.io.IOException; + +public abstract class HadoopDruidIndexerMapper extends Mapper +{ + private HadoopDruidIndexerConfig config; + private StringInputRowParser parser; + + @Override + protected void setup(Context context) + throws IOException, InterruptedException + { + config = HadoopDruidIndexerConfig.fromConfiguration(context.getConfiguration()); + parser = config.getParser(); + } + + public HadoopDruidIndexerConfig getConfig() + { + return config; + } + + public StringInputRowParser getParser() + { + return parser; + } + + @Override + protected void map( + LongWritable key, Text value, Context context + ) throws IOException, InterruptedException + { + try { + final InputRow inputRow; + try { + inputRow = parser.parse(value.toString()); + } + catch (IllegalArgumentException e) { + if (config.isIgnoreInvalidRows()) { + context.getCounter(HadoopDruidIndexerConfig.IndexJobCounters.INVALID_ROW_COUNTER).increment(1); + return; // we're ignoring this invalid row + } else { + throw e; + } + } + + if(config.getGranularitySpec().bucketInterval(new DateTime(inputRow.getTimestampFromEpoch())).isPresent()) { + innerMap(inputRow, value, context); + } + } + catch (RuntimeException e) { + throw new RE(e, "Failure on row[%s]", value); + } + } + + abstract protected void innerMap(InputRow inputRow, Text text, Context context) + throws IOException, InterruptedException; +} diff --git a/indexer/src/main/java/com/metamx/druid/indexer/IndexGeneratorJob.java b/indexer/src/main/java/com/metamx/druid/indexer/IndexGeneratorJob.java index 34d743fc9be..d8eba264c11 100644 --- a/indexer/src/main/java/com/metamx/druid/indexer/IndexGeneratorJob.java +++ b/indexer/src/main/java/com/metamx/druid/indexer/IndexGeneratorJob.java @@ -19,31 +19,25 @@ package com.metamx.druid.indexer; -import com.google.common.base.Function; import com.google.common.base.Optional; -import com.google.common.base.Predicate; import com.google.common.base.Throwables; import com.google.common.collect.ImmutableMap; import com.google.common.collect.Iterables; import com.google.common.collect.Lists; -import com.google.common.collect.Maps; import com.google.common.collect.Sets; import com.google.common.io.Closeables; import com.google.common.primitives.Longs; import com.metamx.common.ISE; -import com.metamx.common.RE; -import com.metamx.common.guava.FunctionalIterable; import com.metamx.common.logger.Logger; -import com.metamx.common.parsers.Parser; -import com.metamx.common.parsers.ParserUtils; import com.metamx.druid.aggregation.AggregatorFactory; import com.metamx.druid.client.DataSegment; +import com.metamx.druid.index.QueryableIndex; import com.metamx.druid.index.v1.IncrementalIndex; import com.metamx.druid.index.v1.IndexIO; import com.metamx.druid.index.v1.IndexMerger; -import com.metamx.druid.index.v1.MMappedIndex; +import com.metamx.druid.indexer.data.StringInputRowParser; import com.metamx.druid.indexer.rollup.DataRollupSpec; -import com.metamx.druid.input.MapBasedInputRow; +import com.metamx.druid.input.InputRow; import org.apache.commons.io.FileUtils; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FSDataOutputStream; @@ -53,13 +47,11 @@ import org.apache.hadoop.fs.LocalFileSystem; import org.apache.hadoop.fs.Path; import org.apache.hadoop.fs.s3native.NativeS3FileSystem; import org.apache.hadoop.io.BytesWritable; -import org.apache.hadoop.io.LongWritable; import org.apache.hadoop.io.Text; import org.apache.hadoop.mapred.InvalidJobConfException; import org.apache.hadoop.mapreduce.Counter; import org.apache.hadoop.mapreduce.Job; import org.apache.hadoop.mapreduce.JobContext; -import org.apache.hadoop.mapreduce.Mapper; import org.apache.hadoop.mapreduce.Partitioner; import org.apache.hadoop.mapreduce.Reducer; import org.apache.hadoop.mapreduce.lib.input.TextInputFormat; @@ -68,7 +60,6 @@ import org.apache.hadoop.mapreduce.lib.output.TextOutputFormat; import org.joda.time.DateTime; import org.joda.time.Interval; -import javax.annotation.Nullable; import java.io.BufferedOutputStream; import java.io.File; import java.io.FileInputStream; @@ -78,7 +69,6 @@ import java.net.URI; import java.nio.ByteBuffer; import java.util.Arrays; import java.util.List; -import java.util.Map; import java.util.Set; import java.util.zip.ZipEntry; import java.util.zip.ZipOutputStream; @@ -127,7 +117,7 @@ public class IndexGeneratorJob implements Jobby job.setMapperClass(IndexGeneratorMapper.class); job.setMapOutputValueClass(Text.class); - SortableBytes.useSortableBytesAsKey(job); + SortableBytes.useSortableBytesAsMapOutputKey(job); job.setNumReduceTasks(Iterables.size(config.getAllBuckets())); job.setPartitionerClass(IndexGeneratorPartitioner.class); @@ -144,7 +134,7 @@ public class IndexGeneratorJob implements Jobby job.setJarByClass(IndexGeneratorJob.class); job.submit(); - log.info("Job submitted, status available at %s", job.getTrackingURL()); + log.info("Job %s submitted, status available at %s", job.getJobName(), job.getTrackingURL()); boolean success = job.waitForCompletion(true); @@ -159,75 +149,29 @@ public class IndexGeneratorJob implements Jobby } } - public static class IndexGeneratorMapper extends Mapper + public static class IndexGeneratorMapper extends HadoopDruidIndexerMapper { - private HadoopDruidIndexerConfig config; - private Parser parser; - private Function timestampConverter; - @Override - protected void setup(Context context) - throws IOException, InterruptedException - { - config = HadoopDruidIndexerConfig.fromConfiguration(context.getConfiguration()); - parser = config.getDataSpec().getParser(); - timestampConverter = ParserUtils.createTimestampParser(config.getTimestampFormat()); - } - - @Override - protected void map( - LongWritable key, Text value, Context context + protected void innerMap( + InputRow inputRow, + Text text, + Context context ) throws IOException, InterruptedException { + // Group by bucket, sort by timestamp + final Optional bucket = getConfig().getBucket(inputRow); - try { - final Map values = parser.parse(value.toString()); - - final String tsStr = (String) values.get(config.getTimestampColumnName()); - final DateTime timestamp; - try { - timestamp = timestampConverter.apply(tsStr); - } - catch (IllegalArgumentException e) { - if (config.isIgnoreInvalidRows()) { - context.getCounter(HadoopDruidIndexerConfig.IndexJobCounters.INVALID_ROW_COUNTER).increment(1); - return; // we're ignoring this invalid row - } else { - throw e; - } - } - - Optional bucket = config.getBucket( - Maps.transformEntries( - values, - new Maps.EntryTransformer() - { - @Override - public String transformEntry(@Nullable String key, @Nullable Object value) - { - if (key.equalsIgnoreCase(config.getTimestampColumnName())) { - return timestamp.toString(); - } - return value.toString(); - } - } - ) - ); - - if (bucket.isPresent()) { - // Group by bucket, sort by timestamp - context.write( - new SortableBytes( - bucket.get().toGroupKey(), - Longs.toByteArray(timestamp.getMillis()) - ).toBytesWritable(), - value - ); - } - } - catch (RuntimeException e) { - throw new RE(e, "Failure on row[%s]", value); + if(!bucket.isPresent()) { + throw new ISE("WTF?! No bucket found for row: %s", inputRow); } + + context.write( + new SortableBytes( + bucket.get().toGroupKey(), + Longs.toByteArray(inputRow.getTimestampFromEpoch()) + ).toBytesWritable(), + text + ); } } @@ -253,8 +197,7 @@ public class IndexGeneratorJob implements Jobby { private HadoopDruidIndexerConfig config; private List metricNames = Lists.newArrayList(); - private Function timestampConverter; - private Parser parser; + private StringInputRowParser parser; @Override protected void setup(Context context) @@ -265,8 +208,8 @@ public class IndexGeneratorJob implements Jobby for (AggregatorFactory factory : config.getRollupSpec().getAggs()) { metricNames.add(factory.getName().toLowerCase()); } - timestampConverter = ParserUtils.createTimestampParser(config.getTimestampFormat()); - parser = config.getDataSpec().getParser(); + + parser = config.getParser(); } @Override @@ -299,32 +242,10 @@ public class IndexGeneratorJob implements Jobby for (final Text value : values) { context.progress(); - Map event = parser.parse(value.toString()); - final long timestamp = timestampConverter.apply((String) event.get(config.getTimestampColumnName())) - .getMillis(); - List dimensionNames = - config.getDataSpec().hasCustomDimensions() ? - config.getDataSpec().getDimensions() : - Lists.newArrayList( - FunctionalIterable.create(event.keySet()) - .filter( - new Predicate() - { - @Override - public boolean apply(@Nullable String input) - { - return !(metricNames.contains(input.toLowerCase()) - || config.getTimestampColumnName() - .equalsIgnoreCase(input)); - } - } - ) - ); - allDimensionNames.addAll(dimensionNames); + final InputRow inputRow = parser.parse(value.toString()); + allDimensionNames.addAll(inputRow.getDimensions()); - int numRows = index.add( - new MapBasedInputRow(timestamp, dimensionNames, event) - ); + int numRows = index.add(inputRow); ++lineCount; if (numRows >= rollupSpec.rowFlushBoundary) { @@ -359,7 +280,7 @@ public class IndexGeneratorJob implements Jobby log.info("%,d lines completed.", lineCount); - List indexes = Lists.newArrayListWithCapacity(indexCount); + List indexes = Lists.newArrayListWithCapacity(indexCount); final File mergedBase; if (toMerge.size() == 0) { @@ -389,9 +310,9 @@ public class IndexGeneratorJob implements Jobby toMerge.add(finalFile); for (File file : toMerge) { - indexes.add(IndexIO.mapDir(file)); + indexes.add(IndexIO.loadIndex(file)); } - mergedBase = IndexMerger.mergeMMapped( + mergedBase = IndexMerger.mergeQueryableIndex( indexes, aggs, new File(baseFlushFile, "merged"), new IndexMerger.ProgressIndicator() { @Override @@ -472,6 +393,7 @@ public class IndexGeneratorJob implements Jobby dimensionNames, metricNames, config.getShardSpec(bucket).getActualSpec(), + IndexIO.getVersionFromDir(mergedBase), size ); diff --git a/indexer/src/main/java/com/metamx/druid/indexer/SortableBytes.java b/indexer/src/main/java/com/metamx/druid/indexer/SortableBytes.java index 3abaa7951b8..394f9dacffb 100644 --- a/indexer/src/main/java/com/metamx/druid/indexer/SortableBytes.java +++ b/indexer/src/main/java/com/metamx/druid/indexer/SortableBytes.java @@ -102,7 +102,7 @@ public class SortableBytes ); } - public static void useSortableBytesAsKey(Job job) + public static void useSortableBytesAsMapOutputKey(Job job) { job.setMapOutputKeyClass(BytesWritable.class); job.setGroupingComparatorClass(SortableBytesGroupingComparator.class); diff --git a/indexer/src/main/java/com/metamx/druid/indexer/granularity/UniformGranularitySpec.java b/indexer/src/main/java/com/metamx/druid/indexer/granularity/UniformGranularitySpec.java index a1039caba1c..51d2f37d437 100644 --- a/indexer/src/main/java/com/metamx/druid/indexer/granularity/UniformGranularitySpec.java +++ b/indexer/src/main/java/com/metamx/druid/indexer/granularity/UniformGranularitySpec.java @@ -20,6 +20,9 @@ package com.metamx.druid.indexer.granularity; import com.google.common.base.Optional; +import com.google.common.collect.ImmutableList; +import com.google.common.collect.Iterables; +import com.google.common.collect.Lists; import com.google.common.collect.Sets; import com.metamx.common.Granularity; import com.metamx.common.guava.Comparators; @@ -35,47 +38,47 @@ import java.util.TreeSet; public class UniformGranularitySpec implements GranularitySpec { final private Granularity granularity; - final private List intervals; + final private List inputIntervals; + final private ArbitraryGranularitySpec wrappedSpec; @JsonCreator public UniformGranularitySpec( @JsonProperty("gran") Granularity granularity, - @JsonProperty("intervals") List intervals + @JsonProperty("intervals") List inputIntervals ) { + List granularIntervals = Lists.newArrayList(); + + for (Interval inputInterval : inputIntervals) { + Iterables.addAll(granularIntervals, granularity.getIterable(inputInterval)); + } + this.granularity = granularity; - this.intervals = intervals; + this.inputIntervals = ImmutableList.copyOf(inputIntervals); + this.wrappedSpec = new ArbitraryGranularitySpec(granularIntervals); } @Override public SortedSet bucketIntervals() { - final TreeSet retVal = Sets.newTreeSet(Comparators.intervals()); - - for (Interval interval : intervals) { - for (Interval segmentInterval : granularity.getIterable(interval)) { - retVal.add(segmentInterval); - } - } - - return retVal; + return wrappedSpec.bucketIntervals(); } @Override public Optional bucketInterval(DateTime dt) { - return Optional.of(granularity.bucket(dt)); + return wrappedSpec.bucketInterval(dt); } - @JsonProperty + @JsonProperty("gran") public Granularity getGranularity() { return granularity; } - @JsonProperty + @JsonProperty("intervals") public Iterable getIntervals() { - return intervals; + return inputIntervals; } } diff --git a/indexer/src/main/java/com/metamx/druid/indexer/partitions/PartitionsSpec.java b/indexer/src/main/java/com/metamx/druid/indexer/partitions/PartitionsSpec.java new file mode 100644 index 00000000000..2d00cf71f06 --- /dev/null +++ b/indexer/src/main/java/com/metamx/druid/indexer/partitions/PartitionsSpec.java @@ -0,0 +1,52 @@ +package com.metamx.druid.indexer.partitions; + +import org.codehaus.jackson.annotate.JsonIgnore; +import org.codehaus.jackson.annotate.JsonProperty; + +import javax.annotation.Nullable; + +public class PartitionsSpec +{ + @Nullable + private final String partitionDimension; + + private final long targetPartitionSize; + + private final boolean assumeGrouped; + + public PartitionsSpec( + @JsonProperty("partitionDimension") @Nullable String partitionDimension, + @JsonProperty("targetPartitionSize") @Nullable Long targetPartitionSize, + @JsonProperty("assumeGrouped") @Nullable Boolean assumeGrouped + ) + { + this.partitionDimension = partitionDimension; + this.targetPartitionSize = targetPartitionSize == null ? -1 : targetPartitionSize; + this.assumeGrouped = assumeGrouped == null ? false : assumeGrouped; + } + + @JsonIgnore + public boolean isDeterminingPartitions() + { + return targetPartitionSize > 0; + } + + @JsonProperty + @Nullable + public String getPartitionDimension() + { + return partitionDimension; + } + + @JsonProperty + public long getTargetPartitionSize() + { + return targetPartitionSize; + } + + @JsonProperty + public boolean isAssumeGrouped() + { + return assumeGrouped; + } +} diff --git a/indexer/src/test/java/com/metamx/druid/indexer/HadoopDruidIndexerConfigTest.java b/indexer/src/test/java/com/metamx/druid/indexer/HadoopDruidIndexerConfigTest.java index 6bb56df31f5..f4db1148327 100644 --- a/indexer/src/test/java/com/metamx/druid/indexer/HadoopDruidIndexerConfigTest.java +++ b/indexer/src/test/java/com/metamx/druid/indexer/HadoopDruidIndexerConfigTest.java @@ -22,6 +22,7 @@ package com.metamx.druid.indexer; import com.google.common.base.Throwables; import com.google.common.collect.Lists; import com.metamx.druid.indexer.granularity.UniformGranularitySpec; +import com.metamx.druid.indexer.partitions.PartitionsSpec; import com.metamx.druid.jackson.DefaultObjectMapper; import org.codehaus.jackson.map.ObjectMapper; import org.joda.time.Interval; @@ -67,7 +68,7 @@ public class HadoopDruidIndexerConfigTest } @Test - public void testIntervalsAndSegmentGranularity() { + public void testGranularitySpecLegacy() { // Deprecated and replaced by granularitySpec, but still supported final HadoopDruidIndexerConfig cfg; @@ -98,9 +99,8 @@ public class HadoopDruidIndexerConfigTest ); } - @Test - public void testCmdlineAndSegmentGranularity() { + public void testGranularitySpecPostConstructorIntervals() { // Deprecated and replaced by granularitySpec, but still supported final HadoopDruidIndexerConfig cfg; @@ -133,7 +133,7 @@ public class HadoopDruidIndexerConfigTest } @Test - public void testInvalidCombination() { + public void testInvalidGranularityCombination() { boolean thrown = false; try { final HadoopDruidIndexerConfig cfg = jsonMapper.readValue( @@ -154,4 +154,160 @@ public class HadoopDruidIndexerConfigTest Assert.assertTrue("Exception thrown", thrown); } + + @Test + public void testPartitionsSpecNoPartitioning() { + final HadoopDruidIndexerConfig cfg; + + try { + cfg = jsonMapper.readValue( + "{}", + HadoopDruidIndexerConfig.class + ); + } catch(Exception e) { + throw Throwables.propagate(e); + } + + final PartitionsSpec partitionsSpec = cfg.getPartitionsSpec(); + + Assert.assertEquals( + "isDeterminingPartitions", + partitionsSpec.isDeterminingPartitions(), + false + ); + } + + @Test + public void testPartitionsSpecAutoDimension() { + final HadoopDruidIndexerConfig cfg; + + try { + cfg = jsonMapper.readValue( + "{" + + "\"partitionsSpec\":{" + + " \"targetPartitionSize\":100" + + " }" + + "}", + HadoopDruidIndexerConfig.class + ); + } catch(Exception e) { + throw Throwables.propagate(e); + } + + final PartitionsSpec partitionsSpec = cfg.getPartitionsSpec(); + + Assert.assertEquals( + "isDeterminingPartitions", + partitionsSpec.isDeterminingPartitions(), + true + ); + + Assert.assertEquals( + "getTargetPartitionSize", + partitionsSpec.getTargetPartitionSize(), + 100 + ); + + Assert.assertEquals( + "getPartitionDimension", + partitionsSpec.getPartitionDimension(), + null + ); + } + + @Test + public void testPartitionsSpecSpecificDimension() { + final HadoopDruidIndexerConfig cfg; + + try { + cfg = jsonMapper.readValue( + "{" + + "\"partitionsSpec\":{" + + " \"targetPartitionSize\":100," + + " \"partitionDimension\":\"foo\"" + + " }" + + "}", + HadoopDruidIndexerConfig.class + ); + } catch(Exception e) { + throw Throwables.propagate(e); + } + + final PartitionsSpec partitionsSpec = cfg.getPartitionsSpec(); + + Assert.assertEquals( + "isDeterminingPartitions", + partitionsSpec.isDeterminingPartitions(), + true + ); + + Assert.assertEquals( + "getTargetPartitionSize", + partitionsSpec.getTargetPartitionSize(), + 100 + ); + + Assert.assertEquals( + "getPartitionDimension", + partitionsSpec.getPartitionDimension(), + "foo" + ); + } + + @Test + public void testPartitionsSpecLegacy() { + final HadoopDruidIndexerConfig cfg; + + try { + cfg = jsonMapper.readValue( + "{" + + "\"targetPartitionSize\":100," + + "\"partitionDimension\":\"foo\"" + + "}", + HadoopDruidIndexerConfig.class + ); + } catch(Exception e) { + throw Throwables.propagate(e); + } + + final PartitionsSpec partitionsSpec = cfg.getPartitionsSpec(); + + Assert.assertEquals( + "isDeterminingPartitions", + partitionsSpec.isDeterminingPartitions(), + true + ); + + Assert.assertEquals( + "getTargetPartitionSize", + partitionsSpec.getTargetPartitionSize(), + 100 + ); + + Assert.assertEquals( + "getPartitionDimension", + partitionsSpec.getPartitionDimension(), + "foo" + ); + } + + @Test + public void testInvalidPartitionsCombination() { + boolean thrown = false; + try { + final HadoopDruidIndexerConfig cfg = jsonMapper.readValue( + "{" + + "\"targetPartitionSize\":100," + + "\"partitionsSpec\":{" + + " \"targetPartitionSize\":100" + + " }" + + "}", + HadoopDruidIndexerConfig.class + ); + } catch(Exception e) { + thrown = true; + } + + Assert.assertTrue("Exception thrown", thrown); + } } diff --git a/indexer/src/test/java/com/metamx/druid/indexer/granularity/ArbitraryGranularityTest.java b/indexer/src/test/java/com/metamx/druid/indexer/granularity/ArbitraryGranularityTest.java index efc8113ee1b..0044d7d13e6 100644 --- a/indexer/src/test/java/com/metamx/druid/indexer/granularity/ArbitraryGranularityTest.java +++ b/indexer/src/test/java/com/metamx/druid/indexer/granularity/ArbitraryGranularityTest.java @@ -69,6 +69,12 @@ public class ArbitraryGranularityTest spec.bucketInterval(new DateTime("2012-01-03T01Z")) ); + Assert.assertEquals( + "2012-01-04T01Z", + Optional.absent(), + spec.bucketInterval(new DateTime("2012-01-04T01Z")) + ); + Assert.assertEquals( "2012-01-07T23:59:59.999Z", Optional.of(new Interval("2012-01-07T00Z/2012-01-08T00Z")), diff --git a/indexer/src/test/java/com/metamx/druid/indexer/granularity/UniformGranularityTest.java b/indexer/src/test/java/com/metamx/druid/indexer/granularity/UniformGranularityTest.java index 1f37da56de0..ab21be5f9f5 100644 --- a/indexer/src/test/java/com/metamx/druid/indexer/granularity/UniformGranularityTest.java +++ b/indexer/src/test/java/com/metamx/druid/indexer/granularity/UniformGranularityTest.java @@ -72,6 +72,12 @@ public class UniformGranularityTest spec.bucketInterval(new DateTime("2012-01-03T01Z")) ); + Assert.assertEquals( + "2012-01-04T01Z", + Optional.absent(), + spec.bucketInterval(new DateTime("2012-01-04T01Z")) + ); + Assert.assertEquals( "2012-01-07T23:59:59.999Z", Optional.of(new Interval("2012-01-07T00Z/2012-01-08T00Z")), diff --git a/merger/pom.xml b/merger/pom.xml index eb191b90701..80d5f0095c5 100644 --- a/merger/pom.xml +++ b/merger/pom.xml @@ -28,7 +28,7 @@ com.metamx druid - 0.1.25-SNAPSHOT + 0.2.7-SNAPSHOT diff --git a/merger/src/main/java/com/metamx/druid/merger/common/TaskToolbox.java b/merger/src/main/java/com/metamx/druid/merger/common/TaskToolbox.java index e631db0eea2..74a546cf696 100644 --- a/merger/src/main/java/com/metamx/druid/merger/common/TaskToolbox.java +++ b/merger/src/main/java/com/metamx/druid/merger/common/TaskToolbox.java @@ -20,13 +20,13 @@ package com.metamx.druid.merger.common; import com.google.common.collect.ImmutableMap; -import com.metamx.druid.loading.S3SegmentGetter; +import com.metamx.druid.loading.S3SegmentPuller; import com.metamx.druid.loading.S3SegmentGetterConfig; -import com.metamx.druid.loading.S3ZippedSegmentGetter; -import com.metamx.druid.loading.SegmentGetter; +import com.metamx.druid.loading.S3ZippedSegmentPuller; +import com.metamx.druid.loading.SegmentPuller; import com.metamx.druid.merger.common.task.Task; import com.metamx.druid.merger.coordinator.config.IndexerCoordinatorConfig; -import com.metamx.druid.realtime.SegmentPusher; +import com.metamx.druid.loading.SegmentPusher; import com.metamx.emitter.service.ServiceEmitter; import org.codehaus.jackson.map.ObjectMapper; import org.jets3t.service.impl.rest.httpclient.RestS3Service; @@ -85,7 +85,7 @@ public class TaskToolbox return objectMapper; } - public Map getSegmentGetters(final Task task) + public Map getSegmentGetters(final Task task) { final S3SegmentGetterConfig getterConfig = new S3SegmentGetterConfig() { @@ -96,10 +96,10 @@ public class TaskToolbox } }; - return ImmutableMap.builder() - .put("s3", new S3SegmentGetter(s3Client, getterConfig)) - .put("s3_union", new S3SegmentGetter(s3Client, getterConfig)) - .put("s3_zip", new S3ZippedSegmentGetter(s3Client, getterConfig)) + return ImmutableMap.builder() + .put("s3", new S3SegmentPuller(s3Client, getterConfig)) + .put("s3_union", new S3SegmentPuller(s3Client, getterConfig)) + .put("s3_zip", new S3ZippedSegmentPuller(s3Client, getterConfig)) .build(); } } diff --git a/merger/src/main/java/com/metamx/druid/merger/common/index/YeOldePlumberSchool.java b/merger/src/main/java/com/metamx/druid/merger/common/index/YeOldePlumberSchool.java index c77dfbd9e80..634fe65ebaf 100644 --- a/merger/src/main/java/com/metamx/druid/merger/common/index/YeOldePlumberSchool.java +++ b/merger/src/main/java/com/metamx/druid/merger/common/index/YeOldePlumberSchool.java @@ -21,22 +21,23 @@ package com.metamx.druid.merger.common.index; import com.google.common.base.Preconditions; import com.google.common.base.Throwables; +import com.google.common.collect.ImmutableList; import com.google.common.collect.Iterables; import com.google.common.collect.Lists; import com.google.common.collect.Sets; import com.metamx.common.logger.Logger; import com.metamx.druid.Query; import com.metamx.druid.client.DataSegment; +import com.metamx.druid.index.QueryableIndex; import com.metamx.druid.index.v1.IndexIO; import com.metamx.druid.index.v1.IndexMerger; -import com.metamx.druid.index.v1.MMappedIndex; +import com.metamx.druid.loading.SegmentPusher; import com.metamx.druid.query.QueryRunner; import com.metamx.druid.realtime.FireDepartmentMetrics; import com.metamx.druid.realtime.FireHydrant; import com.metamx.druid.realtime.Plumber; import com.metamx.druid.realtime.PlumberSchool; import com.metamx.druid.realtime.Schema; -import com.metamx.druid.realtime.SegmentPusher; import com.metamx.druid.realtime.Sink; import org.codehaus.jackson.annotate.JsonCreator; import org.codehaus.jackson.annotate.JsonProperty; @@ -82,9 +83,7 @@ public class YeOldePlumberSchool implements PlumberSchool final Sink theSink = new Sink(interval, schema); // Temporary directory to hold spilled segments. - final File persistDir = new File( - tmpSegmentDir, theSink.getSegment().withVersion(version).getIdentifier() - ); + final File persistDir = new File(tmpSegmentDir, theSink.getSegment().withVersion(version).getIdentifier()); // Set of spilled segments. Will be merged at the end. final Set spilled = Sets.newHashSet(); @@ -129,16 +128,23 @@ public class YeOldePlumberSchool implements PlumberSchool } else if(spilled.size() == 1) { fileToUpload = Iterables.getOnlyElement(spilled); } else { - List indexes = Lists.newArrayList(); + List indexes = Lists.newArrayList(); for (final File oneSpill : spilled) { - indexes.add(IndexIO.mapDir(oneSpill)); + indexes.add(IndexIO.loadIndex(oneSpill)); } fileToUpload = new File(tmpSegmentDir, "merged"); - IndexMerger.mergeMMapped(indexes, schema.getAggregators(), fileToUpload); + IndexMerger.mergeQueryableIndex(indexes, schema.getAggregators(), fileToUpload); } - final DataSegment segmentToUpload = theSink.getSegment().withVersion(version); + // Map merged segment so we can extract dimensions + final QueryableIndex mappedSegment = IndexIO.loadIndex(fileToUpload); + + final DataSegment segmentToUpload = theSink.getSegment() + .withDimensions(ImmutableList.copyOf(mappedSegment.getAvailableDimensions())) + .withVersion(version) + .withBinaryVersion(IndexIO.getVersionFromDir(fileToUpload)); + segmentPusher.push(fileToUpload, segmentToUpload); log.info( @@ -168,7 +174,7 @@ public class YeOldePlumberSchool implements PlumberSchool dirToPersist ); - indexToPersist.swapAdapter(null); + indexToPersist.swapSegment(null); metrics.incrementRowOutputCount(rowsToPersist); diff --git a/merger/src/main/java/com/metamx/druid/merger/common/task/AppendTask.java b/merger/src/main/java/com/metamx/druid/merger/common/task/AppendTask.java index f9898070aa1..f1153e5c43c 100644 --- a/merger/src/main/java/com/metamx/druid/merger/common/task/AppendTask.java +++ b/merger/src/main/java/com/metamx/druid/merger/common/task/AppendTask.java @@ -30,7 +30,7 @@ import com.metamx.druid.client.DataSegment; import com.metamx.druid.index.v1.IndexIO; import com.metamx.druid.index.v1.IndexMerger; import com.metamx.druid.index.v1.IndexableAdapter; -import com.metamx.druid.index.v1.MMappedIndexAdapter; +import com.metamx.druid.index.v1.QueryableIndexIndexableAdapter; import com.metamx.druid.index.v1.Rowboat; import com.metamx.druid.index.v1.RowboatFilteringIndexAdapter; import org.codehaus.jackson.annotate.JsonCreator; @@ -90,8 +90,8 @@ public class AppendTask extends MergeTask for (final SegmentToMergeHolder holder : segmentsToMerge) { adapters.add( new RowboatFilteringIndexAdapter( - new MMappedIndexAdapter( - IndexIO.mapDir(holder.getFile()) + new QueryableIndexIndexableAdapter( + IndexIO.loadIndex(holder.getFile()) ), new Predicate() { diff --git a/merger/src/main/java/com/metamx/druid/merger/common/task/DefaultMergeTask.java b/merger/src/main/java/com/metamx/druid/merger/common/task/DefaultMergeTask.java index 6dfc95c2271..e17db4b980e 100644 --- a/merger/src/main/java/com/metamx/druid/merger/common/task/DefaultMergeTask.java +++ b/merger/src/main/java/com/metamx/druid/merger/common/task/DefaultMergeTask.java @@ -25,9 +25,9 @@ import com.google.common.collect.ImmutableList; import com.google.common.collect.Lists; import com.metamx.druid.aggregation.AggregatorFactory; import com.metamx.druid.client.DataSegment; +import com.metamx.druid.index.QueryableIndex; import com.metamx.druid.index.v1.IndexIO; import com.metamx.druid.index.v1.IndexMerger; -import com.metamx.druid.index.v1.MMappedIndex; import org.codehaus.jackson.annotate.JsonCreator; import org.codehaus.jackson.annotate.JsonProperty; @@ -57,16 +57,16 @@ public class DefaultMergeTask extends MergeTask public File merge(final Map segments, final File outDir) throws Exception { - return IndexMerger.mergeMMapped( + return IndexMerger.mergeQueryableIndex( Lists.transform( ImmutableList.copyOf(segments.values()), - new Function() + new Function() { @Override - public MMappedIndex apply(@Nullable File input) + public QueryableIndex apply(@Nullable File input) { try { - return IndexIO.mapDir(input); + return IndexIO.loadIndex(input); } catch (Exception e) { throw Throwables.propagate(e); diff --git a/merger/src/main/java/com/metamx/druid/merger/common/task/IndexGeneratorTask.java b/merger/src/main/java/com/metamx/druid/merger/common/task/IndexGeneratorTask.java index 37187904ace..b89142ef19a 100644 --- a/merger/src/main/java/com/metamx/druid/merger/common/task/IndexGeneratorTask.java +++ b/merger/src/main/java/com/metamx/druid/merger/common/task/IndexGeneratorTask.java @@ -22,7 +22,6 @@ package com.metamx.druid.merger.common.task; import com.google.common.collect.ImmutableList; import com.google.common.collect.Iterables; import com.google.common.collect.Maps; -import com.metamx.common.exception.FormattedException; import com.metamx.common.logger.Logger; import com.metamx.druid.client.DataSegment; import com.metamx.druid.input.InputRow; @@ -35,7 +34,7 @@ import com.metamx.druid.realtime.Firehose; import com.metamx.druid.realtime.FirehoseFactory; import com.metamx.druid.realtime.Plumber; import com.metamx.druid.realtime.Schema; -import com.metamx.druid.realtime.SegmentPusher; +import com.metamx.druid.loading.SegmentPusher; import com.metamx.druid.realtime.Sink; import org.codehaus.jackson.annotate.JsonCreator; import org.codehaus.jackson.annotate.JsonProperty; diff --git a/merger/src/main/java/com/metamx/druid/merger/common/task/MergeTask.java b/merger/src/main/java/com/metamx/druid/merger/common/task/MergeTask.java index 859352c2a7d..20fefa0014f 100644 --- a/merger/src/main/java/com/metamx/druid/merger/common/task/MergeTask.java +++ b/merger/src/main/java/com/metamx/druid/merger/common/task/MergeTask.java @@ -34,7 +34,7 @@ import com.google.common.collect.Sets; import com.metamx.common.ISE; import com.metamx.common.logger.Logger; import com.metamx.druid.client.DataSegment; -import com.metamx.druid.loading.SegmentGetter; +import com.metamx.druid.loading.SegmentPuller; import com.metamx.druid.merger.common.TaskStatus; import com.metamx.druid.merger.common.TaskToolbox; import com.metamx.druid.merger.coordinator.TaskContext; @@ -127,27 +127,29 @@ public abstract class MergeTask extends AbstractTask final long startTime = System.currentTimeMillis(); log.info( - "Starting merge of id[%s], segments: %s", getId(), Lists.transform( - segments, - new Function() - { - @Override - public String apply(@Nullable DataSegment input) - { - return input.getIdentifier(); - } - } - ) + "Starting merge of id[%s], segments: %s", + getId(), + Lists.transform( + segments, + new Function() + { + @Override + public String apply(@Nullable DataSegment input) + { + return input.getIdentifier(); + } + } + ) ); // download segments to merge - final Map segmentGetters = toolbox.getSegmentGetters(this); + final Map segmentGetters = toolbox.getSegmentGetters(this); final Map gettedSegments = Maps.newHashMap(); for (final DataSegment segment : segments) { Map loadSpec = segment.getLoadSpec(); - SegmentGetter segmentGetter = segmentGetters.get(loadSpec.get("type")); - gettedSegments.put(segment, segmentGetter.getSegmentFiles(loadSpec)); + SegmentPuller segmentPuller = segmentGetters.get(loadSpec.get("type")); + gettedSegments.put(segment, segmentPuller.getSegmentFiles(segment)); } // merge files together diff --git a/merger/src/main/java/com/metamx/druid/merger/common/task/Task.java b/merger/src/main/java/com/metamx/druid/merger/common/task/Task.java index b2059210b58..807172d11ae 100644 --- a/merger/src/main/java/com/metamx/druid/merger/common/task/Task.java +++ b/merger/src/main/java/com/metamx/druid/merger/common/task/Task.java @@ -64,7 +64,7 @@ public interface Task /** * Execute preflight checks for a task. This typically runs on the coordinator, and will be run while - * holding a lock on our dataSouce and interval. If this method throws an exception, the task should be + * holding a lock on our dataSource and interval. If this method throws an exception, the task should be * considered a failure. * * @param context Context for this task, gathered under indexer lock diff --git a/merger/src/main/java/com/metamx/druid/merger/common/task/V8toV9UpgradeTask.java b/merger/src/main/java/com/metamx/druid/merger/common/task/V8toV9UpgradeTask.java new file mode 100644 index 00000000000..1a718fdaf20 --- /dev/null +++ b/merger/src/main/java/com/metamx/druid/merger/common/task/V8toV9UpgradeTask.java @@ -0,0 +1,39 @@ +package com.metamx.druid.merger.common.task; + +import com.metamx.druid.merger.common.TaskStatus; +import com.metamx.druid.merger.common.TaskToolbox; +import com.metamx.druid.merger.coordinator.TaskContext; +import org.codehaus.jackson.annotate.JsonProperty; +import org.joda.time.DateTime; +import org.joda.time.Interval; + +/** + */ +public class V8toV9UpgradeTask extends AbstractTask +{ + public V8toV9UpgradeTask( + @JsonProperty("dataSource") String dataSource, + @JsonProperty("interval") Interval interval + ) + { + super( + String.format("v8tov9_%s_%s_%s", dataSource, interval.toString().replace("/", "_"), new DateTime()), + dataSource, + interval + ); + } + + @Override + public Type getType() + { + throw new UnsupportedOperationException("Do we really need to return a Type?"); + } + + @Override + public TaskStatus run( + TaskContext context, TaskToolbox toolbox + ) throws Exception + { + throw new UnsupportedOperationException(); + } +} diff --git a/merger/src/main/java/com/metamx/druid/merger/coordinator/RemoteTaskRunner.java b/merger/src/main/java/com/metamx/druid/merger/coordinator/RemoteTaskRunner.java index 2a235b88d86..a95f64cb623 100644 --- a/merger/src/main/java/com/metamx/druid/merger/coordinator/RemoteTaskRunner.java +++ b/merger/src/main/java/com/metamx/druid/merger/coordinator/RemoteTaskRunner.java @@ -39,6 +39,7 @@ import com.metamx.druid.merger.common.task.Task; import com.metamx.druid.merger.coordinator.config.RemoteTaskRunnerConfig; import com.metamx.druid.merger.coordinator.scaling.AutoScalingData; import com.metamx.druid.merger.coordinator.scaling.ScalingStrategy; +import com.metamx.druid.merger.coordinator.setup.WorkerSetupManager; import com.metamx.druid.merger.worker.Worker; import com.metamx.emitter.EmittingLogger; import com.netflix.curator.framework.CuratorFramework; @@ -52,7 +53,7 @@ import org.joda.time.DateTime; import org.joda.time.Duration; import org.joda.time.Period; -import javax.annotation.Nullable; +import java.util.Arrays; import java.util.Comparator; import java.util.List; import java.util.Map; @@ -88,6 +89,7 @@ public class RemoteTaskRunner implements TaskRunner private final ScheduledExecutorService scheduledExec; private final RetryPolicyFactory retryPolicyFactory; private final ScalingStrategy strategy; + private final WorkerSetupManager workerSetupManager; // all workers that exist in ZK private final Map zkWorkers = new ConcurrentHashMap(); @@ -109,7 +111,8 @@ public class RemoteTaskRunner implements TaskRunner PathChildrenCache workerPathCache, ScheduledExecutorService scheduledExec, RetryPolicyFactory retryPolicyFactory, - ScalingStrategy strategy + ScalingStrategy strategy, + WorkerSetupManager workerSetupManager ) { this.jsonMapper = jsonMapper; @@ -119,6 +122,7 @@ public class RemoteTaskRunner implements TaskRunner this.scheduledExec = scheduledExec; this.retryPolicyFactory = retryPolicyFactory; this.strategy = strategy; + this.workerSetupManager = workerSetupManager; } @LifecycleStart @@ -144,7 +148,7 @@ public class RemoteTaskRunner implements TaskRunner Worker.class ); log.info("Worker[%s] removed!", worker.getHost()); - removeWorker(worker.getHost()); + removeWorker(worker); } } } @@ -169,26 +173,23 @@ public class RemoteTaskRunner implements TaskRunner public void run() { if (currentlyTerminating.isEmpty()) { - if (zkWorkers.size() <= config.getMinNumWorkers()) { + if (zkWorkers.size() <= workerSetupManager.getWorkerSetupData().getMinNumWorkers()) { return; } - List thoseLazyWorkers = Lists.newArrayList( - FunctionalIterable - .create(zkWorkers.values()) - .filter( - new Predicate() - { - @Override - public boolean apply(@Nullable WorkerWrapper input) - { - return input.getRunningTasks().isEmpty() - && System.currentTimeMillis() - input.getLastCompletedTaskTime().getMillis() - > config.getMaxWorkerIdleTimeMillisBeforeDeletion(); - } - } - ) - ); + int workerCount = 0; + List thoseLazyWorkers = Lists.newArrayList(); + for (WorkerWrapper workerWrapper : zkWorkers.values()) { + workerCount++; + + if (workerCount > workerSetupManager.getWorkerSetupData().getMinNumWorkers() && + workerWrapper.getRunningTasks().isEmpty() && + System.currentTimeMillis() - workerWrapper.getLastCompletedTaskTime().getMillis() + > config.getMaxWorkerIdleTimeMillisBeforeDeletion() + ) { + thoseLazyWorkers.add(workerWrapper); + } + } AutoScalingData terminated = strategy.terminate( Lists.transform( @@ -196,9 +197,9 @@ public class RemoteTaskRunner implements TaskRunner new Function() { @Override - public String apply(@Nullable WorkerWrapper input) + public String apply(WorkerWrapper input) { - return input.getWorker().getHost(); + return input.getWorker().getIp(); } } ) @@ -218,7 +219,7 @@ public class RemoteTaskRunner implements TaskRunner } log.info( - "[%s] still terminating. Wait for all nodes to terminate before trying again.", + "%s still terminating. Wait for all nodes to terminate before trying again.", currentlyTerminating ); } @@ -368,7 +369,7 @@ public class RemoteTaskRunner implements TaskRunner private void addWorker(final Worker worker) { try { - currentlyProvisioning.remove(worker.getHost()); + currentlyProvisioning.removeAll(strategy.ipLookup(Arrays.asList(worker.getIp()))); final String workerStatusPath = JOINER.join(config.getStatusPath(), worker.getHost()); final PathChildrenCache statusCache = new PathChildrenCache(cf, workerStatusPath, true); @@ -388,8 +389,7 @@ public class RemoteTaskRunner implements TaskRunner synchronized (statusLock) { try { if (event.getType().equals(PathChildrenCacheEvent.Type.CHILD_ADDED) || - event.getType().equals(PathChildrenCacheEvent.Type.CHILD_UPDATED)) - { + event.getType().equals(PathChildrenCacheEvent.Type.CHILD_UPDATED)) { final String taskId = ZKPaths.getNodeFromPath(event.getData().getPath()); final TaskStatus taskStatus; @@ -399,7 +399,7 @@ public class RemoteTaskRunner implements TaskRunner event.getData().getData(), TaskStatus.class ); - if(!taskStatus.getId().equals(taskId)) { + if (!taskStatus.getId().equals(taskId)) { // Sanity check throw new ISE( "Worker[%s] status id does not match payload id: %s != %s", @@ -408,7 +408,8 @@ public class RemoteTaskRunner implements TaskRunner taskStatus.getId() ); } - } catch (Exception e) { + } + catch (Exception e) { log.warn(e, "Worker[%s] wrote bogus status for task: %s", worker.getHost(), taskId); retryTask(new CleanupPaths(worker.getHost(), taskId), tasks.get(taskId)); throw Throwables.propagate(e); @@ -446,7 +447,8 @@ public class RemoteTaskRunner implements TaskRunner } } } - } catch(Exception e) { + } + catch (Exception e) { log.makeAlert(e, "Failed to handle new worker status") .addData("worker", worker.getHost()) .addData("znode", event.getData().getPath()) @@ -478,22 +480,22 @@ public class RemoteTaskRunner implements TaskRunner * When a ephemeral worker node disappears from ZK, we have to make sure there are no tasks still assigned * to the worker. If tasks remain, they are retried. * - * @param workerId - id of the removed worker + * @param worker - the removed worker */ - private void removeWorker(final String workerId) + private void removeWorker(final Worker worker) { - currentlyTerminating.remove(workerId); + currentlyTerminating.remove(worker.getHost()); - WorkerWrapper workerWrapper = zkWorkers.get(workerId); + WorkerWrapper workerWrapper = zkWorkers.get(worker.getHost()); if (workerWrapper != null) { try { Set tasksToRetry = Sets.newHashSet(workerWrapper.getRunningTasks()); - tasksToRetry.addAll(cf.getChildren().forPath(JOINER.join(config.getTaskPath(), workerId))); + tasksToRetry.addAll(cf.getChildren().forPath(JOINER.join(config.getTaskPath(), worker.getHost()))); for (String taskId : tasksToRetry) { TaskWrapper taskWrapper = tasks.get(taskId); if (taskWrapper != null) { - retryTask(new CleanupPaths(workerId, taskId), tasks.get(taskId)); + retryTask(new CleanupPaths(worker.getHost(), taskId), tasks.get(taskId)); } } @@ -503,7 +505,7 @@ public class RemoteTaskRunner implements TaskRunner log.error(e, "Failed to cleanly remove worker[%s]"); } } - zkWorkers.remove(workerId); + zkWorkers.remove(worker.getHost()); } private WorkerWrapper findWorkerForTask() @@ -526,7 +528,9 @@ public class RemoteTaskRunner implements TaskRunner public boolean apply(WorkerWrapper input) { return (!input.isAtCapacity() && - input.getWorker().getVersion().compareTo(config.getMinWorkerVersion()) >= 0); + input.getWorker() + .getVersion() + .compareTo(workerSetupManager.getWorkerSetupData().getMinVersion()) >= 0); } } ) @@ -551,7 +555,7 @@ public class RemoteTaskRunner implements TaskRunner } log.info( - "[%s] still provisioning. Wait for all provisioned nodes to complete before requesting new worker.", + "%s still provisioning. Wait for all provisioned nodes to complete before requesting new worker.", currentlyProvisioning ); } diff --git a/merger/src/main/java/com/metamx/druid/merger/coordinator/config/EC2AutoScalingStrategyConfig.java b/merger/src/main/java/com/metamx/druid/merger/coordinator/config/EC2AutoScalingStrategyConfig.java index c364070e313..a8cfcf8df22 100644 --- a/merger/src/main/java/com/metamx/druid/merger/coordinator/config/EC2AutoScalingStrategyConfig.java +++ b/merger/src/main/java/com/metamx/druid/merger/coordinator/config/EC2AutoScalingStrategyConfig.java @@ -26,24 +26,7 @@ import org.skife.config.Default; */ public abstract class EC2AutoScalingStrategyConfig { - @Config("druid.indexer.amiId") - public abstract String getAmiId(); - @Config("druid.indexer.worker.port") @Default("8080") public abstract String getWorkerPort(); - - @Config("druid.indexer.instanceType") - public abstract String getInstanceType(); - - @Config("druid.indexer.minNumInstancesToProvision") - @Default("1") - public abstract int getMinNumInstancesToProvision(); - - @Config("druid.indexer.maxNumInstancesToProvision") - @Default("1") - public abstract int getMaxNumInstancesToProvision(); - - @Config("druid.indexer.userDataFile") - public abstract String getUserDataFile(); } diff --git a/merger/src/main/java/com/metamx/druid/merger/coordinator/config/RemoteTaskRunnerConfig.java b/merger/src/main/java/com/metamx/druid/merger/coordinator/config/RemoteTaskRunnerConfig.java index 00b869ea6da..44b3a1d4c8c 100644 --- a/merger/src/main/java/com/metamx/druid/merger/coordinator/config/RemoteTaskRunnerConfig.java +++ b/merger/src/main/java/com/metamx/druid/merger/coordinator/config/RemoteTaskRunnerConfig.java @@ -37,15 +37,8 @@ public abstract class RemoteTaskRunnerConfig extends IndexerZkConfig @Default("2012-01-01T00:55:00.000Z") public abstract DateTime getTerminateResourcesOriginDateTime(); - @Config("druid.indexer.minWorkerVersion") - public abstract String getMinWorkerVersion(); - - @Config("druid.indexer.minNumWorkers") - @Default("1") - public abstract int getMinNumWorkers(); - @Config("druid.indexer.maxWorkerIdleTimeMillisBeforeDeletion") - @Default("1") + @Default("600000") public abstract int getMaxWorkerIdleTimeMillisBeforeDeletion(); @Config("druid.indexer.maxScalingDuration") diff --git a/server/src/main/java/com/metamx/druid/loading/MMappedStorageAdapterFactory.java b/merger/src/main/java/com/metamx/druid/merger/coordinator/config/WorkerSetupManagerConfig.java similarity index 63% rename from server/src/main/java/com/metamx/druid/loading/MMappedStorageAdapterFactory.java rename to merger/src/main/java/com/metamx/druid/merger/coordinator/config/WorkerSetupManagerConfig.java index 13c65ad5c4d..16eeb1c3439 100644 --- a/server/src/main/java/com/metamx/druid/loading/MMappedStorageAdapterFactory.java +++ b/merger/src/main/java/com/metamx/druid/merger/coordinator/config/WorkerSetupManagerConfig.java @@ -17,22 +17,23 @@ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. */ -package com.metamx.druid.loading; +package com.metamx.druid.merger.coordinator.config; -import com.metamx.druid.StorageAdapter; -import com.metamx.druid.index.v1.IndexIO; -import com.metamx.druid.index.v1.MMappedIndexStorageAdapter; - -import java.io.File; -import java.io.IOException; +import org.joda.time.Duration; +import org.skife.config.Config; +import org.skife.config.Default; /** */ -public class MMappedStorageAdapterFactory extends ConvertingBaseQueryableFactory +public abstract class WorkerSetupManagerConfig { - @Override - protected StorageAdapter factorizeConverted(File parentDir) throws IOException - { - return new MMappedIndexStorageAdapter(IndexIO.mapDir(parentDir)); - } + @Config("druid.indexer.configTable") + public abstract String getConfigTable(); + + @Config("druid.indexer.workerSetupConfigName") + public abstract String getWorkerSetupConfigName(); + + @Config("druid.indexer.poll.duration") + @Default("PT1M") + public abstract Duration getPollDuration(); } diff --git a/merger/src/main/java/com/metamx/druid/merger/coordinator/exec/TaskConsumer.java b/merger/src/main/java/com/metamx/druid/merger/coordinator/exec/TaskConsumer.java index ed7ac9f3f25..78326d3a3cc 100644 --- a/merger/src/main/java/com/metamx/druid/merger/coordinator/exec/TaskConsumer.java +++ b/merger/src/main/java/com/metamx/druid/merger/coordinator/exec/TaskConsumer.java @@ -106,7 +106,7 @@ public class TaskConsumer implements Runnable .addData("interval", task.getInterval()) .emit(); - // TODO - Retry would be nice, but only after we have a way to throttle and limit them + // Retry would be nice, but only after we have a way to throttle and limit them. Just fail for now. if(!shutdown) { queue.done(task, TaskStatus.failure(task.getId())); } diff --git a/merger/src/main/java/com/metamx/druid/merger/coordinator/http/IndexerCoordinatorNode.java b/merger/src/main/java/com/metamx/druid/merger/coordinator/http/IndexerCoordinatorNode.java index 1c11c62cb7b..3dae4046764 100644 --- a/merger/src/main/java/com/metamx/druid/merger/coordinator/http/IndexerCoordinatorNode.java +++ b/merger/src/main/java/com/metamx/druid/merger/coordinator/http/IndexerCoordinatorNode.java @@ -47,6 +47,9 @@ import com.metamx.druid.initialization.Initialization; import com.metamx.druid.initialization.ServerConfig; import com.metamx.druid.initialization.ServiceDiscoveryConfig; import com.metamx.druid.jackson.DefaultObjectMapper; +import com.metamx.druid.loading.S3SegmentPusher; +import com.metamx.druid.loading.S3SegmentPusherConfig; +import com.metamx.druid.loading.SegmentPusher; import com.metamx.druid.merger.common.TaskToolbox; import com.metamx.druid.merger.common.config.IndexerZkConfig; import com.metamx.druid.merger.common.index.StaticS3FirehoseFactory; @@ -66,12 +69,11 @@ import com.metamx.druid.merger.coordinator.config.IndexerCoordinatorConfig; import com.metamx.druid.merger.coordinator.config.IndexerDbConnectorConfig; import com.metamx.druid.merger.coordinator.config.RemoteTaskRunnerConfig; import com.metamx.druid.merger.coordinator.config.RetryPolicyConfig; +import com.metamx.druid.merger.coordinator.config.WorkerSetupManagerConfig; import com.metamx.druid.merger.coordinator.scaling.EC2AutoScalingStrategy; import com.metamx.druid.merger.coordinator.scaling.NoopScalingStrategy; import com.metamx.druid.merger.coordinator.scaling.ScalingStrategy; -import com.metamx.druid.realtime.S3SegmentPusher; -import com.metamx.druid.realtime.S3SegmentPusherConfig; -import com.metamx.druid.realtime.SegmentPusher; +import com.metamx.druid.merger.coordinator.setup.WorkerSetupManager; import com.metamx.druid.utils.PropUtils; import com.metamx.emitter.EmittingLogger; import com.metamx.emitter.core.Emitters; @@ -98,6 +100,7 @@ import org.mortbay.jetty.servlet.DefaultServlet; import org.mortbay.jetty.servlet.FilterHolder; import org.mortbay.jetty.servlet.ServletHolder; import org.skife.config.ConfigurationObjectFactory; +import org.skife.jdbi.v2.DBI; import java.net.URL; import java.util.Arrays; @@ -133,6 +136,7 @@ public class IndexerCoordinatorNode extends RegisteringNode private CuratorFramework curatorFramework = null; private ScheduledExecutorFactory scheduledExecutorFactory = null; private IndexerZkConfig indexerZkConfig; + private WorkerSetupManager workerSetupManager = null; private TaskRunnerFactory taskRunnerFactory = null; private TaskMaster taskMaster = null; private Server server = null; @@ -160,14 +164,16 @@ public class IndexerCoordinatorNode extends RegisteringNode return this; } - public void setMergerDBCoordinator(MergerDBCoordinator mergerDBCoordinator) + public IndexerCoordinatorNode setMergerDBCoordinator(MergerDBCoordinator mergerDBCoordinator) { this.mergerDBCoordinator = mergerDBCoordinator; + return this; } - public void setTaskQueue(TaskQueue taskQueue) + public IndexerCoordinatorNode setTaskQueue(TaskQueue taskQueue) { this.taskQueue = taskQueue; + return this; } public IndexerCoordinatorNode setMergeDbCoordinator(MergerDBCoordinator mergeDbCoordinator) @@ -182,9 +188,16 @@ public class IndexerCoordinatorNode extends RegisteringNode return this; } - public void setTaskRunnerFactory(TaskRunnerFactory taskRunnerFactory) + public IndexerCoordinatorNode setWorkerSetupManager(WorkerSetupManager workerSetupManager) + { + this.workerSetupManager = workerSetupManager; + return this; + } + + public IndexerCoordinatorNode setTaskRunnerFactory(TaskRunnerFactory taskRunnerFactory) { this.taskRunnerFactory = taskRunnerFactory; + return this; } public void init() throws Exception @@ -202,6 +215,7 @@ public class IndexerCoordinatorNode extends RegisteringNode initializeJacksonSubtypes(); initializeCurator(); initializeIndexerZkConfig(); + initializeWorkerSetupManager(); initializeTaskRunnerFactory(); initializeTaskMaster(); initializeServer(); @@ -220,7 +234,8 @@ public class IndexerCoordinatorNode extends RegisteringNode jsonMapper, config, emitter, - taskQueue + taskQueue, + workerSetupManager ) ); @@ -447,6 +462,27 @@ public class IndexerCoordinatorNode extends RegisteringNode } } + public void initializeWorkerSetupManager() + { + if (workerSetupManager == null) { + final DbConnectorConfig dbConnectorConfig = configFactory.build(DbConnectorConfig.class); + final DBI dbi = new DbConnector(dbConnectorConfig).getDBI(); + final WorkerSetupManagerConfig workerSetupManagerConfig = configFactory.build(WorkerSetupManagerConfig.class); + + DbConnector.createConfigTable(dbi, workerSetupManagerConfig.getConfigTable()); + workerSetupManager = new WorkerSetupManager( + dbi, Executors.newScheduledThreadPool( + 1, + new ThreadFactoryBuilder() + .setDaemon(true) + .setNameFormat("WorkerSetupManagerExec--%d") + .build() + ), jsonMapper, workerSetupManagerConfig + ); + } + lifecycle.addManagedInstance(workerSetupManager); + } + public void initializeTaskRunnerFactory() { if (taskRunnerFactory == null) { @@ -476,7 +512,8 @@ public class IndexerCoordinatorNode extends RegisteringNode PropUtils.getProperty(props, "com.metamx.aws.secretKey") ) ), - configFactory.build(EC2AutoScalingStrategyConfig.class) + configFactory.build(EC2AutoScalingStrategyConfig.class), + workerSetupManager ); } else if (config.getStrategyImpl().equalsIgnoreCase("noop")) { strategy = new NoopScalingStrategy(); @@ -491,7 +528,8 @@ public class IndexerCoordinatorNode extends RegisteringNode new PathChildrenCache(curatorFramework, indexerZkConfig.getAnnouncementPath(), true), retryScheduledExec, new RetryPolicyFactory(configFactory.build(RetryPolicyConfig.class)), - strategy + strategy, + workerSetupManager ); } }; diff --git a/merger/src/main/java/com/metamx/druid/merger/coordinator/http/IndexerCoordinatorResource.java b/merger/src/main/java/com/metamx/druid/merger/coordinator/http/IndexerCoordinatorResource.java index 6cf9b0a7c16..e4acd93514f 100644 --- a/merger/src/main/java/com/metamx/druid/merger/coordinator/http/IndexerCoordinatorResource.java +++ b/merger/src/main/java/com/metamx/druid/merger/coordinator/http/IndexerCoordinatorResource.java @@ -28,6 +28,8 @@ import com.metamx.druid.merger.common.task.MergeTask; import com.metamx.druid.merger.common.task.Task; import com.metamx.druid.merger.coordinator.TaskQueue; import com.metamx.druid.merger.coordinator.config.IndexerCoordinatorConfig; +import com.metamx.druid.merger.coordinator.setup.WorkerSetupData; +import com.metamx.druid.merger.coordinator.setup.WorkerSetupManager; import com.metamx.emitter.service.ServiceEmitter; import javax.ws.rs.Consumes; @@ -48,18 +50,21 @@ public class IndexerCoordinatorResource private final IndexerCoordinatorConfig config; private final ServiceEmitter emitter; private final TaskQueue tasks; + private final WorkerSetupManager workerSetupManager; @Inject public IndexerCoordinatorResource( IndexerCoordinatorConfig config, ServiceEmitter emitter, - TaskQueue tasks + TaskQueue tasks, + WorkerSetupManager workerSetupManager ) throws Exception { this.config = config; this.emitter = emitter; this.tasks = tasks; + this.workerSetupManager = workerSetupManager; } @POST @@ -115,4 +120,25 @@ public class IndexerCoordinatorResource { return Response.ok(ImmutableMap.of("task", taskid)).build(); } + + @GET + @Path("/worker/setup") + @Produces("application/json") + public Response getWorkerSetupData() + { + return Response.ok(workerSetupManager.getWorkerSetupData()).build(); + } + + @POST + @Path("/worker/setup") + @Consumes("application/json") + public Response setWorkerSetupData( + final WorkerSetupData workerSetupData + ) + { + if (!workerSetupManager.setWorkerSetupData(workerSetupData)) { + return Response.status(Response.Status.BAD_REQUEST).build(); + } + return Response.ok().build(); + } } diff --git a/merger/src/main/java/com/metamx/druid/merger/coordinator/http/IndexerCoordinatorServletModule.java b/merger/src/main/java/com/metamx/druid/merger/coordinator/http/IndexerCoordinatorServletModule.java index 9c657bdc292..4cc1df9fa6f 100644 --- a/merger/src/main/java/com/metamx/druid/merger/coordinator/http/IndexerCoordinatorServletModule.java +++ b/merger/src/main/java/com/metamx/druid/merger/coordinator/http/IndexerCoordinatorServletModule.java @@ -22,6 +22,7 @@ package com.metamx.druid.merger.coordinator.http; import com.google.inject.Provides; import com.metamx.druid.merger.coordinator.TaskQueue; import com.metamx.druid.merger.coordinator.config.IndexerCoordinatorConfig; +import com.metamx.druid.merger.coordinator.setup.WorkerSetupManager; import com.metamx.emitter.service.ServiceEmitter; import com.sun.jersey.guice.JerseyServletModule; import com.sun.jersey.guice.spi.container.servlet.GuiceContainer; @@ -38,18 +39,21 @@ public class IndexerCoordinatorServletModule extends JerseyServletModule private final IndexerCoordinatorConfig indexerCoordinatorConfig; private final ServiceEmitter emitter; private final TaskQueue tasks; + private final WorkerSetupManager workerSetupManager; public IndexerCoordinatorServletModule( ObjectMapper jsonMapper, IndexerCoordinatorConfig indexerCoordinatorConfig, ServiceEmitter emitter, - TaskQueue tasks + TaskQueue tasks, + WorkerSetupManager workerSetupManager ) { this.jsonMapper = jsonMapper; this.indexerCoordinatorConfig = indexerCoordinatorConfig; this.emitter = emitter; this.tasks = tasks; + this.workerSetupManager = workerSetupManager; } @Override @@ -60,6 +64,7 @@ public class IndexerCoordinatorServletModule extends JerseyServletModule bind(IndexerCoordinatorConfig.class).toInstance(indexerCoordinatorConfig); bind(ServiceEmitter.class).toInstance(emitter); bind(TaskQueue.class).toInstance(tasks); + bind(WorkerSetupManager.class).toInstance(workerSetupManager); serve("/*").with(GuiceContainer.class); } diff --git a/merger/src/main/java/com/metamx/druid/merger/coordinator/scaling/AutoScalingData.java b/merger/src/main/java/com/metamx/druid/merger/coordinator/scaling/AutoScalingData.java index 6cce08f8731..5a1bb4980e5 100644 --- a/merger/src/main/java/com/metamx/druid/merger/coordinator/scaling/AutoScalingData.java +++ b/merger/src/main/java/com/metamx/druid/merger/coordinator/scaling/AutoScalingData.java @@ -1,3 +1,22 @@ +/* + * Druid - a distributed column store. + * Copyright (C) 2012 Metamarkets Group Inc. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version 2 + * of the License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + */ + package com.metamx.druid.merger.coordinator.scaling; import java.util.List; diff --git a/merger/src/main/java/com/metamx/druid/merger/coordinator/scaling/EC2AutoScalingStrategy.java b/merger/src/main/java/com/metamx/druid/merger/coordinator/scaling/EC2AutoScalingStrategy.java index 265fe62287c..8d51da61afd 100644 --- a/merger/src/main/java/com/metamx/druid/merger/coordinator/scaling/EC2AutoScalingStrategy.java +++ b/merger/src/main/java/com/metamx/druid/merger/coordinator/scaling/EC2AutoScalingStrategy.java @@ -24,7 +24,6 @@ import com.amazonaws.services.ec2.model.DescribeInstancesRequest; import com.amazonaws.services.ec2.model.DescribeInstancesResult; import com.amazonaws.services.ec2.model.Filter; import com.amazonaws.services.ec2.model.Instance; -import com.amazonaws.services.ec2.model.InstanceType; import com.amazonaws.services.ec2.model.Reservation; import com.amazonaws.services.ec2.model.RunInstancesRequest; import com.amazonaws.services.ec2.model.RunInstancesResult; @@ -32,11 +31,14 @@ import com.amazonaws.services.ec2.model.TerminateInstancesRequest; import com.google.common.base.Function; import com.google.common.collect.Lists; import com.metamx.druid.merger.coordinator.config.EC2AutoScalingStrategyConfig; +import com.metamx.druid.merger.coordinator.setup.EC2NodeData; +import com.metamx.druid.merger.coordinator.setup.WorkerSetupData; +import com.metamx.druid.merger.coordinator.setup.WorkerSetupManager; import com.metamx.emitter.EmittingLogger; +import org.apache.commons.codec.binary.Base64; import org.codehaus.jackson.map.ObjectMapper; import javax.annotation.Nullable; -import java.io.File; import java.util.List; /** @@ -48,31 +50,45 @@ public class EC2AutoScalingStrategy implements ScalingStrategy private final ObjectMapper jsonMapper; private final AmazonEC2Client amazonEC2Client; private final EC2AutoScalingStrategyConfig config; + private final WorkerSetupManager workerSetupManager; public EC2AutoScalingStrategy( ObjectMapper jsonMapper, AmazonEC2Client amazonEC2Client, - EC2AutoScalingStrategyConfig config + EC2AutoScalingStrategyConfig config, + WorkerSetupManager workerSetupManager ) { this.jsonMapper = jsonMapper; this.amazonEC2Client = amazonEC2Client; this.config = config; + this.workerSetupManager = workerSetupManager; } @Override public AutoScalingData provision() { try { + WorkerSetupData setupData = workerSetupManager.getWorkerSetupData(); + EC2NodeData workerConfig = setupData.getNodeData(); + log.info("Creating new instance(s)..."); RunInstancesResult result = amazonEC2Client.runInstances( new RunInstancesRequest( - config.getAmiId(), - config.getMinNumInstancesToProvision(), - config.getMaxNumInstancesToProvision() + workerConfig.getAmiId(), + workerConfig.getMinInstances(), + workerConfig.getMaxInstances() ) - .withInstanceType(InstanceType.fromValue(config.getInstanceType())) - .withUserData(jsonMapper.writeValueAsString(new File(config.getUserDataFile()))) + .withInstanceType(workerConfig.getInstanceType()) + .withSecurityGroupIds(workerConfig.getSecurityGroupIds()) + .withKeyName(workerConfig.getKeyName()) + .withUserData( + Base64.encodeBase64String( + jsonMapper.writeValueAsBytes( + setupData.getUserData() + ) + ) + ) ); List instanceIds = Lists.transform( @@ -80,7 +96,7 @@ public class EC2AutoScalingStrategy implements ScalingStrategy new Function() { @Override - public String apply(@Nullable Instance input) + public String apply(Instance input) { return input.getInstanceId(); } @@ -95,9 +111,9 @@ public class EC2AutoScalingStrategy implements ScalingStrategy new Function() { @Override - public String apply(@Nullable Instance input) + public String apply(Instance input) { - return String.format("%s:%s", input.getPrivateIpAddress(), config.getWorkerPort()); + return input.getInstanceId(); } } ), @@ -112,12 +128,12 @@ public class EC2AutoScalingStrategy implements ScalingStrategy } @Override - public AutoScalingData terminate(List nodeIds) + public AutoScalingData terminate(List ids) { DescribeInstancesResult result = amazonEC2Client.describeInstances( new DescribeInstancesRequest() .withFilters( - new Filter("private-ip-address", nodeIds) + new Filter("private-ip-address", ids) ) ); @@ -135,7 +151,7 @@ public class EC2AutoScalingStrategy implements ScalingStrategy new Function() { @Override - public String apply(@Nullable Instance input) + public String apply(Instance input) { return input.getInstanceId(); } @@ -146,13 +162,13 @@ public class EC2AutoScalingStrategy implements ScalingStrategy return new AutoScalingData( Lists.transform( - instances, - new Function() + ids, + new Function() { @Override - public String apply(@Nullable Instance input) + public String apply(@Nullable String input) { - return String.format("%s:%s", input.getPrivateIpAddress(), config.getWorkerPort()); + return String.format("%s:%s", input, config.getWorkerPort()); } } ), @@ -165,4 +181,36 @@ public class EC2AutoScalingStrategy implements ScalingStrategy return null; } + + @Override + public List ipLookup(List ips) + { + DescribeInstancesResult result = amazonEC2Client.describeInstances( + new DescribeInstancesRequest() + .withFilters( + new Filter("private-ip-address", ips) + ) + ); + + List instances = Lists.newArrayList(); + for (Reservation reservation : result.getReservations()) { + instances.addAll(reservation.getInstances()); + } + + List retVal = Lists.transform( + instances, + new Function() + { + @Override + public String apply(Instance input) + { + return input.getInstanceId(); + } + } + ); + + log.info("Performing lookup: %s --> %s", ips, retVal); + + return retVal; + } } diff --git a/merger/src/main/java/com/metamx/druid/merger/coordinator/scaling/NoopScalingStrategy.java b/merger/src/main/java/com/metamx/druid/merger/coordinator/scaling/NoopScalingStrategy.java index 67eb99293e4..2b412ca6202 100644 --- a/merger/src/main/java/com/metamx/druid/merger/coordinator/scaling/NoopScalingStrategy.java +++ b/merger/src/main/java/com/metamx/druid/merger/coordinator/scaling/NoopScalingStrategy.java @@ -1,3 +1,22 @@ +/* + * Druid - a distributed column store. + * Copyright (C) 2012 Metamarkets Group Inc. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version 2 + * of the License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + */ + package com.metamx.druid.merger.coordinator.scaling; import com.metamx.emitter.EmittingLogger; @@ -24,4 +43,11 @@ public class NoopScalingStrategy implements ScalingStrategy log.info("If I were a real strategy I'd terminate %s now", nodeIds); return null; } + + @Override + public List ipLookup(List ips) + { + log.info("I'm not a real strategy so I'm returning what I got %s", ips); + return ips; + } } diff --git a/merger/src/main/java/com/metamx/druid/merger/coordinator/scaling/ScalingStrategy.java b/merger/src/main/java/com/metamx/druid/merger/coordinator/scaling/ScalingStrategy.java index 9b7da8fb3a4..150de1357e0 100644 --- a/merger/src/main/java/com/metamx/druid/merger/coordinator/scaling/ScalingStrategy.java +++ b/merger/src/main/java/com/metamx/druid/merger/coordinator/scaling/ScalingStrategy.java @@ -27,5 +27,12 @@ public interface ScalingStrategy { public AutoScalingData provision(); - public AutoScalingData terminate(List nodeIds); + public AutoScalingData terminate(List ids); + + /** + * Provides a lookup of ip addresses to node ids + * @param ips + * @return + */ + public List ipLookup(List ips); } diff --git a/merger/src/main/java/com/metamx/druid/merger/coordinator/setup/EC2NodeData.java b/merger/src/main/java/com/metamx/druid/merger/coordinator/setup/EC2NodeData.java new file mode 100644 index 00000000000..8d302df25f6 --- /dev/null +++ b/merger/src/main/java/com/metamx/druid/merger/coordinator/setup/EC2NodeData.java @@ -0,0 +1,91 @@ +/* + * Druid - a distributed column store. + * Copyright (C) 2012 Metamarkets Group Inc. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version 2 + * of the License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + */ + +package com.metamx.druid.merger.coordinator.setup; + +import org.codehaus.jackson.annotate.JsonCreator; +import org.codehaus.jackson.annotate.JsonProperty; + +import java.util.List; + +/** + */ +public class EC2NodeData +{ + private final String amiId; + private final String instanceType; + private final int minInstances; + private final int maxInstances; + private final List securityGroupIds; + private final String keyName; + + @JsonCreator + public EC2NodeData( + @JsonProperty("amiId") String amiId, + @JsonProperty("instanceType") String instanceType, + @JsonProperty("minInstances") int minInstances, + @JsonProperty("maxInstances") int maxInstances, + @JsonProperty("securityGroupIds") List securityGroupIds, + @JsonProperty("keyName") String keyName + ) + { + this.amiId = amiId; + this.instanceType = instanceType; + this.minInstances = minInstances; + this.maxInstances = maxInstances; + this.securityGroupIds = securityGroupIds; + this.keyName = keyName; + } + + @JsonProperty + public String getAmiId() + { + return amiId; + } + + @JsonProperty + public String getInstanceType() + { + return instanceType; + } + + @JsonProperty + public int getMinInstances() + { + return minInstances; + } + + @JsonProperty + public int getMaxInstances() + { + return maxInstances; + } + + @JsonProperty + public List getSecurityGroupIds() + { + return securityGroupIds; + } + + @JsonProperty + public String getKeyName() + { + return keyName; + } +} diff --git a/merger/src/main/java/com/metamx/druid/merger/coordinator/setup/GalaxyUserData.java b/merger/src/main/java/com/metamx/druid/merger/coordinator/setup/GalaxyUserData.java new file mode 100644 index 00000000000..876a2635273 --- /dev/null +++ b/merger/src/main/java/com/metamx/druid/merger/coordinator/setup/GalaxyUserData.java @@ -0,0 +1,62 @@ +/* + * Druid - a distributed column store. + * Copyright (C) 2012 Metamarkets Group Inc. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version 2 + * of the License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + */ + +package com.metamx.druid.merger.coordinator.setup; + +import org.codehaus.jackson.annotate.JsonCreator; +import org.codehaus.jackson.annotate.JsonProperty; + +/** + */ +public class GalaxyUserData +{ + public final String env; + public final String version; + public final String type; + + @JsonCreator + public GalaxyUserData( + @JsonProperty("env") String env, + @JsonProperty("version") String version, + @JsonProperty("type") String type + ) + { + this.env = env; + this.version = version; + this.type = type; + } + + @JsonProperty + public String getEnv() + { + return env; + } + + @JsonProperty + public String getVersion() + { + return version; + } + + @JsonProperty + public String getType() + { + return type; + } +} diff --git a/merger/src/main/java/com/metamx/druid/merger/coordinator/setup/WorkerSetupData.java b/merger/src/main/java/com/metamx/druid/merger/coordinator/setup/WorkerSetupData.java new file mode 100644 index 00000000000..8395fa2d6c8 --- /dev/null +++ b/merger/src/main/java/com/metamx/druid/merger/coordinator/setup/WorkerSetupData.java @@ -0,0 +1,73 @@ +/* + * Druid - a distributed column store. + * Copyright (C) 2012 Metamarkets Group Inc. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version 2 + * of the License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + */ + +package com.metamx.druid.merger.coordinator.setup; + +import org.codehaus.jackson.annotate.JsonCreator; +import org.codehaus.jackson.annotate.JsonProperty; + +import java.util.List; + +/** + */ +public class WorkerSetupData +{ + private final String minVersion; + private final int minNumWorkers; + private final EC2NodeData nodeData; + private final GalaxyUserData userData; + + @JsonCreator + public WorkerSetupData( + @JsonProperty("minVersion") String minVersion, + @JsonProperty("minNumWorkers") int minNumWorkers, + @JsonProperty("nodeData") EC2NodeData nodeData, + @JsonProperty("userData") GalaxyUserData userData + ) + { + this.minVersion = minVersion; + this.minNumWorkers = minNumWorkers; + this.nodeData = nodeData; + this.userData = userData; + } + + @JsonProperty + public String getMinVersion() + { + return minVersion; + } + + @JsonProperty + public int getMinNumWorkers() + { + return minNumWorkers; + } + + @JsonProperty + public EC2NodeData getNodeData() + { + return nodeData; + } + + @JsonProperty + public GalaxyUserData getUserData() + { + return userData; + } +} diff --git a/merger/src/main/java/com/metamx/druid/merger/coordinator/setup/WorkerSetupManager.java b/merger/src/main/java/com/metamx/druid/merger/coordinator/setup/WorkerSetupManager.java new file mode 100644 index 00000000000..5e43e68ae66 --- /dev/null +++ b/merger/src/main/java/com/metamx/druid/merger/coordinator/setup/WorkerSetupManager.java @@ -0,0 +1,226 @@ +/* + * Druid - a distributed column store. + * Copyright (C) 2012 Metamarkets Group Inc. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version 2 + * of the License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + */ + +package com.metamx.druid.merger.coordinator.setup; + +import com.google.common.base.Throwables; +import com.google.common.collect.Lists; +import com.metamx.common.ISE; +import com.metamx.common.concurrent.ScheduledExecutors; +import com.metamx.common.lifecycle.LifecycleStart; +import com.metamx.common.lifecycle.LifecycleStop; +import com.metamx.common.logger.Logger; +import com.metamx.druid.merger.coordinator.config.WorkerSetupManagerConfig; +import org.apache.commons.collections.MapUtils; +import org.codehaus.jackson.map.ObjectMapper; +import org.joda.time.Duration; +import org.skife.jdbi.v2.DBI; +import org.skife.jdbi.v2.FoldController; +import org.skife.jdbi.v2.Folder3; +import org.skife.jdbi.v2.Handle; +import org.skife.jdbi.v2.StatementContext; +import org.skife.jdbi.v2.tweak.HandleCallback; + +import java.sql.SQLException; +import java.util.ArrayList; +import java.util.List; +import java.util.Map; +import java.util.concurrent.ScheduledExecutorService; +import java.util.concurrent.atomic.AtomicReference; + +/** + */ +public class WorkerSetupManager +{ + private static final Logger log = new Logger(WorkerSetupManager.class); + + private final DBI dbi; + private final ObjectMapper jsonMapper; + private final ScheduledExecutorService exec; + private final WorkerSetupManagerConfig config; + + private final Object lock = new Object(); + + private volatile AtomicReference workerSetupData = new AtomicReference(null); + private volatile boolean started = false; + + public WorkerSetupManager( + DBI dbi, + ScheduledExecutorService exec, + ObjectMapper jsonMapper, + WorkerSetupManagerConfig config + ) + { + this.dbi = dbi; + this.exec = exec; + this.jsonMapper = jsonMapper; + this.config = config; + } + + @LifecycleStart + public void start() + { + synchronized (lock) { + if (started) { + return; + } + + ScheduledExecutors.scheduleWithFixedDelay( + exec, + new Duration(0), + config.getPollDuration(), + new Runnable() + { + @Override + public void run() + { + poll(); + } + } + ); + + started = true; + } + } + + @LifecycleStop + public void stop() + { + synchronized (lock) { + if (!started) { + return; + } + + started = false; + } + } + + public void poll() + { + try { + List setupDataList = dbi.withHandle( + new HandleCallback>() + { + @Override + public List withHandle(Handle handle) throws Exception + { + return handle.createQuery( + String.format( + "SELECT payload FROM %s WHERE name = :name", + config.getConfigTable() + ) + ) + .bind("name", config.getWorkerSetupConfigName()) + .fold( + Lists.newArrayList(), + new Folder3, Map>() + { + @Override + public ArrayList fold( + ArrayList workerNodeConfigurations, + Map stringObjectMap, + FoldController foldController, + StatementContext statementContext + ) throws SQLException + { + try { + // stringObjectMap lowercases and jackson may fail serde + workerNodeConfigurations.add( + jsonMapper.readValue( + MapUtils.getString(stringObjectMap, "payload"), + WorkerSetupData.class + ) + ); + return workerNodeConfigurations; + } + catch (Exception e) { + throw Throwables.propagate(e); + } + } + } + ); + } + } + ); + + if (setupDataList.isEmpty()) { + throw new ISE("WTF?! No configuration found for worker nodes!"); + } else if (setupDataList.size() != 1) { + throw new ISE("WTF?! Found more than one configuration for worker nodes"); + } + + workerSetupData.set(setupDataList.get(0)); + } + catch (Exception e) { + log.error(e, "Exception while polling for worker setup data!"); + } + } + + @SuppressWarnings("unchecked") + public WorkerSetupData getWorkerSetupData() + { + synchronized (lock) { + if (!started) { + throw new ISE("Must start WorkerSetupManager first!"); + } + + return workerSetupData.get(); + } + } + + public boolean setWorkerSetupData(final WorkerSetupData value) + { + synchronized (lock) { + try { + if (!started) { + throw new ISE("Must start WorkerSetupManager first!"); + } + + dbi.withHandle( + new HandleCallback() + { + @Override + public Void withHandle(Handle handle) throws Exception + { + handle.createStatement( + String.format( + "INSERT INTO %s (name, payload) VALUES (:name, :payload) ON DUPLICATE KEY UPDATE payload = :payload", + config.getConfigTable() + ) + ) + .bind("name", config.getWorkerSetupConfigName()) + .bind("payload", jsonMapper.writeValueAsString(value)) + .execute(); + + return null; + } + } + ); + + workerSetupData.set(value); + } + catch (Exception e) { + log.error(e, "Exception updating worker config"); + return false; + } + } + + return true; + } +} diff --git a/merger/src/main/java/com/metamx/druid/merger/worker/http/WorkerNode.java b/merger/src/main/java/com/metamx/druid/merger/worker/http/WorkerNode.java index 7c218d4dcdc..0799a8de37c 100644 --- a/merger/src/main/java/com/metamx/druid/merger/worker/http/WorkerNode.java +++ b/merger/src/main/java/com/metamx/druid/merger/worker/http/WorkerNode.java @@ -30,7 +30,6 @@ import com.metamx.common.lifecycle.LifecycleStop; import com.metamx.common.logger.Logger; import com.metamx.druid.RegisteringNode; import com.metamx.druid.http.StatusServlet; -import com.metamx.druid.index.v1.serde.Registererer; import com.metamx.druid.initialization.CuratorConfig; import com.metamx.druid.initialization.Initialization; import com.metamx.druid.initialization.ServerConfig; @@ -43,9 +42,9 @@ import com.metamx.druid.merger.worker.TaskMonitor; import com.metamx.druid.merger.worker.Worker; import com.metamx.druid.merger.worker.WorkerCuratorCoordinator; import com.metamx.druid.merger.worker.config.WorkerConfig; -import com.metamx.druid.realtime.S3SegmentPusher; -import com.metamx.druid.realtime.S3SegmentPusherConfig; -import com.metamx.druid.realtime.SegmentPusher; +import com.metamx.druid.loading.S3SegmentPusher; +import com.metamx.druid.loading.S3SegmentPusherConfig; +import com.metamx.druid.loading.SegmentPusher; import com.metamx.druid.utils.PropUtils; import com.metamx.emitter.EmittingLogger; import com.metamx.emitter.core.Emitters; diff --git a/merger/src/test/java/com/metamx/druid/merger/common/task/MergeTaskTest.java b/merger/src/test/java/com/metamx/druid/merger/common/task/MergeTaskTest.java index c0c8a88b798..fcecac91d5b 100644 --- a/merger/src/test/java/com/metamx/druid/merger/common/task/MergeTaskTest.java +++ b/merger/src/test/java/com/metamx/druid/merger/common/task/MergeTaskTest.java @@ -32,12 +32,14 @@ import java.util.Map; public class MergeTaskTest { - final List segments = - ImmutableList - .builder() - .add(new DataSegment("foo", new Interval("2012-01-04/2012-01-06"), "V1", null, null, null, null, -1)) - .add(new DataSegment("foo", new Interval("2012-01-05/2012-01-07"), "V1", null, null, null, null, -1)) - .add(new DataSegment("foo", new Interval("2012-01-03/2012-01-05"), "V1", null, null, null, null, -1)) + private final DataSegment.Builder segmentBuilder = DataSegment.builder() + .dataSource("foo") + .version("V1"); + + final List segments = ImmutableList.builder() + .add(segmentBuilder.interval(new Interval("2012-01-04/2012-01-06")).build()) + .add(segmentBuilder.interval(new Interval("2012-01-05/2012-01-07")).build()) + .add(segmentBuilder.interval(new Interval("2012-01-03/2012-01-05")).build()) .build(); final MergeTask testMergeTask = new MergeTask("foo", segments) diff --git a/merger/src/test/java/com/metamx/druid/merger/coordinator/RemoteTaskRunnerTest.java b/merger/src/test/java/com/metamx/druid/merger/coordinator/RemoteTaskRunnerTest.java index eb10731abd9..b7107ed72d2 100644 --- a/merger/src/test/java/com/metamx/druid/merger/coordinator/RemoteTaskRunnerTest.java +++ b/merger/src/test/java/com/metamx/druid/merger/coordinator/RemoteTaskRunnerTest.java @@ -17,6 +17,8 @@ import com.metamx.druid.merger.coordinator.config.RemoteTaskRunnerConfig; import com.metamx.druid.merger.coordinator.config.RetryPolicyConfig; import com.metamx.druid.merger.coordinator.scaling.AutoScalingData; import com.metamx.druid.merger.coordinator.scaling.ScalingStrategy; +import com.metamx.druid.merger.coordinator.setup.WorkerSetupData; +import com.metamx.druid.merger.coordinator.setup.WorkerSetupManager; import com.metamx.druid.merger.worker.TaskMonitor; import com.metamx.druid.merger.worker.Worker; import com.metamx.druid.merger.worker.WorkerCuratorCoordinator; @@ -42,6 +44,7 @@ import org.junit.Test; import java.io.File; import java.util.List; +import java.util.concurrent.CountDownLatch; import java.util.concurrent.Executors; import java.util.concurrent.ScheduledExecutorService; @@ -62,14 +65,14 @@ public class RemoteTaskRunnerTest private PathChildrenCache pathChildrenCache; private RemoteTaskRunner remoteTaskRunner; private TaskMonitor taskMonitor; + private WorkerSetupManager workerSetupManager; private ScheduledExecutorService scheduledExec; - private Task task1; + private TestTask task1; private Worker worker1; - @Before public void setUp() throws Exception { @@ -109,6 +112,7 @@ public class RemoteTaskRunnerTest null, null, null, + null, 0 ) ), Lists.newArrayList() @@ -139,12 +143,28 @@ public class RemoteTaskRunnerTest @Test public void testAlreadyExecutedTask() throws Exception { - remoteTaskRunner.run(task1, new TaskContext(new DateTime().toString(), Sets.newHashSet()), null); + final CountDownLatch latch = new CountDownLatch(1); + remoteTaskRunner.run( + new TestTask(task1){ + @Override + public TaskStatus run( + TaskContext context, TaskToolbox toolbox + ) throws Exception + { + latch.await(); + return super.run(context, toolbox); + } + }, + new TaskContext(new DateTime().toString(), Sets.newHashSet()), + null + ); try { - remoteTaskRunner.run(task1, new TaskContext(new DateTime().toString(), Sets.newHashSet()), null); - fail("ISE expected"); - } catch (ISE expected) { - + remoteTaskRunner.run(task1, new TaskContext(new DateTime().toString(), Sets.newHashSet()), null); + latch.countDown(); + fail("ISE expected"); + } + catch (ISE expected) { + latch.countDown(); } } @@ -166,6 +186,7 @@ public class RemoteTaskRunnerTest null, null, null, + null, 0 ) ), Lists.newArrayList() @@ -333,6 +354,17 @@ public class RemoteTaskRunnerTest private void makeRemoteTaskRunner() throws Exception { scheduledExec = EasyMock.createMock(ScheduledExecutorService.class); + workerSetupManager = EasyMock.createMock(WorkerSetupManager.class); + + EasyMock.expect(workerSetupManager.getWorkerSetupData()).andReturn( + new WorkerSetupData( + "0", + 0, + null, + null + ) + ); + EasyMock.replay(workerSetupManager); remoteTaskRunner = new RemoteTaskRunner( jsonMapper, @@ -341,7 +373,8 @@ public class RemoteTaskRunnerTest pathChildrenCache, scheduledExec, new RetryPolicyFactory(new TestRetryPolicyConfig()), - new TestScalingStrategy() + new TestScalingStrategy(), + workerSetupManager ); // Create a single worker and wait for things for be ready @@ -351,7 +384,7 @@ public class RemoteTaskRunnerTest jsonMapper.writeValueAsBytes(worker1) ); while (remoteTaskRunner.getNumWorkers() == 0) { - Thread.sleep(500); + Thread.sleep(50); } } @@ -389,6 +422,12 @@ public class RemoteTaskRunnerTest { return null; } + + @Override + public List ipLookup(List ips) + { + return ips; + } } private static class TestRemoteTaskRunnerConfig extends RemoteTaskRunnerConfig @@ -405,18 +444,6 @@ public class RemoteTaskRunnerTest return null; } - @Override - public String getMinWorkerVersion() - { - return "0"; - } - - @Override - public int getMinNumWorkers() - { - return 0; - } - @Override public int getMaxWorkerIdleTimeMillisBeforeDeletion() { @@ -464,6 +491,9 @@ public class RemoteTaskRunnerTest private static class TestTask extends DefaultMergeTask { private final String id; + private final String dataSource; + private final List segments; + private final List aggregators; public TestTask( @JsonProperty("id") String id, @@ -475,6 +505,14 @@ public class RemoteTaskRunnerTest super(dataSource, segments, aggregators); this.id = id; + this.dataSource = dataSource; + this.segments = segments; + this.aggregators = aggregators; + } + + public TestTask(TestTask task) + { + this(task.id, task.dataSource, task.segments, task.aggregators); } @Override diff --git a/merger/src/test/java/com/metamx/druid/merger/coordinator/TaskQueueTest.java b/merger/src/test/java/com/metamx/druid/merger/coordinator/TaskQueueTest.java index 47913404eed..1a88f49885e 100644 --- a/merger/src/test/java/com/metamx/druid/merger/coordinator/TaskQueueTest.java +++ b/merger/src/test/java/com/metamx/druid/merger/coordinator/TaskQueueTest.java @@ -327,6 +327,7 @@ public class TaskQueueTest null, null, null, + null, -1 ) ) diff --git a/merger/src/test/java/com/metamx/druid/merger/coordinator/scaling/EC2AutoScalingStrategyTest.java b/merger/src/test/java/com/metamx/druid/merger/coordinator/scaling/EC2AutoScalingStrategyTest.java index 958a2c1d836..c3aa8378b07 100644 --- a/merger/src/test/java/com/metamx/druid/merger/coordinator/scaling/EC2AutoScalingStrategyTest.java +++ b/merger/src/test/java/com/metamx/druid/merger/coordinator/scaling/EC2AutoScalingStrategyTest.java @@ -27,8 +27,13 @@ import com.amazonaws.services.ec2.model.Reservation; import com.amazonaws.services.ec2.model.RunInstancesRequest; import com.amazonaws.services.ec2.model.RunInstancesResult; import com.amazonaws.services.ec2.model.TerminateInstancesRequest; +import com.google.common.collect.Lists; import com.metamx.druid.jackson.DefaultObjectMapper; import com.metamx.druid.merger.coordinator.config.EC2AutoScalingStrategyConfig; +import com.metamx.druid.merger.coordinator.setup.EC2NodeData; +import com.metamx.druid.merger.coordinator.setup.GalaxyUserData; +import com.metamx.druid.merger.coordinator.setup.WorkerSetupData; +import com.metamx.druid.merger.coordinator.setup.WorkerSetupManager; import org.easymock.EasyMock; import org.junit.After; import org.junit.Assert; @@ -52,6 +57,7 @@ public class EC2AutoScalingStrategyTest private Reservation reservation; private Instance instance; private EC2AutoScalingStrategy strategy; + private WorkerSetupManager workerSetupManager; @Before public void setUp() throws Exception @@ -60,6 +66,7 @@ public class EC2AutoScalingStrategyTest runInstancesResult = EasyMock.createMock(RunInstancesResult.class); describeInstancesResult = EasyMock.createMock(DescribeInstancesResult.class); reservation = EasyMock.createMock(Reservation.class); + workerSetupManager = EasyMock.createMock(WorkerSetupManager.class); instance = new Instance() .withInstanceId(INSTANCE_ID) @@ -69,44 +76,16 @@ public class EC2AutoScalingStrategyTest strategy = new EC2AutoScalingStrategy( new DefaultObjectMapper(), - amazonEC2Client, new EC2AutoScalingStrategyConfig() - { - @Override - public String getAmiId() - { - return AMI_ID; - } - - @Override - public String getWorkerPort() - { - return "8080"; - } - - @Override - public String getInstanceType() - { - return "t1.micro"; - } - - @Override - public int getMinNumInstancesToProvision() - { - return 1; - } - - @Override - public int getMaxNumInstancesToProvision() - { - return 1; - } - - @Override - public String getUserDataFile() - { - return ""; - } - } + amazonEC2Client, + new EC2AutoScalingStrategyConfig() + { + @Override + public String getWorkerPort() + { + return "8080"; + } + }, + workerSetupManager ); } @@ -117,11 +96,22 @@ public class EC2AutoScalingStrategyTest EasyMock.verify(runInstancesResult); EasyMock.verify(describeInstancesResult); EasyMock.verify(reservation); + EasyMock.verify(workerSetupManager); } @Test public void testScale() { + EasyMock.expect(workerSetupManager.getWorkerSetupData()).andReturn( + new WorkerSetupData( + "0", + 0, + new EC2NodeData(AMI_ID, INSTANCE_ID, 1, 1, Lists.newArrayList(), "foo"), + new GalaxyUserData("env", "version", "type") + ) + ); + EasyMock.replay(workerSetupManager); + EasyMock.expect(amazonEC2Client.runInstances(EasyMock.anyObject(RunInstancesRequest.class))).andReturn( runInstancesResult ); @@ -144,9 +134,9 @@ public class EC2AutoScalingStrategyTest Assert.assertEquals(created.getNodeIds().size(), 1); Assert.assertEquals(created.getNodes().size(), 1); - Assert.assertEquals(String.format("%s:8080", IP), created.getNodeIds().get(0)); + Assert.assertEquals("theInstance", created.getNodeIds().get(0)); - AutoScalingData deleted = strategy.terminate(Arrays.asList("dummyHost")); + AutoScalingData deleted = strategy.terminate(Arrays.asList("dummyIP")); Assert.assertEquals(deleted.getNodeIds().size(), 1); Assert.assertEquals(deleted.getNodes().size(), 1); diff --git a/pom.xml b/pom.xml index 83a963f1c38..195845004a9 100644 --- a/pom.xml +++ b/pom.xml @@ -23,7 +23,7 @@ com.metamx druid pom - 0.1.25-SNAPSHOT + 0.2.7-SNAPSHOT druid druid @@ -68,7 +68,7 @@ com.metamx java-util - 0.16.0 + 0.19.1 com.metamx @@ -84,7 +84,7 @@ commons-codec commons-codec - 1.3 + 1.7 commons-httpclient @@ -364,14 +364,6 @@ pub-libs-local https://metamx.artifactoryonline.com/metamx/pub-libs-releases-local - - repo.codahale.com - http://repo.codahale.com - - - nativelibs4java - http://nativelibs4java.sourceforge.net/maven - thirdparty-uploads JBoss Thirdparty Uploads diff --git a/realtime/pom.xml b/realtime/pom.xml index 5da5645f35c..d74016fb34b 100644 --- a/realtime/pom.xml +++ b/realtime/pom.xml @@ -28,7 +28,7 @@ com.metamx druid - 0.1.25-SNAPSHOT + 0.2.7-SNAPSHOT @@ -139,13 +139,6 @@ zkclient - - - com.codahale - jerkson_${scala.version} - 0.5.0 - - commons-codec diff --git a/realtime/src/main/java/com/metamx/druid/realtime/FireHydrant.java b/realtime/src/main/java/com/metamx/druid/realtime/FireHydrant.java index 633f7b1ac62..366d3e51a28 100644 --- a/realtime/src/main/java/com/metamx/druid/realtime/FireHydrant.java +++ b/realtime/src/main/java/com/metamx/druid/realtime/FireHydrant.java @@ -19,16 +19,16 @@ package com.metamx.druid.realtime; -import com.metamx.druid.StorageAdapter; +import com.metamx.druid.index.IncrementalIndexSegment; +import com.metamx.druid.index.Segment; import com.metamx.druid.index.v1.IncrementalIndex; -import com.metamx.druid.index.v1.IncrementalIndexStorageAdapter; /** */ public class FireHydrant { private volatile IncrementalIndex index; - private volatile StorageAdapter adapter; + private volatile Segment adapter; private final int count; public FireHydrant( @@ -37,12 +37,12 @@ public class FireHydrant ) { this.index = index; - this.adapter = new IncrementalIndexStorageAdapter(index); + this.adapter = new IncrementalIndexSegment(index); this.count = count; } public FireHydrant( - StorageAdapter adapter, + Segment adapter, int count ) { @@ -56,7 +56,7 @@ public class FireHydrant return index; } - public StorageAdapter getAdapter() + public Segment getSegment() { return adapter; } @@ -71,7 +71,7 @@ public class FireHydrant return index == null; } - public void swapAdapter(StorageAdapter adapter) + public void swapSegment(Segment adapter) { this.adapter = adapter; this.index = null; diff --git a/realtime/src/main/java/com/metamx/druid/realtime/RealtimeNode.java b/realtime/src/main/java/com/metamx/druid/realtime/RealtimeNode.java index 60d290992d5..f503e80ade3 100644 --- a/realtime/src/main/java/com/metamx/druid/realtime/RealtimeNode.java +++ b/realtime/src/main/java/com/metamx/druid/realtime/RealtimeNode.java @@ -40,6 +40,9 @@ import com.metamx.druid.http.QueryServlet; import com.metamx.druid.http.StatusServlet; import com.metamx.druid.initialization.Initialization; import com.metamx.druid.jackson.DefaultObjectMapper; +import com.metamx.druid.loading.S3SegmentPusher; +import com.metamx.druid.loading.S3SegmentPusherConfig; +import com.metamx.druid.loading.SegmentPusher; import com.metamx.druid.query.QueryRunnerFactoryConglomerate; import com.metamx.druid.utils.PropUtils; import com.metamx.emitter.service.ServiceEmitter; diff --git a/realtime/src/main/java/com/metamx/druid/realtime/RealtimePlumberSchool.java b/realtime/src/main/java/com/metamx/druid/realtime/RealtimePlumberSchool.java index fb4713f344b..f4df5e054f8 100644 --- a/realtime/src/main/java/com/metamx/druid/realtime/RealtimePlumberSchool.java +++ b/realtime/src/main/java/com/metamx/druid/realtime/RealtimePlumberSchool.java @@ -33,16 +33,17 @@ import com.metamx.common.Pair; import com.metamx.common.concurrent.ScheduledExecutors; import com.metamx.common.guava.FunctionalIterable; import com.metamx.druid.Query; -import com.metamx.druid.StorageAdapter; import com.metamx.druid.client.DataSegment; import com.metamx.druid.client.DruidServer; import com.metamx.druid.client.ServerView; import com.metamx.druid.guava.ThreadRenamingRunnable; +import com.metamx.druid.index.QueryableIndex; +import com.metamx.druid.index.QueryableIndexSegment; +import com.metamx.druid.index.Segment; import com.metamx.druid.index.v1.IndexGranularity; import com.metamx.druid.index.v1.IndexIO; import com.metamx.druid.index.v1.IndexMerger; -import com.metamx.druid.index.v1.MMappedIndex; -import com.metamx.druid.index.v1.MMappedIndexStorageAdapter; +import com.metamx.druid.loading.SegmentPusher; import com.metamx.druid.query.MetricsEmittingQueryRunner; import com.metamx.druid.query.QueryRunner; import com.metamx.druid.query.QueryRunnerFactory; @@ -185,7 +186,7 @@ public class RealtimePlumberSchool implements PlumberSchool log.info("Loading previously persisted segment at [%s]", segmentDir); hydrants.add( new FireHydrant( - new MMappedIndexStorageAdapter(IndexIO.mapDir(segmentDir)), + new QueryableIndexSegment(null, IndexIO.loadIndex(segmentDir)), Integer.parseInt(segmentDir.getName()) ) ); @@ -303,29 +304,21 @@ public class RealtimePlumberSchool implements PlumberSchool final File mergedFile; try { - List indexes = Lists.newArrayList(); + List indexes = Lists.newArrayList(); for (FireHydrant fireHydrant : sink) { - StorageAdapter adapter = fireHydrant.getAdapter(); - if (adapter instanceof MMappedIndexStorageAdapter) { - log.info("Adding hydrant[%s]", fireHydrant); - indexes.add(((MMappedIndexStorageAdapter) adapter).getIndex()); - } - else { - log.makeAlert("[%s] Failure to merge-n-push", schema.getDataSource()) - .addData("type", "Unknown adapter type") - .addData("adapterClass", adapter.getClass().toString()) - .emit(); - return; - } + Segment segment = fireHydrant.getSegment(); + final QueryableIndex queryableIndex = segment.asQueryableIndex(); + log.info("Adding hydrant[%s]", fireHydrant); + indexes.add(queryableIndex); } - mergedFile = IndexMerger.mergeMMapped( + mergedFile = IndexMerger.mergeQueryableIndex( indexes, schema.getAggregators(), new File(computePersistDir(schema, interval), "merged") ); - MMappedIndex index = IndexIO.mapDir(mergedFile); + QueryableIndex index = IndexIO.loadIndex(mergedFile); DataSegment segment = segmentPusher.push( mergedFile, @@ -420,7 +413,7 @@ public class RealtimePlumberSchool implements PlumberSchool @Override public QueryRunner apply(@Nullable FireHydrant input) { - return factory.createRunner(input.getAdapter()); + return factory.createRunner(input.getSegment()); } } ) @@ -442,6 +435,8 @@ public class RealtimePlumberSchool implements PlumberSchool } } + log.info("Submitting persist runnable for dataSource[%s]", schema.getDataSource()); + persistExecutor.execute( new ThreadRenamingRunnable(String.format("%s-incremental-persist", schema.getDataSource())) { @@ -495,7 +490,7 @@ public class RealtimePlumberSchool implements PlumberSchool new File(computePersistDir(schema, interval), String.valueOf(indexToPersist.getCount())) ); - indexToPersist.swapAdapter(new MMappedIndexStorageAdapter(IndexIO.mapDir(persistedFile))); + indexToPersist.swapSegment(new QueryableIndexSegment(null, IndexIO.loadIndex(persistedFile))); return numRows; } diff --git a/realtime/src/main/java/com/metamx/druid/realtime/S3SegmentPusher.java b/realtime/src/main/java/com/metamx/druid/realtime/S3SegmentPusher.java index 6ae83c5f9bf..1b22f1e3a78 100644 --- a/realtime/src/main/java/com/metamx/druid/realtime/S3SegmentPusher.java +++ b/realtime/src/main/java/com/metamx/druid/realtime/S3SegmentPusher.java @@ -19,133 +19,23 @@ package com.metamx.druid.realtime; -import com.google.common.base.Joiner; -import com.google.common.collect.ImmutableMap; -import com.google.common.io.Closeables; -import com.metamx.common.ISE; -import com.metamx.common.StreamUtils; -import com.metamx.druid.client.DataSegment; -import com.metamx.emitter.EmittingLogger; -import org.apache.commons.io.FileUtils; -import org.apache.commons.io.IOUtils; +import com.metamx.druid.loading.S3SegmentPusherConfig; +import com.metamx.druid.loading.SegmentPusher; import org.codehaus.jackson.map.ObjectMapper; -import org.jets3t.service.S3ServiceException; import org.jets3t.service.impl.rest.httpclient.RestS3Service; -import org.jets3t.service.model.S3Object; - -import java.io.ByteArrayInputStream; -import java.io.File; -import java.io.FileInputStream; -import java.io.FileOutputStream; -import java.io.IOException; -import java.security.NoSuchAlgorithmException; -import java.util.zip.ZipEntry; -import java.util.zip.ZipOutputStream; /** + * A placeholder class to make the move of the SegmentPushers to a new package backwards compatible */ -public class S3SegmentPusher implements SegmentPusher +@Deprecated +public class S3SegmentPusher extends com.metamx.druid.loading.S3SegmentPusher implements SegmentPusher { - private static final EmittingLogger log = new EmittingLogger(S3SegmentPusher.class); - private static final Joiner JOINER = Joiner.on("/").skipNulls(); - - private final RestS3Service s3Client; - private final S3SegmentPusherConfig config; - private final ObjectMapper jsonMapper; - public S3SegmentPusher( - RestS3Service s3Client, - S3SegmentPusherConfig config, - ObjectMapper jsonMapper + RestS3Service s3Client, + S3SegmentPusherConfig config, + ObjectMapper jsonMapper ) { - this.s3Client = s3Client; - this.config = config; - this.jsonMapper = jsonMapper; - } - - @Override - public DataSegment push(File file, DataSegment segment) throws IOException - { - log.info("Uploading [%s] to S3", file); - String outputKey = JOINER.join( - config.getBaseKey().isEmpty() ? null : config.getBaseKey(), - segment.getDataSource(), - String.format( - "%s_%s", - segment.getInterval().getStart(), - segment.getInterval().getEnd() - ), - segment.getVersion(), - segment.getShardSpec().getPartitionNum() - ); - - File indexFilesDir = file; - - long indexSize = 0; - final File zipOutFile = File.createTempFile("druid", "index.zip"); - ZipOutputStream zipOut = null; - try { - zipOut = new ZipOutputStream(new FileOutputStream(zipOutFile)); - File[] indexFiles = indexFilesDir.listFiles(); - for (File indexFile : indexFiles) { - log.info("Adding indexFile[%s] with size[%,d]. Total size[%,d]", indexFile, indexFile.length(), indexSize); - if (indexFile.length() >= Integer.MAX_VALUE) { - throw new ISE("indexFile[%s] too large [%,d]", indexFile, indexFile.length()); - } - zipOut.putNextEntry(new ZipEntry(indexFile.getName())); - IOUtils.copy(new FileInputStream(indexFile), zipOut); - indexSize += indexFile.length(); - } - } - finally { - Closeables.closeQuietly(zipOut); - } - - try { - S3Object toPush = new S3Object(zipOutFile); - - final String outputBucket = config.getBucket(); - toPush.setBucketName(outputBucket); - toPush.setKey(outputKey + "/index.zip"); - - log.info("Pushing %s.", toPush); - s3Client.putObject(outputBucket, toPush); - - DataSegment outputSegment = segment.withSize(indexSize) - .withLoadSpec( - ImmutableMap.of( - "type", "s3_zip", - "bucket", outputBucket, - "key", toPush.getKey() - ) - ); - - File descriptorFile = File.createTempFile("druid", "descriptor.json"); - StreamUtils.copyToFileAndClose(new ByteArrayInputStream(jsonMapper.writeValueAsBytes(segment)), descriptorFile); - S3Object descriptorObject = new S3Object(descriptorFile); - descriptorObject.setBucketName(outputBucket); - descriptorObject.setKey(outputKey + "/descriptor.json"); - - log.info("Pushing %s", descriptorObject); - s3Client.putObject(outputBucket, descriptorObject); - - log.info("Deleting Index File[%s]", indexFilesDir); - FileUtils.deleteDirectory(indexFilesDir); - - log.info("Deleting zipped index File[%s]", zipOutFile); - zipOutFile.delete(); - - log.info("Deleting descriptor file[%s]", descriptorFile); - descriptorFile.delete(); - - return outputSegment; - } - catch (NoSuchAlgorithmException e) { - throw new IOException(e); - } - catch (S3ServiceException e) { - throw new IOException(e); - } + super(s3Client, config, jsonMapper); } } diff --git a/realtime/src/main/java/com/metamx/druid/realtime/Sink.java b/realtime/src/main/java/com/metamx/druid/realtime/Sink.java index 051550e3c41..42acc191b63 100644 --- a/realtime/src/main/java/com/metamx/druid/realtime/Sink.java +++ b/realtime/src/main/java/com/metamx/druid/realtime/Sink.java @@ -30,6 +30,7 @@ import com.metamx.common.logger.Logger; import com.metamx.druid.aggregation.AggregatorFactory; import com.metamx.druid.client.DataSegment; import com.metamx.druid.index.v1.IncrementalIndex; +import com.metamx.druid.index.v1.IndexIO; import com.metamx.druid.input.InputRow; import org.joda.time.Interval; @@ -134,6 +135,7 @@ public class Sink implements Iterable } }), schema.getShardSpec(), + null, 0 ); } diff --git a/server/pom.xml b/server/pom.xml index 4a1f2065059..1e33320d0df 100644 --- a/server/pom.xml +++ b/server/pom.xml @@ -28,7 +28,7 @@ com.metamx druid - 0.1.25-SNAPSHOT + 0.2.7-SNAPSHOT @@ -209,13 +209,6 @@ easymock test - - com.metamx.druid - druid-client - ${project.parent.version} - test-jar - test - com.metamx.druid druid-index-common diff --git a/server/src/main/java/com/metamx/druid/coordination/ServerManager.java b/server/src/main/java/com/metamx/druid/coordination/ServerManager.java index 9d0e8bbdfa2..3bde07908c7 100644 --- a/server/src/main/java/com/metamx/druid/coordination/ServerManager.java +++ b/server/src/main/java/com/metamx/druid/coordination/ServerManager.java @@ -24,13 +24,12 @@ import com.google.common.collect.Ordering; import com.metamx.common.ISE; import com.metamx.common.guava.FunctionalIterable; import com.metamx.druid.Query; -import com.metamx.druid.StorageAdapter; import com.metamx.druid.TimelineObjectHolder; import com.metamx.druid.VersionedIntervalTimeline; import com.metamx.druid.client.DataSegment; import com.metamx.druid.collect.CountingMap; -import com.metamx.druid.index.v1.SegmentIdAttachedStorageAdapter; -import com.metamx.druid.loading.StorageAdapterLoader; +import com.metamx.druid.index.Segment; +import com.metamx.druid.loading.SegmentLoader; import com.metamx.druid.loading.StorageAdapterLoadingException; import com.metamx.druid.partition.PartitionChunk; import com.metamx.druid.partition.PartitionHolder; @@ -66,29 +65,29 @@ public class ServerManager implements QuerySegmentWalker private final Object lock = new Object(); - private final StorageAdapterLoader storageAdapterLoader; + private final SegmentLoader segmentLoader; private final QueryRunnerFactoryConglomerate conglomerate; private final ServiceEmitter emitter; private final ExecutorService exec; - private final Map> dataSources; + private final Map> dataSources; private final CountingMap dataSourceSizes = new CountingMap(); private final CountingMap dataSourceCounts = new CountingMap(); public ServerManager( - StorageAdapterLoader storageAdapterLoader, + SegmentLoader segmentLoader, QueryRunnerFactoryConglomerate conglomerate, ServiceEmitter emitter, ExecutorService exec ) { - this.storageAdapterLoader = storageAdapterLoader; + this.segmentLoader = segmentLoader; this.conglomerate = conglomerate; this.emitter = emitter; this.exec = exec; - this.dataSources = new HashMap>(); + this.dataSources = new HashMap>(); } public Map getDataSourceSizes() @@ -107,13 +106,13 @@ public class ServerManager implements QuerySegmentWalker public void loadSegment(final DataSegment segment) throws StorageAdapterLoadingException { - StorageAdapter adapter = null; + final Segment adapter; try { - adapter = storageAdapterLoader.getAdapter(segment.getLoadSpec()); + adapter = segmentLoader.getSegment(segment); } catch (StorageAdapterLoadingException e) { try { - storageAdapterLoader.cleanupAdapter(segment.getLoadSpec()); + segmentLoader.cleanup(segment); } catch (StorageAdapterLoadingException e1) { // ignore @@ -125,18 +124,16 @@ public class ServerManager implements QuerySegmentWalker throw new StorageAdapterLoadingException("Null adapter from loadSpec[%s]", segment.getLoadSpec()); } - adapter = new SegmentIdAttachedStorageAdapter(segment.getIdentifier(), adapter); - synchronized (lock) { String dataSource = segment.getDataSource(); - VersionedIntervalTimeline loadedIntervals = dataSources.get(dataSource); + VersionedIntervalTimeline loadedIntervals = dataSources.get(dataSource); if (loadedIntervals == null) { - loadedIntervals = new VersionedIntervalTimeline(Ordering.natural()); + loadedIntervals = new VersionedIntervalTimeline(Ordering.natural()); dataSources.put(dataSource, loadedIntervals); } - PartitionHolder entry = loadedIntervals.findEntry( + PartitionHolder entry = loadedIntervals.findEntry( segment.getInterval(), segment.getVersion() ); @@ -161,17 +158,17 @@ public class ServerManager implements QuerySegmentWalker { String dataSource = segment.getDataSource(); synchronized (lock) { - VersionedIntervalTimeline loadedIntervals = dataSources.get(dataSource); + VersionedIntervalTimeline loadedIntervals = dataSources.get(dataSource); if (loadedIntervals == null) { log.info("Told to delete a queryable for a dataSource[%s] that doesn't exist.", dataSource); return; } - PartitionChunk removed = loadedIntervals.remove( - segment.getInterval(), segment.getVersion(), segment.getShardSpec().createChunk((StorageAdapter) null) + PartitionChunk removed = loadedIntervals.remove( + segment.getInterval(), segment.getVersion(), segment.getShardSpec().createChunk((Segment) null) ); - StorageAdapter oldQueryable = (removed == null) ? null : removed.getObject(); + Segment oldQueryable = (removed == null) ? null : removed.getObject(); if (oldQueryable != null) { synchronized (dataSourceSizes) { @@ -189,7 +186,7 @@ public class ServerManager implements QuerySegmentWalker ); } } - storageAdapterLoader.cleanupAdapter(segment.getLoadSpec()); + segmentLoader.cleanup(segment); } @Override @@ -202,7 +199,7 @@ public class ServerManager implements QuerySegmentWalker final QueryToolChest> toolChest = factory.getToolchest(); - final VersionedIntervalTimeline timeline = dataSources.get(query.getDataSource()); + final VersionedIntervalTimeline timeline = dataSources.get(query.getDataSource()); if (timeline == null) { return new NoopQueryRunner(); @@ -211,20 +208,20 @@ public class ServerManager implements QuerySegmentWalker FunctionalIterable> adapters = FunctionalIterable .create(intervals) .transformCat( - new Function>>() + new Function>>() { @Override - public Iterable> apply(Interval input) + public Iterable> apply(Interval input) { return timeline.lookup(input); } } ) .transformCat( - new Function, Iterable>>() + new Function, Iterable>>() { @Override - public Iterable> apply(@Nullable final TimelineObjectHolder holder) + public Iterable> apply(@Nullable final TimelineObjectHolder holder) { if (holder == null) { return null; @@ -233,10 +230,10 @@ public class ServerManager implements QuerySegmentWalker return FunctionalIterable .create(holder.getObject()) .transform( - new Function, QueryRunner>() + new Function, QueryRunner>() { @Override - public QueryRunner apply(PartitionChunk input) + public QueryRunner apply(PartitionChunk input) { return buildAndDecorateQueryRunner( factory, @@ -274,7 +271,7 @@ public class ServerManager implements QuerySegmentWalker final QueryToolChest> toolChest = factory.getToolchest(); - final VersionedIntervalTimeline timeline = dataSources.get(query.getDataSource()); + final VersionedIntervalTimeline timeline = dataSources.get(query.getDataSource()); if (timeline == null) { return new NoopQueryRunner(); @@ -289,7 +286,7 @@ public class ServerManager implements QuerySegmentWalker @SuppressWarnings("unchecked") public Iterable> apply(@Nullable SegmentDescriptor input) { - final PartitionHolder entry = timeline.findEntry( + final PartitionHolder entry = timeline.findEntry( input.getInterval(), input.getVersion() ); @@ -297,13 +294,13 @@ public class ServerManager implements QuerySegmentWalker return null; } - final PartitionChunk chunk = entry.getChunk(input.getPartitionNumber()); + final PartitionChunk chunk = entry.getChunk(input.getPartitionNumber()); if (chunk == null) { return null; } - final StorageAdapter adapter = chunk.getObject(); - return Arrays.>asList( + final Segment adapter = chunk.getObject(); + return Arrays.asList( buildAndDecorateQueryRunner(factory, toolChest, adapter, new SpecificSegmentSpec(input)) ); } @@ -316,7 +313,7 @@ public class ServerManager implements QuerySegmentWalker private QueryRunner buildAndDecorateQueryRunner( QueryRunnerFactory> factory, final QueryToolChest> toolChest, - StorageAdapter adapter, + Segment adapter, QuerySegmentSpec segmentSpec ) { @@ -332,8 +329,8 @@ public class ServerManager implements QuerySegmentWalker } }, new BySegmentQueryRunner( - adapter.getSegmentIdentifier(), - adapter.getInterval().getStart(), + adapter.getIdentifier(), + adapter.getDataInterval().getStart(), factory.createRunner(adapter) ) ).withWaitMeasuredFromNow(), diff --git a/server/src/main/java/com/metamx/druid/http/ComputeNode.java b/server/src/main/java/com/metamx/druid/http/ComputeNode.java index 39f43bfcde2..c6d284403c0 100644 --- a/server/src/main/java/com/metamx/druid/http/ComputeNode.java +++ b/server/src/main/java/com/metamx/druid/http/ComputeNode.java @@ -38,7 +38,7 @@ import com.metamx.druid.initialization.Initialization; import com.metamx.druid.initialization.ServerInit; import com.metamx.druid.jackson.DefaultObjectMapper; import com.metamx.druid.loading.QueryableLoaderConfig; -import com.metamx.druid.loading.StorageAdapterLoader; +import com.metamx.druid.loading.SegmentLoader; import com.metamx.druid.metrics.ServerMonitor; import com.metamx.druid.query.MetricsEmittingExecutorService; import com.metamx.druid.query.QueryRunnerFactoryConglomerate; @@ -71,7 +71,7 @@ public class ComputeNode extends BaseServerNode } private DruidServer druidServer; - private StorageAdapterLoader adapterLoader; + private SegmentLoader segmentLoader; public ComputeNode( Properties props, @@ -84,10 +84,10 @@ public class ComputeNode extends BaseServerNode super(log, props, lifecycle, jsonMapper, smileMapper, configFactory); } - public ComputeNode setAdapterLoader(StorageAdapterLoader storageAdapterLoader) + public ComputeNode setSegmentLoader(SegmentLoader segmentLoader) { - Preconditions.checkState(this.adapterLoader == null, "Cannot set adapterLoader once it has already been set."); - this.adapterLoader = storageAdapterLoader; + Preconditions.checkState(this.segmentLoader == null, "Cannot set segmentLoader once it has already been set."); + this.segmentLoader = segmentLoader; return this; } @@ -104,10 +104,10 @@ public class ComputeNode extends BaseServerNode return druidServer; } - public StorageAdapterLoader getAdapterLoader() + public SegmentLoader getSegmentLoader() { initializeAdapterLoader(); - return adapterLoader; + return segmentLoader; } protected void doInit() throws Exception @@ -126,10 +126,12 @@ public class ComputeNode extends BaseServerNode getConfigFactory().buildWithReplacements( ExecutorServiceConfig.class, ImmutableMap.of("base_path", "druid.processing") ) - ), emitter, new ServiceMetricEvent.Builder() + ), + emitter, + new ServiceMetricEvent.Builder() ); - final ServerManager serverManager = new ServerManager(adapterLoader, conglomerate, emitter, executorService); + final ServerManager serverManager = new ServerManager(segmentLoader, conglomerate, emitter, executorService); final ZkCoordinator coordinator = new ZkCoordinator( getJsonMapper(), @@ -157,7 +159,7 @@ public class ComputeNode extends BaseServerNode private void initializeAdapterLoader() { - if (adapterLoader == null) { + if (segmentLoader == null) { final Properties props = getProps(); try { final RestS3Service s3Client = new RestS3Service( @@ -167,7 +169,7 @@ public class ComputeNode extends BaseServerNode ) ); - setAdapterLoader( + setSegmentLoader( ServerInit.makeDefaultQueryableLoader(s3Client, getConfigFactory().build(QueryableLoaderConfig.class)) ); } diff --git a/server/src/main/java/com/metamx/druid/loading/SingleStorageAdapterLoader.java b/server/src/main/java/com/metamx/druid/index/IncrementalIndexSegment.java similarity index 54% rename from server/src/main/java/com/metamx/druid/loading/SingleStorageAdapterLoader.java rename to server/src/main/java/com/metamx/druid/index/IncrementalIndexSegment.java index 8c58ac1c2bd..79c7dc069b6 100644 --- a/server/src/main/java/com/metamx/druid/loading/SingleStorageAdapterLoader.java +++ b/server/src/main/java/com/metamx/druid/index/IncrementalIndexSegment.java @@ -17,39 +17,47 @@ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. */ -package com.metamx.druid.loading; +package com.metamx.druid.index; -import com.google.inject.Inject; import com.metamx.druid.StorageAdapter; - -import java.util.Map; +import com.metamx.druid.index.v1.IncrementalIndex; +import com.metamx.druid.index.v1.IncrementalIndexStorageAdapter; +import org.joda.time.Interval; /** */ -public class SingleStorageAdapterLoader implements StorageAdapterLoader +public class IncrementalIndexSegment implements Segment { - private final SegmentGetter segmentGetter; - private final StorageAdapterFactory factory; + private final IncrementalIndex index; - @Inject - public SingleStorageAdapterLoader( - SegmentGetter segmentGetter, - StorageAdapterFactory factory + public IncrementalIndexSegment( + IncrementalIndex index ) { - this.segmentGetter = segmentGetter; - this.factory = factory; + this.index = index; } @Override - public StorageAdapter getAdapter(Map loadSpec) throws StorageAdapterLoadingException + public String getIdentifier() { - return factory.factorize(segmentGetter.getSegmentFiles(loadSpec)); + throw new UnsupportedOperationException(); } @Override - public void cleanupAdapter(Map loadSpec) throws StorageAdapterLoadingException + public Interval getDataInterval() { - segmentGetter.cleanSegmentFiles(loadSpec); + return index.getInterval(); + } + + @Override + public QueryableIndex asQueryableIndex() + { + return null; + } + + @Override + public StorageAdapter asStorageAdapter() + { + return new IncrementalIndexStorageAdapter(index); } } diff --git a/server/src/main/java/com/metamx/druid/index/QueryableIndexSegment.java b/server/src/main/java/com/metamx/druid/index/QueryableIndexSegment.java new file mode 100644 index 00000000000..770eb783d3b --- /dev/null +++ b/server/src/main/java/com/metamx/druid/index/QueryableIndexSegment.java @@ -0,0 +1,62 @@ +/* + * Druid - a distributed column store. + * Copyright (C) 2012 Metamarkets Group Inc. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version 2 + * of the License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + */ + +package com.metamx.druid.index; + +import com.metamx.druid.StorageAdapter; +import com.metamx.druid.index.v1.QueryableIndexStorageAdapter; +import org.joda.time.Interval; + +/** +*/ +public class QueryableIndexSegment implements Segment +{ + private final QueryableIndex index; + private final String identifier; + + public QueryableIndexSegment(final String segmentIdentifier, QueryableIndex index) + { + this.index = index; + identifier = segmentIdentifier; + } + + @Override + public String getIdentifier() + { + return identifier; + } + + @Override + public Interval getDataInterval() + { + return index.getDataInterval(); + } + + @Override + public QueryableIndex asQueryableIndex() + { + return index; + } + + @Override + public StorageAdapter asStorageAdapter() + { + return new QueryableIndexStorageAdapter(index); + } +} diff --git a/server/src/main/java/com/metamx/druid/loading/StorageAdapterLoader.java b/server/src/main/java/com/metamx/druid/index/Segment.java similarity index 74% rename from server/src/main/java/com/metamx/druid/loading/StorageAdapterLoader.java rename to server/src/main/java/com/metamx/druid/index/Segment.java index 96afd41e2ce..b2edda90f8d 100644 --- a/server/src/main/java/com/metamx/druid/loading/StorageAdapterLoader.java +++ b/server/src/main/java/com/metamx/druid/index/Segment.java @@ -17,16 +17,17 @@ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. */ -package com.metamx.druid.loading; +package com.metamx.druid.index; import com.metamx.druid.StorageAdapter; - -import java.util.Map; +import org.joda.time.Interval; /** */ -public interface StorageAdapterLoader +public interface Segment { - public StorageAdapter getAdapter(Map loadSpec) throws StorageAdapterLoadingException; - public void cleanupAdapter(Map loadSpec) throws StorageAdapterLoadingException; + public String getIdentifier(); + public Interval getDataInterval(); + public QueryableIndex asQueryableIndex(); + public StorageAdapter asStorageAdapter(); } diff --git a/server/src/main/java/com/metamx/druid/index/v1/ComplexMetricColumnSerializer.java b/server/src/main/java/com/metamx/druid/index/v1/ComplexMetricColumnSerializer.java index 526877176a9..d09581c1c5b 100644 --- a/server/src/main/java/com/metamx/druid/index/v1/ComplexMetricColumnSerializer.java +++ b/server/src/main/java/com/metamx/druid/index/v1/ComplexMetricColumnSerializer.java @@ -19,7 +19,6 @@ package com.metamx.druid.index.v1; -import com.google.common.io.ByteStreams; import com.google.common.io.Files; import com.metamx.druid.index.v1.serde.ComplexMetricSerde; import com.metamx.druid.kv.FlattenedArrayWriter; @@ -27,7 +26,6 @@ import com.metamx.druid.kv.IOPeon; import java.io.File; import java.io.IOException; -import java.nio.ByteOrder; /** */ @@ -75,18 +73,12 @@ public class ComplexMetricColumnSerializer implements MetricColumnSerializer { writer.close(); - final File littleEndianFile = IndexIO.makeMetricFile(outDir, metricName, ByteOrder.LITTLE_ENDIAN); - littleEndianFile.delete(); + final File outFile = IndexIO.makeMetricFile(outDir, metricName, IndexIO.BYTE_ORDER); + outFile.delete(); MetricHolder.writeComplexMetric( - Files.newOutputStreamSupplier(littleEndianFile, true), metricName, serde.getTypeName(), writer - ); - IndexIO.checkFileSize(littleEndianFile); - - final File bigEndianFile = IndexIO.makeMetricFile(outDir, metricName, ByteOrder.BIG_ENDIAN); - ByteStreams.copy( - Files.newInputStreamSupplier(littleEndianFile), - Files.newOutputStreamSupplier(bigEndianFile, false) + Files.newOutputStreamSupplier(outFile, true), metricName, serde.getTypeName(), writer ); + IndexIO.checkFileSize(outFile); writer = null; } diff --git a/server/src/main/java/com/metamx/druid/index/v1/FloatMetricColumnSerializer.java b/server/src/main/java/com/metamx/druid/index/v1/FloatMetricColumnSerializer.java index 8207897e502..20ec5a4d30d 100644 --- a/server/src/main/java/com/metamx/druid/index/v1/FloatMetricColumnSerializer.java +++ b/server/src/main/java/com/metamx/druid/index/v1/FloatMetricColumnSerializer.java @@ -24,7 +24,6 @@ import com.metamx.druid.kv.IOPeon; import java.io.File; import java.io.IOException; -import java.nio.ByteOrder; /** */ @@ -34,8 +33,7 @@ public class FloatMetricColumnSerializer implements MetricColumnSerializer private final IOPeon ioPeon; private final File outDir; - private CompressedFloatsSupplierSerializer littleMetricsWriter; - private CompressedFloatsSupplierSerializer bigEndianMetricsWriter; + private CompressedFloatsSupplierSerializer writer; public FloatMetricColumnSerializer( String metricName, @@ -51,43 +49,30 @@ public class FloatMetricColumnSerializer implements MetricColumnSerializer @Override public void open() throws IOException { - littleMetricsWriter = CompressedFloatsSupplierSerializer.create( - ioPeon, String.format("%s_little", metricName), ByteOrder.LITTLE_ENDIAN - ); - bigEndianMetricsWriter = CompressedFloatsSupplierSerializer.create( - ioPeon, String.format("%s_big", metricName), ByteOrder.BIG_ENDIAN + writer = CompressedFloatsSupplierSerializer.create( + ioPeon, String.format("%s_little", metricName), IndexIO.BYTE_ORDER ); - littleMetricsWriter.open(); - bigEndianMetricsWriter.open(); + writer.open(); } @Override public void serialize(Object obj) throws IOException { float val = (obj == null) ? 0 : ((Number) obj).floatValue(); - littleMetricsWriter.add(val); - bigEndianMetricsWriter.add(val); + writer.add(val); } @Override public void close() throws IOException { - final File littleEndianFile = IndexIO.makeMetricFile(outDir, metricName, ByteOrder.LITTLE_ENDIAN); - littleEndianFile.delete(); + final File outFile = IndexIO.makeMetricFile(outDir, metricName, IndexIO.BYTE_ORDER); + outFile.delete(); MetricHolder.writeFloatMetric( - Files.newOutputStreamSupplier(littleEndianFile, true), metricName, littleMetricsWriter + Files.newOutputStreamSupplier(outFile, true), metricName, writer ); - IndexIO.checkFileSize(littleEndianFile); + IndexIO.checkFileSize(outFile); - final File bigEndianFile = IndexIO.makeMetricFile(outDir, metricName, ByteOrder.BIG_ENDIAN); - bigEndianFile.delete(); - MetricHolder.writeFloatMetric( - Files.newOutputStreamSupplier(bigEndianFile, true), metricName, bigEndianMetricsWriter - ); - IndexIO.checkFileSize(bigEndianFile); - - littleMetricsWriter = null; - bigEndianMetricsWriter = null; + writer = null; } } diff --git a/server/src/main/java/com/metamx/druid/index/v1/IncrementalIndexStorageAdapter.java b/server/src/main/java/com/metamx/druid/index/v1/IncrementalIndexStorageAdapter.java index ef5ecef5d56..bf8e0ac9653 100644 --- a/server/src/main/java/com/metamx/druid/index/v1/IncrementalIndexStorageAdapter.java +++ b/server/src/main/java/com/metamx/druid/index/v1/IncrementalIndexStorageAdapter.java @@ -215,7 +215,7 @@ public class IncrementalIndexStorageAdapter implements StorageAdapter while (baseIter.hasNext()) { currEntry.set(baseIter.next()); if (filterMatcher.matches()) { - break; + return; } numAdvanced++; diff --git a/server/src/main/java/com/metamx/druid/index/v1/IndexMerger.java b/server/src/main/java/com/metamx/druid/index/v1/IndexMerger.java index 23c6dbb6842..3cd9170f9e2 100644 --- a/server/src/main/java/com/metamx/druid/index/v1/IndexMerger.java +++ b/server/src/main/java/com/metamx/druid/index/v1/IndexMerger.java @@ -44,6 +44,7 @@ import com.metamx.druid.aggregation.AggregatorFactory; import com.metamx.druid.aggregation.ToLowerCaseAggregatorFactory; import com.metamx.druid.guava.FileOutputSupplier; import com.metamx.druid.guava.GuavaUtils; +import com.metamx.druid.index.QueryableIndex; import com.metamx.druid.index.v1.serde.ComplexMetricSerde; import com.metamx.druid.index.v1.serde.ComplexMetrics; import com.metamx.druid.kv.ConciseCompressedIndexedInts; @@ -75,8 +76,10 @@ import java.nio.channels.FileChannel; import java.util.ArrayList; import java.util.Arrays; import java.util.Iterator; +import java.util.LinkedHashSet; import java.util.List; import java.util.Map; +import java.util.Set; import java.util.TreeSet; /** @@ -139,26 +142,26 @@ public class IndexMerger ); } - public static File mergeMMapped( - List indexes, final AggregatorFactory[] metricAggs, File outDir + public static File mergeQueryableIndex( + List indexes, final AggregatorFactory[] metricAggs, File outDir ) throws IOException { - return mergeMMapped(indexes, metricAggs, outDir, new NoopProgressIndicator()); + return mergeQueryableIndex(indexes, metricAggs, outDir, new NoopProgressIndicator()); } - public static File mergeMMapped( - List indexes, final AggregatorFactory[] metricAggs, File outDir, ProgressIndicator progress + public static File mergeQueryableIndex( + List indexes, final AggregatorFactory[] metricAggs, File outDir, ProgressIndicator progress ) throws IOException { return merge( Lists.transform( indexes, - new Function() + new Function() { @Override - public IndexableAdapter apply(@Nullable final MMappedIndex input) + public IndexableAdapter apply(final QueryableIndex input) { - return new MMappedIndexAdapter(input); + return new QueryableIndexIndexableAdapter(input); } } ), @@ -391,18 +394,20 @@ public class IndexMerger } } final Interval dataInterval; + File v8OutDir = new File(outDir, "v8-tmp"); + v8OutDir.mkdirs(); /************* Main index.drd file **************/ progress.progress(); long startTime = System.currentTimeMillis(); - File indexFile = new File(outDir, "index.drd"); + File indexFile = new File(v8OutDir, "index.drd"); FileOutputStream fileOutputStream = null; FileChannel channel = null; try { fileOutputStream = new FileOutputStream(indexFile); channel = fileOutputStream.getChannel(); - channel.write(ByteBuffer.wrap(new byte[]{IndexIO.CURRENT_VERSION_ID})); + channel.write(ByteBuffer.wrap(new byte[]{IndexIO.V8_VERSION})); GenericIndexed.fromIterable(mergedDimensions, GenericIndexed.stringStrategy).writeToChannel(channel); GenericIndexed.fromIterable(mergedMetrics, GenericIndexed.stringStrategy).writeToChannel(channel); @@ -425,7 +430,7 @@ public class IndexMerger fileOutputStream = null; } IndexIO.checkFileSize(indexFile); - log.info("outDir[%s] completed index.drd in %,d millis.", outDir, System.currentTimeMillis() - startTime); + log.info("outDir[%s] completed index.drd in %,d millis.", v8OutDir, System.currentTimeMillis() - startTime); /************* Setup Dim Conversions **************/ progress.progress(); @@ -498,7 +503,7 @@ public class IndexMerger } dimensionCardinalities.put(dimension, count); - FileOutputSupplier dimOut = new FileOutputSupplier(IndexIO.makeDimFile(outDir, dimension), true); + FileOutputSupplier dimOut = new FileOutputSupplier(IndexIO.makeDimFile(v8OutDir, dimension), true); dimOuts.add(dimOut); writer.close(); @@ -513,7 +518,7 @@ public class IndexMerger ioPeon.cleanup(); } - log.info("outDir[%s] completed dim conversions in %,d millis.", outDir, System.currentTimeMillis() - startTime); + log.info("outDir[%s] completed dim conversions in %,d millis.", v8OutDir, System.currentTimeMillis() - startTime); /************* Walk through data sets and merge them *************/ progress.progress(); @@ -572,15 +577,11 @@ public class IndexMerger Iterable theRows = rowMergerFn.apply(boats); - CompressedLongsSupplierSerializer littleEndianTimeWriter = CompressedLongsSupplierSerializer.create( - ioPeon, "little_end_time", ByteOrder.LITTLE_ENDIAN - ); - CompressedLongsSupplierSerializer bigEndianTimeWriter = CompressedLongsSupplierSerializer.create( - ioPeon, "big_end_time", ByteOrder.BIG_ENDIAN + CompressedLongsSupplierSerializer timeWriter = CompressedLongsSupplierSerializer.create( + ioPeon, "little_end_time", IndexIO.BYTE_ORDER ); - littleEndianTimeWriter.open(); - bigEndianTimeWriter.open(); + timeWriter.open(); ArrayList forwardDimWriters = Lists.newArrayListWithCapacity(mergedDimensions.size()); for (String dimension : mergedDimensions) { @@ -594,7 +595,7 @@ public class IndexMerger String metric = entry.getKey(); String typeName = entry.getValue(); if ("float".equals(typeName)) { - metWriters.add(new FloatMetricColumnSerializer(metric, outDir, ioPeon)); + metWriters.add(new FloatMetricColumnSerializer(metric, v8OutDir, ioPeon)); } else { ComplexMetricSerde serde = ComplexMetrics.getSerdeForType(typeName); @@ -602,7 +603,7 @@ public class IndexMerger throw new ISE("Unknown type[%s]", typeName); } - metWriters.add(new ComplexMetricColumnSerializer(metric, outDir, ioPeon, serde)); + metWriters.add(new ComplexMetricColumnSerializer(metric, v8OutDir, ioPeon, serde)); } } for (MetricColumnSerializer metWriter : metWriters) { @@ -620,8 +621,7 @@ public class IndexMerger for (Rowboat theRow : theRows) { progress.progress(); - littleEndianTimeWriter.add(theRow.getTimestamp()); - bigEndianTimeWriter.add(theRow.getTimestamp()); + timeWriter.add(theRow.getTimestamp()); final Object[] metrics = theRow.getMetrics(); for (int i = 0; i < metrics.length; ++i) { @@ -649,7 +649,7 @@ public class IndexMerger if ((++rowCount % 500000) == 0) { log.info( - "outDir[%s] walked 500,000/%,d rows in %,d millis.", outDir, rowCount, System.currentTimeMillis() - time + "outDir[%s] walked 500,000/%,d rows in %,d millis.", v8OutDir, rowCount, System.currentTimeMillis() - time ); time = System.currentTimeMillis(); } @@ -659,17 +659,11 @@ public class IndexMerger rowNumConversion.rewind(); } - final File littleEndianFile = IndexIO.makeTimeFile(outDir, ByteOrder.LITTLE_ENDIAN); - littleEndianFile.delete(); - OutputSupplier out = Files.newOutputStreamSupplier(littleEndianFile, true); - littleEndianTimeWriter.closeAndConsolidate(out); - IndexIO.checkFileSize(littleEndianFile); - - final File bigEndianFile = IndexIO.makeTimeFile(outDir, ByteOrder.BIG_ENDIAN); - bigEndianFile.delete(); - out = Files.newOutputStreamSupplier(bigEndianFile, true); - bigEndianTimeWriter.closeAndConsolidate(out); - IndexIO.checkFileSize(bigEndianFile); + final File timeFile = IndexIO.makeTimeFile(v8OutDir, IndexIO.BYTE_ORDER); + timeFile.delete(); + OutputSupplier out = Files.newOutputStreamSupplier(timeFile, true); + timeWriter.closeAndConsolidate(out); + IndexIO.checkFileSize(timeFile); for (int i = 0; i < mergedDimensions.size(); ++i) { forwardDimWriters.get(i).close(); @@ -683,7 +677,7 @@ public class IndexMerger ioPeon.cleanup(); log.info( "outDir[%s] completed walk through of %,d rows in %,d millis.", - outDir, + v8OutDir, rowCount, System.currentTimeMillis() - startTime ); @@ -691,7 +685,7 @@ public class IndexMerger /************ Create Inverted Indexes *************/ startTime = System.currentTimeMillis(); - final File invertedFile = new File(outDir, "inverted.drd"); + final File invertedFile = new File(v8OutDir, "inverted.drd"); Files.touch(invertedFile); out = Files.newOutputStreamSupplier(invertedFile, true); for (int i = 0; i < mergedDimensions.size(); ++i) { @@ -704,7 +698,7 @@ public class IndexMerger if (!dimension.equals(serializerUtils.readString(dimValsMapped))) { throw new ISE("dimensions[%s] didn't equate!? This is a major WTF moment.", dimension); } - Indexed dimVals = GenericIndexed.readFromByteBuffer(dimValsMapped, GenericIndexed.stringStrategy); + Indexed dimVals = GenericIndexed.read(dimValsMapped, GenericIndexed.stringStrategy); log.info("Starting dimension[%s] with cardinality[%,d]", dimension, dimVals.size()); FlattenedArrayWriter writer = new FlattenedArrayWriter( @@ -724,10 +718,7 @@ public class IndexMerger } ConciseSet bitset = new ConciseSet(); - for (Integer row : CombiningIterable.createSplatted( - convertedInverteds, - Ordering.natural().nullsFirst() - )) { + for (Integer row : CombiningIterable.createSplatted(convertedInverteds, Ordering.natural().nullsFirst())) { if (row != INVALID_ROW) { bitset.add(row); } @@ -743,33 +734,34 @@ public class IndexMerger log.info("Completed dimension[%s] in %,d millis.", dimension, System.currentTimeMillis() - dimStartTime); } - log.info("outDir[%s] completed inverted.drd in %,d millis.", outDir, System.currentTimeMillis() - startTime); + log.info("outDir[%s] completed inverted.drd in %,d millis.", v8OutDir, System.currentTimeMillis() - startTime); final ArrayList expectedFiles = Lists.newArrayList( Iterables.concat( Arrays.asList( - "index.drd", "inverted.drd", "time_BIG_ENDIAN.drd", "time_LITTLE_ENDIAN.drd" + "index.drd", "inverted.drd", String.format("time_%s.drd", IndexIO.BYTE_ORDER) ), Iterables.transform(mergedDimensions, GuavaUtils.formatFunction("dim_%s.drd")), - Iterables.transform(mergedMetrics, GuavaUtils.formatFunction("met_%s_LITTLE_ENDIAN.drd")), - Iterables.transform(mergedMetrics, GuavaUtils.formatFunction("met_%s_BIG_ENDIAN.drd")) + Iterables.transform( + mergedMetrics, GuavaUtils.formatFunction(String.format("met_%%s_%s.drd", IndexIO.BYTE_ORDER)) + ) ) ); Map files = Maps.newLinkedHashMap(); for (String fileName : expectedFiles) { - files.put(fileName, new File(outDir, fileName)); + files.put(fileName, new File(v8OutDir, fileName)); } - File smooshDir = new File(outDir, "smoosher"); + File smooshDir = new File(v8OutDir, "smoosher"); smooshDir.mkdir(); - for (Map.Entry entry : Smoosh.smoosh(outDir, smooshDir, files).entrySet()) { + for (Map.Entry entry : Smoosh.smoosh(v8OutDir, smooshDir, files).entrySet()) { entry.getValue().delete(); } for (File file : smooshDir.listFiles()) { - Files.move(file, new File(outDir, file.getName())); + Files.move(file, new File(v8OutDir, file.getName())); } if (!smooshDir.delete()) { @@ -778,19 +770,22 @@ public class IndexMerger } createIndexDrdFile( - IndexIO.CURRENT_VERSION_ID, - outDir, + IndexIO.V8_VERSION, + v8OutDir, GenericIndexed.fromIterable(mergedDimensions, GenericIndexed.stringStrategy), GenericIndexed.fromIterable(mergedMetrics, GenericIndexed.stringStrategy), dataInterval ); + IndexIO.DefaultIndexIOHandler.convertV8toV9(v8OutDir, outDir); + FileUtils.deleteDirectory(v8OutDir); + return outDir; } private static ArrayList mergeIndexed(final List> indexedLists) { - TreeSet retVal = Sets.newTreeSet(Ordering.natural().nullsFirst()); + Set retVal = Sets.newTreeSet(Ordering.natural().nullsFirst()); for (Iterable indexedList : indexedLists) { for (T val : indexedList) { diff --git a/server/src/main/java/com/metamx/druid/index/v1/MMappedIndexStorageAdapter.java b/server/src/main/java/com/metamx/druid/index/v1/MMappedIndexStorageAdapter.java deleted file mode 100644 index 1e64950d308..00000000000 --- a/server/src/main/java/com/metamx/druid/index/v1/MMappedIndexStorageAdapter.java +++ /dev/null @@ -1,666 +0,0 @@ -/* - * Druid - a distributed column store. - * Copyright (C) 2012 Metamarkets Group Inc. - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation; either version 2 - * of the License, or (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. - */ - -package com.metamx.druid.index.v1; - -import com.google.common.base.Function; -import com.google.common.base.Functions; -import com.google.common.collect.ImmutableList; -import com.google.common.collect.Maps; -import com.google.common.io.Closeables; -import com.metamx.common.collect.MoreIterators; -import com.metamx.common.guava.FunctionalIterable; -import com.metamx.common.guava.FunctionalIterator; -import com.metamx.druid.BaseStorageAdapter; -import com.metamx.druid.Capabilities; -import com.metamx.druid.QueryGranularity; -import com.metamx.druid.index.brita.BitmapIndexSelector; -import com.metamx.druid.index.brita.Filter; -import com.metamx.druid.index.v1.processing.Cursor; -import com.metamx.druid.index.v1.processing.DimensionSelector; -import com.metamx.druid.index.v1.processing.Offset; -import com.metamx.druid.kv.Indexed; -import com.metamx.druid.kv.IndexedFloats; -import com.metamx.druid.kv.IndexedInts; -import com.metamx.druid.kv.IndexedLongs; -import com.metamx.druid.processing.ComplexMetricSelector; -import com.metamx.druid.processing.FloatMetricSelector; -import it.uniroma3.mat.extendedset.intset.ImmutableConciseSet; -import org.joda.time.DateTime; -import org.joda.time.Interval; - -import java.io.Closeable; -import java.util.Iterator; -import java.util.Map; - -/** - */ -public class MMappedIndexStorageAdapter extends BaseStorageAdapter -{ - private final MMappedIndex index; - - public MMappedIndexStorageAdapter( - MMappedIndex index - ) - { - this.index = index; - } - - public MMappedIndex getIndex() - { - return index; - } - - @Override - public String getSegmentIdentifier() - { - throw new UnsupportedOperationException(); - } - - @Override - public Interval getInterval() - { - return index.getDataInterval(); - } - - @Override - public int getDimensionCardinality(String dimension) - { - final Indexed dimValueLookup = index.getDimValueLookup(dimension.toLowerCase()); - if (dimValueLookup == null) { - return 0; - } - return dimValueLookup.size(); - } - - @Override - public DateTime getMinTime() - { - final IndexedLongs timestamps = index.getReadOnlyTimestamps(); - final DateTime retVal = new DateTime(timestamps.get(0)); - Closeables.closeQuietly(timestamps); - return retVal; - } - - @Override - public DateTime getMaxTime() - { - final IndexedLongs timestamps = index.getReadOnlyTimestamps(); - final DateTime retVal = new DateTime(timestamps.get(timestamps.size() - 1)); - Closeables.closeQuietly(timestamps); - return retVal; - } - - @Override - public Capabilities getCapabilities() - { - return Capabilities.builder().dimensionValuesSorted(true).build(); - } - - @Override - public Iterable makeCursors(Filter filter, Interval interval, QueryGranularity gran) - { - Interval actualInterval = interval; - if (!actualInterval.overlaps(index.dataInterval)) { - return ImmutableList.of(); - } - - if (actualInterval.getStart().isBefore(index.dataInterval.getStart())) { - actualInterval = actualInterval.withStart(index.dataInterval.getStart()); - } - if (actualInterval.getEnd().isAfter(index.dataInterval.getEnd())) { - actualInterval = actualInterval.withEnd(index.dataInterval.getEnd()); - } - - final Iterable iterable; - if (filter == null) { - iterable = new NoFilterCursorIterable(index, actualInterval, gran); - } else { - Offset offset = new ConciseOffset(filter.goConcise(new MMappedBitmapIndexSelector(index))); - - iterable = new CursorIterable(index, actualInterval, gran, offset); - } - - return FunctionalIterable.create(iterable).keep(Functions.identity()); - } - - @Override - public Indexed getAvailableDimensions() - { - return index.getAvailableDimensions(); - } - - @Override - public Indexed getDimValueLookup(String dimension) - { - return index.getDimValueLookup(dimension.toLowerCase()); - } - - @Override - public ImmutableConciseSet getInvertedIndex(String dimension, String dimVal) - { - return index.getInvertedIndex(dimension.toLowerCase(), dimVal); - } - - @Override - public Offset getFilterOffset(Filter filter) - { - return new ConciseOffset( - filter.goConcise( - new MMappedBitmapIndexSelector(index) - ) - ); - } - - private static class CursorIterable implements Iterable - { - private final MMappedIndex index; - private final Interval interval; - private final QueryGranularity gran; - private final Offset offset; - - public CursorIterable( - MMappedIndex index, - Interval interval, - QueryGranularity gran, - Offset offset - ) - { - this.index = index; - this.interval = interval; - this.gran = gran; - this.offset = offset; - } - - @Override - public Iterator iterator() - { - final Offset baseOffset = offset.clone(); - - final Map metricHolderCache = Maps.newHashMap(); - final IndexedLongs timestamps = index.getReadOnlyTimestamps(); - - final FunctionalIterator retVal = FunctionalIterator - .create(gran.iterable(interval.getStartMillis(), interval.getEndMillis()).iterator()) - .transform( - new Function() - { - - @Override - public Cursor apply(final Long input) - { - final long timeStart = Math.max(interval.getStartMillis(), input); - while (baseOffset.withinBounds() - && timestamps.get(baseOffset.getOffset()) < timeStart) { - baseOffset.increment(); - } - - final Offset offset = new TimestampCheckingOffset( - baseOffset, timestamps, Math.min(interval.getEndMillis(), gran.next(timeStart)) - ); - - return new Cursor() - { - private final Offset initOffset = offset.clone(); - private final DateTime myBucket = gran.toDateTime(input); - private Offset cursorOffset = offset; - - @Override - public DateTime getTime() - { - return myBucket; - } - - @Override - public void advance() - { - cursorOffset.increment(); - } - - @Override - public boolean isDone() - { - return !cursorOffset.withinBounds(); - } - - @Override - public void reset() - { - cursorOffset = initOffset.clone(); - } - - @Override - public DimensionSelector makeDimensionSelector(String dimension) - { - final String dimensionName = dimension; - final Indexed rowVals = index.getDimColumn(dimensionName); - final Indexed dimValueLookup = index.getDimValueLookup(dimensionName); - - if (rowVals == null) { - return null; - } - - return new DimensionSelector() - { - @Override - public IndexedInts getRow() - { - return rowVals.get(cursorOffset.getOffset()); - } - - @Override - public int getValueCardinality() - { - return dimValueLookup.size(); - } - - @Override - public String lookupName(int id) - { - final String retVal = dimValueLookup.get(id); - return retVal == null ? "" : retVal; - } - - @Override - public int lookupId(String name) - { - return ("".equals(name)) ? dimValueLookup.indexOf(null) : dimValueLookup.indexOf(name); - } - }; - } - - @Override - public FloatMetricSelector makeFloatMetricSelector(String metric) - { - final String metricName = metric.toLowerCase(); - IndexedFloats cachedMetricVals = (IndexedFloats) metricHolderCache.get(metricName); - - if (cachedMetricVals == null) { - MetricHolder holder = index.getMetricHolder(metricName); - if (holder != null) { - cachedMetricVals = holder.getFloatType(); - metricHolderCache.put(metricName, cachedMetricVals); - } - } - - if (cachedMetricVals == null) { - return new FloatMetricSelector() - { - @Override - public float get() - { - return 0.0f; - } - }; - } - - final IndexedFloats metricVals = cachedMetricVals; - return new FloatMetricSelector() - { - @Override - public float get() - { - return metricVals.get(cursorOffset.getOffset()); - } - }; - } - - @Override - public ComplexMetricSelector makeComplexMetricSelector(String metric) - { - final String metricName = metric.toLowerCase(); - Indexed cachedMetricVals = (Indexed) metricHolderCache.get(metricName); - - if (cachedMetricVals == null) { - MetricHolder holder = index.getMetricHolder(metricName); - if (holder != null) { - cachedMetricVals = holder.getComplexType(); - metricHolderCache.put(metricName, cachedMetricVals); - } - } - - if (cachedMetricVals == null) { - return null; - } - - final Indexed metricVals = cachedMetricVals; - return new ComplexMetricSelector() - { - @Override - public Class classOfObject() - { - return metricVals.getClazz(); - } - - @Override - public Object get() - { - return metricVals.get(cursorOffset.getOffset()); - } - }; - } - }; - } - } - ); - - // This after call is not perfect, if there is an exception during processing, it will never get called, - // but it's better than nothing and doing this properly all the time requires a lot more fixerating - return MoreIterators.after( - retVal, - new Runnable() - { - @Override - public void run() - { - Closeables.closeQuietly(timestamps); - for (Object object : metricHolderCache.values()) { - if (object instanceof Closeable) { - Closeables.closeQuietly((Closeable) object); - } - } - } - } - ); - } - } - - private static class TimestampCheckingOffset implements Offset - { - private final Offset baseOffset; - private final IndexedLongs timestamps; - private final long threshold; - - public TimestampCheckingOffset( - Offset baseOffset, - IndexedLongs timestamps, - long threshold - ) - { - this.baseOffset = baseOffset; - this.timestamps = timestamps; - this.threshold = threshold; - } - - @Override - public int getOffset() - { - return baseOffset.getOffset(); - } - - @Override - public Offset clone() - { - return new TimestampCheckingOffset(baseOffset.clone(), timestamps, threshold); - } - - @Override - public boolean withinBounds() - { - return baseOffset.withinBounds() && timestamps.get(baseOffset.getOffset()) < threshold; - } - - @Override - public void increment() - { - baseOffset.increment(); - } - } - - private static class NoFilterCursorIterable implements Iterable - { - private final MMappedIndex index; - private final Interval interval; - private final QueryGranularity gran; - - public NoFilterCursorIterable( - MMappedIndex index, - Interval interval, - QueryGranularity gran - ) - { - this.index = index; - this.interval = interval; - this.gran = gran; - } - - /** - * This produces iterators of Cursor objects that must be fully processed (until isDone() returns true) before the - * next Cursor is processed. It is *not* safe to pass these cursors off to another thread for parallel processing - * - * @return - */ - @Override - public Iterator iterator() - { - final Map metricCacheMap = Maps.newHashMap(); - final IndexedLongs timestamps = index.getReadOnlyTimestamps(); - - final FunctionalIterator retVal = FunctionalIterator - .create(gran.iterable(interval.getStartMillis(), interval.getEndMillis()).iterator()) - .transform( - new Function() - { - private int currRow = 0; - - @Override - public Cursor apply(final Long input) - { - final long timeStart = Math.max(interval.getStartMillis(), input); - while (currRow < timestamps.size() && timestamps.get(currRow) < timeStart) { - ++currRow; - } - - return new Cursor() - { - private final DateTime myBucket = gran.toDateTime(input); - private final long nextBucket = Math.min(gran.next(myBucket.getMillis()), interval.getEndMillis()); - private final int initRow = currRow; - - @Override - public DateTime getTime() - { - return myBucket; - } - - @Override - public void advance() - { - ++currRow; - } - - @Override - public boolean isDone() - { - return currRow >= timestamps.size() || timestamps.get(currRow) >= nextBucket; - } - - @Override - public void reset() - { - currRow = initRow; - } - - @Override - public DimensionSelector makeDimensionSelector(final String dimension) - { - final String dimensionName = dimension.toLowerCase(); - final Indexed rowVals = index.getDimColumn(dimensionName); - final Indexed dimValueLookup = index.getDimValueLookup(dimensionName); - - if (rowVals == null) { - return null; - } - - return new DimensionSelector() - { - @Override - public IndexedInts getRow() - { - return rowVals.get(currRow); - } - - @Override - public int getValueCardinality() - { - return dimValueLookup.size(); - } - - @Override - public String lookupName(int id) - { - final String retVal = dimValueLookup.get(id); - return retVal == null ? "" : retVal; - } - - @Override - public int lookupId(String name) - { - return ("".equals(name)) ? dimValueLookup.indexOf(null) : dimValueLookup.indexOf(name); - } - }; - } - - @Override - public FloatMetricSelector makeFloatMetricSelector(String metric) - { - final String metricName = metric.toLowerCase(); - IndexedFloats cachedMetricVals = (IndexedFloats) metricCacheMap.get(metricName); - - if (cachedMetricVals == null) { - final MetricHolder metricHolder = index.getMetricHolder(metricName); - if (metricHolder != null) { - cachedMetricVals = metricHolder.getFloatType(); - if (cachedMetricVals != null) { - metricCacheMap.put(metricName, cachedMetricVals); - } - } - } - - if (cachedMetricVals == null) { - return new FloatMetricSelector() - { - @Override - public float get() - { - return 0.0f; - } - }; - } - - final IndexedFloats metricVals = cachedMetricVals; - return new FloatMetricSelector() - { - @Override - public float get() - { - return metricVals.get(currRow); - } - }; - } - - @Override - public ComplexMetricSelector makeComplexMetricSelector(String metric) - { - final String metricName = metric.toLowerCase(); - Indexed cachedMetricVals = (Indexed) metricCacheMap.get(metricName); - - if (cachedMetricVals == null) { - final MetricHolder metricHolder = index.getMetricHolder(metricName); - - if (metricHolder != null) { - cachedMetricVals = metricHolder.getComplexType(); - if (cachedMetricVals != null) { - metricCacheMap.put(metricName, cachedMetricVals); - } - } - } - - if (cachedMetricVals == null) { - return null; - } - - final Indexed metricVals = cachedMetricVals; - return new ComplexMetricSelector() - { - @Override - public Class classOfObject() - { - return metricVals.getClazz(); - } - - @Override - public Object get() - { - return metricVals.get(currRow); - } - }; - } - }; - } - } - ); - - return MoreIterators.after( - retVal, - new Runnable() - { - @Override - public void run() - { - Closeables.closeQuietly(timestamps); - for (Object object : metricCacheMap.values()) { - if (object instanceof Closeable) { - Closeables.closeQuietly((Closeable) object); - } - } - } - } - ); - } - } - - private static class MMappedBitmapIndexSelector implements BitmapIndexSelector - { - private final MMappedIndex index; - - public MMappedBitmapIndexSelector(final MMappedIndex index) - { - this.index = index; - } - - @Override - public Indexed getDimensionValues(String dimension) - { - return index.getDimValueLookup(dimension.toLowerCase()); - } - - @Override - public int getNumRows() - { - return index.getReadOnlyTimestamps().size(); - } - - @Override - public ImmutableConciseSet getConciseInvertedIndex(String dimension, String value) - { - return index.getInvertedIndex(dimension.toLowerCase(), value); - } - } -} diff --git a/server/src/main/java/com/metamx/druid/index/v1/QueryableIndexIndexableAdapter.java b/server/src/main/java/com/metamx/druid/index/v1/QueryableIndexIndexableAdapter.java new file mode 100644 index 00000000000..d05864716af --- /dev/null +++ b/server/src/main/java/com/metamx/druid/index/v1/QueryableIndexIndexableAdapter.java @@ -0,0 +1,290 @@ +/* + * Druid - a distributed column store. + * Copyright (C) 2012 Metamarkets Group Inc. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version 2 + * of the License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + */ + +package com.metamx.druid.index.v1; + +import com.google.common.collect.ImmutableList; +import com.google.common.collect.Lists; +import com.google.common.collect.Maps; +import com.google.common.collect.Sets; +import com.google.common.io.Closeables; +import com.metamx.common.ISE; +import com.metamx.druid.index.QueryableIndex; +import com.metamx.druid.index.column.BitmapIndex; +import com.metamx.druid.index.column.Column; +import com.metamx.druid.index.column.ComplexColumn; +import com.metamx.druid.index.column.DictionaryEncodedColumn; +import com.metamx.druid.index.column.GenericColumn; +import com.metamx.druid.index.column.ValueType; +import com.metamx.druid.kv.ArrayBasedIndexedInts; +import com.metamx.druid.kv.ConciseCompressedIndexedInts; +import com.metamx.druid.kv.EmptyIndexedInts; +import com.metamx.druid.kv.Indexed; +import com.metamx.druid.kv.IndexedInts; +import com.metamx.druid.kv.IndexedIterable; +import com.metamx.druid.kv.ListIndexed; +import org.joda.time.Interval; + +import java.io.Closeable; +import java.util.HashSet; +import java.util.Iterator; +import java.util.Map; +import java.util.NoSuchElementException; +import java.util.Set; + +/** +*/ +public class QueryableIndexIndexableAdapter implements IndexableAdapter +{ + private final int numRows; + private final QueryableIndex input; + + public QueryableIndexIndexableAdapter(QueryableIndex input) + { + this.input = input; + numRows = input.getNumRows(); + } + + @Override + public Interval getDataInterval() + { + return input.getDataInterval(); + } + + @Override + public int getNumRows() + { + return numRows; + } + + @Override + public Indexed getAvailableDimensions() + { + return input.getAvailableDimensions(); + } + + @Override + public Indexed getAvailableMetrics() + { + final Set columns = Sets.newLinkedHashSet(input.getColumnNames()); + final HashSet dimensions = Sets.newHashSet(getAvailableDimensions()); + + return new ListIndexed( + Lists.newArrayList(Sets.difference(columns, dimensions)), + String.class + ); + } + + @Override + public Indexed getDimValueLookup(String dimension) + { + final Column column = input.getColumn(dimension); + + if (column == null) { + return null; + } + + final DictionaryEncodedColumn dict = column.getDictionaryEncoding(); + + if (dict == null) { + return null; + } + + return new Indexed() + { + @Override + public Class getClazz() + { + return String.class; + } + + @Override + public int size() + { + return dict.getCardinality(); + } + + @Override + public String get(int index) + { + return dict.lookupName(index); + } + + @Override + public int indexOf(String value) + { + return dict.lookupId(value); + } + + @Override + public Iterator iterator() + { + return IndexedIterable.create(this).iterator(); + } + }; + } + + @Override + public Iterable getRows() + { + return new Iterable() + { + @Override + public Iterator iterator() + { + return new Iterator() + { + final GenericColumn timestamps = input.getTimeColumn().getGenericColumn(); + final Object[] metrics; + final Map dimensions; + + final int numMetrics = getAvailableMetrics().size(); + + int currRow = 0; + boolean done = false; + + { + dimensions = Maps.newLinkedHashMap(); + for (String dim : input.getAvailableDimensions()) { + dimensions.put(dim, input.getColumn(dim).getDictionaryEncoding()); + } + + final Indexed availableMetrics = getAvailableMetrics(); + metrics = new Object[availableMetrics.size()]; + for (int i = 0; i < metrics.length; ++i) { + final Column column = input.getColumn(availableMetrics.get(i)); + final ValueType type = column.getCapabilities().getType(); + switch (type) { + case FLOAT: + metrics[i] = column.getGenericColumn(); + break; + case COMPLEX: + metrics[i] = column.getComplexColumn(); + break; + default: + throw new ISE("Cannot handle type[%s]", type); + } + } + } + + @Override + public boolean hasNext() + { + final boolean hasNext = currRow < numRows; + if (!hasNext && !done) { + Closeables.closeQuietly(timestamps); + for (Object metric : metrics) { + if (metric instanceof Closeable) { + Closeables.closeQuietly((Closeable) metric); + } + } + done = true; + } + return hasNext; + } + + @Override + public Rowboat next() + { + if (!hasNext()) { + throw new NoSuchElementException(); + } + + int[][] dims = new int[dimensions.size()][]; + int dimIndex = 0; + for (String dim : dimensions.keySet()) { + final DictionaryEncodedColumn dict = dimensions.get(dim); + final IndexedInts dimVals; + if (dict.hasMultipleValues()) { + dimVals = dict.getMultiValueRow(currRow); + } + else { + dimVals = new ArrayBasedIndexedInts(new int[]{dict.getSingleValueRow(currRow)}); + } + + int[] theVals = new int[dimVals.size()]; + for (int j = 0; j < theVals.length; ++j) { + theVals[j] = dimVals.get(j); + } + + dims[dimIndex++] = theVals; + } + + Object[] metricArray = new Object[numMetrics]; + for (int i = 0; i < metricArray.length; ++i) { + if (metrics[i] instanceof GenericColumn) { + metricArray[i] = ((GenericColumn) metrics[i]).getFloatSingleValueRow(currRow); + } + else if (metrics[i] instanceof ComplexColumn) { + metricArray[i] = ((ComplexColumn) metrics[i]).getRowValue(currRow); + } + } + + final Rowboat retVal = new Rowboat( + timestamps.getLongSingleValueRow(currRow), dims, metricArray, currRow + ); + + ++currRow; + + return retVal; + } + + @Override + public void remove() + { + throw new UnsupportedOperationException(); + } + }; + } + }; + } + + @Override + public IndexedInts getInverteds(String dimension, String value) + { + final Column column = input.getColumn(dimension); + + if (column == null) { + return new EmptyIndexedInts(); + } + + final BitmapIndex bitmaps = column.getBitmapIndex(); + if (bitmaps == null) { + return new EmptyIndexedInts(); + } + + return new ConciseCompressedIndexedInts(bitmaps.getConciseSet(value)); + } + + @Override + public String getMetricType(String metric) + { + final Column column = input.getColumn(metric); + + final ValueType type = column.getCapabilities().getType(); + switch (type) { + case FLOAT: + return "float"; + case COMPLEX: + return column.getComplexColumn().getTypeName(); + default: + throw new ISE("Unknown type[%s]", type); + } + } +} diff --git a/server/src/main/java/com/metamx/druid/index/v1/QueryableIndexStorageAdapter.java b/server/src/main/java/com/metamx/druid/index/v1/QueryableIndexStorageAdapter.java new file mode 100644 index 00000000000..9f159eb7aeb --- /dev/null +++ b/server/src/main/java/com/metamx/druid/index/v1/QueryableIndexStorageAdapter.java @@ -0,0 +1,864 @@ +/* + * Druid - a distributed column store. + * Copyright (C) 2012 Metamarkets Group Inc. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version 2 + * of the License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + */ + +package com.metamx.druid.index.v1; + +import com.google.common.base.Function; +import com.google.common.base.Functions; +import com.google.common.collect.ImmutableList; +import com.google.common.collect.Iterators; +import com.google.common.collect.Maps; +import com.google.common.io.Closeables; +import com.metamx.common.collect.MoreIterators; +import com.metamx.common.guava.FunctionalIterable; +import com.metamx.common.guava.FunctionalIterator; +import com.metamx.druid.BaseStorageAdapter; +import com.metamx.druid.Capabilities; +import com.metamx.druid.QueryGranularity; +import com.metamx.druid.index.QueryableIndex; +import com.metamx.druid.index.brita.BitmapIndexSelector; +import com.metamx.druid.index.brita.Filter; +import com.metamx.druid.index.column.Column; +import com.metamx.druid.index.column.ColumnSelector; +import com.metamx.druid.index.column.ComplexColumn; +import com.metamx.druid.index.column.DictionaryEncodedColumn; +import com.metamx.druid.index.column.GenericColumn; +import com.metamx.druid.index.column.ValueType; +import com.metamx.druid.index.v1.processing.Cursor; +import com.metamx.druid.index.v1.processing.DimensionSelector; +import com.metamx.druid.index.v1.processing.Offset; +import com.metamx.druid.kv.Indexed; +import com.metamx.druid.kv.IndexedInts; +import com.metamx.druid.kv.IndexedIterable; +import com.metamx.druid.processing.ComplexMetricSelector; +import com.metamx.druid.processing.FloatMetricSelector; +import it.uniroma3.mat.extendedset.intset.ImmutableConciseSet; +import org.joda.time.DateTime; +import org.joda.time.Interval; + +import java.util.Iterator; +import java.util.Map; + +/** + */ +public class QueryableIndexStorageAdapter extends BaseStorageAdapter +{ + private final QueryableIndex index; + + public QueryableIndexStorageAdapter( + QueryableIndex index + ) + { + this.index = index; + } + + @Override + public String getSegmentIdentifier() + { + throw new UnsupportedOperationException(); + } + + @Override + public Interval getInterval() + { + return index.getDataInterval(); + } + + @Override + public int getDimensionCardinality(String dimension) + { + Column column = null; + column = index.getColumn(dimension); + if (column == null) { + return 0; + } + if (!column.getCapabilities().isDictionaryEncoded()) { + throw new UnsupportedOperationException("Only know cardinality of dictionary encoded columns."); + } + return column.getDictionaryEncoding().getCardinality(); + } + + @Override + public DateTime getMinTime() + { + GenericColumn column = null; + try { + column = index.getTimeColumn().getGenericColumn(); + return new DateTime(column.getLongSingleValueRow(0)); + } + finally { + Closeables.closeQuietly(column); + } + } + + @Override + public DateTime getMaxTime() + { + GenericColumn column = null; + try { + column = index.getTimeColumn().getGenericColumn(); + return new DateTime(column.getLongSingleValueRow(column.length() - 1)); + } + finally { + Closeables.closeQuietly(column); + } + } + + @Override + public Capabilities getCapabilities() + { + return Capabilities.builder().dimensionValuesSorted(true).build(); + } + + @Override + public Iterable makeCursors(Filter filter, Interval interval, QueryGranularity gran) + { + Interval actualInterval = interval; + final Interval dataInterval = getInterval(); + if (!actualInterval.overlaps(dataInterval)) { + return ImmutableList.of(); + } + + if (actualInterval.getStart().isBefore(dataInterval.getStart())) { + actualInterval = actualInterval.withStart(dataInterval.getStart()); + } + if (actualInterval.getEnd().isAfter(dataInterval.getEnd())) { + actualInterval = actualInterval.withEnd(dataInterval.getEnd()); + } + + final Iterable iterable; + if (filter == null) { + iterable = new NoFilterCursorIterable(index, actualInterval, gran); + } else { + Offset offset = new ConciseOffset(filter.goConcise(new MMappedBitmapIndexSelector(index))); + + iterable = new CursorIterable(index, actualInterval, gran, offset); + } + + return FunctionalIterable.create(iterable).keep(Functions.identity()); + } + + @Override + public Indexed getAvailableDimensions() + { + return index.getAvailableDimensions(); + } + + @Override + public Indexed getDimValueLookup(String dimension) + { + final Column column = index.getColumn(dimension.toLowerCase()); + + if (column == null || !column.getCapabilities().isDictionaryEncoded()) { + return null; + } + + final DictionaryEncodedColumn dictionary = column.getDictionaryEncoding(); + return new Indexed() + { + @Override + public Class getClazz() + { + return String.class; + } + + @Override + public int size() + { + return dictionary.getCardinality(); + } + + @Override + public String get(int index) + { + return dictionary.lookupName(index); + } + + @Override + public int indexOf(String value) + { + return dictionary.lookupId(value); + } + + @Override + public Iterator iterator() + { + return IndexedIterable.create(this).iterator(); + } + }; + } + + @Override + public ImmutableConciseSet getInvertedIndex(String dimension, String dimVal) + { + final Column column = index.getColumn(dimension.toLowerCase()); + if (column == null) { + return new ImmutableConciseSet(); + } + if (!column.getCapabilities().hasBitmapIndexes()) { + return new ImmutableConciseSet(); + } + + return column.getBitmapIndex().getConciseSet(dimVal); + } + + @Override + public Offset getFilterOffset(Filter filter) + { + return new ConciseOffset(filter.goConcise(new MMappedBitmapIndexSelector(index))); + } + + private static class CursorIterable implements Iterable + { + private final ColumnSelector index; + private final Interval interval; + private final QueryGranularity gran; + private final Offset offset; + + public CursorIterable( + ColumnSelector index, + Interval interval, + QueryGranularity gran, + Offset offset + ) + { + this.index = index; + this.interval = interval; + this.gran = gran; + this.offset = offset; + } + + @Override + public Iterator iterator() + { + final Offset baseOffset = offset.clone(); + + final Map genericColumnCache = Maps.newHashMap(); + final Map complexColumnCache = Maps.newHashMap(); + final GenericColumn timestamps = index.getTimeColumn().getGenericColumn(); + + final FunctionalIterator retVal = FunctionalIterator + .create(gran.iterable(interval.getStartMillis(), interval.getEndMillis()).iterator()) + .transform( + new Function() + { + + @Override + public Cursor apply(final Long input) + { + final long timeStart = Math.max(interval.getStartMillis(), input); + while (baseOffset.withinBounds() + && timestamps.getLongSingleValueRow(baseOffset.getOffset()) < timeStart) { + baseOffset.increment(); + } + + final Offset offset = new TimestampCheckingOffset( + baseOffset, timestamps, Math.min(interval.getEndMillis(), gran.next(timeStart)) + ); + + return new Cursor() + { + private final Offset initOffset = offset.clone(); + private final DateTime myBucket = gran.toDateTime(input); + private Offset cursorOffset = offset; + + @Override + public DateTime getTime() + { + return myBucket; + } + + @Override + public void advance() + { + cursorOffset.increment(); + } + + @Override + public boolean isDone() + { + return !cursorOffset.withinBounds(); + } + + @Override + public void reset() + { + cursorOffset = initOffset.clone(); + } + + @Override + public DimensionSelector makeDimensionSelector(String dimension) + { + final String dimensionName = dimension.toLowerCase(); + final Column columnDesc = index.getColumn(dimensionName); + if (columnDesc == null) { + return null; + } + + final DictionaryEncodedColumn column = columnDesc.getDictionaryEncoding(); + + if (columnDesc.getCapabilities().hasMultipleValues()) { + return new DimensionSelector() + { + @Override + public IndexedInts getRow() + { + return column.getMultiValueRow(cursorOffset.getOffset()); + } + + @Override + public int getValueCardinality() + { + return column.getCardinality(); + } + + @Override + public String lookupName(int id) + { + final String retVal = column.lookupName(id); + return retVal == null ? "" : retVal; + } + + @Override + public int lookupId(String name) + { + return column.lookupId(name); + } + }; + } + else { + return new DimensionSelector() + { + @Override + public IndexedInts getRow() + { + final int value = column.getSingleValueRow(cursorOffset.getOffset()); + return new IndexedInts() + { + @Override + public int size() + { + return 1; + } + + @Override + public int get(int index) + { + return value; + } + + @Override + public Iterator iterator() + { + return Iterators.singletonIterator(value); + } + }; + } + + @Override + public int getValueCardinality() + { + return column.getCardinality(); + } + + @Override + public String lookupName(int id) + { + return column.lookupName(id); + } + + @Override + public int lookupId(String name) + { + return column.lookupId(name); + } + }; + } + } + + @Override + public FloatMetricSelector makeFloatMetricSelector(String metric) + { + final String metricName = metric.toLowerCase(); + GenericColumn cachedMetricVals = genericColumnCache.get(metricName); + + if (cachedMetricVals == null) { + Column holder = index.getColumn(metricName); + if (holder != null && holder.getCapabilities().getType() == ValueType.FLOAT) { + cachedMetricVals = holder.getGenericColumn(); + genericColumnCache.put(metricName, cachedMetricVals); + } + } + + if (cachedMetricVals == null) { + return new FloatMetricSelector() + { + @Override + public float get() + { + return 0.0f; + } + }; + } + + final GenericColumn metricVals = cachedMetricVals; + return new FloatMetricSelector() + { + @Override + public float get() + { + return metricVals.getFloatSingleValueRow(cursorOffset.getOffset()); + } + }; + } + + @Override + public ComplexMetricSelector makeComplexMetricSelector(String metric) + { + final String metricName = metric.toLowerCase(); + ComplexColumn cachedMetricVals = complexColumnCache.get(metricName); + + if (cachedMetricVals == null) { + Column holder = index.getColumn(metricName); + if (holder != null && holder.getCapabilities().getType() == ValueType.COMPLEX) { + cachedMetricVals = holder.getComplexColumn(); + complexColumnCache.put(metricName, cachedMetricVals); + } + } + + if (cachedMetricVals == null) { + return null; + } + + final ComplexColumn metricVals = cachedMetricVals; + return new ComplexMetricSelector() + { + @Override + public Class classOfObject() + { + return metricVals.getClazz(); + } + + @Override + public Object get() + { + return metricVals.getRowValue(cursorOffset.getOffset()); + } + }; + } + }; + } + } + ); + + // This after call is not perfect, if there is an exception during processing, it will never get called, + // but it's better than nothing and doing this properly all the time requires a lot more fixerating + return MoreIterators.after( + retVal, + new Runnable() + { + @Override + public void run() + { + Closeables.closeQuietly(timestamps); + for (GenericColumn column : genericColumnCache.values()) { + Closeables.closeQuietly(column); + } + for (ComplexColumn complexColumn : complexColumnCache.values()) { + Closeables.closeQuietly(complexColumn); + } + } + } + ); + } + } + + private static class TimestampCheckingOffset implements Offset + { + private final Offset baseOffset; + private final GenericColumn timestamps; + private final long threshold; + + public TimestampCheckingOffset( + Offset baseOffset, + GenericColumn timestamps, + long threshold + ) + { + this.baseOffset = baseOffset; + this.timestamps = timestamps; + this.threshold = threshold; + } + + @Override + public int getOffset() + { + return baseOffset.getOffset(); + } + + @Override + public Offset clone() + { + return new TimestampCheckingOffset(baseOffset.clone(), timestamps, threshold); + } + + @Override + public boolean withinBounds() + { + return baseOffset.withinBounds() && timestamps.getLongSingleValueRow(baseOffset.getOffset()) < threshold; + } + + @Override + public void increment() + { + baseOffset.increment(); + } + } + + private static class NoFilterCursorIterable implements Iterable + { + private final ColumnSelector index; + private final Interval interval; + private final QueryGranularity gran; + + public NoFilterCursorIterable( + ColumnSelector index, + Interval interval, + QueryGranularity gran + ) + { + this.index = index; + this.interval = interval; + this.gran = gran; + } + + /** + * This produces iterators of Cursor objects that must be fully processed (until isDone() returns true) before the + * next Cursor is processed. It is *not* safe to pass these cursors off to another thread for parallel processing + * + * @return + */ + @Override + public Iterator iterator() + { + final Map genericColumnCache = Maps.newHashMap(); + final Map complexColumnCache = Maps.newHashMap(); + final GenericColumn timestamps = index.getTimeColumn().getGenericColumn(); + + final FunctionalIterator retVal = FunctionalIterator + .create(gran.iterable(interval.getStartMillis(), interval.getEndMillis()).iterator()) + .transform( + new Function() + { + private int currRow = 0; + + @Override + public Cursor apply(final Long input) + { + final long timeStart = Math.max(interval.getStartMillis(), input); + while (currRow < timestamps.length() && timestamps.getLongSingleValueRow(currRow) < timeStart) { + ++currRow; + } + + return new Cursor() + { + private final DateTime myBucket = gran.toDateTime(input); + private final long nextBucket = Math.min(gran.next(myBucket.getMillis()), interval.getEndMillis()); + private final int initRow = currRow; + + @Override + public DateTime getTime() + { + return myBucket; + } + + @Override + public void advance() + { + ++currRow; + } + + @Override + public boolean isDone() + { + return currRow >= timestamps.length() || timestamps.getLongSingleValueRow(currRow) >= nextBucket; + } + + @Override + public void reset() + { + currRow = initRow; + } + + @Override + public DimensionSelector makeDimensionSelector(String dimension) + { + final String dimensionName = dimension.toLowerCase(); + final Column columnDesc = index.getColumn(dimensionName); + if (columnDesc == null) { + return null; + } + + final DictionaryEncodedColumn column = columnDesc.getDictionaryEncoding(); + + if (columnDesc.getCapabilities().hasMultipleValues()) { + return new DimensionSelector() + { + @Override + public IndexedInts getRow() + { + return column.getMultiValueRow(currRow); + } + + @Override + public int getValueCardinality() + { + return column.getCardinality(); + } + + @Override + public String lookupName(int id) + { + final String retVal = column.lookupName(id); + return retVal == null ? "" : retVal; + } + + @Override + public int lookupId(String name) + { + return column.lookupId(name); + } + }; + } + else { + return new DimensionSelector() + { + @Override + public IndexedInts getRow() + { + final int value = column.getSingleValueRow(currRow); + return new IndexedInts() + { + @Override + public int size() + { + return 1; + } + + @Override + public int get(int index) + { + return value; + } + + @Override + public Iterator iterator() + { + return Iterators.singletonIterator(value); + } + }; + } + + @Override + public int getValueCardinality() + { + return column.getCardinality(); + } + + @Override + public String lookupName(int id) + { + return column.lookupName(id); + } + + @Override + public int lookupId(String name) + { + return column.lookupId(name); + } + }; + } + } + + @Override + public FloatMetricSelector makeFloatMetricSelector(String metric) + { + final String metricName = metric.toLowerCase(); + GenericColumn cachedMetricVals = genericColumnCache.get(metricName); + + if (cachedMetricVals == null) { + Column holder = index.getColumn(metricName); + if (holder != null && holder.getCapabilities().getType() == ValueType.FLOAT) { + cachedMetricVals = holder.getGenericColumn(); + genericColumnCache.put(metricName, cachedMetricVals); + } + } + + if (cachedMetricVals == null) { + return new FloatMetricSelector() + { + @Override + public float get() + { + return 0.0f; + } + }; + } + + final GenericColumn metricVals = cachedMetricVals; + return new FloatMetricSelector() + { + @Override + public float get() + { + return metricVals.getFloatSingleValueRow(currRow); + } + }; + } + + @Override + public ComplexMetricSelector makeComplexMetricSelector(String metric) + { + final String metricName = metric.toLowerCase(); + ComplexColumn cachedMetricVals = complexColumnCache.get(metricName); + + if (cachedMetricVals == null) { + Column holder = index.getColumn(metricName); + if (holder != null && holder.getCapabilities().getType() == ValueType.COMPLEX) { + cachedMetricVals = holder.getComplexColumn(); + complexColumnCache.put(metricName, cachedMetricVals); + } + } + + if (cachedMetricVals == null) { + return null; + } + + final ComplexColumn metricVals = cachedMetricVals; + return new ComplexMetricSelector() + { + @Override + public Class classOfObject() + { + return metricVals.getClazz(); + } + + @Override + public Object get() + { + return metricVals.getRowValue(currRow); + } + }; + } + }; + } + } + ); + + return MoreIterators.after( + retVal, + new Runnable() + { + @Override + public void run() + { + Closeables.closeQuietly(timestamps); + for (GenericColumn column : genericColumnCache.values()) { + Closeables.closeQuietly(column); + } + for (ComplexColumn complexColumn : complexColumnCache.values()) { + Closeables.closeQuietly(complexColumn); + } + } + } + ); + } + } + + private class MMappedBitmapIndexSelector implements BitmapIndexSelector + { + private final ColumnSelector index; + + public MMappedBitmapIndexSelector(final ColumnSelector index) + { + this.index = index; + } + + @Override + public Indexed getDimensionValues(String dimension) + { + final Column columnDesc = index.getColumn(dimension.toLowerCase()); + if (columnDesc == null || !columnDesc.getCapabilities().isDictionaryEncoded()) { + return null; + } + final DictionaryEncodedColumn column = columnDesc.getDictionaryEncoding(); + return new Indexed() + { + @Override + public Class getClazz() + { + return String.class; + } + + @Override + public int size() + { + return column.getCardinality(); + } + + @Override + public String get(int index) + { + return column.lookupName(index); + } + + @Override + public int indexOf(String value) + { + return column.lookupId(value); + } + + @Override + public Iterator iterator() + { + return IndexedIterable.create(this).iterator(); + } + }; + } + + @Override + public int getNumRows() + { + GenericColumn column = null; + try { + column = index.getTimeColumn().getGenericColumn(); + return column.length(); + } + finally { + Closeables.closeQuietly(column); + } + } + + @Override + public ImmutableConciseSet getConciseInvertedIndex(String dimension, String value) + { + return getInvertedIndex(dimension, value); + } + } +} diff --git a/server/src/main/java/com/metamx/druid/index/v1/processing/DimensionSelector.java b/server/src/main/java/com/metamx/druid/index/v1/processing/DimensionSelector.java index c2efd331c64..8ced8f388dd 100644 --- a/server/src/main/java/com/metamx/druid/index/v1/processing/DimensionSelector.java +++ b/server/src/main/java/com/metamx/druid/index/v1/processing/DimensionSelector.java @@ -57,18 +57,12 @@ public interface DimensionSelector * A,B * B * - * would be turned into (per lookupExpansion) - * - * 0 - * 1 - * 0 - * 2 + * getRow() would return * - * at which point lookupExpansion would really return: - * - * lookupExpansion(1) => [0 1] - * lookupExpansion(2) => [0] - * lookupExpansion(3) => [1] + * getRow(0) => [0 1] + * getRow(1) => [0] + * getRow(2) => [0 1] + * getRow(3) => [1] * * and then lookupName would return: * @@ -82,7 +76,7 @@ public interface DimensionSelector /** * The ID is the int id value of the field. - * + * * @param name * @return */ diff --git a/server/src/main/java/com/metamx/druid/initialization/ServerInit.java b/server/src/main/java/com/metamx/druid/initialization/ServerInit.java index a0dcbf94735..1d727f9abe3 100644 --- a/server/src/main/java/com/metamx/druid/initialization/ServerInit.java +++ b/server/src/main/java/com/metamx/druid/initialization/ServerInit.java @@ -25,19 +25,18 @@ import com.google.common.collect.Maps; import com.metamx.common.ISE; import com.metamx.common.logger.Logger; import com.metamx.druid.DruidProcessingConfig; -import com.metamx.druid.GroupByQueryEngine; -import com.metamx.druid.GroupByQueryEngineConfig; +import com.metamx.druid.loading.DelegatingSegmentLoader; +import com.metamx.druid.loading.MMappedQueryableIndexFactory; +import com.metamx.druid.loading.QueryableIndexFactory; +import com.metamx.druid.loading.S3SegmentPuller; +import com.metamx.druid.loading.SingleSegmentLoader; +import com.metamx.druid.query.group.GroupByQueryEngine; +import com.metamx.druid.query.group.GroupByQueryEngineConfig; import com.metamx.druid.Query; import com.metamx.druid.collect.StupidPool; -import com.metamx.druid.loading.DelegatingStorageAdapterLoader; -import com.metamx.druid.loading.MMappedStorageAdapterFactory; import com.metamx.druid.loading.QueryableLoaderConfig; -import com.metamx.druid.loading.RealtimeSegmentGetter; -import com.metamx.druid.loading.S3SegmentGetter; -import com.metamx.druid.loading.S3ZippedSegmentGetter; -import com.metamx.druid.loading.SingleStorageAdapterLoader; -import com.metamx.druid.loading.StorageAdapterFactory; -import com.metamx.druid.loading.StorageAdapterLoader; +import com.metamx.druid.loading.S3ZippedSegmentPuller; +import com.metamx.druid.loading.SegmentLoader; import com.metamx.druid.query.QueryRunnerFactory; import com.metamx.druid.query.group.GroupByQuery; import com.metamx.druid.query.group.GroupByQueryRunnerFactory; @@ -63,28 +62,26 @@ public class ServerInit { private static Logger log = new Logger(ServerInit.class); - public static StorageAdapterLoader makeDefaultQueryableLoader( + public static SegmentLoader makeDefaultQueryableLoader( RestS3Service s3Client, QueryableLoaderConfig config ) { - DelegatingStorageAdapterLoader delegateLoader = new DelegatingStorageAdapterLoader(); + DelegatingSegmentLoader delegateLoader = new DelegatingSegmentLoader(); - final S3SegmentGetter segmentGetter = new S3SegmentGetter(s3Client, config); - final S3ZippedSegmentGetter zippedGetter = new S3ZippedSegmentGetter(s3Client, config); - final RealtimeSegmentGetter realtimeGetter = new RealtimeSegmentGetter(config); - final StorageAdapterFactory factory; + final S3SegmentPuller segmentGetter = new S3SegmentPuller(s3Client, config); + final S3ZippedSegmentPuller zippedGetter = new S3ZippedSegmentPuller(s3Client, config); + final QueryableIndexFactory factory; if ("mmap".equals(config.getQueryableFactoryType())) { - factory = new MMappedStorageAdapterFactory(); + factory = new MMappedQueryableIndexFactory(); } else { throw new ISE("Unknown queryableFactoryType[%s]", config.getQueryableFactoryType()); } delegateLoader.setLoaderTypes( - ImmutableMap.builder() - .put("s3", new SingleStorageAdapterLoader(segmentGetter, factory)) - .put("s3_zip", new SingleStorageAdapterLoader(zippedGetter, factory)) - .put("realtime", new SingleStorageAdapterLoader(realtimeGetter, factory)) + ImmutableMap.builder() + .put("s3", new SingleSegmentLoader(segmentGetter, factory)) + .put("s3_zip", new SingleSegmentLoader(zippedGetter, factory)) .build() ); diff --git a/server/src/main/java/com/metamx/druid/loading/DelegatingStorageAdapterLoader.java b/server/src/main/java/com/metamx/druid/loading/DelegatingSegmentLoader.java similarity index 63% rename from server/src/main/java/com/metamx/druid/loading/DelegatingStorageAdapterLoader.java rename to server/src/main/java/com/metamx/druid/loading/DelegatingSegmentLoader.java index 5d421df7a0a..d576e59ae82 100644 --- a/server/src/main/java/com/metamx/druid/loading/DelegatingStorageAdapterLoader.java +++ b/server/src/main/java/com/metamx/druid/loading/DelegatingSegmentLoader.java @@ -21,43 +21,44 @@ package com.metamx.druid.loading; import com.metamx.common.MapUtils; import com.metamx.common.logger.Logger; -import com.metamx.druid.StorageAdapter; +import com.metamx.druid.client.DataSegment; +import com.metamx.druid.index.Segment; import javax.inject.Inject; import java.util.Map; /** */ -public class DelegatingStorageAdapterLoader implements StorageAdapterLoader +public class DelegatingSegmentLoader implements SegmentLoader { - private static final Logger log = new Logger(DelegatingStorageAdapterLoader.class); + private static final Logger log = new Logger(DelegatingSegmentLoader.class); - private volatile Map loaderTypes; + private volatile Map loaderTypes; @Inject public void setLoaderTypes( - Map loaderTypes + Map loaderTypes ) { this.loaderTypes = loaderTypes; } @Override - public StorageAdapter getAdapter(Map loadSpec) throws StorageAdapterLoadingException + public Segment getSegment(DataSegment segment) throws StorageAdapterLoadingException { - return getLoader(loadSpec).getAdapter(loadSpec); + return getLoader(segment.getLoadSpec()).getSegment(segment); } @Override - public void cleanupAdapter(Map loadSpec) throws StorageAdapterLoadingException + public void cleanup(DataSegment segment) throws StorageAdapterLoadingException { - getLoader(loadSpec).cleanupAdapter(loadSpec); + getLoader(segment.getLoadSpec()).cleanup(segment); } - private StorageAdapterLoader getLoader(Map loadSpec) throws StorageAdapterLoadingException + private SegmentLoader getLoader(Map loadSpec) throws StorageAdapterLoadingException { String type = MapUtils.getString(loadSpec, "type"); - StorageAdapterLoader loader = loaderTypes.get(type); + SegmentLoader loader = loaderTypes.get(type); if (loader == null) { throw new StorageAdapterLoadingException("Unknown loader type[%s]. Known types are %s", type, loaderTypes.keySet()); diff --git a/server/src/main/java/com/metamx/druid/loading/ConvertingBaseQueryableFactory.java b/server/src/main/java/com/metamx/druid/loading/MMappedQueryableIndexFactory.java similarity index 75% rename from server/src/main/java/com/metamx/druid/loading/ConvertingBaseQueryableFactory.java rename to server/src/main/java/com/metamx/druid/loading/MMappedQueryableIndexFactory.java index 8cb720affd2..648813d62ac 100644 --- a/server/src/main/java/com/metamx/druid/loading/ConvertingBaseQueryableFactory.java +++ b/server/src/main/java/com/metamx/druid/loading/MMappedQueryableIndexFactory.java @@ -20,7 +20,7 @@ package com.metamx.druid.loading; import com.metamx.common.logger.Logger; -import com.metamx.druid.StorageAdapter; +import com.metamx.druid.index.QueryableIndex; import com.metamx.druid.index.v1.IndexIO; import org.apache.commons.io.FileUtils; @@ -29,18 +29,13 @@ import java.io.IOException; /** */ -public abstract class ConvertingBaseQueryableFactory implements StorageAdapterFactory +public class MMappedQueryableIndexFactory implements QueryableIndexFactory { - private static final Logger log = new Logger(ConvertingBaseQueryableFactory.class); + private static final Logger log = new Logger(MMappedQueryableIndexFactory.class); @Override - public StorageAdapter factorize(File parentDir) throws StorageAdapterLoadingException + public QueryableIndex factorize(File parentDir) throws StorageAdapterLoadingException { - File indexFile = new File(parentDir, "index.drd"); - if (!indexFile.exists()) { - throw new StorageAdapterLoadingException("indexFile[%s] does not exist.", indexFile); - } - try { if (! IndexIO.canBeMapped(parentDir)) { File canBeMappedDir = new File(parentDir, "forTheMapping"); @@ -55,16 +50,16 @@ public abstract class ConvertingBaseQueryableFactory implements StorageAdapterFa } for (File file : canBeMappedDir.listFiles()) { if (! file.renameTo(new File(parentDir, file.getName()))) { - throw new StorageAdapterLoadingException("Couldn't rename[%s] to [%s]", canBeMappedDir, indexFile); + throw new StorageAdapterLoadingException("Couldn't rename[%s] to [%s]", canBeMappedDir, parentDir); } } FileUtils.deleteDirectory(canBeMappedDir); } - return factorizeConverted(parentDir); + return IndexIO.loadIndex(parentDir); } catch (IOException e) { - log.warn(e, "Got exception, deleting index[%s]", indexFile); + log.warn(e, "Got exception, deleting parentDir[%s]", parentDir); try { FileUtils.deleteDirectory(parentDir); } @@ -74,6 +69,4 @@ public abstract class ConvertingBaseQueryableFactory implements StorageAdapterFa throw new StorageAdapterLoadingException(e, e.getMessage()); } } - - protected abstract StorageAdapter factorizeConverted(File parentDir) throws IOException; } diff --git a/server/src/main/java/com/metamx/druid/loading/StorageAdapterFactory.java b/server/src/main/java/com/metamx/druid/loading/QueryableIndexFactory.java similarity index 85% rename from server/src/main/java/com/metamx/druid/loading/StorageAdapterFactory.java rename to server/src/main/java/com/metamx/druid/loading/QueryableIndexFactory.java index 92fec4e7632..d7f60309aa6 100644 --- a/server/src/main/java/com/metamx/druid/loading/StorageAdapterFactory.java +++ b/server/src/main/java/com/metamx/druid/loading/QueryableIndexFactory.java @@ -19,13 +19,13 @@ package com.metamx.druid.loading; -import com.metamx.druid.StorageAdapter; +import com.metamx.druid.index.QueryableIndex; import java.io.File; /** */ -public interface StorageAdapterFactory +public interface QueryableIndexFactory { - public StorageAdapter factorize(File parentDir) throws StorageAdapterLoadingException; + public QueryableIndex factorize(File parentDir) throws StorageAdapterLoadingException; } diff --git a/server/src/main/java/com/metamx/druid/loading/S3SegmentGetter.java b/server/src/main/java/com/metamx/druid/loading/S3SegmentPuller.java similarity index 92% rename from server/src/main/java/com/metamx/druid/loading/S3SegmentGetter.java rename to server/src/main/java/com/metamx/druid/loading/S3SegmentPuller.java index d0d9fb7a0bd..380489548d5 100644 --- a/server/src/main/java/com/metamx/druid/loading/S3SegmentGetter.java +++ b/server/src/main/java/com/metamx/druid/loading/S3SegmentPuller.java @@ -23,6 +23,7 @@ import com.google.inject.Inject; import com.metamx.common.MapUtils; import com.metamx.common.StreamUtils; import com.metamx.common.logger.Logger; +import com.metamx.druid.client.DataSegment; import com.metamx.druid.common.s3.S3Utils; import org.apache.commons.io.FileUtils; import org.jets3t.service.impl.rest.httpclient.RestS3Service; @@ -38,9 +39,9 @@ import java.util.zip.GZIPInputStream; /** */ -public class S3SegmentGetter implements SegmentGetter +public class S3SegmentPuller implements SegmentPuller { - private static final Logger log = new Logger(S3SegmentGetter.class); + private static final Logger log = new Logger(S3SegmentPuller.class); private static final long DEFAULT_TIMEOUT = 5 * 60 * 1000; private static final String BUCKET = "bucket"; @@ -50,7 +51,7 @@ public class S3SegmentGetter implements SegmentGetter private final S3SegmentGetterConfig config; @Inject - public S3SegmentGetter( + public S3SegmentPuller( RestS3Service s3Client, S3SegmentGetterConfig config ) @@ -60,8 +61,9 @@ public class S3SegmentGetter implements SegmentGetter } @Override - public File getSegmentFiles(Map loadSpec) throws StorageAdapterLoadingException + public File getSegmentFiles(DataSegment segment) throws StorageAdapterLoadingException { + Map loadSpec = segment.getLoadSpec(); String s3Bucket = MapUtils.getString(loadSpec, "bucket"); String s3Path = MapUtils.getString(loadSpec, "key"); @@ -156,8 +158,9 @@ public class S3SegmentGetter implements SegmentGetter } @Override - public boolean cleanSegmentFiles(Map loadSpec) throws StorageAdapterLoadingException + public boolean cleanSegmentFiles(DataSegment segment) throws StorageAdapterLoadingException { + Map loadSpec = segment.getLoadSpec(); File cacheFile = new File( config.getCacheDirectory(), computeCacheFilePath(MapUtils.getString(loadSpec, BUCKET), MapUtils.getString(loadSpec, KEY)) diff --git a/server/src/main/java/com/metamx/druid/loading/S3SegmentPusher.java b/server/src/main/java/com/metamx/druid/loading/S3SegmentPusher.java new file mode 100644 index 00000000000..4d0c1148593 --- /dev/null +++ b/server/src/main/java/com/metamx/druid/loading/S3SegmentPusher.java @@ -0,0 +1,149 @@ +/* + * Druid - a distributed column store. + * Copyright (C) 2012 Metamarkets Group Inc. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version 2 + * of the License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + */ + +package com.metamx.druid.loading; + +import com.google.common.base.Joiner; +import com.google.common.collect.ImmutableMap; +import com.google.common.io.Closeables; +import com.metamx.common.ISE; +import com.metamx.common.StreamUtils; +import com.metamx.druid.client.DataSegment; +import com.metamx.druid.index.v1.IndexIO; +import com.metamx.emitter.EmittingLogger; +import org.apache.commons.io.FileUtils; +import org.apache.commons.io.IOUtils; +import org.codehaus.jackson.map.ObjectMapper; +import org.jets3t.service.S3ServiceException; +import org.jets3t.service.impl.rest.httpclient.RestS3Service; +import org.jets3t.service.model.S3Object; + +import java.io.ByteArrayInputStream; +import java.io.File; +import java.io.FileInputStream; +import java.io.FileOutputStream; +import java.io.IOException; +import java.security.NoSuchAlgorithmException; +import java.util.zip.ZipEntry; +import java.util.zip.ZipOutputStream; + +public class S3SegmentPusher implements SegmentPusher +{ + private static final EmittingLogger log = new EmittingLogger(S3SegmentPusher.class); + private static final Joiner JOINER = Joiner.on("/").skipNulls(); + + private final RestS3Service s3Client; + private final S3SegmentPusherConfig config; + private final ObjectMapper jsonMapper; + + public S3SegmentPusher( + RestS3Service s3Client, + S3SegmentPusherConfig config, + ObjectMapper jsonMapper + ) + { + this.s3Client = s3Client; + this.config = config; + this.jsonMapper = jsonMapper; + } + + @Override + public DataSegment push(final File indexFilesDir, DataSegment segment) throws IOException + { + log.info("Uploading [%s] to S3", indexFilesDir); + String outputKey = JOINER.join( + config.getBaseKey().isEmpty() ? null : config.getBaseKey(), + segment.getDataSource(), + String.format( + "%s_%s", + segment.getInterval().getStart(), + segment.getInterval().getEnd() + ), + segment.getVersion(), + segment.getShardSpec().getPartitionNum() + ); + + long indexSize = 0; + final File zipOutFile = File.createTempFile("druid", "index.zip"); + ZipOutputStream zipOut = null; + try { + zipOut = new ZipOutputStream(new FileOutputStream(zipOutFile)); + File[] indexFiles = indexFilesDir.listFiles(); + for (File indexFile : indexFiles) { + log.info("Adding indexFile[%s] with size[%,d]. Total size[%,d]", indexFile, indexFile.length(), indexSize); + if (indexFile.length() >= Integer.MAX_VALUE) { + throw new ISE("indexFile[%s] too large [%,d]", indexFile, indexFile.length()); + } + zipOut.putNextEntry(new ZipEntry(indexFile.getName())); + IOUtils.copy(new FileInputStream(indexFile), zipOut); + indexSize += indexFile.length(); + } + } + finally { + Closeables.closeQuietly(zipOut); + } + + try { + S3Object toPush = new S3Object(zipOutFile); + + final String outputBucket = config.getBucket(); + toPush.setBucketName(outputBucket); + toPush.setKey(outputKey + "/index.zip"); + + log.info("Pushing %s.", toPush); + s3Client.putObject(outputBucket, toPush); + + segment = segment.withSize(indexSize) + .withLoadSpec( + ImmutableMap.of( + "type", "s3_zip", + "bucket", outputBucket, + "key", toPush.getKey() + ) + ) + .withBinaryVersion(IndexIO.getVersionFromDir(indexFilesDir)); + + File descriptorFile = File.createTempFile("druid", "descriptor.json"); + StreamUtils.copyToFileAndClose(new ByteArrayInputStream(jsonMapper.writeValueAsBytes(segment)), descriptorFile); + S3Object descriptorObject = new S3Object(descriptorFile); + descriptorObject.setBucketName(outputBucket); + descriptorObject.setKey(outputKey + "/descriptor.json"); + + log.info("Pushing %s", descriptorObject); + s3Client.putObject(outputBucket, descriptorObject); + + log.info("Deleting Index File[%s]", indexFilesDir); + FileUtils.deleteDirectory(indexFilesDir); + + log.info("Deleting zipped index File[%s]", zipOutFile); + zipOutFile.delete(); + + log.info("Deleting descriptor file[%s]", descriptorFile); + descriptorFile.delete(); + + return segment; + } + catch (NoSuchAlgorithmException e) { + throw new IOException(e); + } + catch (S3ServiceException e) { + throw new IOException(e); + } + } +} \ No newline at end of file diff --git a/realtime/src/main/java/com/metamx/druid/realtime/S3SegmentPusherConfig.java b/server/src/main/java/com/metamx/druid/loading/S3SegmentPusherConfig.java similarity index 96% rename from realtime/src/main/java/com/metamx/druid/realtime/S3SegmentPusherConfig.java rename to server/src/main/java/com/metamx/druid/loading/S3SegmentPusherConfig.java index 96a96eeea10..0bd66a1a913 100644 --- a/realtime/src/main/java/com/metamx/druid/realtime/S3SegmentPusherConfig.java +++ b/server/src/main/java/com/metamx/druid/loading/S3SegmentPusherConfig.java @@ -17,7 +17,7 @@ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. */ -package com.metamx.druid.realtime; +package com.metamx.druid.loading; import org.skife.config.Config; import org.skife.config.Default; diff --git a/server/src/main/java/com/metamx/druid/loading/S3ZippedSegmentGetter.java b/server/src/main/java/com/metamx/druid/loading/S3ZippedSegmentPuller.java similarity index 91% rename from server/src/main/java/com/metamx/druid/loading/S3ZippedSegmentGetter.java rename to server/src/main/java/com/metamx/druid/loading/S3ZippedSegmentPuller.java index d90d33c586d..8fd8ebd4542 100644 --- a/server/src/main/java/com/metamx/druid/loading/S3ZippedSegmentGetter.java +++ b/server/src/main/java/com/metamx/druid/loading/S3ZippedSegmentPuller.java @@ -23,6 +23,7 @@ import com.google.common.io.Closeables; import com.metamx.common.MapUtils; import com.metamx.common.StreamUtils; import com.metamx.common.logger.Logger; +import com.metamx.druid.client.DataSegment; import com.metamx.druid.common.s3.S3Utils; import org.apache.commons.io.FileUtils; import org.apache.commons.io.IOUtils; @@ -43,9 +44,9 @@ import java.util.zip.ZipInputStream; /** */ -public class S3ZippedSegmentGetter implements SegmentGetter +public class S3ZippedSegmentPuller implements SegmentPuller { - private static final Logger log = new Logger(S3ZippedSegmentGetter.class); + private static final Logger log = new Logger(S3ZippedSegmentPuller.class); private static final String BUCKET = "bucket"; private static final String KEY = "key"; @@ -53,7 +54,7 @@ public class S3ZippedSegmentGetter implements SegmentGetter private final RestS3Service s3Client; private final S3SegmentGetterConfig config; - public S3ZippedSegmentGetter( + public S3ZippedSegmentPuller( RestS3Service s3Client, S3SegmentGetterConfig config ) @@ -63,8 +64,9 @@ public class S3ZippedSegmentGetter implements SegmentGetter } @Override - public File getSegmentFiles(Map loadSpec) throws StorageAdapterLoadingException + public File getSegmentFiles(DataSegment segment) throws StorageAdapterLoadingException { + Map loadSpec = segment.getLoadSpec(); String s3Bucket = MapUtils.getString(loadSpec, "bucket"); String s3Path = MapUtils.getString(loadSpec, "key"); @@ -161,8 +163,9 @@ public class S3ZippedSegmentGetter implements SegmentGetter } @Override - public boolean cleanSegmentFiles(Map loadSpec) throws StorageAdapterLoadingException + public boolean cleanSegmentFiles(DataSegment segment) throws StorageAdapterLoadingException { + Map loadSpec = segment.getLoadSpec(); File cacheFile = new File( config.getCacheDirectory(), computeCacheFilePath( diff --git a/server/src/main/java/com/metamx/druid/loading/SegmentLoader.java b/server/src/main/java/com/metamx/druid/loading/SegmentLoader.java new file mode 100644 index 00000000000..1ca54b89106 --- /dev/null +++ b/server/src/main/java/com/metamx/druid/loading/SegmentLoader.java @@ -0,0 +1,31 @@ +/* + * Druid - a distributed column store. + * Copyright (C) 2012 Metamarkets Group Inc. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version 2 + * of the License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + */ + +package com.metamx.druid.loading; + +import com.metamx.druid.client.DataSegment; +import com.metamx.druid.index.Segment; + +/** + */ +public interface SegmentLoader +{ + public Segment getSegment(DataSegment loadSpec) throws StorageAdapterLoadingException; + public void cleanup(DataSegment loadSpec) throws StorageAdapterLoadingException; +} diff --git a/server/src/main/java/com/metamx/druid/loading/SegmentGetter.java b/server/src/main/java/com/metamx/druid/loading/SegmentPuller.java similarity index 77% rename from server/src/main/java/com/metamx/druid/loading/SegmentGetter.java rename to server/src/main/java/com/metamx/druid/loading/SegmentPuller.java index 53e7481ef1a..9cba65f425c 100644 --- a/server/src/main/java/com/metamx/druid/loading/SegmentGetter.java +++ b/server/src/main/java/com/metamx/druid/loading/SegmentPuller.java @@ -19,13 +19,15 @@ package com.metamx.druid.loading; +import com.metamx.druid.client.DataSegment; + import java.io.File; import java.util.Map; /** */ -public interface SegmentGetter +public interface SegmentPuller { - public File getSegmentFiles(Map loadSpec) throws StorageAdapterLoadingException; - public boolean cleanSegmentFiles(Map loadSpec) throws StorageAdapterLoadingException; + public File getSegmentFiles(DataSegment loadSpec) throws StorageAdapterLoadingException; + public boolean cleanSegmentFiles(DataSegment loadSpec) throws StorageAdapterLoadingException; } diff --git a/realtime/src/main/java/com/metamx/druid/realtime/SegmentPusher.java b/server/src/main/java/com/metamx/druid/loading/SegmentPusher.java similarity index 95% rename from realtime/src/main/java/com/metamx/druid/realtime/SegmentPusher.java rename to server/src/main/java/com/metamx/druid/loading/SegmentPusher.java index 16a9253137c..3700215efc1 100644 --- a/realtime/src/main/java/com/metamx/druid/realtime/SegmentPusher.java +++ b/server/src/main/java/com/metamx/druid/loading/SegmentPusher.java @@ -17,15 +17,13 @@ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. */ -package com.metamx.druid.realtime; +package com.metamx.druid.loading; import com.metamx.druid.client.DataSegment; import java.io.File; import java.io.IOException; -/** - */ public interface SegmentPusher { public DataSegment push(File file, DataSegment segment) throws IOException; diff --git a/server/src/main/java/com/metamx/druid/loading/RealtimeSegmentGetter.java b/server/src/main/java/com/metamx/druid/loading/SingleSegmentLoader.java similarity index 50% rename from server/src/main/java/com/metamx/druid/loading/RealtimeSegmentGetter.java rename to server/src/main/java/com/metamx/druid/loading/SingleSegmentLoader.java index 15a37ebf611..19c3981e988 100644 --- a/server/src/main/java/com/metamx/druid/loading/RealtimeSegmentGetter.java +++ b/server/src/main/java/com/metamx/druid/loading/SingleSegmentLoader.java @@ -20,46 +20,39 @@ package com.metamx.druid.loading; import com.google.inject.Inject; -import com.metamx.common.logger.Logger; - -import java.io.File; -import java.util.Map; +import com.metamx.druid.client.DataSegment; +import com.metamx.druid.index.QueryableIndex; +import com.metamx.druid.index.QueryableIndexSegment; +import com.metamx.druid.index.Segment; /** */ -public class RealtimeSegmentGetter implements SegmentGetter +public class SingleSegmentLoader implements SegmentLoader { - private static final Logger log = new Logger(RealtimeSegmentGetter.class); - - private final S3SegmentGetterConfig config; + private final SegmentPuller segmentPuller; + private final QueryableIndexFactory factory; @Inject - public RealtimeSegmentGetter( - S3SegmentGetterConfig config + public SingleSegmentLoader( + SegmentPuller segmentPuller, + QueryableIndexFactory factory ) { - this.config = config; + this.segmentPuller = segmentPuller; + this.factory = factory; } @Override - public File getSegmentFiles(final Map loadSpec) throws StorageAdapterLoadingException + public Segment getSegment(DataSegment segment) throws StorageAdapterLoadingException { - try { - File cacheFile = (File) loadSpec.get("file"); + final QueryableIndex index = factory.factorize(segmentPuller.getSegmentFiles(segment)); - if (!cacheFile.exists()) { - throw new StorageAdapterLoadingException("Unable to find persisted file!"); - } - return cacheFile; - } - catch (Exception e) { - throw new StorageAdapterLoadingException(e, e.getMessage()); - } + return new QueryableIndexSegment(segment.getIdentifier(), index); } @Override - public boolean cleanSegmentFiles(Map loadSpec) throws StorageAdapterLoadingException + public void cleanup(DataSegment segment) throws StorageAdapterLoadingException { - throw new UnsupportedOperationException(); + segmentPuller.cleanSegmentFiles(segment); } } diff --git a/server/src/main/java/com/metamx/druid/master/DruidMaster.java b/server/src/main/java/com/metamx/druid/master/DruidMaster.java index fcb17123066..24b186d7622 100644 --- a/server/src/main/java/com/metamx/druid/master/DruidMaster.java +++ b/server/src/main/java/com/metamx/druid/master/DruidMaster.java @@ -21,7 +21,6 @@ package com.metamx.druid.master; import com.google.common.base.Function; import com.google.common.base.Predicate; -import com.google.common.collect.Collections2; import com.google.common.collect.ImmutableList; import com.google.common.collect.ImmutableMap; import com.google.common.collect.Iterables; @@ -36,7 +35,6 @@ import com.metamx.common.guava.Comparators; import com.metamx.common.guava.FunctionalIterable; import com.metamx.common.lifecycle.LifecycleStart; import com.metamx.common.lifecycle.LifecycleStop; -import com.metamx.common.logger.Logger; import com.metamx.druid.client.DataSegment; import com.metamx.druid.client.DruidDataSource; import com.metamx.druid.client.DruidServer; @@ -57,7 +55,6 @@ import org.joda.time.Duration; import javax.annotation.Nullable; import java.util.Arrays; -import java.util.Collection; import java.util.List; import java.util.Map; import java.util.Set; @@ -565,7 +562,6 @@ public class DruidMaster DruidMasterRuntimeParams.newBuilder() .withStartTime(startTime) .withDatasources(databaseSegmentManager.getInventory()) - .withLoadManagementPeons(loadManagementPeons) .withMillisToWaitBeforeDeleting(config.getMillisToWaitBeforeDeleting()) .withEmitter(emitter) .withMergeBytesLimit(config.getMergeBytesLimit()) @@ -596,21 +592,20 @@ public class DruidMaster public DruidMasterRuntimeParams run(DruidMasterRuntimeParams params) { // Display info about all historical servers - Iterable servers = - FunctionalIterable.create(serverInventoryManager.getInventory()) - .filter( - new Predicate() - { - @Override - public boolean apply( - @Nullable DruidServer input - ) - { - return input.getType() - .equalsIgnoreCase("historical"); - } - } - ); + Iterable servers =FunctionalIterable + .create(serverInventoryManager.getInventory()) + .filter( + new Predicate() + { + @Override + public boolean apply( + @Nullable DruidServer input + ) + { + return input.getType().equalsIgnoreCase("historical"); + } + } + ); if (log.isDebugEnabled()) { log.debug("Servers"); for (DruidServer druidServer : servers) { @@ -670,12 +665,17 @@ public class DruidMaster return params.buildFromExisting() .withDruidCluster(cluster) .withDatabaseRuleManager(databaseRuleManager) + .withLoadManagementPeons(loadManagementPeons) .withSegmentReplicantLookup(segmentReplicantLookup) .withBalancerReferenceTimestamp(DateTime.now()) .build(); } }, - new DruidMasterRuleRunner(DruidMaster.this), + new DruidMasterRuleRunner( + DruidMaster.this, + config.getReplicantLifetime(), + config.getReplicantThrottleLimit() + ), new DruidMasterCleanup(DruidMaster.this), new DruidMasterBalancer(DruidMaster.this), new DruidMasterLogger() diff --git a/server/src/main/java/com/metamx/druid/master/DruidMasterBalancer.java b/server/src/main/java/com/metamx/druid/master/DruidMasterBalancer.java index a48073e121f..fa04d93a34a 100644 --- a/server/src/main/java/com/metamx/druid/master/DruidMasterBalancer.java +++ b/server/src/main/java/com/metamx/druid/master/DruidMasterBalancer.java @@ -19,7 +19,6 @@ package com.metamx.druid.master; -import com.google.common.collect.ImmutableMap; import com.google.common.collect.Maps; import com.google.common.collect.MinMaxPriorityQueue; import com.metamx.common.guava.Comparators; @@ -30,10 +29,8 @@ import org.joda.time.DateTime; import java.util.ArrayList; import java.util.Comparator; -import java.util.HashSet; import java.util.List; import java.util.Map; -import java.util.Set; import java.util.concurrent.ConcurrentHashMap; /** @@ -68,13 +65,10 @@ public class DruidMasterBalancer implements DruidMasterHelper for (BalancerSegmentHolder holder : currentlyMovingSegments.get(tier).values()) { holder.reduceLifetime(); if (holder.getLifetime() <= 0) { - log.makeAlert( - "[%s]: Balancer move segments queue has a segment stuck", - tier, - ImmutableMap.builder() - .put("segment", holder.getSegment().getIdentifier()) - .build() - ).emit(); + log.makeAlert("[%s]: Balancer move segments queue has a segment stuck", tier) + .addData("segment", holder.getSegment().getIdentifier()) + .addData("server", holder.getFromServer()) + .emit(); } } } @@ -97,11 +91,7 @@ public class DruidMasterBalancer implements DruidMasterHelper if (!currentlyMovingSegments.get(tier).isEmpty()) { reduceLifetimes(tier); - log.info( - "[%s]: Still waiting on %,d segments to be moved", - tier, - currentlyMovingSegments.size() - ); + log.info("[%s]: Still waiting on %,d segments to be moved", tier, currentlyMovingSegments.size()); continue; } @@ -173,12 +163,7 @@ public class DruidMasterBalancer implements DruidMasterHelper if (!toPeon.getSegmentsToLoad().contains(segmentToMove) && (toServer.getSegment(segmentName) == null) && new ServerHolder(toServer, toPeon).getAvailableSize() > segmentToMove.getSize()) { - log.info( - "Moving [%s] from [%s] to [%s]", - segmentName, - fromServerName, - toServerName - ); + log.info("Moving [%s] from [%s] to [%s]", segmentName, fromServerName, toServerName); try { master.moveSegment( fromServerName, diff --git a/server/src/main/java/com/metamx/druid/master/DruidMasterConfig.java b/server/src/main/java/com/metamx/druid/master/DruidMasterConfig.java index 274b3765164..755c668dc0e 100644 --- a/server/src/main/java/com/metamx/druid/master/DruidMasterConfig.java +++ b/server/src/main/java/com/metamx/druid/master/DruidMasterConfig.java @@ -84,4 +84,12 @@ public abstract class DruidMasterConfig @Config("druid.master.balancer.maxSegmentsToMove") @Default("5") public abstract int getMaxSegmentsToMove(); + + @Config("druid.master.replicant.lifetime") + @Default("15") + public abstract int getReplicantLifetime(); + + @Config("druid.master.replicant.throttleLimit") + @Default("10") + public abstract int getReplicantThrottleLimit(); } diff --git a/server/src/main/java/com/metamx/druid/master/DruidMasterRuleRunner.java b/server/src/main/java/com/metamx/druid/master/DruidMasterRuleRunner.java index 56d27f47544..2bd4870ff2c 100644 --- a/server/src/main/java/com/metamx/druid/master/DruidMasterRuleRunner.java +++ b/server/src/main/java/com/metamx/druid/master/DruidMasterRuleRunner.java @@ -23,6 +23,7 @@ import com.metamx.druid.client.DataSegment; import com.metamx.druid.db.DatabaseRuleManager; import com.metamx.druid.master.rules.Rule; import com.metamx.emitter.EmittingLogger; +import org.joda.time.DateTime; import java.util.List; @@ -32,11 +33,14 @@ public class DruidMasterRuleRunner implements DruidMasterHelper { private static final EmittingLogger log = new EmittingLogger(DruidMasterRuleRunner.class); + private final ReplicationThrottler replicatorThrottler; + private final DruidMaster master; - public DruidMasterRuleRunner(DruidMaster master) + public DruidMasterRuleRunner(DruidMaster master, int replicantLifeTime, int replicantThrottleLimit) { this.master = master; + this.replicatorThrottler = new ReplicationThrottler(replicantThrottleLimit, replicantLifeTime); } @Override @@ -50,15 +54,25 @@ public class DruidMasterRuleRunner implements DruidMasterHelper return params; } + for (String tier : cluster.getTierNames()) { + replicatorThrottler.updateReplicationState(tier); + replicatorThrottler.updateTerminationState(tier); + } + + DruidMasterRuntimeParams paramsWithReplicationManager = params.buildFromExisting() + .withReplicationManager(replicatorThrottler) + .build(); + // Run through all matched rules for available segments - DatabaseRuleManager databaseRuleManager = params.getDatabaseRuleManager(); - for (DataSegment segment : params.getAvailableSegments()) { + DateTime now = new DateTime(); + DatabaseRuleManager databaseRuleManager = paramsWithReplicationManager.getDatabaseRuleManager(); + for (DataSegment segment : paramsWithReplicationManager.getAvailableSegments()) { List rules = databaseRuleManager.getRulesWithDefault(segment.getDataSource()); boolean foundMatchingRule = false; for (Rule rule : rules) { - if (rule.appliesTo(segment)) { - stats.accumulate(rule.run(master, params, segment)); + if (rule.appliesTo(segment, now)) { + stats.accumulate(rule.run(master, paramsWithReplicationManager, segment)); foundMatchingRule = true; break; } @@ -74,8 +88,8 @@ public class DruidMasterRuleRunner implements DruidMasterHelper } } - return params.buildFromExisting() - .withMasterStats(stats) - .build(); + return paramsWithReplicationManager.buildFromExisting() + .withMasterStats(stats) + .build(); } } diff --git a/server/src/main/java/com/metamx/druid/master/DruidMasterRuntimeParams.java b/server/src/main/java/com/metamx/druid/master/DruidMasterRuntimeParams.java index fb56033c70f..85fe6a35d89 100644 --- a/server/src/main/java/com/metamx/druid/master/DruidMasterRuntimeParams.java +++ b/server/src/main/java/com/metamx/druid/master/DruidMasterRuntimeParams.java @@ -25,7 +25,6 @@ import com.metamx.common.guava.Comparators; import com.metamx.druid.client.DataSegment; import com.metamx.druid.client.DruidDataSource; import com.metamx.druid.db.DatabaseRuleManager; -import com.metamx.druid.master.rules.RuleMap; import com.metamx.emitter.service.ServiceEmitter; import org.joda.time.DateTime; @@ -45,6 +44,7 @@ public class DruidMasterRuntimeParams private final Set dataSources; private final Set availableSegments; private final Map loadManagementPeons; + private final ReplicationThrottler replicationManager; private final ServiceEmitter emitter; private final long millisToWaitBeforeDeleting; private final MasterStats stats; @@ -61,6 +61,7 @@ public class DruidMasterRuntimeParams Set dataSources, Set availableSegments, Map loadManagementPeons, + ReplicationThrottler replicationManager, ServiceEmitter emitter, long millisToWaitBeforeDeleting, MasterStats stats, @@ -77,6 +78,7 @@ public class DruidMasterRuntimeParams this.dataSources = dataSources; this.availableSegments = availableSegments; this.loadManagementPeons = loadManagementPeons; + this.replicationManager = replicationManager; this.emitter = emitter; this.millisToWaitBeforeDeleting = millisToWaitBeforeDeleting; this.stats = stats; @@ -121,6 +123,11 @@ public class DruidMasterRuntimeParams return loadManagementPeons; } + public ReplicationThrottler getReplicationManager() + { + return replicationManager; + } + public ServiceEmitter getEmitter() { return emitter; @@ -181,6 +188,7 @@ public class DruidMasterRuntimeParams dataSources, availableSegments, loadManagementPeons, + replicationManager, emitter, millisToWaitBeforeDeleting, stats, @@ -200,6 +208,7 @@ public class DruidMasterRuntimeParams private final Set dataSources; private final Set availableSegments; private final Map loadManagementPeons; + private ReplicationThrottler replicationManager; private ServiceEmitter emitter; private long millisToWaitBeforeDeleting; private MasterStats stats; @@ -217,6 +226,7 @@ public class DruidMasterRuntimeParams this.dataSources = Sets.newHashSet(); this.availableSegments = Sets.newTreeSet(Comparators.inverse(DataSegment.bucketMonthComparator())); this.loadManagementPeons = Maps.newHashMap(); + this.replicationManager = null; this.emitter = null; this.millisToWaitBeforeDeleting = 0; this.stats = new MasterStats(); @@ -234,6 +244,7 @@ public class DruidMasterRuntimeParams Set dataSources, Set availableSegments, Map loadManagementPeons, + ReplicationThrottler replicationManager, ServiceEmitter emitter, long millisToWaitBeforeDeleting, MasterStats stats, @@ -250,6 +261,7 @@ public class DruidMasterRuntimeParams this.dataSources = dataSources; this.availableSegments = availableSegments; this.loadManagementPeons = loadManagementPeons; + this.replicationManager = replicationManager; this.emitter = emitter; this.millisToWaitBeforeDeleting = millisToWaitBeforeDeleting; this.stats = stats; @@ -269,6 +281,7 @@ public class DruidMasterRuntimeParams dataSources, availableSegments, loadManagementPeons, + replicationManager, emitter, millisToWaitBeforeDeleting, stats, @@ -321,6 +334,12 @@ public class DruidMasterRuntimeParams return this; } + public Builder withReplicationManager(ReplicationThrottler replicationManager) + { + this.replicationManager = replicationManager; + return this; + } + public Builder withEmitter(ServiceEmitter emitter) { this.emitter = emitter; diff --git a/server/src/main/java/com/metamx/druid/master/ReplicationThrottler.java b/server/src/main/java/com/metamx/druid/master/ReplicationThrottler.java new file mode 100644 index 00000000000..3b3e90f78c2 --- /dev/null +++ b/server/src/main/java/com/metamx/druid/master/ReplicationThrottler.java @@ -0,0 +1,172 @@ +/* + * Druid - a distributed column store. + * Copyright (C) 2012 Metamarkets Group Inc. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version 2 + * of the License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + */ + +package com.metamx.druid.master; + +import com.google.common.collect.Maps; +import com.metamx.emitter.EmittingLogger; + +import java.util.Map; +import java.util.Set; +import java.util.concurrent.ConcurrentSkipListSet; + +/** + * The ReplicationThrottler is used to throttle the number of replicants that are created and destroyed. + */ +public class ReplicationThrottler +{ + private static final EmittingLogger log = new EmittingLogger(ReplicationThrottler.class); + private final int maxReplicants; + private final int maxLifetime; + + private final Map replicatingLookup = Maps.newHashMap(); + private final Map terminatingLookup = Maps.newHashMap(); + private final ReplicatorSegmentHolder currentlyReplicating = new ReplicatorSegmentHolder(); + private final ReplicatorSegmentHolder currentlyTerminating = new ReplicatorSegmentHolder(); + + public ReplicationThrottler(int maxReplicants, int maxLifetime) + { + this.maxReplicants = maxReplicants; + this.maxLifetime = maxLifetime; + } + + public void updateReplicationState(String tier) + { + update(tier, currentlyReplicating, replicatingLookup, "create"); + } + + public void updateTerminationState(String tier) + { + update(tier, currentlyTerminating, terminatingLookup, "terminate"); + } + + private void update(String tier, ReplicatorSegmentHolder holder, Map lookup, String type) + { + int size = holder.getNumProcessing(tier); + if (size != 0) { + log.info( + "[%s]: Replicant %s queue still has %d segments. Lifetime[%d]", + tier, + type, + size, + holder.getLifetime(tier) + ); + holder.reduceLifetime(tier); + lookup.put(tier, false); + + if (holder.getLifetime(tier) < 0) { + log.makeAlert("[%s]: Replicant %s queue stuck after %d+ runs!", tier, type, maxLifetime).emit(); + } + } else { + log.info("[%s]: Replicant %s queue is empty.", tier, type); + lookup.put(tier, true); + holder.resetLifetime(tier); + } + } + + public boolean canAddReplicant(String tier) + { + return replicatingLookup.get(tier); + } + + public boolean canDestroyReplicant(String tier) + { + return terminatingLookup.get(tier); + } + + public boolean registerReplicantCreation(String tier, String segmentId) + { + return currentlyReplicating.addSegment(tier, segmentId); + } + + public void unregisterReplicantCreation(String tier, String segmentId) + { + currentlyReplicating.removeSegment(tier, segmentId); + } + + public boolean registerReplicantTermination(String tier, String segmentId) + { + return currentlyTerminating.addSegment(tier, segmentId); + } + + public void unregisterReplicantTermination(String tier, String segmentId) + { + currentlyTerminating.removeSegment(tier, segmentId); + } + + private class ReplicatorSegmentHolder + { + private final Map> currentlyProcessingSegments = Maps.newHashMap(); + private final Map lifetimes = Maps.newHashMap(); + + public boolean addSegment(String tier, String segmentId) + { + ConcurrentSkipListSet segments = currentlyProcessingSegments.get(tier); + if (segments == null) { + segments = new ConcurrentSkipListSet(); + currentlyProcessingSegments.put(tier, segments); + } + if (segments.size() < maxReplicants) { + segments.add(segmentId); + return true; + } + + return false; + } + + public void removeSegment(String tier, String segmentId) + { + Set segments = currentlyProcessingSegments.get(tier); + if (segments != null) { + segments.remove(segmentId); + } + } + + public int getNumProcessing(String tier) + { + Set segments = currentlyProcessingSegments.get(tier); + return (segments == null) ? 0 : segments.size(); + } + + public int getLifetime(String tier) + { + Integer lifetime = lifetimes.get(tier); + if (lifetime == null) { + lifetime = maxLifetime; + lifetimes.put(tier, lifetime); + } + return lifetime; + } + + public void reduceLifetime(String tier) + { + Integer lifetime = lifetimes.get(tier); + if (lifetime == null) { + lifetime = maxLifetime; + lifetimes.put(tier, lifetime); + } + lifetimes.put(tier, --lifetime); + } + + public void resetLifetime(String tier) + { + lifetimes.put(tier, maxLifetime); + } + } +} diff --git a/server/src/main/java/com/metamx/druid/master/rules/IntervalDropRule.java b/server/src/main/java/com/metamx/druid/master/rules/IntervalDropRule.java index 6546fce40b3..0acdd8bc2f8 100644 --- a/server/src/main/java/com/metamx/druid/master/rules/IntervalDropRule.java +++ b/server/src/main/java/com/metamx/druid/master/rules/IntervalDropRule.java @@ -22,6 +22,7 @@ package com.metamx.druid.master.rules; import com.metamx.druid.client.DataSegment; import org.codehaus.jackson.annotate.JsonCreator; import org.codehaus.jackson.annotate.JsonProperty; +import org.joda.time.DateTime; import org.joda.time.Interval; /** @@ -52,7 +53,7 @@ public class IntervalDropRule extends DropRule } @Override - public boolean appliesTo(DataSegment segment) + public boolean appliesTo(DataSegment segment, DateTime referenceTimestamp) { return interval.contains(segment.getInterval()); } diff --git a/server/src/main/java/com/metamx/druid/master/rules/IntervalLoadRule.java b/server/src/main/java/com/metamx/druid/master/rules/IntervalLoadRule.java index 8c77594a177..5aa984ccba8 100644 --- a/server/src/main/java/com/metamx/druid/master/rules/IntervalLoadRule.java +++ b/server/src/main/java/com/metamx/druid/master/rules/IntervalLoadRule.java @@ -23,6 +23,7 @@ import com.metamx.common.logger.Logger; import com.metamx.druid.client.DataSegment; import org.codehaus.jackson.annotate.JsonCreator; import org.codehaus.jackson.annotate.JsonProperty; +import org.joda.time.DateTime; import org.joda.time.Interval; /** @@ -81,7 +82,7 @@ public class IntervalLoadRule extends LoadRule } @Override - public boolean appliesTo(DataSegment segment) + public boolean appliesTo(DataSegment segment, DateTime referenceTimestamp) { return interval.contains(segment.getInterval()); } diff --git a/server/src/main/java/com/metamx/druid/master/rules/LoadRule.java b/server/src/main/java/com/metamx/druid/master/rules/LoadRule.java index 05d9adf35ca..d33da8286e2 100644 --- a/server/src/main/java/com/metamx/druid/master/rules/LoadRule.java +++ b/server/src/main/java/com/metamx/druid/master/rules/LoadRule.java @@ -19,16 +19,15 @@ package com.metamx.druid.master.rules; -import com.google.common.collect.ImmutableMap; import com.google.common.collect.Lists; import com.google.common.collect.MinMaxPriorityQueue; -import com.metamx.common.Pair; import com.metamx.druid.client.DataSegment; import com.metamx.druid.master.BalancerCostAnalyzer; import com.metamx.druid.master.DruidMaster; import com.metamx.druid.master.DruidMasterRuntimeParams; import com.metamx.druid.master.LoadPeonCallback; import com.metamx.druid.master.MasterStats; +import com.metamx.druid.master.ReplicationThrottler; import com.metamx.druid.master.ServerHolder; import com.metamx.emitter.EmittingLogger; import org.joda.time.DateTime; @@ -61,6 +60,7 @@ public abstract class LoadRule implements Rule stats.accumulate( assign( + params.getReplicationManager(), expectedReplicants, totalReplicants, serverQueue, @@ -68,17 +68,19 @@ public abstract class LoadRule implements Rule params ) ); + stats.accumulate(drop(expectedReplicants, clusterReplicants, segment, params)); return stats; } private MasterStats assign( + final ReplicationThrottler replicationManager, int expectedReplicants, int totalReplicants, MinMaxPriorityQueue serverQueue, - DataSegment segment, - DruidMasterRuntimeParams params + final DataSegment segment, + final DruidMasterRuntimeParams params ) { MasterStats stats = new MasterStats(); @@ -106,6 +108,14 @@ public abstract class LoadRule implements Rule continue; } + if (totalReplicants > 0) { // don't throttle if there's only 1 copy of this segment in the cluster + if (!replicationManager.canAddReplicant(getTier()) || + !replicationManager.registerReplicantCreation(getTier(), segment.getIdentifier())) { + serverQueue.add(holder); + break; + } + } + holder.getPeon().loadSegment( segment, new LoadPeonCallback() @@ -113,6 +123,7 @@ public abstract class LoadRule implements Rule @Override protected void execute() { + replicationManager.unregisterReplicantCreation(getTier(), segment.getIdentifier()); } } ); @@ -129,11 +140,12 @@ public abstract class LoadRule implements Rule private MasterStats drop( int expectedReplicants, int clusterReplicants, - DataSegment segment, - DruidMasterRuntimeParams params + final DataSegment segment, + final DruidMasterRuntimeParams params ) { MasterStats stats = new MasterStats(); + final ReplicationThrottler replicationManager = params.getReplicationManager(); if (!params.hasDeletionWaitTimeElapsed()) { return stats; @@ -165,6 +177,14 @@ public abstract class LoadRule implements Rule break; } + if (expectedNumReplicantsForType > 0) { // don't throttle unless we are removing extra replicants + if (!replicationManager.canDestroyReplicant(getTier()) || + !replicationManager.registerReplicantTermination(getTier(), segment.getIdentifier())) { + serverQueue.add(holder); + break; + } + } + if (holder.isServingSegment(segment)) { holder.getPeon().dropSegment( segment, @@ -173,6 +193,7 @@ public abstract class LoadRule implements Rule @Override protected void execute() { + replicationManager.unregisterReplicantTermination(getTier(), segment.getIdentifier()); } } ); diff --git a/server/src/main/java/com/metamx/druid/master/rules/PeriodDropRule.java b/server/src/main/java/com/metamx/druid/master/rules/PeriodDropRule.java index ce3c472a28f..152f074dc3a 100644 --- a/server/src/main/java/com/metamx/druid/master/rules/PeriodDropRule.java +++ b/server/src/main/java/com/metamx/druid/master/rules/PeriodDropRule.java @@ -54,9 +54,9 @@ public class PeriodDropRule extends DropRule } @Override - public boolean appliesTo(DataSegment segment) + public boolean appliesTo(DataSegment segment, DateTime referenceTimestamp) { - final Interval currInterval = new Interval(new DateTime().minus(period), period); + final Interval currInterval = new Interval(period, referenceTimestamp); return currInterval.contains(segment.getInterval()); } } diff --git a/server/src/main/java/com/metamx/druid/master/rules/PeriodLoadRule.java b/server/src/main/java/com/metamx/druid/master/rules/PeriodLoadRule.java index 60d27eb01cd..051967e65ab 100644 --- a/server/src/main/java/com/metamx/druid/master/rules/PeriodLoadRule.java +++ b/server/src/main/java/com/metamx/druid/master/rules/PeriodLoadRule.java @@ -81,9 +81,9 @@ public class PeriodLoadRule extends LoadRule } @Override - public boolean appliesTo(DataSegment segment) + public boolean appliesTo(DataSegment segment, DateTime referenceTimestamp) { - final Interval currInterval = new Interval(period, new DateTime()); + final Interval currInterval = new Interval(period, referenceTimestamp); return currInterval.overlaps(segment.getInterval()); } } diff --git a/server/src/main/java/com/metamx/druid/master/rules/Rule.java b/server/src/main/java/com/metamx/druid/master/rules/Rule.java index a6fbfa358cc..a6eced93c68 100644 --- a/server/src/main/java/com/metamx/druid/master/rules/Rule.java +++ b/server/src/main/java/com/metamx/druid/master/rules/Rule.java @@ -25,6 +25,7 @@ import com.metamx.druid.master.DruidMasterRuntimeParams; import com.metamx.druid.master.MasterStats; import org.codehaus.jackson.annotate.JsonSubTypes; import org.codehaus.jackson.annotate.JsonTypeInfo; +import org.joda.time.DateTime; /** */ @@ -40,7 +41,7 @@ public interface Rule { public String getType(); - public boolean appliesTo(DataSegment segment); + public boolean appliesTo(DataSegment segment, DateTime referenceTimestamp); public MasterStats run(DruidMaster master, DruidMasterRuntimeParams params, DataSegment segment); } diff --git a/server/src/main/java/com/metamx/druid/query/QueryRunnerFactory.java b/server/src/main/java/com/metamx/druid/query/QueryRunnerFactory.java index 5eda9f40514..083ebae543e 100644 --- a/server/src/main/java/com/metamx/druid/query/QueryRunnerFactory.java +++ b/server/src/main/java/com/metamx/druid/query/QueryRunnerFactory.java @@ -21,6 +21,7 @@ package com.metamx.druid.query; import com.metamx.druid.Query; import com.metamx.druid.StorageAdapter; +import com.metamx.druid.index.Segment; import java.util.concurrent.ExecutorService; @@ -28,7 +29,7 @@ import java.util.concurrent.ExecutorService; */ public interface QueryRunnerFactory> { - public QueryRunner createRunner(StorageAdapter adapter); + public QueryRunner createRunner(Segment segment); public QueryRunner mergeRunners(ExecutorService queryExecutor, Iterable> queryRunners); public QueryToolChest getToolchest(); } diff --git a/server/src/main/java/com/metamx/druid/GroupByQueryEngine.java b/server/src/main/java/com/metamx/druid/query/group/GroupByQueryEngine.java similarity index 93% rename from server/src/main/java/com/metamx/druid/GroupByQueryEngine.java rename to server/src/main/java/com/metamx/druid/query/group/GroupByQueryEngine.java index 48afd63ac49..cb9a1468b80 100644 --- a/server/src/main/java/com/metamx/druid/GroupByQueryEngine.java +++ b/server/src/main/java/com/metamx/druid/query/group/GroupByQueryEngine.java @@ -17,7 +17,7 @@ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. */ -package com.metamx.druid; +package com.metamx.druid.query.group; import com.google.common.base.Function; import com.google.common.collect.ImmutableList; @@ -31,6 +31,7 @@ import com.metamx.common.ISE; import com.metamx.common.guava.BaseSequence; import com.metamx.common.guava.FunctionalIterator; import com.metamx.common.guava.Sequence; +import com.metamx.druid.StorageAdapter; import com.metamx.druid.aggregation.AggregatorFactory; import com.metamx.druid.aggregation.BufferAggregator; import com.metamx.druid.aggregation.post.PostAggregator; @@ -42,7 +43,7 @@ import com.metamx.druid.index.v1.processing.DimensionSelector; import com.metamx.druid.input.MapBasedRow; import com.metamx.druid.input.Row; import com.metamx.druid.query.dimension.DimensionSpec; -import com.metamx.druid.query.group.GroupByQuery; +import org.joda.time.DateTime; import org.joda.time.Interval; import javax.annotation.Nullable; @@ -236,6 +237,11 @@ public class GroupByQueryEngine } } + public int getIncrement() + { + return increment; + } + public int[] getIncrements() { return increments; @@ -272,7 +278,10 @@ public class GroupByQueryEngine dimNames = new String[dimensionSpecs.size()]; for (int i = 0; i < dimensionSpecs.size(); ++i) { final DimensionSpec dimSpec = dimensionSpecs.get(i); - dimensions.add(cursor.makeDimensionSelector(dimSpec.getDimension())); + final DimensionSelector selector = cursor.makeDimensionSelector(dimSpec.getDimension()); + if (selector != null) { + dimensions.add(selector); + } dimNames[i] = dimSpec.getOutputName(); } @@ -301,11 +310,11 @@ public class GroupByQueryEngine return delegate.next(); } - if (cursor.isDone()) { + if (unprocessedKeys == null && cursor.isDone()) { throw new NoSuchElementException(); } - final PositionMaintainer positionMaintainer = new PositionMaintainer(0, sizesRequired, metricsBuffer.limit()); + final PositionMaintainer positionMaintainer = new PositionMaintainer(0, sizesRequired, metricsBuffer.remaining()); final RowUpdater rowUpdater = new RowUpdater(metricsBuffer, aggregators, positionMaintainer); if (unprocessedKeys != null) { for (ByteBuffer key : unprocessedKeys) { @@ -327,12 +336,19 @@ public class GroupByQueryEngine cursor.advance(); } + if (rowUpdater.getPositions().isEmpty() && unprocessedKeys != null) { + throw new ISE( + "Not enough memory to process even a single item. Required [%,d] memory, but only have[%,d]", + positionMaintainer.getIncrement(), metricsBuffer.remaining() + ); + } + delegate = FunctionalIterator .create(rowUpdater.getPositions().entrySet().iterator()) .transform( new Function, Row>() { - private final long timestamp = cursor.getTime().getMillis(); + private final DateTime timestamp = cursor.getTime(); private final int[] increments = positionMaintainer.getIncrements(); @Override diff --git a/server/src/main/java/com/metamx/druid/GroupByQueryEngineConfig.java b/server/src/main/java/com/metamx/druid/query/group/GroupByQueryEngineConfig.java similarity index 96% rename from server/src/main/java/com/metamx/druid/GroupByQueryEngineConfig.java rename to server/src/main/java/com/metamx/druid/query/group/GroupByQueryEngineConfig.java index 3236a64f684..2422849d96a 100644 --- a/server/src/main/java/com/metamx/druid/GroupByQueryEngineConfig.java +++ b/server/src/main/java/com/metamx/druid/query/group/GroupByQueryEngineConfig.java @@ -17,7 +17,7 @@ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. */ -package com.metamx.druid; +package com.metamx.druid.query.group; import org.skife.config.Config; import org.skife.config.Default; diff --git a/server/src/main/java/com/metamx/druid/query/group/GroupByQueryRunnerFactory.java b/server/src/main/java/com/metamx/druid/query/group/GroupByQueryRunnerFactory.java index 01327a44c80..c1fcc22786a 100644 --- a/server/src/main/java/com/metamx/druid/query/group/GroupByQueryRunnerFactory.java +++ b/server/src/main/java/com/metamx/druid/query/group/GroupByQueryRunnerFactory.java @@ -26,9 +26,9 @@ import com.metamx.common.ISE; import com.metamx.common.guava.ExecutorExecutingSequence; import com.metamx.common.guava.Sequence; import com.metamx.common.guava.Sequences; -import com.metamx.druid.GroupByQueryEngine; import com.metamx.druid.Query; import com.metamx.druid.StorageAdapter; +import com.metamx.druid.index.Segment; import com.metamx.druid.input.Row; import com.metamx.druid.query.ConcatQueryRunner; import com.metamx.druid.query.QueryRunner; @@ -62,22 +62,9 @@ public class GroupByQueryRunnerFactory implements QueryRunnerFactory createRunner(final StorageAdapter adapter) + public QueryRunner createRunner(final Segment segment) { - return new QueryRunner() - { - @Override - public Sequence run(Query input) - { - if (! (input instanceof GroupByQuery)) { - throw new ISE("Got a [%s] which isn't a %s", input.getClass(), GroupByQuery.class); - } - - GroupByQuery query = (GroupByQuery) input; - - return engine.process(query, adapter); - } - }; + return new GroupByQueryRunner(segment, engine); } @Override @@ -132,4 +119,26 @@ public class GroupByQueryRunnerFactory implements QueryRunnerFactory + { + private final StorageAdapter adapter; + private final GroupByQueryEngine engine; + + public GroupByQueryRunner(Segment segment, final GroupByQueryEngine engine) + { + this.adapter = segment.asStorageAdapter(); + this.engine = engine; + } + + @Override + public Sequence run(Query input) + { + if (! (input instanceof GroupByQuery)) { + throw new ISE("Got a [%s] which isn't a %s", input.getClass(), GroupByQuery.class); + } + + return engine.process((GroupByQuery) input, adapter); + } + } } diff --git a/server/src/main/java/com/metamx/druid/query/metadata/SegmentAnalyzer.java b/server/src/main/java/com/metamx/druid/query/metadata/SegmentAnalyzer.java new file mode 100644 index 00000000000..a219dbae9b4 --- /dev/null +++ b/server/src/main/java/com/metamx/druid/query/metadata/SegmentAnalyzer.java @@ -0,0 +1,160 @@ +/* + * Druid - a distributed column store. + * Copyright (C) 2012 Metamarkets Group Inc. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version 2 + * of the License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + */ + +package com.metamx.druid.query.metadata; + +import com.google.common.base.Charsets; +import com.google.common.base.Function; +import com.google.common.base.Preconditions; +import com.google.common.collect.Maps; +import com.google.common.primitives.Floats; +import com.google.common.primitives.Longs; +import com.metamx.common.logger.Logger; +import com.metamx.druid.index.QueryableIndex; +import com.metamx.druid.index.column.BitmapIndex; +import com.metamx.druid.index.column.Column; +import com.metamx.druid.index.column.ColumnCapabilities; +import com.metamx.druid.index.column.ComplexColumn; +import com.metamx.druid.index.column.ValueType; +import com.metamx.druid.index.v1.serde.ComplexMetricSerde; +import com.metamx.druid.index.v1.serde.ComplexMetrics; + +import java.util.Map; + + +public class SegmentAnalyzer +{ + private static final Logger log = new Logger(SegmentAnalyzer.class); + + /** + * This is based on the minimum size of a timestamp (POSIX seconds). An ISO timestamp will actually be more like 24+ + */ + private static final int NUM_BYTES_IN_TIMESTAMP = 10; + + /** + * This is based on assuming 6 units of precision, one decimal point and a single value left of the decimal + */ + private static final int NUM_BYTES_IN_TEXT_FLOAT = 8; + + public Map analyze(QueryableIndex index) + { + Preconditions.checkNotNull(index, "Index cannot be null"); + + Map columns = Maps.newTreeMap(); + + for (String columnName : index.getColumnNames()) { + final Column column = index.getColumn(columnName); + final ColumnCapabilities capabilities = column.getCapabilities(); + + final ColumnAnalysis analysis; + final ValueType type = capabilities.getType(); + switch(type) { + case LONG: + analysis = analyzeLongColumn(column); + break; + case FLOAT: + analysis = analyzeFloatColumn(column); + break; + case STRING: + analysis = analyzeStringColumn(column); + break; + case COMPLEX: + analysis = analyzeComplexColumn(column); + break; + default: + log.warn("Unknown column type[%s].", type); + analysis = ColumnAnalysis.error(String.format("unknown_type_%s", type)); + } + + columns.put(columnName, analysis); + } + + columns.put("__time", lengthBasedAnalysis(index.getTimeColumn(), NUM_BYTES_IN_TIMESTAMP)); + + return columns; + } + + public ColumnAnalysis analyzeLongColumn(Column column) + { + return lengthBasedAnalysis(column, Longs.BYTES); + } + + public ColumnAnalysis analyzeFloatColumn(Column column) + { + return lengthBasedAnalysis(column, NUM_BYTES_IN_TEXT_FLOAT); + } + + private ColumnAnalysis lengthBasedAnalysis(Column column, final int numBytes) + { + final ColumnCapabilities capabilities = column.getCapabilities(); + if (capabilities.hasMultipleValues()) { + return ColumnAnalysis.error("multi_value"); + } + + return new ColumnAnalysis(capabilities.getType(), column.getLength() * numBytes, null); + } + + public ColumnAnalysis analyzeStringColumn(Column column) + { + final ColumnCapabilities capabilities = column.getCapabilities(); + + if (capabilities.hasBitmapIndexes()) { + final BitmapIndex bitmapIndex = column.getBitmapIndex(); + + int cardinality = bitmapIndex.getCardinality(); + long size = 0; + for (int i = 0; i < cardinality; ++i) { + String value = bitmapIndex.getValue(i); + + if (value != null) { + size += value.getBytes(Charsets.UTF_8).length * bitmapIndex.getConciseSet(value).size(); + } + } + + return new ColumnAnalysis(capabilities.getType(), size, cardinality); + } + + return ColumnAnalysis.error("string_no_bitmap"); + } + + public ColumnAnalysis analyzeComplexColumn(Column column) + { + final ColumnCapabilities capabilities = column.getCapabilities(); + final ComplexColumn complexColumn = column.getComplexColumn(); + + final String typeName = complexColumn.getTypeName(); + final ComplexMetricSerde serde = ComplexMetrics.getSerdeForType(typeName); + if (serde == null) { + return ColumnAnalysis.error(String.format("unknown_complex_%s", typeName)); + } + + final Function inputSizeFn = serde.inputSizeFn(); + if (inputSizeFn == null) { + return ColumnAnalysis.error("noSizeFn"); + } + + final int length = column.getLength(); + long size = 0; + for (int i = 0; i < length; ++i) { + size += inputSizeFn.apply(complexColumn.getRowValue(i)); + } + + return new ColumnAnalysis(capabilities.getType(), size, null); + } +} \ No newline at end of file diff --git a/server/src/main/java/com/metamx/druid/query/metadata/SegmentMetadataQueryEngine.java b/server/src/main/java/com/metamx/druid/query/metadata/SegmentMetadataQueryEngine.java deleted file mode 100644 index 7522b4b4750..00000000000 --- a/server/src/main/java/com/metamx/druid/query/metadata/SegmentMetadataQueryEngine.java +++ /dev/null @@ -1,109 +0,0 @@ -/* - * Druid - a distributed column store. - * Copyright (C) 2012 Metamarkets Group Inc. - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation; either version 2 - * of the License, or (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. - */ - -package com.metamx.druid.query.metadata; - -import com.google.common.base.Charsets; -import com.google.common.base.Function; -import com.google.common.collect.ImmutableList; -import com.google.common.collect.ImmutableMap; -import com.google.common.collect.Maps; -import com.metamx.common.IAE; -import com.metamx.common.guava.Sequence; -import com.metamx.common.guava.Sequences; -import com.metamx.common.guava.SimpleSequence; -import com.metamx.druid.BaseStorageAdapter; -import com.metamx.druid.StorageAdapter; -import com.metamx.druid.index.v1.SegmentIdAttachedStorageAdapter; -import com.metamx.druid.kv.Indexed; -import com.metamx.druid.result.Result; -import com.metamx.druid.result.SegmentMetadataResultValue; -import it.uniroma3.mat.extendedset.intset.ImmutableConciseSet; -import org.joda.time.Interval; - -import javax.annotation.Nullable; -import java.util.HashMap; -import java.util.List; - - - -public class SegmentMetadataQueryEngine -{ - public Sequence> process( - final SegmentMetadataQuery query, - StorageAdapter storageAdapter - ) - { - final List intervals = query.getQuerySegmentSpec().getIntervals(); - if (intervals.size() != 1) { - throw new IAE("Should only have one interval, got[%s]", intervals); - } - - if(!(storageAdapter instanceof SegmentIdAttachedStorageAdapter) || - !(((SegmentIdAttachedStorageAdapter)storageAdapter).getDelegate() instanceof BaseStorageAdapter)) { - return Sequences.empty(); - } - - final BaseStorageAdapter adapter = (BaseStorageAdapter) - ((SegmentIdAttachedStorageAdapter) storageAdapter).getDelegate(); - - Function sizeDimension = new Function() - { - @Override - public SegmentMetadataResultValue.Dimension apply(@Nullable String input) - { - long size = 0; - final Indexed lookup = adapter.getDimValueLookup(input); - for (String dimVal : lookup) { - ImmutableConciseSet index = adapter.getInvertedIndex(input, dimVal); - size += (dimVal == null) ? 0 : index.size() * Charsets.UTF_8.encode(dimVal).capacity(); - } - return new SegmentMetadataResultValue.Dimension( - size, - adapter.getDimensionCardinality(input) - ); - } - }; - - // TODO: add metric storage size - - long totalSize = 0; - - HashMap dimensions = Maps.newHashMap(); - for(String input : adapter.getAvailableDimensions()) { - SegmentMetadataResultValue.Dimension d = sizeDimension.apply(input); - dimensions.put(input, d); - totalSize += d.size; - } - - return new SimpleSequence>( - ImmutableList.of( - new Result( - adapter.getMinTime(), - new SegmentMetadataResultValue( - storageAdapter.getSegmentIdentifier(), - dimensions, - ImmutableMap.of(), - totalSize - ) - ) - ) - ); - } -} \ No newline at end of file diff --git a/server/src/main/java/com/metamx/druid/query/metadata/SegmentMetadataQueryRunnerFactory.java b/server/src/main/java/com/metamx/druid/query/metadata/SegmentMetadataQueryRunnerFactory.java index ce624a944f4..f44110fcf14 100644 --- a/server/src/main/java/com/metamx/druid/query/metadata/SegmentMetadataQueryRunnerFactory.java +++ b/server/src/main/java/com/metamx/druid/query/metadata/SegmentMetadataQueryRunnerFactory.java @@ -21,82 +21,105 @@ package com.metamx.druid.query.metadata; import com.google.common.base.Function; import com.google.common.base.Throwables; -import com.google.common.collect.ImmutableList; -import com.metamx.common.ISE; +import com.google.common.collect.Maps; import com.metamx.common.guava.ExecutorExecutingSequence; import com.metamx.common.guava.Sequence; import com.metamx.common.guava.Sequences; import com.metamx.druid.Query; -import com.metamx.druid.StorageAdapter; +import com.metamx.druid.index.QueryableIndex; +import com.metamx.druid.index.Segment; import com.metamx.druid.query.ConcatQueryRunner; import com.metamx.druid.query.QueryRunner; import com.metamx.druid.query.QueryRunnerFactory; import com.metamx.druid.query.QueryToolChest; -import com.metamx.druid.query.metadata.SegmentMetadataQuery; -import com.metamx.druid.query.metadata.SegmentMetadataQueryEngine; -import com.metamx.druid.query.metadata.SegmentMetadataQueryQueryToolChest; -import com.metamx.druid.result.SegmentMetadataResultValue; -import com.metamx.druid.result.Result; +import java.util.Arrays; +import java.util.Map; import java.util.concurrent.Callable; import java.util.concurrent.ExecutionException; import java.util.concurrent.ExecutorService; import java.util.concurrent.Future; -public class SegmentMetadataQueryRunnerFactory implements QueryRunnerFactory, SegmentMetadataQuery> +public class SegmentMetadataQueryRunnerFactory implements QueryRunnerFactory { - private static final SegmentMetadataQueryQueryToolChest toolChest = new SegmentMetadataQueryQueryToolChest() - { - @Override - public QueryRunner> mergeResults(QueryRunner> runner) - { - return new ConcatQueryRunner>(Sequences.simple(ImmutableList.of(runner))); - } - }; + private static final SegmentAnalyzer analyzer = new SegmentAnalyzer(); + private static final SegmentMetadataQueryQueryToolChest toolChest = new SegmentMetadataQueryQueryToolChest(); @Override - public QueryRunner> createRunner(final StorageAdapter adapter) + public QueryRunner createRunner(final Segment segment) { - return new QueryRunner>() + return new QueryRunner() { @Override - public Sequence> run(Query> query) + public Sequence run(Query inQ) { - if (!(query instanceof SegmentMetadataQuery)) { - throw new ISE("Got a [%s] which isn't a %s", query.getClass(), SegmentMetadataQuery.class); + SegmentMetadataQuery query = (SegmentMetadataQuery) inQ; + + final QueryableIndex index = segment.asQueryableIndex(); + if (index == null) { + return Sequences.empty(); } - return new SegmentMetadataQueryEngine().process((SegmentMetadataQuery) query, adapter); + + final Map analyzedColumns = analyzer.analyze(index); + + // Initialize with the size of the whitespace, 1 byte per + long totalSize = analyzedColumns.size() * index.getNumRows(); + + Map columns = Maps.newTreeMap(); + ColumnIncluderator includerator = query.getToInclude(); + for (Map.Entry entry : analyzedColumns.entrySet()) { + final String columnName = entry.getKey(); + final ColumnAnalysis column = entry.getValue(); + + if (!column.isError()) { + totalSize += column.getSize(); + } + if (includerator.include(columnName)) { + columns.put(columnName, column); + } + } + + return Sequences.simple( + Arrays.asList( + new SegmentAnalysis( + segment.getIdentifier(), + Arrays.asList(segment.getDataInterval()), + columns, + totalSize + ) + ) + ); } }; } @Override - public QueryRunner> mergeRunners( - final ExecutorService queryExecutor, Iterable>> queryRunners + public QueryRunner mergeRunners( + final ExecutorService queryExecutor, Iterable> queryRunners ) { - return new ConcatQueryRunner>( + return new ConcatQueryRunner( Sequences.map( Sequences.simple(queryRunners), - new Function>, QueryRunner>>() + new Function, QueryRunner>() { @Override - public QueryRunner> apply(final QueryRunner> input) + public QueryRunner apply(final QueryRunner input) { - return new QueryRunner>() + return new QueryRunner() { @Override - public Sequence> run(final Query> query) + public Sequence run(final Query query) { - Future>> future = queryExecutor.submit( - new Callable>>() + Future> future = queryExecutor.submit( + new Callable>() { @Override - public Sequence> call() throws Exception + public Sequence call() throws Exception { - return new ExecutorExecutingSequence>( + return new ExecutorExecutingSequence( input.run(query), queryExecutor ); @@ -121,7 +144,7 @@ public class SegmentMetadataQueryRunnerFactory implements QueryRunnerFactory getToolchest() { return toolChest; } diff --git a/server/src/main/java/com/metamx/druid/query/search/SearchQueryRunnerFactory.java b/server/src/main/java/com/metamx/druid/query/search/SearchQueryRunnerFactory.java index ce4e8e89e6e..21cc70aa1e6 100644 --- a/server/src/main/java/com/metamx/druid/query/search/SearchQueryRunnerFactory.java +++ b/server/src/main/java/com/metamx/druid/query/search/SearchQueryRunnerFactory.java @@ -19,13 +19,14 @@ package com.metamx.druid.query.search; -import com.google.common.collect.Lists; +import com.google.common.collect.Iterators; import com.metamx.common.ISE; import com.metamx.common.guava.BaseSequence; import com.metamx.common.guava.Sequence; import com.metamx.druid.Query; import com.metamx.druid.SearchResultBuilder; import com.metamx.druid.StorageAdapter; +import com.metamx.druid.index.Segment; import com.metamx.druid.index.brita.Filters; import com.metamx.druid.query.ChainedExecutionQueryRunner; import com.metamx.druid.query.QueryRunner; @@ -45,46 +46,9 @@ public class SearchQueryRunnerFactory implements QueryRunnerFactory> createRunner(final StorageAdapter adapter) + public QueryRunner> createRunner(final Segment segment) { - return new QueryRunner>() - { - @Override - public Sequence> run(final Query> input) - { - if (!(input instanceof SearchQuery)) { - throw new ISE("Got a [%s] which isn't a %s", input.getClass(), GroupByQuery.class); - } - - final SearchQuery query = (SearchQuery) input; - - return new BaseSequence, Iterator>>( - new BaseSequence.IteratorMaker, Iterator>>() - { - @Override - public Iterator> make() - { - return Lists.newArrayList( - new SearchResultBuilder( - adapter.getInterval().getStart(), - adapter.searchDimensions( - query, - Filters.convertDimensionFilters(query.getDimensionsFilter()) - ) - ).build() - ).iterator(); - } - - @Override - public void cleanup(Iterator> toClean) - { - - } - } - ); - } - }; - + return new SearchQueryRunner(segment); } @Override @@ -102,4 +66,49 @@ public class SearchQueryRunnerFactory implements QueryRunnerFactory> + { + private final StorageAdapter adapter; + + public SearchQueryRunner(Segment segment) + { + this.adapter = segment.asStorageAdapter(); + } + + @Override + public Sequence> run(final Query> input) + { + if (!(input instanceof SearchQuery)) { + throw new ISE("Got a [%s] which isn't a %s", input.getClass(), GroupByQuery.class); + } + + final SearchQuery query = (SearchQuery) input; + + return new BaseSequence, Iterator>>( + new BaseSequence.IteratorMaker, Iterator>>() + { + @Override + public Iterator> make() + { + return Iterators.singletonIterator( + new SearchResultBuilder( + adapter.getInterval().getStart(), + adapter.searchDimensions( + query, + Filters.convertDimensionFilters(query.getDimensionsFilter()) + ) + ).build() + ); + } + + @Override + public void cleanup(Iterator> toClean) + { + + } + } + ); + } + } } diff --git a/server/src/main/java/com/metamx/druid/query/timeboundary/TimeBoundaryQueryRunnerFactory.java b/server/src/main/java/com/metamx/druid/query/timeboundary/TimeBoundaryQueryRunnerFactory.java index 459a520c916..6c40e6774d9 100644 --- a/server/src/main/java/com/metamx/druid/query/timeboundary/TimeBoundaryQueryRunnerFactory.java +++ b/server/src/main/java/com/metamx/druid/query/timeboundary/TimeBoundaryQueryRunnerFactory.java @@ -24,16 +24,14 @@ import com.metamx.common.guava.BaseSequence; import com.metamx.common.guava.Sequence; import com.metamx.druid.Query; import com.metamx.druid.StorageAdapter; -import com.metamx.druid.collect.StupidPool; +import com.metamx.druid.index.Segment; import com.metamx.druid.query.ChainedExecutionQueryRunner; import com.metamx.druid.query.QueryRunner; import com.metamx.druid.query.QueryRunnerFactory; import com.metamx.druid.query.QueryToolChest; -import com.metamx.druid.query.group.GroupByQuery; import com.metamx.druid.result.Result; import com.metamx.druid.result.TimeBoundaryResultValue; -import java.nio.ByteBuffer; import java.util.Iterator; import java.util.concurrent.ExecutorService; @@ -45,41 +43,9 @@ public class TimeBoundaryQueryRunnerFactory private static final TimeBoundaryQueryQueryToolChest toolChest = new TimeBoundaryQueryQueryToolChest(); @Override - public QueryRunner> createRunner(final StorageAdapter adapter) + public QueryRunner> createRunner(final Segment segment) { - return new QueryRunner>() - { - @Override - public Sequence> run(Query> input) - { - if (!(input instanceof TimeBoundaryQuery)) { - throw new ISE("Got a [%s] which isn't a %s", input.getClass(), GroupByQuery.class); - } - - final TimeBoundaryQuery legacyQuery = (TimeBoundaryQuery) input; - - return new BaseSequence, Iterator>>( - new BaseSequence.IteratorMaker, Iterator>>() - { - @Override - public Iterator> make() - { - return legacyQuery.buildResult( - adapter.getInterval().getStart(), - adapter.getMinTime(), - adapter.getMaxTime() - ).iterator(); - } - - @Override - public void cleanup(Iterator> toClean) - { - - } - } - ); - } - }; + return new TimeBoundaryQueryRunner(segment); } @Override @@ -97,4 +63,45 @@ public class TimeBoundaryQueryRunnerFactory { return toolChest; } + + private static class TimeBoundaryQueryRunner implements QueryRunner> + { + private final StorageAdapter adapter; + + public TimeBoundaryQueryRunner(Segment segment) + { + this.adapter = segment.asStorageAdapter(); + } + + @Override + public Sequence> run(Query> input) + { + if (!(input instanceof TimeBoundaryQuery)) { + throw new ISE("Got a [%s] which isn't a %s", input.getClass(), TimeBoundaryQuery.class); + } + + final TimeBoundaryQuery legacyQuery = (TimeBoundaryQuery) input; + + return new BaseSequence, Iterator>>( + new BaseSequence.IteratorMaker, Iterator>>() + { + @Override + public Iterator> make() + { + return legacyQuery.buildResult( + adapter.getInterval().getStart(), + adapter.getMinTime(), + adapter.getMaxTime() + ).iterator(); + } + + @Override + public void cleanup(Iterator> toClean) + { + + } + } + ); + } + } } diff --git a/server/src/main/java/com/metamx/druid/query/timeseries/TimeseriesQueryEngine.java b/server/src/main/java/com/metamx/druid/query/timeseries/TimeseriesQueryEngine.java new file mode 100644 index 00000000000..018dc329fc2 --- /dev/null +++ b/server/src/main/java/com/metamx/druid/query/timeseries/TimeseriesQueryEngine.java @@ -0,0 +1,100 @@ +/* + * Druid - a distributed column store. + * Copyright (C) 2012 Metamarkets Group Inc. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version 2 + * of the License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + */ + +package com.metamx.druid.query.timeseries; + +import com.google.common.base.Function; +import com.metamx.common.guava.BaseSequence; +import com.metamx.common.guava.Sequence; +import com.metamx.druid.StorageAdapter; +import com.metamx.druid.TimeseriesResultBuilder; +import com.metamx.druid.aggregation.Aggregator; +import com.metamx.druid.aggregation.AggregatorFactory; +import com.metamx.druid.aggregation.post.PostAggregator; +import com.metamx.druid.index.brita.Filters; +import com.metamx.druid.index.v1.processing.Cursor; +import com.metamx.druid.query.QueryRunnerHelper; +import com.metamx.druid.result.Result; +import com.metamx.druid.result.TimeseriesResultValue; + +import java.util.Iterator; +import java.util.List; + +/** + */ +public class TimeseriesQueryEngine +{ + public Sequence> process(final TimeseriesQuery query, final StorageAdapter adapter) + { + return new BaseSequence, Iterator>>( + new BaseSequence.IteratorMaker, Iterator>>() + { + @Override + public Iterator> make() + { + return QueryRunnerHelper.makeCursorBasedQuery( + adapter, + query.getQuerySegmentSpec().getIntervals(), + Filters.convertDimensionFilters(query.getDimensionsFilter()), + query.getGranularity(), + new Function>() + { + private final List aggregatorSpecs = query.getAggregatorSpecs(); + private final List postAggregatorSpecs = query.getPostAggregatorSpecs(); + + @Override + public Result apply(Cursor cursor) + { + Aggregator[] aggregators = QueryRunnerHelper.makeAggregators(cursor, aggregatorSpecs); + + while (!cursor.isDone()) { + for (Aggregator aggregator : aggregators) { + aggregator.aggregate(); + } + cursor.advance(); + } + + TimeseriesResultBuilder bob = new TimeseriesResultBuilder(cursor.getTime()); + + for (Aggregator aggregator : aggregators) { + bob.addMetric(aggregator); + } + + for (PostAggregator postAgg : postAggregatorSpecs) { + bob.addMetric(postAgg); + } + + return bob.build(); + } + } + ).iterator(); + } + + @Override + public void cleanup(Iterator> toClean) + { + // TODO: Let's fix this to actually use Sequences for the closing of stuff + while (toClean.hasNext()) { + toClean.next(); + } + } + } + ); + } +} diff --git a/server/src/main/java/com/metamx/druid/query/timeseries/TimeseriesQueryRunnerFactory.java b/server/src/main/java/com/metamx/druid/query/timeseries/TimeseriesQueryRunnerFactory.java index 32e612f12e8..8d413730e09 100644 --- a/server/src/main/java/com/metamx/druid/query/timeseries/TimeseriesQueryRunnerFactory.java +++ b/server/src/main/java/com/metamx/druid/query/timeseries/TimeseriesQueryRunnerFactory.java @@ -19,29 +19,19 @@ package com.metamx.druid.query.timeseries; -import com.google.common.base.Function; import com.metamx.common.ISE; -import com.metamx.common.guava.BaseSequence; import com.metamx.common.guava.Sequence; import com.metamx.druid.Query; import com.metamx.druid.StorageAdapter; -import com.metamx.druid.TimeseriesResultBuilder; -import com.metamx.druid.aggregation.Aggregator; -import com.metamx.druid.aggregation.AggregatorFactory; -import com.metamx.druid.aggregation.post.PostAggregator; -import com.metamx.druid.index.brita.Filters; -import com.metamx.druid.index.v1.processing.Cursor; +import com.metamx.druid.index.Segment; import com.metamx.druid.query.ChainedExecutionQueryRunner; import com.metamx.druid.query.QueryRunner; -import com.metamx.druid.query.QueryRunnerHelper; import com.metamx.druid.query.QueryRunnerFactory; import com.metamx.druid.query.QueryToolChest; import com.metamx.druid.query.group.GroupByQuery; import com.metamx.druid.result.Result; import com.metamx.druid.result.TimeseriesResultValue; -import java.util.Iterator; -import java.util.List; import java.util.concurrent.ExecutorService; /** @@ -50,74 +40,12 @@ public class TimeseriesQueryRunnerFactory implements QueryRunnerFactory, TimeseriesQuery> { private static final TimeseriesQueryQueryToolChest toolChest = new TimeseriesQueryQueryToolChest(); + private static final TimeseriesQueryEngine engine = new TimeseriesQueryEngine(); @Override - public QueryRunner> createRunner(final StorageAdapter adapter) + public QueryRunner> createRunner(final Segment segment) { - return new QueryRunner>() - { - @Override - public Sequence> run(Query> input) - { - if (!(input instanceof TimeseriesQuery)) { - throw new ISE("Got a [%s] which isn't a %s", input.getClass(), GroupByQuery.class); - } - - final TimeseriesQuery query = (TimeseriesQuery) input; - - return new BaseSequence, Iterator>>( - new BaseSequence.IteratorMaker, Iterator>>() - { - @Override - public Iterator> make() - { - return QueryRunnerHelper.makeCursorBasedQuery( - adapter, - query.getQuerySegmentSpec().getIntervals(), - Filters.convertDimensionFilters(query.getDimensionsFilter()), - query.getGranularity(), - new Function>() - { - private final List aggregatorSpecs = query.getAggregatorSpecs(); - private final List postAggregatorSpecs = query.getPostAggregatorSpecs(); - - @Override - public Result apply(Cursor cursor) - { - Aggregator[] aggregators = QueryRunnerHelper.makeAggregators(cursor, aggregatorSpecs); - - while (!cursor.isDone()) { - for (Aggregator aggregator : aggregators) { - aggregator.aggregate(); - } - cursor.advance(); - } - - TimeseriesResultBuilder bob = new TimeseriesResultBuilder(cursor.getTime()); - - for (Aggregator aggregator : aggregators) { - bob.addMetric(aggregator); - } - - for (PostAggregator postAgg : postAggregatorSpecs) { - bob.addMetric(postAgg); - } - - return bob.build(); - } - } - ).iterator(); - } - - @Override - public void cleanup(Iterator> toClean) - { - - } - } - ); - } - }; + return new TimeseriesQueryRunner(segment); } @Override @@ -135,4 +63,24 @@ public class TimeseriesQueryRunnerFactory { return toolChest; } + + private static class TimeseriesQueryRunner implements QueryRunner> + { + private final StorageAdapter adapter; + + public TimeseriesQueryRunner(Segment segment) + { + this.adapter = segment.asStorageAdapter(); + } + + @Override + public Sequence> run(Query> input) + { + if (!(input instanceof TimeseriesQuery)) { + throw new ISE("Got a [%s] which isn't a %s", input.getClass(), GroupByQuery.class); + } + + return engine.process((TimeseriesQuery) input, adapter); + } + } } diff --git a/client/src/test/java/com/metamx/druid/TestHelper.java b/server/src/test/java/com/metamx/druid/TestHelper.java similarity index 64% rename from client/src/test/java/com/metamx/druid/TestHelper.java rename to server/src/test/java/com/metamx/druid/TestHelper.java index 342b3b4111c..53d45192f7c 100644 --- a/client/src/test/java/com/metamx/druid/TestHelper.java +++ b/server/src/test/java/com/metamx/druid/TestHelper.java @@ -46,6 +46,16 @@ public class TestHelper assertResults(expectedResults, results, failMsg); } + public static void assertExpectedObjects(Iterable expectedResults, Iterable results, String failMsg) + { + assertObjects(expectedResults, results, failMsg); + } + + public static void assertExpectedObjects(Iterable expectedResults, Sequence results, String failMsg) + { + assertObjects(expectedResults, Sequences.toList(results, Lists.newArrayList()), failMsg); + } + private static void assertResults(Iterable> expectedResults, Iterable> actualResults, String failMsg) { Iterator resultsIter = actualResults.iterator(); @@ -86,8 +96,48 @@ public class TestHelper } } + private static void assertObjects(Iterable expectedResults, Iterable actualResults, String failMsg) + { + Iterator resultsIter = actualResults.iterator(); + Iterator resultsIter2 = actualResults.iterator(); + Iterator expectedResultsIter = expectedResults.iterator(); + + while (resultsIter.hasNext() && resultsIter2.hasNext() && expectedResultsIter.hasNext()) { + Object expectedNext = expectedResultsIter.next(); + final Object next = resultsIter.next(); + final Object next2 = resultsIter2.next(); + + Assert.assertEquals(failMsg, expectedNext, next); + Assert.assertEquals( + String.format("%sSecond iterator bad, multiple calls to iterator() should be safe", failMsg), + expectedNext, + next2 + ); + } + + if (resultsIter.hasNext()) { + Assert.fail( + String.format("%sExpected resultsIter to be exhausted, next element was %s", failMsg, resultsIter.next()) + ); + } + + if (resultsIter2.hasNext()) { + Assert.fail( + String.format("%sExpected resultsIter2 to be exhausted, next element was %s", failMsg, resultsIter.next()) + ); + } + + if (expectedResultsIter.hasNext()) { + Assert.fail( + String.format( + "%sExpected expectedResultsIter to be exhausted, next element was %s", failMsg, expectedResultsIter.next() + ) + ); + } + } + private static void assertResult(String msg, Result expected, Result actual) { Assert.assertEquals(msg, expected, actual); } -} \ No newline at end of file +} diff --git a/client/src/test/java/com/metamx/druid/client/RangeIterable.java b/server/src/test/java/com/metamx/druid/client/RangeIterable.java similarity index 100% rename from client/src/test/java/com/metamx/druid/client/RangeIterable.java rename to server/src/test/java/com/metamx/druid/client/RangeIterable.java diff --git a/server/src/test/java/com/metamx/druid/coordination/SegmentChangeRequestDropTest.java b/server/src/test/java/com/metamx/druid/coordination/SegmentChangeRequestDropTest.java index 9673d15b3c0..62cb939e0ed 100644 --- a/server/src/test/java/com/metamx/druid/coordination/SegmentChangeRequestDropTest.java +++ b/server/src/test/java/com/metamx/druid/coordination/SegmentChangeRequestDropTest.java @@ -21,6 +21,7 @@ package com.metamx.druid.coordination; import com.google.common.collect.ImmutableMap; import com.metamx.druid.client.DataSegment; +import com.metamx.druid.index.v1.IndexIO; import com.metamx.druid.jackson.DefaultObjectMapper; import com.metamx.druid.shard.NoneShardSpec; import org.codehaus.jackson.map.ObjectMapper; @@ -52,6 +53,7 @@ public class SegmentChangeRequestDropTest Arrays.asList("dim1", "dim2"), Arrays.asList("met1", "met2"), new NoneShardSpec(), + IndexIO.CURRENT_VERSION_ID, 1 ); @@ -61,7 +63,7 @@ public class SegmentChangeRequestDropTest mapper.writeValueAsString(segmentDrop), new TypeReference>(){} ); - Assert.assertEquals(10, objectMap.size()); + Assert.assertEquals(11, objectMap.size()); Assert.assertEquals("drop", objectMap.get("action")); Assert.assertEquals("something", objectMap.get("dataSource")); Assert.assertEquals(interval.toString(), objectMap.get("interval")); @@ -70,6 +72,7 @@ public class SegmentChangeRequestDropTest Assert.assertEquals("dim1,dim2", objectMap.get("dimensions")); Assert.assertEquals("met1,met2", objectMap.get("metrics")); Assert.assertEquals(ImmutableMap.of("type", "none"), objectMap.get("shardSpec")); + Assert.assertEquals(IndexIO.CURRENT_VERSION_ID, objectMap.get("binaryVersion")); Assert.assertEquals(1, objectMap.get("size")); } } diff --git a/server/src/test/java/com/metamx/druid/coordination/SegmentChangeRequestLoadTest.java b/server/src/test/java/com/metamx/druid/coordination/SegmentChangeRequestLoadTest.java index eaedcde0b6a..122b779e922 100644 --- a/server/src/test/java/com/metamx/druid/coordination/SegmentChangeRequestLoadTest.java +++ b/server/src/test/java/com/metamx/druid/coordination/SegmentChangeRequestLoadTest.java @@ -21,6 +21,7 @@ package com.metamx.druid.coordination; import com.google.common.collect.ImmutableMap; import com.metamx.druid.client.DataSegment; +import com.metamx.druid.index.v1.IndexIO; import com.metamx.druid.jackson.DefaultObjectMapper; import com.metamx.druid.shard.NoneShardSpec; import org.codehaus.jackson.map.ObjectMapper; @@ -52,6 +53,7 @@ public class SegmentChangeRequestLoadTest Arrays.asList("dim1", "dim2"), Arrays.asList("met1", "met2"), new NoneShardSpec(), + IndexIO.CURRENT_VERSION_ID, 1 ); @@ -61,7 +63,7 @@ public class SegmentChangeRequestLoadTest mapper.writeValueAsString(segmentDrop), new TypeReference>(){} ); - Assert.assertEquals(10, objectMap.size()); + Assert.assertEquals(11, objectMap.size()); Assert.assertEquals("load", objectMap.get("action")); Assert.assertEquals("something", objectMap.get("dataSource")); Assert.assertEquals(interval.toString(), objectMap.get("interval")); @@ -70,6 +72,7 @@ public class SegmentChangeRequestLoadTest Assert.assertEquals("dim1,dim2", objectMap.get("dimensions")); Assert.assertEquals("met1,met2", objectMap.get("metrics")); Assert.assertEquals(ImmutableMap.of("type", "none"), objectMap.get("shardSpec")); + Assert.assertEquals(IndexIO.CURRENT_VERSION_ID, objectMap.get("binaryVersion")); Assert.assertEquals(1, objectMap.get("size")); } } diff --git a/server/src/test/java/com/metamx/druid/coordination/ServerManagerTest.java b/server/src/test/java/com/metamx/druid/coordination/ServerManagerTest.java index 1f44bd3e65f..84982d05737 100644 --- a/server/src/test/java/com/metamx/druid/coordination/ServerManagerTest.java +++ b/server/src/test/java/com/metamx/druid/coordination/ServerManagerTest.java @@ -36,10 +36,13 @@ import com.metamx.druid.Query; import com.metamx.druid.QueryGranularity; import com.metamx.druid.StorageAdapter; import com.metamx.druid.client.DataSegment; +import com.metamx.druid.index.QueryableIndex; +import com.metamx.druid.index.Segment; import com.metamx.druid.index.brita.Filter; +import com.metamx.druid.index.v1.IndexIO; import com.metamx.druid.index.v1.SegmentIdAttachedStorageAdapter; import com.metamx.druid.index.v1.processing.Cursor; -import com.metamx.druid.loading.StorageAdapterLoader; +import com.metamx.druid.loading.SegmentLoader; import com.metamx.druid.loading.StorageAdapterLoadingException; import com.metamx.druid.metrics.NoopServiceEmitter; import com.metamx.druid.query.CacheStrategy; @@ -85,19 +88,19 @@ public class ServerManagerTest factory = new MyQueryRunnerFactory(); serverManager = new ServerManager( - new StorageAdapterLoader() + new SegmentLoader() { @Override - public StorageAdapter getAdapter(final Map loadSpec) + public Segment getSegment(final DataSegment segment) { - return new StorageAdapterForTesting( - MapUtils.getString(loadSpec, "version"), - (Interval) loadSpec.get("interval") + return new SegmentForTesting( + MapUtils.getString(segment.getLoadSpec(), "version"), + (Interval) segment.getLoadSpec().get("interval") ); } @Override - public void cleanupAdapter(Map loadSpec) throws StorageAdapterLoadingException + public void cleanup(DataSegment segment) throws StorageAdapterLoadingException { } @@ -236,6 +239,7 @@ public class ServerManagerTest Arrays.asList("dim1", "dim2", "dim3"), Arrays.asList("metric1", "metric2"), new NoneShardSpec(), + IndexIO.CURRENT_VERSION_ID, 123l ) ); @@ -257,6 +261,7 @@ public class ServerManagerTest Arrays.asList("dim1", "dim2", "dim3"), Arrays.asList("metric1", "metric2"), new NoneShardSpec(), + IndexIO.CURRENT_VERSION_ID, 123l ) ); @@ -285,11 +290,11 @@ public class ServerManagerTest QueryRunner> runner = serverManager.getQueryRunnerForIntervals(query, intervals); final Sequence> seq = runner.run(query); Sequences.toList(seq, Lists.>newArrayList()); - Iterator adaptersIter = factory.getAdapters().iterator(); + Iterator adaptersIter = factory.getAdapters().iterator(); while (expectedIter.hasNext() && adaptersIter.hasNext()) { Pair expectedVals = expectedIter.next(); - StorageAdapterForTesting value = adaptersIter.next(); + SegmentForTesting value = adaptersIter.next(); Assert.assertEquals(expectedVals.lhs, value.getVersion()); Assert.assertEquals(expectedVals.rhs, value.getInterval()); @@ -301,12 +306,12 @@ public class ServerManagerTest factory.clearAdapters(); } - private static class StorageAdapterForTesting implements StorageAdapter + private static class SegmentForTesting implements Segment { private final String version; private final Interval interval; - StorageAdapterForTesting( + SegmentForTesting( String version, Interval interval ) @@ -326,43 +331,25 @@ public class ServerManagerTest } @Override - public String getSegmentIdentifier() + public String getIdentifier() + { + return version; + } + + @Override + public Interval getDataInterval() + { + return interval; + } + + @Override + public QueryableIndex asQueryableIndex() { throw new UnsupportedOperationException(); } @Override - public int getDimensionCardinality(String dimension) - { - throw new UnsupportedOperationException(); - } - - @Override - public DateTime getMinTime() - { - throw new UnsupportedOperationException(); - } - - @Override - public DateTime getMaxTime() - { - throw new UnsupportedOperationException(); - } - - @Override - public Capabilities getCapabilities() - { - throw new UnsupportedOperationException(); - } - - @Override - public Iterable makeCursors(Filter filter, Interval interval, QueryGranularity gran) - { - throw new UnsupportedOperationException(); - } - - @Override - public Iterable searchDimensions(SearchQuery query, Filter filter) + public StorageAdapter asStorageAdapter() { throw new UnsupportedOperationException(); } @@ -370,12 +357,12 @@ public class ServerManagerTest public static class MyQueryRunnerFactory implements QueryRunnerFactory, SearchQuery> { - private List adapters = Lists.newArrayList(); + private List adapters = Lists.newArrayList(); @Override - public QueryRunner> createRunner(StorageAdapter adapter) + public QueryRunner> createRunner(Segment adapter) { - adapters.add((StorageAdapterForTesting) ((SegmentIdAttachedStorageAdapter) adapter).getDelegate()); + adapters.add((SegmentForTesting) adapter); return new NoopQueryRunner>(); } @@ -393,7 +380,7 @@ public class ServerManagerTest return new NoopQueryToolChest, SearchQuery>(); } - public List getAdapters() + public List getAdapters() { return adapters; } @@ -437,7 +424,7 @@ public class ServerManagerTest } @Override - public CacheStrategy getCacheStrategy(QueryType query) + public CacheStrategy getCacheStrategy(QueryType query) { return null; } diff --git a/server/src/test/java/com/metamx/druid/coordination/ZkCoordinatorTest.java b/server/src/test/java/com/metamx/druid/coordination/ZkCoordinatorTest.java index 040673707b0..5165647b24f 100644 --- a/server/src/test/java/com/metamx/druid/coordination/ZkCoordinatorTest.java +++ b/server/src/test/java/com/metamx/druid/coordination/ZkCoordinatorTest.java @@ -27,8 +27,9 @@ import com.metamx.druid.client.DataSegment; import com.metamx.druid.client.DruidServer; import com.metamx.druid.client.DruidServerConfig; import com.metamx.druid.client.ZKPhoneBook; +import com.metamx.druid.index.v1.IndexIO; import com.metamx.druid.jackson.DefaultObjectMapper; -import com.metamx.druid.loading.NoopStorageAdapterLoader; +import com.metamx.druid.loading.NoopSegmentLoader; import com.metamx.druid.metrics.NoopServiceEmitter; import com.metamx.druid.query.NoopQueryRunnerFactoryConglomerate; import com.metamx.druid.shard.NoneShardSpec; @@ -72,7 +73,7 @@ public class ZkCoordinatorTest } serverManager = new ServerManager( - new NoopStorageAdapterLoader(), + new NoopSegmentLoader(), new NoopQueryRunnerFactoryConglomerate(), new NoopServiceEmitter(), MoreExecutors.sameThreadExecutor() @@ -196,6 +197,7 @@ public class ZkCoordinatorTest Arrays.asList("dim1", "dim2", "dim3"), Arrays.asList("metric1", "metric2"), new NoneShardSpec(), + IndexIO.CURRENT_VERSION_ID, 123l ); } diff --git a/server/src/test/java/com/metamx/druid/index/v1/EmptyIndexTest.java b/server/src/test/java/com/metamx/druid/index/v1/EmptyIndexTest.java index 01dd32a3097..55c4b7be6e5 100644 --- a/server/src/test/java/com/metamx/druid/index/v1/EmptyIndexTest.java +++ b/server/src/test/java/com/metamx/druid/index/v1/EmptyIndexTest.java @@ -23,12 +23,12 @@ import com.google.common.collect.Iterables; import com.google.common.collect.Lists; import com.metamx.druid.QueryGranularity; import com.metamx.druid.aggregation.AggregatorFactory; +import com.metamx.druid.index.QueryableIndex; import org.joda.time.Interval; import org.junit.Assert; import org.junit.Test; import java.io.File; -import java.util.ArrayList; public class EmptyIndexTest { @@ -48,11 +48,11 @@ public class EmptyIndexTest IncrementalIndexAdapter emptyIndexAdapter = new IncrementalIndexAdapter(new Interval("2012-08-01/P3D"), emptyIndex); IndexMerger.merge(Lists.newArrayList(emptyIndexAdapter), new AggregatorFactory[0], tmpDir); - MMappedIndex emptyIndexMMapped = IndexIO.mapDir(tmpDir); + QueryableIndex emptyQueryableIndex = IndexIO.loadIndex(tmpDir); - Assert.assertEquals("getAvailableDimensions", 0, Iterables.size(emptyIndexMMapped.getAvailableDimensions())); - Assert.assertEquals("getAvailableMetrics", 0, Iterables.size(emptyIndexMMapped.getAvailableMetrics())); - Assert.assertEquals("getDataInterval", new Interval("2012-08-01/P3D"), emptyIndexMMapped.getDataInterval()); - Assert.assertEquals("getReadOnlyTimestamps", 0, emptyIndexMMapped.getReadOnlyTimestamps().size()); + Assert.assertEquals("getAvailableDimensions", 0, Iterables.size(emptyQueryableIndex.getAvailableDimensions())); + Assert.assertEquals("getAvailableMetrics", 0, Iterables.size(emptyQueryableIndex.getColumnNames())); + Assert.assertEquals("getDataInterval", new Interval("2012-08-01/P3D"), emptyQueryableIndex.getDataInterval()); + Assert.assertEquals("getReadOnlyTimestamps", 0, emptyQueryableIndex.getTimeColumn().getLength()); } } diff --git a/server/src/test/java/com/metamx/druid/index/v1/IndexMergerTest.java b/server/src/test/java/com/metamx/druid/index/v1/IndexMergerTest.java index c3afa5eb94e..097762106a2 100644 --- a/server/src/test/java/com/metamx/druid/index/v1/IndexMergerTest.java +++ b/server/src/test/java/com/metamx/druid/index/v1/IndexMergerTest.java @@ -24,6 +24,7 @@ import com.google.common.collect.Lists; import com.google.common.io.Files; import com.metamx.druid.QueryGranularity; import com.metamx.druid.aggregation.AggregatorFactory; +import com.metamx.druid.index.QueryableIndex; import com.metamx.druid.input.MapBasedInputRow; import junit.framework.Assert; import org.apache.commons.io.FileUtils; @@ -44,11 +45,11 @@ public class IndexMergerTest final File tempDir = Files.createTempDir(); try { - MMappedIndex index = IndexIO.mapDir(IndexMerger.persist(toPersist, tempDir)); + QueryableIndex index = IndexIO.loadIndex(IndexMerger.persist(toPersist, tempDir)); - Assert.assertEquals(2, index.getTimestamps().size()); + Assert.assertEquals(2, index.getTimeColumn().getLength()); Assert.assertEquals(Arrays.asList("dim1", "dim2"), Lists.newArrayList(index.getAvailableDimensions())); - Assert.assertEquals(0, index.getAvailableMetrics().size()); + Assert.assertEquals(2, index.getColumnNames().size()); } finally { tempDir.delete(); @@ -84,25 +85,25 @@ public class IndexMergerTest final File tempDir2 = Files.createTempDir(); final File mergedDir = Files.createTempDir(); try { - MMappedIndex index1 = IndexIO.mapDir(IndexMerger.persist(toPersist1, tempDir1)); + QueryableIndex index1 = IndexIO.loadIndex(IndexMerger.persist(toPersist1, tempDir1)); - Assert.assertEquals(2, index1.getTimestamps().size()); + Assert.assertEquals(2, index1.getTimeColumn().getLength()); Assert.assertEquals(Arrays.asList("dim1", "dim2"), Lists.newArrayList(index1.getAvailableDimensions())); - Assert.assertEquals(0, index1.getAvailableMetrics().size()); + Assert.assertEquals(2, index1.getColumnNames().size()); - MMappedIndex index2 = IndexIO.mapDir(IndexMerger.persist(toPersist2, tempDir2)); + QueryableIndex index2 = IndexIO.loadIndex(IndexMerger.persist(toPersist2, tempDir2)); - Assert.assertEquals(2, index2.getTimestamps().size()); + Assert.assertEquals(2, index2.getTimeColumn().getLength()); Assert.assertEquals(Arrays.asList("dim1", "dim2"), Lists.newArrayList(index2.getAvailableDimensions())); - Assert.assertEquals(0, index2.getAvailableMetrics().size()); + Assert.assertEquals(2, index2.getColumnNames().size()); - MMappedIndex merged = IndexIO.mapDir( - IndexMerger.mergeMMapped(Arrays.asList(index1, index2), new AggregatorFactory[]{}, mergedDir) + QueryableIndex merged = IndexIO.loadIndex( + IndexMerger.mergeQueryableIndex(Arrays.asList(index1, index2), new AggregatorFactory[]{}, mergedDir) ); - Assert.assertEquals(3, merged.getTimestamps().size()); + Assert.assertEquals(3, merged.getTimeColumn().getLength()); Assert.assertEquals(Arrays.asList("dim1", "dim2"), Lists.newArrayList(merged.getAvailableDimensions())); - Assert.assertEquals(0, merged.getAvailableMetrics().size()); + Assert.assertEquals(2, merged.getColumnNames().size()); } finally { FileUtils.deleteQuietly(tempDir1); diff --git a/server/src/test/java/com/metamx/druid/index/v1/TestIndex.java b/server/src/test/java/com/metamx/druid/index/v1/TestIndex.java index 2a2013b68fc..164c18a13fc 100644 --- a/server/src/test/java/com/metamx/druid/index/v1/TestIndex.java +++ b/server/src/test/java/com/metamx/druid/index/v1/TestIndex.java @@ -21,38 +21,20 @@ package com.metamx.druid.index.v1; import com.google.common.base.Charsets; import com.google.common.base.Function; -import com.google.common.base.Preconditions; import com.google.common.base.Throwables; import com.google.common.collect.Maps; import com.google.common.io.CharStreams; -import com.google.common.io.Closeables; import com.google.common.io.InputSupplier; import com.google.common.io.LineProcessor; -import com.google.common.primitives.Ints; -import com.metamx.common.ISE; import com.metamx.common.logger.Logger; -import com.metamx.common.parsers.DelimitedParser; -import com.metamx.common.parsers.Parser; -import com.metamx.common.parsers.ToLowerCaseParser; import com.metamx.druid.QueryGranularity; import com.metamx.druid.aggregation.AggregatorFactory; import com.metamx.druid.aggregation.DoubleSumAggregatorFactory; import com.metamx.druid.client.RangeIterable; -import com.metamx.druid.guava.GuavaUtils; -import com.metamx.druid.index.v1.serde.ComplexMetricSerde; -import com.metamx.druid.index.v1.serde.ComplexMetrics; +import com.metamx.druid.index.QueryableIndex; import com.metamx.druid.indexer.data.DelimitedDataSpec; import com.metamx.druid.indexer.data.StringInputRowParser; import com.metamx.druid.indexer.data.TimestampSpec; -import com.metamx.druid.input.InputRow; -import com.metamx.druid.input.MapBasedInputRow; -import com.metamx.druid.kv.ArrayIndexed; -import com.metamx.druid.kv.Indexed; -import com.metamx.druid.kv.IndexedFloats; -import com.metamx.druid.kv.IndexedInts; -import com.metamx.druid.kv.IndexedLongs; -import com.metamx.druid.kv.Indexedids; -import it.uniroma3.mat.extendedset.intset.ImmutableConciseSet; import org.joda.time.DateTime; import org.joda.time.Interval; @@ -61,10 +43,7 @@ import java.io.File; import java.io.IOException; import java.io.InputStream; import java.net.URL; -import java.nio.ByteOrder; -import java.nio.FloatBuffer; import java.util.Arrays; -import java.util.List; import java.util.Map; import java.util.concurrent.atomic.AtomicLong; @@ -74,195 +53,18 @@ public class TestIndex { private static final Logger log = new Logger(TestIndex.class); - private static Index index = null; - private static Index unionIndexTop = null; - private static Index unionIndexBottom = null; private static IncrementalIndex realtimeIndex = null; - private static MMappedIndex mmappedIndex = null; - private static MMappedIndex mergedRealtime = null; + private static QueryableIndex mmappedIndex = null; + private static QueryableIndex mergedRealtime = null; public static final String[] COLUMNS = new String[]{"ts", "provider", "quALIty", "plAcEmEnT", "pLacementish", "iNdEx"}; public static final String[] DIMENSIONS = new String[]{"provider", "quALIty", "plAcEmEnT", "pLacementish"}; public static final String[] METRICS = new String[]{"iNdEx"}; - public static final Map dimIds = Maps.uniqueIndex( - new RangeIterable(4), - new Function() - { - @Override - public String apply(@Nullable Integer input) - { - return DIMENSIONS[input]; - } - } - ); private static final Interval DATA_INTERVAL = new Interval("2011-01-12T00:00:00.000Z/2011-04-16T00:00:00.000Z"); private static final AggregatorFactory[] METRIC_AGGS = new AggregatorFactory[]{ new DoubleSumAggregatorFactory(METRICS[0], METRICS[0]) }; - public static Index convertMMapToIndex(MMappedIndex mmappedIndex) - { - Indexed dimsIndexed = mmappedIndex.getAvailableDimensions(); - String[] dimensions = new String[dimsIndexed.size()]; - for (int i = 0; i < dimsIndexed.size(); ++i) { - dimensions[i] = dimsIndexed.get(i); - } - - Indexed metricsIndexed = mmappedIndex.getAvailableMetrics(); - String[] metrics = new String[metricsIndexed.size()]; - for (int i = 0; i < metricsIndexed.size(); ++i) { - metrics[i] = metricsIndexed.get(i); - } - - IndexedLongs timeBuf = mmappedIndex.getReadOnlyTimestamps(); - long[] timestamps = new long[timeBuf.size()]; - timeBuf.fill(0, timestamps); - Closeables.closeQuietly(timeBuf); - - Map metricVals = Maps.newLinkedHashMap(); - for (String metric : metrics) { - MetricHolder holder = mmappedIndex.getMetricHolder(metric); - switch (holder.getType()) { - case FLOAT: - IndexedFloats mmappedFloats = holder.getFloatType(); - float[] metricValsArray = new float[mmappedFloats.size()]; - mmappedFloats.fill(0, metricValsArray); - Closeables.closeQuietly(mmappedFloats); - - metricVals.put( - metric, - MetricHolder.floatMetric( - metric, - CompressedFloatsIndexedSupplier.fromFloatBuffer( - FloatBuffer.wrap(metricValsArray), - ByteOrder.nativeOrder() - ) - ) - ); - break; - case COMPLEX: - Indexed complexObjects = holder.getComplexType(); - Object[] vals = new Object[complexObjects.size()]; - for (int i = 0; i < complexObjects.size(); ++i) { - vals[i] = complexObjects.get(i); - } - - final ComplexMetricSerde serde = ComplexMetrics.getSerdeForType(holder.getTypeName()); - if (serde == null) { - throw new ISE("Unknown type[%s]", holder.getTypeName()); - } - - metricVals.put( - metric, - MetricHolder.complexMetric( - metric, - holder.getTypeName(), - new ArrayIndexed(vals, serde.getObjectStrategy().getClazz()) - ) - ); - break; - } - } - - Map> dimIdLookup = Maps.newHashMap(); - Map reverseDimLookup = Maps.newHashMap(); - Map invertedIndexesMap = Maps.newHashMap(); - Map dimensionColumns = Maps.newHashMap(); - - for (String dimension : dimensions) { - final Indexed dimValueLookup = mmappedIndex.getDimValueLookup(dimension); - String[] values = new String[dimValueLookup.size()]; - for (int i = 0; i < dimValueLookup.size(); ++i) { - values[i] = dimValueLookup.get(i); - } - - Map lookupMap = Maps.newHashMapWithExpectedSize(dimValueLookup.size()); - for (int i = 0; i < values.length; i++) { - lookupMap.put(values[i], i); - } - - ImmutableConciseSet[] invertedIndexes = new ImmutableConciseSet[values.length]; - final Indexed dimValuesIndexed = mmappedIndex.getDimValueLookup(dimension); - for (int i = 0; i < dimValuesIndexed.size(); ++i) { - invertedIndexes[i] = mmappedIndex.getInvertedIndex(dimension, dimValuesIndexed.get(i)); - } - - int[] dimValues = new int[timestamps.length]; - Map, Integer> rowGroupings = Maps.newHashMap(); - final Indexed dimColumn = mmappedIndex.getDimColumn(dimension); - for (int i = 0; i < dimColumn.size(); ++i) { - int[] expansionValue = Indexedids.arrayFromIndexedInts(dimColumn.get(i)); - Integer value = rowGroupings.get(Ints.asList(expansionValue)); - if (value == null) { - value = rowGroupings.size(); - rowGroupings.put(Ints.asList(expansionValue), value); - } - dimValues[i] = value; - } - - int[][] expansionValues = new int[rowGroupings.size()][]; - for (Map.Entry, Integer> entry : rowGroupings.entrySet()) { - expansionValues[entry.getValue()] = Ints.toArray(entry.getKey()); - } - - dimIdLookup.put(dimension, lookupMap); - reverseDimLookup.put(dimension, values); - invertedIndexesMap.put(dimension, invertedIndexes); - dimensionColumns.put(dimension, new DimensionColumn(expansionValues, dimValues)); - } - - return new Index( - dimensions, - metrics, - mmappedIndex.getDataInterval(), - timestamps, - metricVals, - dimIdLookup, - reverseDimLookup, - invertedIndexesMap, - dimensionColumns - ); - } - - public static Index getTestIndex() throws IOException - { - synchronized (log) { - if (index != null) { - return index; - } - } - - return index = convertMMapToIndex(getMMappedTestIndex()); - } - - public static Index getTestUnionIndexTop() throws IOException - { - synchronized (log) { - if (unionIndexTop != null) { - return unionIndexTop; - } - } - - IncrementalIndex incrementalIndex = makeRealtimeIndex("druid.sample.tsv.top"); - MMappedIndex mmapped = persistRealtimeAndLoadMMapped(incrementalIndex); - - return unionIndexTop = convertMMapToIndex(mmapped); - } - - public static Index getTestUnionIndexBottom() throws IOException - { - synchronized (log) { - if (unionIndexBottom != null) { - return unionIndexBottom; - } - } - - IncrementalIndex incrementalIndex = makeRealtimeIndex("druid.sample.tsv.bottom"); - MMappedIndex mmapped = persistRealtimeAndLoadMMapped(incrementalIndex); - - return unionIndexBottom = convertMMapToIndex(mmapped); - } - public static IncrementalIndex getIncrementalTestIndex() { synchronized (log) { @@ -274,7 +76,7 @@ public class TestIndex return realtimeIndex = makeRealtimeIndex("druid.sample.tsv"); } - public static MMappedIndex getMMappedTestIndex() + public static QueryableIndex getMMappedTestIndex() { synchronized (log) { if (mmappedIndex != null) { @@ -288,7 +90,7 @@ public class TestIndex return mmappedIndex; } - public static MMappedIndex mergedRealtimeIndex() + public static QueryableIndex mergedRealtimeIndex() { synchronized (log) { if (mergedRealtime != null) { @@ -316,12 +118,9 @@ public class TestIndex IndexMerger.persist(top, DATA_INTERVAL, topFile); IndexMerger.persist(bottom, DATA_INTERVAL, bottomFile); - mergedRealtime = com.metamx.druid.index.v1.IndexIO.mapDir( - IndexMerger.mergeMMapped( - Arrays.asList( - com.metamx.druid.index.v1.IndexIO.mapDir(topFile), - com.metamx.druid.index.v1.IndexIO.mapDir(bottomFile) - ), + mergedRealtime = IndexIO.loadIndex( + IndexMerger.mergeQueryableIndex( + Arrays.asList(IndexIO.loadIndex(topFile), IndexIO.loadIndex(bottomFile)), METRIC_AGGS, mergedFile ) @@ -377,8 +176,6 @@ public class TestIndex runOnce = true; } - final String[] splits = line.split("\t"); - retVal.add(parser.parse(line)); ++lineCount; @@ -403,7 +200,7 @@ public class TestIndex return retVal; } - public static MMappedIndex persistRealtimeAndLoadMMapped(IncrementalIndex index) + public static QueryableIndex persistRealtimeAndLoadMMapped(IncrementalIndex index) { try { File someTmpFile = File.createTempFile("billy", "yay"); @@ -412,7 +209,7 @@ public class TestIndex someTmpFile.deleteOnExit(); IndexMerger.persist(index, someTmpFile); - return com.metamx.druid.index.v1.IndexIO.mapDir(someTmpFile); + return IndexIO.loadIndex(someTmpFile); } catch (IOException e) { throw Throwables.propagate(e); diff --git a/server/src/test/java/com/metamx/druid/loading/NoopSegmentLoader.java b/server/src/test/java/com/metamx/druid/loading/NoopSegmentLoader.java new file mode 100644 index 00000000000..29d784d3631 --- /dev/null +++ b/server/src/test/java/com/metamx/druid/loading/NoopSegmentLoader.java @@ -0,0 +1,67 @@ +/* + * Druid - a distributed column store. + * Copyright (C) 2012 Metamarkets Group Inc. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version 2 + * of the License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + */ + +package com.metamx.druid.loading; + +import com.metamx.druid.StorageAdapter; +import com.metamx.druid.client.DataSegment; +import com.metamx.druid.index.QueryableIndex; +import com.metamx.druid.index.Segment; +import org.joda.time.Interval; + +/** +*/ +public class NoopSegmentLoader implements SegmentLoader +{ + @Override + public Segment getSegment(final DataSegment segment) throws StorageAdapterLoadingException + { + return new Segment() + { + @Override + public String getIdentifier() + { + return segment.getIdentifier(); + } + + @Override + public Interval getDataInterval() + { + return segment.getInterval(); + } + + @Override + public QueryableIndex asQueryableIndex() + { + throw new UnsupportedOperationException(); + } + + @Override + public StorageAdapter asStorageAdapter() + { + throw new UnsupportedOperationException(); + } + }; + } + + @Override + public void cleanup(DataSegment loadSpec) throws StorageAdapterLoadingException + { + } +} diff --git a/server/src/test/java/com/metamx/druid/loading/NoopStorageAdapterLoader.java b/server/src/test/java/com/metamx/druid/loading/NoopStorageAdapterLoader.java deleted file mode 100644 index a5bcf8b0b34..00000000000 --- a/server/src/test/java/com/metamx/druid/loading/NoopStorageAdapterLoader.java +++ /dev/null @@ -1,98 +0,0 @@ -/* - * Druid - a distributed column store. - * Copyright (C) 2012 Metamarkets Group Inc. - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation; either version 2 - * of the License, or (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. - */ - -package com.metamx.druid.loading; - -import com.metamx.druid.Capabilities; -import com.metamx.druid.QueryGranularity; -import com.metamx.druid.StorageAdapter; -import com.metamx.druid.index.brita.Filter; -import com.metamx.druid.index.v1.processing.Cursor; -import com.metamx.druid.query.search.SearchHit; -import com.metamx.druid.query.search.SearchQuery; -import org.joda.time.DateTime; -import org.joda.time.Interval; - -import java.util.Map; - -/** -*/ -public class NoopStorageAdapterLoader implements StorageAdapterLoader -{ - @Override - public StorageAdapter getAdapter(final Map loadSpec) - { - return new StorageAdapter() - { - @Override - public String getSegmentIdentifier() - { - throw new UnsupportedOperationException(); - } - - @Override - public Interval getInterval() - { - throw new UnsupportedOperationException(); - } - - @Override - public int getDimensionCardinality(String dimension) - { - throw new UnsupportedOperationException(); - } - - @Override - public DateTime getMinTime() - { - throw new UnsupportedOperationException(); - } - - @Override - public DateTime getMaxTime() - { - throw new UnsupportedOperationException(); - } - - @Override - public Capabilities getCapabilities() - { - throw new UnsupportedOperationException(); - } - - @Override - public Iterable makeCursors(Filter filter, Interval interval, QueryGranularity gran) - { - throw new UnsupportedOperationException(); - } - - @Override - public Iterable searchDimensions(SearchQuery query, Filter filter) - { - throw new UnsupportedOperationException(); - } - }; - } - - @Override - public void cleanupAdapter(Map loadSpec) throws StorageAdapterLoadingException - { - - } -} diff --git a/server/src/test/java/com/metamx/druid/master/DruidMasterRuleRunnerTest.java b/server/src/test/java/com/metamx/druid/master/DruidMasterRuleRunnerTest.java index c08a3cb404d..f40d16d93ff 100644 --- a/server/src/test/java/com/metamx/druid/master/DruidMasterRuleRunnerTest.java +++ b/server/src/test/java/com/metamx/druid/master/DruidMasterRuleRunnerTest.java @@ -28,6 +28,7 @@ import com.google.common.collect.Sets; import com.metamx.druid.client.DataSegment; import com.metamx.druid.client.DruidServer; import com.metamx.druid.db.DatabaseRuleManager; +import com.metamx.druid.index.v1.IndexIO; import com.metamx.druid.master.rules.IntervalDropRule; import com.metamx.druid.master.rules.IntervalLoadRule; import com.metamx.druid.master.rules.Rule; @@ -78,13 +79,14 @@ public class DruidMasterRuleRunnerTest Lists.newArrayList(), Lists.newArrayList(), new NoneShardSpec(), + IndexIO.CURRENT_VERSION_ID, 1 ) ); start = start.plusHours(1); } - ruleRunner = new DruidMasterRuleRunner(master); + ruleRunner = new DruidMasterRuleRunner(master, 1, 24); } @After @@ -855,4 +857,187 @@ public class DruidMasterRuleRunnerTest EasyMock.verify(mockPeon); EasyMock.verify(anotherMockPeon); } + + /** + * Nodes: + * hot - 2 replicants + * + * @throws Exception + */ + @Test + public void testReplicantThrottle() throws Exception + { + mockPeon.loadSegment(EasyMock.anyObject(), EasyMock.anyObject()); + EasyMock.expectLastCall().atLeastOnce(); + EasyMock.expect(mockPeon.getSegmentsToLoad()).andReturn(Sets.newHashSet()).atLeastOnce(); + EasyMock.expect(mockPeon.getLoadQueueSize()).andReturn(0L).atLeastOnce(); + EasyMock.replay(mockPeon); + + EasyMock.expect(databaseRuleManager.getRulesWithDefault(EasyMock.anyObject())).andReturn( + Lists.newArrayList( + new IntervalLoadRule(new Interval("2012-01-01T00:00:00.000Z/2013-01-01T00:00:00.000Z"), 2, "hot") + ) + ).atLeastOnce(); + EasyMock.replay(databaseRuleManager); + + DruidCluster druidCluster = new DruidCluster( + ImmutableMap.of( + "hot", + MinMaxPriorityQueue.orderedBy(Ordering.natural().reverse()).create( + Arrays.asList( + new ServerHolder( + new DruidServer( + "serverHot", + "hostHot", + 1000, + "historical", + "hot" + ), + mockPeon + ), + new ServerHolder( + new DruidServer( + "serverHot2", + "hostHot2", + 1000, + "historical", + "hot" + ), + mockPeon + ) + ) + ) + ) + ); + + DruidMasterRuntimeParams params = + new DruidMasterRuntimeParams.Builder() + .withDruidCluster(druidCluster) + .withAvailableSegments(availableSegments) + .withDatabaseRuleManager(databaseRuleManager) + .withSegmentReplicantLookup(SegmentReplicantLookup.make(new DruidCluster())) + .build(); + + DruidMasterRuntimeParams afterParams = ruleRunner.run(params); + MasterStats stats = afterParams.getMasterStats(); + + Assert.assertTrue(stats.getPerTierStats().get("assignedCount").get("hot").get() == 48); + Assert.assertTrue(stats.getPerTierStats().get("unassignedCount") == null); + Assert.assertTrue(stats.getPerTierStats().get("unassignedSize") == null); + + DataSegment overFlowSegment = new DataSegment( + "test", + new Interval("2012-02-01/2012-02-02"), + new DateTime().toString(), + Maps.newHashMap(), + Lists.newArrayList(), + Lists.newArrayList(), + new NoneShardSpec(), + 1, + 0 + ); + + afterParams = ruleRunner.run( + new DruidMasterRuntimeParams.Builder() + .withDruidCluster(druidCluster) + .withEmitter(emitter) + .withAvailableSegments(Arrays.asList(overFlowSegment)) + .withDatabaseRuleManager(databaseRuleManager) + .withSegmentReplicantLookup(SegmentReplicantLookup.make(new DruidCluster())) + .build() + ); + stats = afterParams.getMasterStats(); + + Assert.assertTrue(stats.getPerTierStats().get("assignedCount").get("hot").get() == 1); + Assert.assertTrue(stats.getPerTierStats().get("unassignedCount") == null); + Assert.assertTrue(stats.getPerTierStats().get("unassignedSize") == null); + + EasyMock.verify(mockPeon); + } + + @Test + public void testDropReplicantThrottle() throws Exception + { + mockPeon.dropSegment(EasyMock.anyObject(), EasyMock.anyObject()); + EasyMock.expectLastCall().atLeastOnce(); + EasyMock.expect(mockPeon.getSegmentsToLoad()).andReturn(Sets.newHashSet()).atLeastOnce(); + EasyMock.expect(mockPeon.getLoadQueueSize()).andReturn(0L).atLeastOnce(); + EasyMock.replay(mockPeon); + + EasyMock.expect(databaseRuleManager.getRulesWithDefault(EasyMock.anyObject())).andReturn( + Lists.newArrayList( + new IntervalLoadRule(new Interval("2012-01-01T00:00:00.000Z/2013-01-02T00:00:00.000Z"), 1, "normal") + ) + ).atLeastOnce(); + EasyMock.replay(databaseRuleManager); + + DataSegment overFlowSegment = new DataSegment( + "test", + new Interval("2012-02-01/2012-02-02"), + new DateTime().toString(), + Maps.newHashMap(), + Lists.newArrayList(), + Lists.newArrayList(), + new NoneShardSpec(), + 1, + 0 + ); + List longerAvailableSegments = Lists.newArrayList(availableSegments); + longerAvailableSegments.add(overFlowSegment); + + DruidServer server1 = new DruidServer( + "serverNorm1", + "hostNorm1", + 1000, + "historical", + "normal" + ); + for (DataSegment availableSegment : longerAvailableSegments) { + server1.addDataSegment(availableSegment.getIdentifier(), availableSegment); + } + DruidServer server2 = new DruidServer( + "serverNorm2", + "hostNorm2", + 1000, + "historical", + "normal" + ); + for (DataSegment availableSegment : longerAvailableSegments) { + server2.addDataSegment(availableSegment.getIdentifier(), availableSegment); + } + + DruidCluster druidCluster = new DruidCluster( + ImmutableMap.of( + "normal", + MinMaxPriorityQueue.orderedBy(Ordering.natural().reverse()).create( + Arrays.asList( + new ServerHolder( + server1, + mockPeon + ), + new ServerHolder( + server2, + mockPeon + ) + ) + ) + ) + ); + + SegmentReplicantLookup segmentReplicantLookup = SegmentReplicantLookup.make(druidCluster); + + DruidMasterRuntimeParams params = new DruidMasterRuntimeParams.Builder() + .withDruidCluster(druidCluster) + .withMillisToWaitBeforeDeleting(0L) + .withAvailableSegments(longerAvailableSegments) + .withDatabaseRuleManager(databaseRuleManager) + .withSegmentReplicantLookup(segmentReplicantLookup) + .build(); + + DruidMasterRuntimeParams afterParams = ruleRunner.run(params); + MasterStats stats = afterParams.getMasterStats(); + + Assert.assertTrue(stats.getPerTierStats().get("droppedCount").get("normal").get() == 24); + EasyMock.verify(mockPeon); + } } diff --git a/server/src/test/java/com/metamx/druid/master/DruidMasterTest.java b/server/src/test/java/com/metamx/druid/master/DruidMasterTest.java index f3f5e0a6d30..584ae31de47 100644 --- a/server/src/test/java/com/metamx/druid/master/DruidMasterTest.java +++ b/server/src/test/java/com/metamx/druid/master/DruidMasterTest.java @@ -20,7 +20,6 @@ package com.metamx.druid.master; import com.metamx.common.concurrent.ScheduledExecutorFactory; -import com.metamx.common.logger.Logger; import com.metamx.druid.client.DataSegment; import com.metamx.druid.client.DruidServer; import com.metamx.druid.client.ServerInventoryManager; @@ -130,6 +129,19 @@ public class DruidMasterTest { return 0; } + + + @Override + public int getReplicantLifetime() + { + return 0; + } + + @Override + public int getReplicantThrottleLimit() + { + return 0; + } }, null, null, diff --git a/server/src/test/java/com/metamx/druid/master/rules/PeriodDropRuleTest.java b/server/src/test/java/com/metamx/druid/master/rules/PeriodDropRuleTest.java index ae0c7cedd2e..c6bceb08e5e 100644 --- a/server/src/test/java/com/metamx/druid/master/rules/PeriodDropRuleTest.java +++ b/server/src/test/java/com/metamx/druid/master/rules/PeriodDropRuleTest.java @@ -33,13 +33,13 @@ public class PeriodDropRuleTest { private final static DataSegment.Builder builder = DataSegment.builder() .dataSource("test") - .version(new DateTime().toString()) + .version(new DateTime("2012-12-31T01:00:00").toString()) .shardSpec(new NoneShardSpec()); @Test public void testAppliesToAll() { - DateTime now = new DateTime(); + DateTime now = new DateTime("2012-12-31T01:00:00"); PeriodDropRule rule = new PeriodDropRule( new Period("P5000Y") ); @@ -51,13 +51,15 @@ public class PeriodDropRuleTest now.minusDays(2), now.minusDays(1) ) - ).build() + ).build(), + now ) ); Assert.assertTrue( rule.appliesTo( builder.interval(new Interval(now.minusYears(100), now.minusDays(1))) - .build() + .build(), + now ) ); } @@ -65,7 +67,7 @@ public class PeriodDropRuleTest @Test public void testAppliesToPeriod() { - DateTime now = new DateTime(); + DateTime now = new DateTime("2012-12-31T01:00:00"); PeriodDropRule rule = new PeriodDropRule( new Period("P1M") ); @@ -73,19 +75,29 @@ public class PeriodDropRuleTest Assert.assertTrue( rule.appliesTo( builder.interval(new Interval(now.minusWeeks(1), now.minusDays(1))) - .build() + .build(), + now + ) + ); + Assert.assertTrue( + rule.appliesTo( + builder.interval(new Interval(now.minusDays(1), now)) + .build(), + now ) ); Assert.assertFalse( rule.appliesTo( builder.interval(new Interval(now.minusYears(1), now.minusDays(1))) - .build() + .build(), + now ) ); Assert.assertFalse( rule.appliesTo( builder.interval(new Interval(now.minusMonths(2), now.minusDays(1))) - .build() + .build(), + now ) ); } diff --git a/server/src/test/java/com/metamx/druid/master/rules/PeriodLoadRuleTest.java b/server/src/test/java/com/metamx/druid/master/rules/PeriodLoadRuleTest.java index 283d684cb07..3944d96ecb9 100644 --- a/server/src/test/java/com/metamx/druid/master/rules/PeriodLoadRuleTest.java +++ b/server/src/test/java/com/metamx/druid/master/rules/PeriodLoadRuleTest.java @@ -39,38 +39,41 @@ public class PeriodLoadRuleTest @Test public void testAppliesToAll() { + DateTime now = new DateTime("2013-01-01"); PeriodLoadRule rule = new PeriodLoadRule( new Period("P5000Y"), 0, "" ); - Assert.assertTrue(rule.appliesTo(builder.interval(new Interval("2012-01-01/2012-12-31")).build())); - Assert.assertTrue(rule.appliesTo(builder.interval(new Interval("1000-01-01/2012-12-31")).build())); - Assert.assertTrue(rule.appliesTo(builder.interval(new Interval("0500-01-01/2100-12-31")).build())); + Assert.assertTrue(rule.appliesTo(builder.interval(new Interval("2012-01-01/2012-12-31")).build(), now)); + Assert.assertTrue(rule.appliesTo(builder.interval(new Interval("1000-01-01/2012-12-31")).build(), now)); + Assert.assertTrue(rule.appliesTo(builder.interval(new Interval("0500-01-01/2100-12-31")).build(), now)); } @Test public void testAppliesToPeriod() { - DateTime now = new DateTime(); + DateTime now = new DateTime("2012-12-31T01:00:00"); PeriodLoadRule rule = new PeriodLoadRule( new Period("P1M"), 0, "" ); - Assert.assertTrue(rule.appliesTo(builder.interval(new Interval(now.minusWeeks(1), now)).build())); + Assert.assertTrue(rule.appliesTo(builder.interval(new Interval(now.minusWeeks(1), now)).build(), now)); Assert.assertTrue( rule.appliesTo( builder.interval(new Interval(now.minusDays(1), now.plusDays(1))) - .build() + .build(), + now ) ); Assert.assertFalse( rule.appliesTo( builder.interval(new Interval(now.plusDays(1), now.plusDays(2))) - .build() + .build(), + now ) ); } diff --git a/server/src/test/java/com/metamx/druid/query/QueryRunnerTestHelper.java b/server/src/test/java/com/metamx/druid/query/QueryRunnerTestHelper.java index 89d7802d044..69e74181faa 100644 --- a/server/src/test/java/com/metamx/druid/query/QueryRunnerTestHelper.java +++ b/server/src/test/java/com/metamx/druid/query/QueryRunnerTestHelper.java @@ -22,7 +22,6 @@ package com.metamx.druid.query; import com.google.common.collect.Lists; import com.metamx.druid.Query; import com.metamx.druid.QueryGranularity; -import com.metamx.druid.StorageAdapter; import com.metamx.druid.aggregation.AggregatorFactory; import com.metamx.druid.aggregation.CountAggregatorFactory; import com.metamx.druid.aggregation.DoubleSumAggregatorFactory; @@ -30,12 +29,11 @@ import com.metamx.druid.aggregation.LongSumAggregatorFactory; import com.metamx.druid.aggregation.post.ArithmeticPostAggregator; import com.metamx.druid.aggregation.post.ConstantPostAggregator; import com.metamx.druid.aggregation.post.FieldAccessPostAggregator; +import com.metamx.druid.index.IncrementalIndexSegment; +import com.metamx.druid.index.QueryableIndex; +import com.metamx.druid.index.QueryableIndexSegment; +import com.metamx.druid.index.Segment; import com.metamx.druid.index.v1.IncrementalIndex; -import com.metamx.druid.index.v1.IncrementalIndexStorageAdapter; -import com.metamx.druid.index.v1.Index; -import com.metamx.druid.index.v1.IndexStorageAdapter; -import com.metamx.druid.index.v1.MMappedIndex; -import com.metamx.druid.index.v1.MMappedIndexStorageAdapter; import com.metamx.druid.index.v1.TestIndex; import com.metamx.druid.query.segment.MultipleIntervalSegmentSpec; import com.metamx.druid.query.segment.QuerySegmentSpec; @@ -108,34 +106,27 @@ public class QueryRunnerTestHelper ) throws IOException { - final Index testIndex = TestIndex.getTestIndex(); final IncrementalIndex rtIndex = TestIndex.getIncrementalTestIndex(); - final MMappedIndex persistedRTIndex = TestIndex.getMMappedTestIndex(); - final MMappedIndex mergedRT = TestIndex.mergedRealtimeIndex(); + final QueryableIndex mMappedTestIndex = TestIndex.getMMappedTestIndex(); + final QueryableIndex mergedRealtimeIndex = TestIndex.mergedRealtimeIndex(); return Arrays.asList( new Object[][]{ { - makeQueryRunner(factory, new IndexStorageAdapter(testIndex)) + makeQueryRunner(factory, new IncrementalIndexSegment(rtIndex)) }, { - makeQueryRunner(factory, new MMappedIndexStorageAdapter(MMappedIndex.fromIndex(testIndex))) + makeQueryRunner(factory, new QueryableIndexSegment(null, mMappedTestIndex)) }, { - makeQueryRunner(factory, new IncrementalIndexStorageAdapter(rtIndex)) - }, - { - makeQueryRunner(factory, new MMappedIndexStorageAdapter(persistedRTIndex)) - }, - { - makeQueryRunner(factory, new MMappedIndexStorageAdapter(mergedRT)) + makeQueryRunner(factory, new QueryableIndexSegment(null, mergedRealtimeIndex)) } } ); } - private static QueryRunner makeQueryRunner( + public static QueryRunner makeQueryRunner( QueryRunnerFactory> factory, - StorageAdapter adapter + Segment adapter ) { return new FinalizeResultsQueryRunner( diff --git a/server/src/test/java/com/metamx/druid/query/group/GroupByQueryRunnerTest.java b/server/src/test/java/com/metamx/druid/query/group/GroupByQueryRunnerTest.java new file mode 100644 index 00000000000..6e86c9e1022 --- /dev/null +++ b/server/src/test/java/com/metamx/druid/query/group/GroupByQueryRunnerTest.java @@ -0,0 +1,281 @@ +/* + * Druid - a distributed column store. + * Copyright (C) 2012 Metamarkets Group Inc. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version 2 + * of the License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + */ + +package com.metamx.druid.query.group; + +import com.google.common.base.Function; +import com.google.common.base.Preconditions; +import com.google.common.base.Supplier; +import com.google.common.collect.ImmutableMap; +import com.google.common.collect.Iterables; +import com.google.common.collect.Lists; +import com.google.common.collect.Maps; +import com.metamx.common.guava.Sequence; +import com.metamx.common.guava.Sequences; +import com.metamx.druid.PeriodGranularity; +import com.metamx.druid.Query; +import com.metamx.druid.TestHelper; +import com.metamx.druid.aggregation.AggregatorFactory; +import com.metamx.druid.aggregation.LongSumAggregatorFactory; +import com.metamx.druid.collect.StupidPool; +import com.metamx.druid.input.MapBasedRow; +import com.metamx.druid.input.Row; +import com.metamx.druid.query.QueryRunner; +import com.metamx.druid.query.QueryRunnerTestHelper; +import com.metamx.druid.query.dimension.DefaultDimensionSpec; +import com.metamx.druid.query.dimension.DimensionSpec; +import com.metamx.druid.query.segment.MultipleIntervalSegmentSpec; +import org.joda.time.DateTime; +import org.joda.time.DateTimeZone; +import org.joda.time.Interval; +import org.joda.time.Period; +import org.junit.Test; +import org.junit.runner.RunWith; +import org.junit.runners.Parameterized; + +import javax.annotation.Nullable; +import java.io.IOException; +import java.nio.ByteBuffer; +import java.util.Arrays; +import java.util.Collection; +import java.util.List; +import java.util.Map; + +@RunWith(Parameterized.class) +public class GroupByQueryRunnerTest +{ + private final QueryRunner runner; + private GroupByQueryRunnerFactory factory; + + @Parameterized.Parameters + public static Collection constructorFeeder() throws IOException + { + final GroupByQueryRunnerFactory factory = new GroupByQueryRunnerFactory( + new GroupByQueryEngine( + new GroupByQueryEngineConfig() + { + @Override + public int getMaxIntermediateRows() + { + return 10000; + } + }, + new StupidPool( + new Supplier() + { + @Override + public ByteBuffer get() + { + return ByteBuffer.allocate(1024 * 1024); + } + } + ) + ) + ); + + + return Lists.newArrayList( + Iterables.transform( + QueryRunnerTestHelper.makeQueryRunners(factory), new Function() + { + @Override + public Object apply(@Nullable Object input) + { + return new Object[]{factory, ((Object[]) input)[0]}; + } + } + ) + ); + } + + public GroupByQueryRunnerTest(GroupByQueryRunnerFactory factory, QueryRunner runner) { + this.factory = factory; + this.runner = runner; + } + + @Test + public void testGroupBy() { + GroupByQuery query = GroupByQuery + .builder() + .setDataSource(QueryRunnerTestHelper.dataSource) + .setQuerySegmentSpec(QueryRunnerTestHelper.firstToThird) + .setDimensions(Lists.newArrayList(new DefaultDimensionSpec("quality", "alias"))) + .setAggregatorSpecs( + Arrays.asList( + QueryRunnerTestHelper.rowsCount, + new LongSumAggregatorFactory("idx", "index") + ) + ) + .setGranularity(QueryRunnerTestHelper.dayGran) + .build(); + + List expectedResults = Arrays.asList( + createExpectedRow("2011-04-01", "alias", "automotive", "rows", 1L, "idx", 135L), + createExpectedRow("2011-04-01", "alias", "business", "rows", 1L, "idx", 118L), + createExpectedRow("2011-04-01", "alias", "entertainment", "rows", 1L, "idx", 158L), + createExpectedRow("2011-04-01", "alias", "health", "rows", 1L, "idx", 120L), + createExpectedRow("2011-04-01", "alias", "mezzanine", "rows", 3L, "idx", 2870L), + createExpectedRow("2011-04-01", "alias", "news", "rows", 1L, "idx", 121L), + createExpectedRow("2011-04-01", "alias", "premium", "rows", 3L, "idx", 2900L), + createExpectedRow("2011-04-01", "alias", "technology", "rows", 1L, "idx", 78L), + createExpectedRow("2011-04-01", "alias", "travel", "rows", 1L, "idx", 119L), + + createExpectedRow("2011-04-02", "alias", "automotive", "rows", 1L, "idx", 147L), + createExpectedRow("2011-04-02", "alias", "business", "rows", 1L, "idx", 112L), + createExpectedRow("2011-04-02", "alias", "entertainment", "rows", 1L, "idx", 166L), + createExpectedRow("2011-04-02", "alias", "health", "rows", 1L, "idx", 113L), + createExpectedRow("2011-04-02", "alias", "mezzanine", "rows", 3L, "idx", 2447L), + createExpectedRow("2011-04-02", "alias", "news", "rows", 1L, "idx", 114L), + createExpectedRow("2011-04-02", "alias", "premium", "rows", 3L, "idx", 2505L), + createExpectedRow("2011-04-02", "alias", "technology", "rows", 1L, "idx", 97L), + createExpectedRow("2011-04-02", "alias", "travel", "rows", 1L, "idx", 126L) + ); + + Iterable results = Sequences.toList(runner.run(query), Lists.newArrayList()); + + TestHelper.assertExpectedObjects(expectedResults, results, ""); + } + + @Test + public void testGroupByWithTimeZone() { + DateTimeZone tz = DateTimeZone.forID("America/Los_Angeles"); + + GroupByQuery query = GroupByQuery.builder() + .setDataSource(QueryRunnerTestHelper.dataSource) + .setInterval("2011-03-31T00:00:00-07:00/2011-04-02T00:00:00-07:00") + .setDimensions( + Lists.newArrayList( + (DimensionSpec) new DefaultDimensionSpec( + "quality", + "alias" + ) + ) + ) + .setAggregatorSpecs( + Arrays.asList( + QueryRunnerTestHelper.rowsCount, + new LongSumAggregatorFactory( + "idx", + "index" + ) + ) + ) + .setGranularity( + new PeriodGranularity( + new Period("P1D"), + null, + tz + ) + ) + .build(); + + List expectedResults = Arrays.asList( + (Row) new MapBasedRow(new DateTime("2011-03-31", tz),ImmutableMap.of("alias", "automotive", "rows", 1L, "idx", 135L)), + (Row) new MapBasedRow(new DateTime("2011-03-31", tz),ImmutableMap.of("alias", "business", "rows", 1L, "idx", 118L)), + (Row) new MapBasedRow(new DateTime("2011-03-31", tz),ImmutableMap.of("alias", "entertainment", "rows", 1L, "idx", 158L)), + (Row) new MapBasedRow(new DateTime("2011-03-31", tz),ImmutableMap.of("alias", "health", "rows", 1L, "idx", 120L)), + (Row) new MapBasedRow(new DateTime("2011-03-31", tz),ImmutableMap.of("alias", "mezzanine", "rows", 3L, "idx", 2870L)), + (Row) new MapBasedRow(new DateTime("2011-03-31", tz),ImmutableMap.of("alias", "news", "rows", 1L, "idx", 121L)), + (Row) new MapBasedRow(new DateTime("2011-03-31", tz),ImmutableMap.of("alias", "premium", "rows", 3L, "idx", 2900L)), + (Row) new MapBasedRow(new DateTime("2011-03-31", tz),ImmutableMap.of("alias", "technology", "rows", 1L, "idx", 78L)), + (Row) new MapBasedRow(new DateTime("2011-03-31", tz),ImmutableMap.of("alias", "travel", "rows", 1L, "idx", 119L)), + + (Row) new MapBasedRow(new DateTime("2011-04-01", tz),ImmutableMap.of("alias", "automotive", "rows", 1L, "idx", 147L)), + (Row) new MapBasedRow(new DateTime("2011-04-01", tz),ImmutableMap.of("alias", "business", "rows", 1L, "idx", 112L)), + (Row) new MapBasedRow(new DateTime("2011-04-01", tz),ImmutableMap.of("alias", "entertainment", "rows", 1L, "idx", 166L)), + (Row) new MapBasedRow(new DateTime("2011-04-01", tz),ImmutableMap.of("alias", "health", "rows", 1L, "idx", 113L)), + (Row) new MapBasedRow(new DateTime("2011-04-01", tz),ImmutableMap.of("alias", "mezzanine", "rows", 3L, "idx", 2447L)), + (Row) new MapBasedRow(new DateTime("2011-04-01", tz),ImmutableMap.of("alias", "news", "rows", 1L, "idx", 114L)), + (Row) new MapBasedRow(new DateTime("2011-04-01", tz),ImmutableMap.of("alias", "premium", "rows", 3L, "idx", 2505L)), + (Row) new MapBasedRow(new DateTime("2011-04-01", tz),ImmutableMap.of("alias", "technology", "rows", 1L, "idx", 97L)), + (Row) new MapBasedRow(new DateTime("2011-04-01", tz),ImmutableMap.of("alias", "travel", "rows", 1L, "idx", 126L)) + ); + + Iterable results = Sequences.toList( + runner.run(query), + Lists.newArrayList() + ); + + TestHelper.assertExpectedObjects(expectedResults, results, ""); + } + + + @Test + public void testMergeResults() { + GroupByQuery.Builder builder = GroupByQuery + .builder() + .setDataSource(QueryRunnerTestHelper.dataSource) + .setInterval("2011-04-02/2011-04-04") + .setDimensions(Lists.newArrayList(new DefaultDimensionSpec("quality", "alias"))) + .setAggregatorSpecs( + Arrays.asList( + QueryRunnerTestHelper.rowsCount, + new LongSumAggregatorFactory("idx", "index") + ) + ) + .setGranularity(new PeriodGranularity(new Period("P1M"), null, null)); + + final GroupByQuery fullQuery = builder.build(); + + QueryRunner mergedRunner = new GroupByQueryQueryToolChest().mergeResults( + new QueryRunner() + { + @Override + public Sequence run(Query query) + { + // simulate two daily segments + final Query query1 = query.withQuerySegmentSpec( + new MultipleIntervalSegmentSpec(Lists.newArrayList(new Interval("2011-04-02/2011-04-03"))) + ); + final Query query2 = query.withQuerySegmentSpec( + new MultipleIntervalSegmentSpec(Lists.newArrayList(new Interval("2011-04-03/2011-04-04"))) + ); + return Sequences.concat(runner.run(query1), runner.run(query2)); + } + } + ); + + List expectedResults = Arrays.asList( + createExpectedRow("2011-04-01", "alias", "automotive", "rows", 2L, "idx", 269L), + createExpectedRow("2011-04-01", "alias", "business", "rows", 2L, "idx", 217L), + createExpectedRow("2011-04-01", "alias", "entertainment", "rows", 2L, "idx", 319L), + createExpectedRow("2011-04-01", "alias", "health", "rows", 2L, "idx", 216L), + createExpectedRow("2011-04-01", "alias", "mezzanine", "rows", 6L, "idx", 4420L), + createExpectedRow("2011-04-01", "alias", "news", "rows", 2L, "idx", 221L), + createExpectedRow("2011-04-01", "alias", "premium", "rows", 6L, "idx", 4416L), + createExpectedRow("2011-04-01", "alias", "technology", "rows", 2L, "idx", 177L), + createExpectedRow("2011-04-01", "alias", "travel", "rows", 2L, "idx", 243L) + ); + + TestHelper.assertExpectedObjects(expectedResults, runner.run(fullQuery), "direct"); + TestHelper.assertExpectedObjects(expectedResults, mergedRunner.run(fullQuery), "merged"); + } + + private MapBasedRow createExpectedRow(final String timestamp, Object... vals) + { + Preconditions.checkArgument(vals.length % 2 == 0); + + Map theVals = Maps.newHashMap(); + for (int i = 0; i < vals.length; i+=2) { + theVals.put(vals[i].toString(), vals[i+1]); + } + + return new MapBasedRow(new DateTime(timestamp), theVals); + } +} diff --git a/server/src/test/java/com/metamx/druid/query/group/GroupByTimeseriesQueryRunnerTest.java b/server/src/test/java/com/metamx/druid/query/group/GroupByTimeseriesQueryRunnerTest.java index 61c84f9c447..8a1c7637367 100644 --- a/server/src/test/java/com/metamx/druid/query/group/GroupByTimeseriesQueryRunnerTest.java +++ b/server/src/test/java/com/metamx/druid/query/group/GroupByTimeseriesQueryRunnerTest.java @@ -24,17 +24,13 @@ import com.google.common.base.Preconditions; import com.google.common.base.Supplier; import com.metamx.common.guava.Sequence; import com.metamx.common.guava.Sequences; -import com.metamx.druid.GroupByQueryEngine; -import com.metamx.druid.GroupByQueryEngineConfig; import com.metamx.druid.Query; import com.metamx.druid.collect.StupidPool; -import com.metamx.druid.initialization.ServerInit; import com.metamx.druid.input.MapBasedRow; import com.metamx.druid.input.Row; import com.metamx.druid.query.QueryRunner; import com.metamx.druid.query.QueryRunnerTestHelper; import com.metamx.druid.query.timeseries.TimeseriesQuery; -import com.metamx.druid.query.timeseries.TimeseriesQueryRunnerFactory; import com.metamx.druid.query.timeseries.TimeseriesQueryRunnerTest; import com.metamx.druid.result.Result; import com.metamx.druid.result.TimeseriesResultValue; @@ -42,7 +38,6 @@ import org.joda.time.DateTime; import org.junit.runner.RunWith; import org.junit.runners.Parameterized; -import javax.annotation.Nullable; import java.io.IOException; import java.nio.ByteBuffer; import java.util.Arrays; @@ -117,7 +112,7 @@ public class GroupByTimeseriesQueryRunnerTest extends TimeseriesQueryRunnerTest MapBasedRow row = (MapBasedRow) input; return new Result( - new DateTime(input.getTimestampFromEpoch()), new TimeseriesResultValue(row.getEvent()) + row.getTimestamp(), new TimeseriesResultValue(row.getEvent()) ); } } @@ -165,4 +160,11 @@ public class GroupByTimeseriesQueryRunnerTest extends TimeseriesQueryRunnerTest // Skip this test because the timeseries test expects a day that doesn't have a filter match to be filled in, // but group by just doesn't return a value if the filter doesn't match. } + + @Override + public void testTimeseriesWithFilterOnNonExistentDimension() + { + // Skip this test because the timeseries test expects a day that doesn't have a filter match to be filled in, + // but group by just doesn't return a value if the filter doesn't match. + } } diff --git a/server/src/test/java/com/metamx/druid/query/metadata/SegmentAnalyzerTest.java b/server/src/test/java/com/metamx/druid/query/metadata/SegmentAnalyzerTest.java new file mode 100644 index 00000000000..6805bb6c23b --- /dev/null +++ b/server/src/test/java/com/metamx/druid/query/metadata/SegmentAnalyzerTest.java @@ -0,0 +1,102 @@ +/* + * Druid - a distributed column store. + * Copyright (C) 2012 Metamarkets Group Inc. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version 2 + * of the License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + */ + +package com.metamx.druid.query.metadata; + +import com.google.common.collect.Lists; +import com.metamx.common.guava.Sequences; +import com.metamx.druid.index.IncrementalIndexSegment; +import com.metamx.druid.index.QueryableIndexSegment; +import com.metamx.druid.index.Segment; +import com.metamx.druid.index.column.ValueType; +import com.metamx.druid.index.v1.TestIndex; +import com.metamx.druid.query.QueryRunner; +import com.metamx.druid.query.QueryRunnerFactory; +import com.metamx.druid.query.QueryRunnerTestHelper; +import com.metamx.druid.query.segment.QuerySegmentSpecs; +import junit.framework.Assert; +import org.junit.Test; + +import java.util.List; +import java.util.Map; + +/** + */ +public class SegmentAnalyzerTest +{ + @Test + public void testIncrementalDoesNotWork() throws Exception + { + final List results = getSegmentAnalysises( + new IncrementalIndexSegment(TestIndex.getIncrementalTestIndex()) + ); + + Assert.assertEquals(0, results.size()); + } + + @Test + public void testMappedWorks() throws Exception + { + final List results = getSegmentAnalysises( + new QueryableIndexSegment("test_1", TestIndex.getMMappedTestIndex()) + ); + + Assert.assertEquals(1, results.size()); + + final SegmentAnalysis analysis = results.get(0); + Assert.assertEquals("test_1", analysis.getId()); + + final Map columns = analysis.getColumns(); + Assert.assertEquals(TestIndex.COLUMNS.length, columns.size()); // All columns including time + + for (String dimension : TestIndex.DIMENSIONS) { + final ColumnAnalysis columnAnalysis = columns.get(dimension.toLowerCase()); + + Assert.assertEquals(dimension, ValueType.STRING.name(), columnAnalysis.getType()); + Assert.assertTrue(dimension, columnAnalysis.getSize() > 0); + Assert.assertTrue(dimension, columnAnalysis.getCardinality() > 0); + } + + for (String metric : TestIndex.METRICS) { + final ColumnAnalysis columnAnalysis = columns.get(metric.toLowerCase()); + + Assert.assertEquals(metric, ValueType.FLOAT.name(), columnAnalysis.getType()); + Assert.assertTrue(metric, columnAnalysis.getSize() > 0); + Assert.assertNull(metric, columnAnalysis.getCardinality()); + } + } + + /** + * *Awesome* method name auto-generated by IntelliJ! I love IntelliJ! + * + * @param index + * @return + */ + private List getSegmentAnalysises(Segment index) + { + final QueryRunner runner = QueryRunnerTestHelper.makeQueryRunner( + (QueryRunnerFactory) new SegmentMetadataQueryRunnerFactory(), index + ); + + final SegmentMetadataQuery query = new SegmentMetadataQuery( + "test", QuerySegmentSpecs.create("2011/2012"), null, null, null + ); + return Sequences.toList(query.run(runner), Lists.newArrayList()); + } +} diff --git a/server/src/test/java/com/metamx/druid/query/timeseries/TimeseriesQueryRunnerBonusTest.java b/server/src/test/java/com/metamx/druid/query/timeseries/TimeseriesQueryRunnerBonusTest.java new file mode 100644 index 00000000000..91865f47c7d --- /dev/null +++ b/server/src/test/java/com/metamx/druid/query/timeseries/TimeseriesQueryRunnerBonusTest.java @@ -0,0 +1,105 @@ +package com.metamx.druid.query.timeseries; + +import com.google.common.collect.ImmutableList; +import com.google.common.collect.ImmutableMap; +import com.google.common.collect.Lists; +import com.metamx.common.guava.Sequences; +import com.metamx.druid.Druids; +import com.metamx.druid.Query; +import com.metamx.druid.QueryGranularity; +import com.metamx.druid.aggregation.AggregatorFactory; +import com.metamx.druid.aggregation.CountAggregatorFactory; +import com.metamx.druid.index.IncrementalIndexSegment; +import com.metamx.druid.index.Segment; +import com.metamx.druid.index.v1.IncrementalIndex; +import com.metamx.druid.input.MapBasedInputRow; +import com.metamx.druid.query.FinalizeResultsQueryRunner; +import com.metamx.druid.query.QueryRunner; +import com.metamx.druid.query.QueryRunnerFactory; +import com.metamx.druid.result.Result; +import com.metamx.druid.result.TimeseriesResultValue; +import junit.framework.Assert; +import org.joda.time.DateTime; +import org.joda.time.Interval; +import org.junit.Test; + +import java.util.List; + +public class TimeseriesQueryRunnerBonusTest +{ + @Test + public void testOneRowAtATime() throws Exception + { + final IncrementalIndex oneRowIndex = new IncrementalIndex( + new DateTime("2012-01-01T00:00:00Z").getMillis(), QueryGranularity.NONE, new AggregatorFactory[]{} + ); + + List> results; + + oneRowIndex.add( + new MapBasedInputRow( + new DateTime("2012-01-01T00:00:00Z").getMillis(), + ImmutableList.of("dim1"), + ImmutableMap.of("dim1", "x") + ) + ); + + results = runTimeseriesCount(oneRowIndex); + + Assert.assertEquals("index size", 1, oneRowIndex.size()); + Assert.assertEquals("result size", 1, results.size()); + Assert.assertEquals("result timestamp", new DateTime("2012-01-01T00:00:00Z"), results.get(0).getTimestamp()); + Assert.assertEquals("result count metric", 1, (long) results.get(0).getValue().getLongMetric("rows")); + + oneRowIndex.add( + new MapBasedInputRow( + new DateTime("2012-01-01T00:00:00Z").getMillis(), + ImmutableList.of("dim1"), + ImmutableMap.of("dim1", "y") + ) + ); + + results = runTimeseriesCount(oneRowIndex); + + Assert.assertEquals("index size", 2, oneRowIndex.size()); + Assert.assertEquals("result size", 1, results.size()); + Assert.assertEquals("result timestamp", new DateTime("2012-01-01T00:00:00Z"), results.get(0).getTimestamp()); + Assert.assertEquals("result count metric", 2, (long) results.get(0).getValue().getLongMetric("rows")); + } + + private static List> runTimeseriesCount(IncrementalIndex index) + { + final QueryRunnerFactory factory = new TimeseriesQueryRunnerFactory(); + final QueryRunner> runner = makeQueryRunner( + factory, + new IncrementalIndexSegment(index) + ); + + TimeseriesQuery query = Druids.newTimeseriesQueryBuilder() + .dataSource("xxx") + .granularity(QueryGranularity.ALL) + .intervals(ImmutableList.of(new Interval("2012-01-01T00:00:00Z/P1D"))) + .aggregators( + ImmutableList.of( + new CountAggregatorFactory("rows") + ) + ) + .build(); + + return Sequences.toList( + runner.run(query), + Lists.>newArrayList() + ); + } + + private static QueryRunner makeQueryRunner( + QueryRunnerFactory> factory, + Segment adapter + ) + { + return new FinalizeResultsQueryRunner( + factory.createRunner(adapter), + factory.getToolchest() + ); + } +} diff --git a/server/src/test/java/com/metamx/druid/query/timeseries/TimeseriesQueryRunnerTest.java b/server/src/test/java/com/metamx/druid/query/timeseries/TimeseriesQueryRunnerTest.java index 0368745ef8b..3be91c3686f 100644 --- a/server/src/test/java/com/metamx/druid/query/timeseries/TimeseriesQueryRunnerTest.java +++ b/server/src/test/java/com/metamx/druid/query/timeseries/TimeseriesQueryRunnerTest.java @@ -40,6 +40,7 @@ import com.metamx.druid.query.segment.MultipleIntervalSegmentSpec; import com.metamx.druid.result.Result; import com.metamx.druid.result.TimeseriesResultValue; import org.joda.time.DateTime; +import org.joda.time.DateTimeZone; import org.joda.time.Interval; import org.joda.time.Period; import org.junit.Assert; @@ -257,6 +258,46 @@ public class TimeseriesQueryRunnerTest TestHelper.assertExpectedResults(expectedResults, results); } + @Test + public void testTimeseriesWithTimeZone() + { + TimeseriesQuery query = Druids.newTimeseriesQueryBuilder() + .dataSource(QueryRunnerTestHelper.dataSource) + .intervals("2011-03-31T00:00:00-07:00/2011-04-02T00:00:00-07:00") + .aggregators( + Arrays.asList( + QueryRunnerTestHelper.rowsCount, + new LongSumAggregatorFactory( + "idx", + "index" + ) + ) + ) + .granularity(new PeriodGranularity(new Period("P1D"), null, DateTimeZone.forID("America/Los_Angeles"))) + .build(); + + List> expectedResults = Arrays.asList( + new Result( + new DateTime("2011-03-31", DateTimeZone.forID("America/Los_Angeles")), + new TimeseriesResultValue( + ImmutableMap.of("rows", 13L, "idx", 6619L) + ) + ), + new Result( + new DateTime("2011-04-01T", DateTimeZone.forID("America/Los_Angeles")), + new TimeseriesResultValue( + ImmutableMap.of("rows", 13L, "idx", 5827L) + ) + ) + ); + + Iterable> results = Sequences.toList( + runner.run(query), + Lists.>newArrayList() + ); + + TestHelper.assertExpectedResults(expectedResults, results); + } @Test public void testTimeseriesWithVaryingGran() @@ -958,6 +999,48 @@ public class TimeseriesQueryRunnerTest TestHelper.assertExpectedResults(expectedResults, results); } + @Test + public void testTimeseriesWithFilterOnNonExistentDimension() + { + TimeseriesQuery query = Druids.newTimeseriesQueryBuilder() + .dataSource(QueryRunnerTestHelper.dataSource) + .granularity(QueryRunnerTestHelper.dayGran) + .filters("bobby", "billy") + .intervals(QueryRunnerTestHelper.firstToThird) + .aggregators(QueryRunnerTestHelper.commonAggregators) + .postAggregators(Arrays.asList(QueryRunnerTestHelper.addRowsIndexConstant)) + .build(); + + List> expectedResults = Arrays.asList( + new Result( + new DateTime("2011-04-01"), + new TimeseriesResultValue( + ImmutableMap.of( + "rows", 0L, + "index", 0.0, + "addRowsIndexConstant", 1.0 + ) + ) + ), + new Result( + new DateTime("2011-04-02"), + new TimeseriesResultValue( + ImmutableMap.of( + "rows", 0L, + "index", 0.0, + "addRowsIndexConstant", 1.0 + ) + ) + ) + ); + + Iterable> results = Sequences.toList( + runner.run(query), + Lists.>newArrayList() + ); + TestHelper.assertExpectedResults(expectedResults, results); + } + @Test public void testTimeseriesWithNonExistentFilter() {