diff --git a/client/pom.xml b/client/pom.xml index 588da3fe4de..8ad2bea5ee2 100644 --- a/client/pom.xml +++ b/client/pom.xml @@ -18,8 +18,7 @@ ~ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. --> - + 4.0.0 com.metamx.druid druid-client @@ -29,7 +28,7 @@ com.metamx druid - 0.2.8-SNAPSHOT + 0.3.17-SNAPSHOT diff --git a/client/src/main/java/com/metamx/druid/client/CachingClusteredClient.java b/client/src/main/java/com/metamx/druid/client/CachingClusteredClient.java index 0612fb9337c..6d896d8bf0f 100644 --- a/client/src/main/java/com/metamx/druid/client/CachingClusteredClient.java +++ b/client/src/main/java/com/metamx/druid/client/CachingClusteredClient.java @@ -139,18 +139,24 @@ public class CachingClusteredClient implements QueryRunner // build set of segments to query Set> segments = Sets.newLinkedHashSet(); + List> serversLookup = Lists.newLinkedList(); + for (Interval interval : rewrittenQuery.getIntervals()) { - List> serversLookup = timeline.lookup(interval); + serversLookup.addAll(timeline.lookup(interval)); + } - for (TimelineObjectHolder holder : serversLookup) { - for (PartitionChunk chunk : holder.getObject()) { - ServerSelector selector = chunk.getObject(); - final SegmentDescriptor descriptor = new SegmentDescriptor( - holder.getInterval(), holder.getVersion(), chunk.getChunkNumber() - ); + // Let tool chest filter out unneeded segments + final List> filteredServersLookup = + toolChest.filterSegments(query, serversLookup); - segments.add(Pair.of(selector, descriptor)); - } + for (TimelineObjectHolder holder : filteredServersLookup) { + for (PartitionChunk chunk : holder.getObject()) { + ServerSelector selector = chunk.getObject(); + final SegmentDescriptor descriptor = new SegmentDescriptor( + holder.getInterval(), holder.getVersion(), chunk.getChunkNumber() + ); + + segments.add(Pair.of(selector, descriptor)); } } diff --git a/client/src/main/java/com/metamx/druid/http/ClientInfoResource.java b/client/src/main/java/com/metamx/druid/http/ClientInfoResource.java index 50a0475de4b..8bf344e88bb 100644 --- a/client/src/main/java/com/metamx/druid/http/ClientInfoResource.java +++ b/client/src/main/java/com/metamx/druid/http/ClientInfoResource.java @@ -24,11 +24,11 @@ import com.google.common.collect.Lists; import com.google.common.collect.Maps; import com.google.common.collect.Sets; import com.google.inject.Inject; -import com.metamx.common.IAE; import com.metamx.druid.client.ClientInventoryManager; import com.metamx.druid.client.DataSegment; import com.metamx.druid.client.DruidDataSource; import com.metamx.druid.client.DruidServer; +import org.joda.time.DateTime; import org.joda.time.Interval; import javax.ws.rs.GET; @@ -36,7 +36,7 @@ import javax.ws.rs.Path; import javax.ws.rs.PathParam; import javax.ws.rs.Produces; import javax.ws.rs.QueryParam; -import java.util.Iterator; +import java.util.List; import java.util.Map; import java.util.Set; @@ -57,22 +57,26 @@ public class ClientInfoResource this.clientInventoryManager = clientInventoryManager; } - private Map updateDataSources() + private Map> getSegmentsForDatasources() { - final Map dataSources = Maps.newHashMap(); + final Map> dataSourceMap = Maps.newHashMap(); for (DruidServer server : clientInventoryManager.getInventory()) { for (DruidDataSource dataSource : server.getDataSources()) { - dataSources.put(dataSource.getName(), dataSource); + if (!dataSourceMap.containsKey(dataSource.getName())) { + dataSourceMap.put(dataSource.getName(), Lists.newArrayList()); + } + List segments = dataSourceMap.get(dataSource.getName()); + segments.addAll(dataSource.getSegments()); } } - return dataSources; + return dataSourceMap; } @GET @Produces("application/json") public Iterable getDataSources() { - return updateDataSources().keySet(); + return getSegmentsForDatasources().keySet(); } @GET @@ -97,44 +101,24 @@ public class ClientInfoResource @QueryParam("interval") String interval ) { - DruidDataSource dataSource = updateDataSources().get(dataSourceName); + List segments = getSegmentsForDatasources().get(dataSourceName); - Set retVal = Sets.newHashSet(); - - Interval dimInterval; + Interval theInterval; if (interval == null || interval.isEmpty()) { - Iterator iter = Lists.reverse(Lists.newArrayList(dataSource.getSegments())).iterator(); - DataSegment segment = iter.next(); - retVal.addAll(segment.getDimensions()); - - dimInterval = new Interval( - segment.getInterval().getEnd().minus(SEGMENT_HISTORY_MILLIS), - segment.getInterval().getEnd() - ); - - while (iter.hasNext() && dimInterval.contains(segment.getInterval())) { - retVal.addAll(segment.getDimensions()); - segment = iter.next(); - } + DateTime now = new DateTime(); + theInterval = new Interval(now.minusMillis(SEGMENT_HISTORY_MILLIS), now); } else { - try { - dimInterval = new Interval(interval); - } - catch (Exception e) { - throw new IAE("Interval is not in a parseable format!"); - } + theInterval = new Interval(interval); + } - Iterator iter = dataSource.getSegments().iterator(); - - while (iter.hasNext()) { - DataSegment segment = iter.next(); - if (dimInterval.contains(segment.getInterval())) { - retVal.addAll(segment.getDimensions()); - } + Set dims = Sets.newHashSet(); + for (DataSegment segment : segments) { + if (theInterval.overlaps(segment.getInterval())) { + dims.addAll(segment.getDimensions()); } } - return retVal; + return dims; } @GET @@ -145,43 +129,23 @@ public class ClientInfoResource @QueryParam("interval") String interval ) { - DruidDataSource dataSource = updateDataSources().get(dataSourceName); + List segments = getSegmentsForDatasources().get(dataSourceName); - Set retVal = Sets.newHashSet(); - - Interval dimInterval; + Interval theInterval; if (interval == null || interval.isEmpty()) { - Iterator iter = Lists.reverse(Lists.newArrayList(dataSource.getSegments())).iterator(); - DataSegment segment = iter.next(); - retVal.addAll(segment.getMetrics()); - - dimInterval = new Interval( - segment.getInterval().getEnd().minus(SEGMENT_HISTORY_MILLIS), - segment.getInterval().getEnd() - ); - - while (iter.hasNext() && dimInterval.contains(segment.getInterval())) { - retVal.addAll(segment.getMetrics()); - segment = iter.next(); - } + DateTime now = new DateTime(); + theInterval = new Interval(now.minusMillis(SEGMENT_HISTORY_MILLIS), now); } else { - try { - dimInterval = new Interval(interval); - } - catch (Exception e) { - throw new IAE("Interval is not in a parseable format!"); - } + theInterval = new Interval(interval); + } - Iterator iter = dataSource.getSegments().iterator(); - - while (iter.hasNext()) { - DataSegment segment = iter.next(); - if (dimInterval.contains(segment.getInterval())) { - retVal.addAll(segment.getMetrics()); - } + Set metrics = Sets.newHashSet(); + for (DataSegment segment : segments) { + if (theInterval.overlaps(segment.getInterval())) { + metrics.addAll(segment.getMetrics()); } } - return retVal; + return metrics; } } diff --git a/client/src/main/java/com/metamx/druid/http/QueryServlet.java b/client/src/main/java/com/metamx/druid/http/QueryServlet.java index 9a13c50f45c..bed59a8b525 100644 --- a/client/src/main/java/com/metamx/druid/http/QueryServlet.java +++ b/client/src/main/java/com/metamx/druid/http/QueryServlet.java @@ -150,7 +150,7 @@ public class QueryServlet extends HttpServlet ImmutableMap.builder() .put("exception", e.toString()) .put("query", queryString) - .put("host", req.getRemoteAddr()) + .put("peer", req.getRemoteAddr()) .build() ) ); diff --git a/client/src/main/java/com/metamx/druid/query/QueryToolChest.java b/client/src/main/java/com/metamx/druid/query/QueryToolChest.java index 854e2b97c9b..e2d227729d3 100644 --- a/client/src/main/java/com/metamx/druid/query/QueryToolChest.java +++ b/client/src/main/java/com/metamx/druid/query/QueryToolChest.java @@ -22,18 +22,19 @@ package com.metamx.druid.query; import com.fasterxml.jackson.core.type.TypeReference; import com.google.common.base.Function; import com.metamx.common.guava.Sequence; +import com.metamx.druid.LogicalSegment; import com.metamx.druid.Query; import com.metamx.emitter.service.ServiceMetricEvent; - +import java.util.List; /** * The broker-side (also used by server in some cases) API for a specific Query type. This API is still undergoing * evolution and is only semi-stable, so proprietary Query implementations should be ready for the potential * maintenance burden when upgrading versions. */ -public interface QueryToolChest> +public abstract class QueryToolChest> { - public QueryRunner mergeResults(QueryRunner runner); + public abstract QueryRunner mergeResults(QueryRunner runner); /** * This method doesn't belong here, but it's here for now just to make it work. @@ -41,11 +42,24 @@ public interface QueryToolChest> * @param seqOfSequences * @return */ - public Sequence mergeSequences(Sequence> seqOfSequences); - public ServiceMetricEvent.Builder makeMetricBuilder(QueryType query); - public Function makeMetricManipulatorFn(QueryType query, MetricManipulationFn fn); - public TypeReference getResultTypeReference(); - public CacheStrategy getCacheStrategy(QueryType query); - public QueryRunner preMergeQueryDecoration(QueryRunner runner); - public QueryRunner postMergeQueryDecoration(QueryRunner runner); + public abstract Sequence mergeSequences(Sequence> seqOfSequences); + public abstract ServiceMetricEvent.Builder makeMetricBuilder(QueryType query); + public abstract Function makeMetricManipulatorFn(QueryType query, MetricManipulationFn fn); + public abstract TypeReference getResultTypeReference(); + + public CacheStrategy getCacheStrategy(QueryType query) { + return null; + } + + public QueryRunner preMergeQueryDecoration(QueryRunner runner) { + return runner; + } + + public QueryRunner postMergeQueryDecoration(QueryRunner runner) { + return runner; + } + + public List filterSegments(QueryType query, List segments) { + return segments; + } } diff --git a/client/src/main/java/com/metamx/druid/query/group/GroupByQueryQueryToolChest.java b/client/src/main/java/com/metamx/druid/query/group/GroupByQueryQueryToolChest.java index e11f2885fd8..51570e30955 100644 --- a/client/src/main/java/com/metamx/druid/query/group/GroupByQueryQueryToolChest.java +++ b/client/src/main/java/com/metamx/druid/query/group/GroupByQueryQueryToolChest.java @@ -54,7 +54,7 @@ import java.util.Properties; /** */ -public class GroupByQueryQueryToolChest implements QueryToolChest +public class GroupByQueryQueryToolChest extends QueryToolChest { private static final TypeReference TYPE_REFERENCE = new TypeReference(){}; @@ -177,22 +177,4 @@ public class GroupByQueryQueryToolChest implements QueryToolChest getCacheStrategy(GroupByQuery query) - { - return null; - } - - @Override - public QueryRunner preMergeQueryDecoration(QueryRunner runner) - { - return runner; - } - - @Override - public QueryRunner postMergeQueryDecoration(QueryRunner runner) - { - return runner; - } } diff --git a/client/src/main/java/com/metamx/druid/query/metadata/SegmentMetadataQueryQueryToolChest.java b/client/src/main/java/com/metamx/druid/query/metadata/SegmentMetadataQueryQueryToolChest.java index e7df4959b35..6ede9c7f247 100644 --- a/client/src/main/java/com/metamx/druid/query/metadata/SegmentMetadataQueryQueryToolChest.java +++ b/client/src/main/java/com/metamx/druid/query/metadata/SegmentMetadataQueryQueryToolChest.java @@ -51,7 +51,7 @@ import java.util.Map; import java.util.Set; -public class SegmentMetadataQueryQueryToolChest implements QueryToolChest +public class SegmentMetadataQueryQueryToolChest extends QueryToolChest { private static final TypeReference TYPE_REFERENCE = new TypeReference(){}; private static final byte[] SEGMENT_METADATA_CACHE_PREFIX = new byte[]{0x4}; @@ -220,18 +220,6 @@ public class SegmentMetadataQueryQueryToolChest implements QueryToolChest preMergeQueryDecoration(QueryRunner runner) - { - return runner; - } - - @Override - public QueryRunner postMergeQueryDecoration(QueryRunner runner) - { - return runner; - } - private Ordering getOrdering() { return new Ordering() diff --git a/client/src/main/java/com/metamx/druid/query/search/SearchQueryQueryToolChest.java b/client/src/main/java/com/metamx/druid/query/search/SearchQueryQueryToolChest.java index 3313573adf8..f58f31ebad7 100644 --- a/client/src/main/java/com/metamx/druid/query/search/SearchQueryQueryToolChest.java +++ b/client/src/main/java/com/metamx/druid/query/search/SearchQueryQueryToolChest.java @@ -65,7 +65,7 @@ import java.util.Set; /** */ -public class SearchQueryQueryToolChest implements QueryToolChest, SearchQuery> +public class SearchQueryQueryToolChest extends QueryToolChest, SearchQuery> { private static final byte SEARCH_QUERY = 0x2; @@ -263,12 +263,6 @@ public class SearchQueryQueryToolChest implements QueryToolChest> postMergeQueryDecoration(final QueryRunner> runner) - { - return runner; - } - private static class SearchThresholdAdjustingQueryRunner implements QueryRunner> { private final QueryRunner> runner; diff --git a/client/src/main/java/com/metamx/druid/query/timeboundary/TimeBoundaryQueryQueryToolChest.java b/client/src/main/java/com/metamx/druid/query/timeboundary/TimeBoundaryQueryQueryToolChest.java index 7e902115ef8..dcf09526a31 100644 --- a/client/src/main/java/com/metamx/druid/query/timeboundary/TimeBoundaryQueryQueryToolChest.java +++ b/client/src/main/java/com/metamx/druid/query/timeboundary/TimeBoundaryQueryQueryToolChest.java @@ -27,6 +27,7 @@ import com.google.common.collect.Ordering; import com.metamx.common.guava.MergeSequence; import com.metamx.common.guava.Sequence; import com.metamx.common.guava.Sequences; +import com.metamx.druid.LogicalSegment; import com.metamx.druid.Query; import com.metamx.druid.collect.OrderedMergeSequence; import com.metamx.druid.query.BySegmentSkippingQueryRunner; @@ -37,7 +38,6 @@ import com.metamx.druid.query.QueryToolChest; import com.metamx.druid.result.Result; import com.metamx.druid.result.TimeBoundaryResultValue; import com.metamx.emitter.service.ServiceMetricEvent; - import org.joda.time.DateTime; import javax.annotation.Nullable; @@ -47,7 +47,7 @@ import java.util.List; /** */ public class TimeBoundaryQueryQueryToolChest - implements QueryToolChest, TimeBoundaryQuery> + extends QueryToolChest, TimeBoundaryQuery> { private static final byte TIMEBOUNDARY_QUERY = 0x3; @@ -58,6 +58,16 @@ public class TimeBoundaryQueryQueryToolChest { }; + @Override + public List filterSegments(TimeBoundaryQuery query, List input) + { + if(input.size() <= 1) { + return input; + } + + return Lists.newArrayList(input.get(0), input.get(input.size() - 1)); + } + @Override public QueryRunner> mergeResults( final QueryRunner> runner @@ -169,18 +179,6 @@ public class TimeBoundaryQueryQueryToolChest }; } - @Override - public QueryRunner> preMergeQueryDecoration(QueryRunner> runner) - { - return runner; - } - - @Override - public QueryRunner> postMergeQueryDecoration(QueryRunner> runner) - { - return runner; - } - public Ordering> getOrdering() { return Ordering.natural(); diff --git a/client/src/main/java/com/metamx/druid/query/timeseries/TimeseriesQueryQueryToolChest.java b/client/src/main/java/com/metamx/druid/query/timeseries/TimeseriesQueryQueryToolChest.java index 622626dfd57..bef34d9a2da 100644 --- a/client/src/main/java/com/metamx/druid/query/timeseries/TimeseriesQueryQueryToolChest.java +++ b/client/src/main/java/com/metamx/druid/query/timeseries/TimeseriesQueryQueryToolChest.java @@ -61,7 +61,7 @@ import java.util.Map; /** */ -public class TimeseriesQueryQueryToolChest implements QueryToolChest, TimeseriesQuery> +public class TimeseriesQueryQueryToolChest extends QueryToolChest, TimeseriesQuery> { private static final byte TIMESERIES_QUERY = 0x0; @@ -259,12 +259,6 @@ public class TimeseriesQueryQueryToolChest implements QueryToolChest>(runner, Period.months(1)); } - @Override - public QueryRunner> postMergeQueryDecoration(QueryRunner> runner) - { - return runner; - } - public Ordering> getOrdering() { return Ordering.natural(); diff --git a/common/pom.xml b/common/pom.xml index 369aa402c4d..c4d83b8f817 100644 --- a/common/pom.xml +++ b/common/pom.xml @@ -18,8 +18,7 @@ ~ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. --> - + 4.0.0 com.metamx.druid druid-common @@ -29,7 +28,7 @@ com.metamx druid - 0.2.8-SNAPSHOT + 0.3.17-SNAPSHOT @@ -81,8 +80,12 @@ jackson-databind - com.fasterxml.jackson.datatype - jackson-datatype-joda + com.fasterxml.jackson.datatype + jackson-datatype-guava + + + com.fasterxml.jackson.datatype + jackson-datatype-joda org.jdbi diff --git a/common/src/main/java/com/metamx/druid/LogicalSegment.java b/common/src/main/java/com/metamx/druid/LogicalSegment.java new file mode 100644 index 00000000000..5550fb44966 --- /dev/null +++ b/common/src/main/java/com/metamx/druid/LogicalSegment.java @@ -0,0 +1,8 @@ +package com.metamx.druid; + +import org.joda.time.Interval; + +public interface LogicalSegment +{ + public Interval getInterval(); +} diff --git a/common/src/main/java/com/metamx/druid/TimelineObjectHolder.java b/common/src/main/java/com/metamx/druid/TimelineObjectHolder.java index 403fe8bb2cb..d93e5ed8f6b 100644 --- a/common/src/main/java/com/metamx/druid/TimelineObjectHolder.java +++ b/common/src/main/java/com/metamx/druid/TimelineObjectHolder.java @@ -24,7 +24,7 @@ import org.joda.time.Interval; /** */ -public class TimelineObjectHolder +public class TimelineObjectHolder implements LogicalSegment { private final Interval interval; private final VersionType version; @@ -41,6 +41,7 @@ public class TimelineObjectHolder this.object = object; } + @Override public Interval getInterval() { return interval; diff --git a/common/src/main/java/com/metamx/druid/jackson/DefaultObjectMapper.java b/common/src/main/java/com/metamx/druid/jackson/DefaultObjectMapper.java index 84514bd6c4c..12079a77959 100644 --- a/common/src/main/java/com/metamx/druid/jackson/DefaultObjectMapper.java +++ b/common/src/main/java/com/metamx/druid/jackson/DefaultObjectMapper.java @@ -36,6 +36,7 @@ import com.fasterxml.jackson.databind.SerializationFeature; import com.fasterxml.jackson.databind.SerializerProvider; import com.fasterxml.jackson.databind.module.SimpleModule; import com.fasterxml.jackson.databind.ser.std.ToStringSerializer; +import com.fasterxml.jackson.datatype.guava.GuavaModule; import com.fasterxml.jackson.datatype.joda.JodaModule; import com.google.common.base.Throwables; import com.metamx.common.Granularity; @@ -171,10 +172,11 @@ public class DefaultObjectMapper extends ObjectMapper } ); registerModule(serializerModule); + registerModule(new GuavaModule()); configure(DeserializationFeature.FAIL_ON_UNKNOWN_PROPERTIES, false); configure(MapperFeature.AUTO_DETECT_GETTERS, false); - configure(MapperFeature.AUTO_DETECT_CREATORS, false); +// configure(MapperFeature.AUTO_DETECT_CREATORS, false); https://github.com/FasterXML/jackson-databind/issues/170 configure(MapperFeature.AUTO_DETECT_FIELDS, false); configure(MapperFeature.AUTO_DETECT_IS_GETTERS, false); configure(MapperFeature.AUTO_DETECT_SETTERS, false); diff --git a/common/src/main/java/com/metamx/druid/utils/CompressionUtils.java b/common/src/main/java/com/metamx/druid/utils/CompressionUtils.java new file mode 100644 index 00000000000..c34b8e7e960 --- /dev/null +++ b/common/src/main/java/com/metamx/druid/utils/CompressionUtils.java @@ -0,0 +1,126 @@ +/* + * Druid - a distributed column store. + * Copyright (C) 2012 Metamarkets Group Inc. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version 2 + * of the License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + */ + +package com.metamx.druid.utils; + +import com.google.common.io.ByteStreams; +import com.google.common.io.Closeables; +import com.google.common.io.Files; +import com.metamx.common.ISE; +import com.metamx.common.StreamUtils; +import com.metamx.common.logger.Logger; +import sun.misc.IOUtils; + +import java.io.BufferedInputStream; +import java.io.File; +import java.io.FileInputStream; +import java.io.FileNotFoundException; +import java.io.FileOutputStream; +import java.io.IOException; +import java.io.InputStream; +import java.io.OutputStream; +import java.util.zip.GZIPInputStream; +import java.util.zip.ZipEntry; +import java.util.zip.ZipInputStream; +import java.util.zip.ZipOutputStream; + +/** + */ +public class CompressionUtils +{ + private static final Logger log = new Logger(CompressionUtils.class); + + public static long zip(File directory, File outputZipFile) throws IOException + { + if (!directory.isDirectory()) { + throw new IOException(String.format("directory[%s] is not a directory", directory)); + } + + if (!outputZipFile.getName().endsWith(".zip")) { + log.warn("No .zip suffix[%s], putting files from [%s] into it anyway.", outputZipFile, directory); + } + + long totalSize = 0; + ZipOutputStream zipOut = null; + try { + zipOut = new ZipOutputStream(new FileOutputStream(outputZipFile)); + File[] files = directory.listFiles(); + for (File file : files) { + log.info("Adding file[%s] with size[%,d]. Total size[%,d]", file, file.length(), totalSize); + if (file.length() >= Integer.MAX_VALUE) { + zipOut.close(); + outputZipFile.delete(); + throw new IOException(String.format("file[%s] too large [%,d]", file, file.length())); + } + zipOut.putNextEntry(new ZipEntry(file.getName())); + totalSize += ByteStreams.copy(Files.newInputStreamSupplier(file), zipOut); + } + } + finally { + Closeables.closeQuietly(zipOut); + } + + return totalSize; + } + + public static void unzip(File pulledFile, File outDir) throws IOException + { + if (!(outDir.exists() && outDir.isDirectory())) { + throw new ISE("outDir[%s] must exist and be a directory", outDir); + } + + log.info("Unzipping file[%s] to [%s]", pulledFile, outDir); + InputStream in = null; + try { + in = new BufferedInputStream(new FileInputStream(pulledFile)); + unzip(in, outDir); + } + finally { + Closeables.closeQuietly(in); + } + } + + public static void unzip(InputStream in, File outDir) throws IOException + { + ZipInputStream zipIn = new ZipInputStream(in); + + ZipEntry entry; + while ((entry = zipIn.getNextEntry()) != null) { + OutputStream out = null; + try { + out = new FileOutputStream(new File(outDir, entry.getName())); + ByteStreams.copy(zipIn, out); + zipIn.closeEntry(); + } + finally { + Closeables.closeQuietly(out); + } + } + } + + public static void gunzip(File pulledFile, File outDir) throws IOException + { + log.info("Gunzipping file[%s] to [%s]", pulledFile, outDir); + StreamUtils.copyToFileAndClose(new GZIPInputStream(new FileInputStream(pulledFile)), outDir); + if (!pulledFile.delete()) { + log.error("Could not delete tmpFile[%s].", pulledFile); + } + } + +} diff --git a/common/src/main/java/com/metamx/druid/utils/JodaUtils.java b/common/src/main/java/com/metamx/druid/utils/JodaUtils.java index 53b14618001..9368a8af111 100644 --- a/common/src/main/java/com/metamx/druid/utils/JodaUtils.java +++ b/common/src/main/java/com/metamx/druid/utils/JodaUtils.java @@ -19,12 +19,15 @@ package com.metamx.druid.utils; +import com.google.common.base.Predicate; +import com.google.common.collect.Iterables; import com.google.common.collect.Lists; import com.google.common.collect.Sets; import com.metamx.common.guava.Comparators; import org.joda.time.DateTime; import org.joda.time.Interval; +import javax.annotation.Nullable; import java.util.ArrayList; import java.util.Iterator; import java.util.TreeSet; @@ -41,7 +44,7 @@ public class JodaUtils for (Interval interval : intervals) { sortedIntervals.add(interval); } - + if (sortedIntervals.isEmpty()) { return Lists.newArrayList(); } @@ -63,6 +66,40 @@ public class JodaUtils return retVal; } + public static Interval umbrellaInterval(Iterable intervals) + { + ArrayList startDates = Lists.newArrayList(); + ArrayList endDates = Lists.newArrayList(); + + for (Interval interval : intervals) { + startDates.add(interval.getStart()); + endDates.add(interval.getEnd()); + } + + DateTime minStart = minDateTime(startDates.toArray(new DateTime[]{})); + DateTime maxEnd = maxDateTime(endDates.toArray(new DateTime[]{})); + + if (minStart == null || maxEnd == null) { + throw new IllegalArgumentException("Empty list of intervals"); + } + return new Interval(minStart, maxEnd); + } + + public static boolean overlaps(final Interval i, Iterable intervals) + { + return Iterables.any( + intervals, new Predicate() + { + @Override + public boolean apply(@Nullable Interval input) + { + return input.overlaps(i); + } + } + ); + + } + public static DateTime minDateTime(DateTime... times) { if (times == null) { @@ -70,8 +107,10 @@ public class JodaUtils } switch (times.length) { - case 0: return null; - case 1: return times[0]; + case 0: + return null; + case 1: + return times[0]; default: DateTime min = times[0]; for (int i = 1; i < times.length; ++i) { @@ -88,8 +127,10 @@ public class JodaUtils } switch (times.length) { - case 0: return null; - case 1: return times[0]; + case 0: + return null; + case 1: + return times[0]; default: DateTime max = times[0]; for (int i = 1; i < times.length; ++i) { diff --git a/common/src/test/java/com/metamx/druid/utils/JodaUtilsTest.java b/common/src/test/java/com/metamx/druid/utils/JodaUtilsTest.java index 6abaf485b68..f0248295dfd 100644 --- a/common/src/test/java/com/metamx/druid/utils/JodaUtilsTest.java +++ b/common/src/test/java/com/metamx/druid/utils/JodaUtilsTest.java @@ -31,6 +31,40 @@ import java.util.List; */ public class JodaUtilsTest { + @Test + public void testUmbrellaIntervalsSimple() throws Exception + { + List intervals = Arrays.asList( + new Interval("2011-03-03/2011-03-04"), + new Interval("2011-01-01/2011-01-02"), + new Interval("2011-02-01/2011-02-05"), + new Interval("2011-02-03/2011-02-08"), + new Interval("2011-01-01/2011-01-03"), + new Interval("2011-03-01/2011-03-02"), + new Interval("2011-03-05/2011-03-06"), + new Interval("2011-02-01/2011-02-02") + ); + + Assert.assertEquals( + new Interval("2011-01-01/2011-03-06"), + JodaUtils.umbrellaInterval(intervals) + ); + } + + @Test + public void testUmbrellaIntervalsNull() throws Exception + { + List intervals = Arrays.asList(); + Throwable thrown = null; + try { + Interval res = JodaUtils.umbrellaInterval(intervals); + } + catch (IllegalArgumentException e) { + thrown = e; + } + Assert.assertNotNull("Empty list of intervals", thrown); + } + @Test public void testCondenseIntervalsSimple() throws Exception { diff --git a/druid-services/pom.xml b/druid-services/pom.xml index 9b2567dc753..b71317d2352 100644 --- a/druid-services/pom.xml +++ b/druid-services/pom.xml @@ -24,11 +24,11 @@ druid-services druid-services druid-services - 0.2.8-SNAPSHOT + 0.3.17-SNAPSHOT com.metamx druid - 0.2.8-SNAPSHOT + 0.3.17-SNAPSHOT @@ -66,4 +66,4 @@ - \ No newline at end of file + diff --git a/examples/pom.xml b/examples/pom.xml index fa195a3f0f4..8dfe0dca4d1 100644 --- a/examples/pom.xml +++ b/examples/pom.xml @@ -28,7 +28,7 @@ com.metamx druid - 0.2.8-SNAPSHOT + 0.3.17-SNAPSHOT diff --git a/examples/rand/pom.xml b/examples/rand/pom.xml index 592a713cf3a..4b1f8ab2a1d 100644 --- a/examples/rand/pom.xml +++ b/examples/rand/pom.xml @@ -1,6 +1,5 @@ - + 4.0.0 com.metamx.druid druid-examples-rand @@ -10,7 +9,7 @@ com.metamx druid-examples - 0.2.8-SNAPSHOT + 0.3.17-SNAPSHOT diff --git a/examples/rand/src/main/java/druid/examples/RealtimeStandaloneMain.java b/examples/rand/src/main/java/druid/examples/RealtimeStandaloneMain.java index ecdf2606a3b..92eb86cc801 100644 --- a/examples/rand/src/main/java/druid/examples/RealtimeStandaloneMain.java +++ b/examples/rand/src/main/java/druid/examples/RealtimeStandaloneMain.java @@ -12,7 +12,7 @@ import com.metamx.druid.log.LogLevelAdjuster; import com.metamx.druid.realtime.MetadataUpdater; import com.metamx.druid.realtime.MetadataUpdaterConfig; import com.metamx.druid.realtime.RealtimeNode; -import com.metamx.druid.loading.SegmentPusher; +import com.metamx.druid.loading.DataSegmentPusher; import com.metamx.phonebook.PhoneBook; @@ -72,8 +72,8 @@ public class RealtimeStandaloneMain // dummyMetadataUpdater will not send updates to db because standalone demo has no db rn.setMetadataUpdater(dummyMetadataUpdater); - rn.setSegmentPusher( - new SegmentPusher() + rn.setDataSegmentPusher( + new DataSegmentPusher() { @Override public DataSegment push(File file, DataSegment segment) throws IOException diff --git a/examples/twitter/pom.xml b/examples/twitter/pom.xml index a96af1c08a0..60241675115 100644 --- a/examples/twitter/pom.xml +++ b/examples/twitter/pom.xml @@ -1,6 +1,5 @@ - + 4.0.0 com.metamx.druid druid-examples-twitter @@ -10,7 +9,7 @@ com.metamx druid-examples - 0.2.8-SNAPSHOT + 0.3.17-SNAPSHOT diff --git a/examples/twitter/src/main/java/druid/examples/RealtimeStandaloneMain.java b/examples/twitter/src/main/java/druid/examples/RealtimeStandaloneMain.java index e936d481489..5f4d25cb95b 100644 --- a/examples/twitter/src/main/java/druid/examples/RealtimeStandaloneMain.java +++ b/examples/twitter/src/main/java/druid/examples/RealtimeStandaloneMain.java @@ -12,7 +12,7 @@ import com.metamx.druid.log.LogLevelAdjuster; import com.metamx.druid.realtime.MetadataUpdater; import com.metamx.druid.realtime.MetadataUpdaterConfig; import com.metamx.druid.realtime.RealtimeNode; -import com.metamx.druid.loading.SegmentPusher; +import com.metamx.druid.loading.DataSegmentPusher; import com.metamx.phonebook.PhoneBook; import druid.examples.twitter.TwitterSpritzerFirehoseFactory; @@ -48,10 +48,12 @@ public class RealtimeStandaloneMain rn.setPhoneBook(dummyPhoneBook); MetadataUpdater dummyMetadataUpdater = - new MetadataUpdater(new DefaultObjectMapper(), + new MetadataUpdater( + new DefaultObjectMapper(), Config.createFactory(Initialization.loadProperties()).build(MetadataUpdaterConfig.class), dummyPhoneBook, - null) { + null + ) { @Override public void publishSegment(DataSegment segment) throws IOException { @@ -74,8 +76,8 @@ public class RealtimeStandaloneMain // dummyMetadataUpdater will not send updates to db because standalone demo has no db rn.setMetadataUpdater(dummyMetadataUpdater); - rn.setSegmentPusher( - new SegmentPusher() + rn.setDataSegmentPusher( + new DataSegmentPusher() { @Override public DataSegment push(File file, DataSegment segment) throws IOException diff --git a/index-common/pom.xml b/index-common/pom.xml index 084bc314ba1..d06dd7d561e 100644 --- a/index-common/pom.xml +++ b/index-common/pom.xml @@ -18,8 +18,7 @@ ~ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. --> - + 4.0.0 com.metamx.druid druid-index-common @@ -29,7 +28,7 @@ com.metamx druid - 0.2.8-SNAPSHOT + 0.3.17-SNAPSHOT @@ -95,4 +94,4 @@ - \ No newline at end of file + diff --git a/index-common/src/main/java/com/metamx/druid/index/column/SimpleColumn.java b/index-common/src/main/java/com/metamx/druid/index/column/SimpleColumn.java index 93825a8e9f0..8179cd623e4 100644 --- a/index-common/src/main/java/com/metamx/druid/index/column/SimpleColumn.java +++ b/index-common/src/main/java/com/metamx/druid/index/column/SimpleColumn.java @@ -72,30 +72,30 @@ class SimpleColumn implements Column @Override public DictionaryEncodedColumn getDictionaryEncoding() { - return dictionaryEncodedColumn.get(); + return dictionaryEncodedColumn == null ? null : dictionaryEncodedColumn.get(); } @Override public RunLengthColumn getRunLengthColumn() { - return runLengthColumn.get(); + return runLengthColumn == null ? null : runLengthColumn.get(); } @Override public GenericColumn getGenericColumn() { - return genericColumn.get(); + return genericColumn == null ? null : genericColumn.get(); } @Override public ComplexColumn getComplexColumn() { - return complexColumn.get(); + return complexColumn == null ? null : complexColumn.get(); } @Override public BitmapIndex getBitmapIndex() { - return bitmapIndex.get(); + return bitmapIndex == null ? null : bitmapIndex.get(); } } diff --git a/index-common/src/main/java/com/metamx/druid/index/v1/IndexIO.java b/index-common/src/main/java/com/metamx/druid/index/v1/IndexIO.java index 6d3badd4c35..d26e73f5b3c 100644 --- a/index-common/src/main/java/com/metamx/druid/index/v1/IndexIO.java +++ b/index-common/src/main/java/com/metamx/druid/index/v1/IndexIO.java @@ -21,6 +21,7 @@ package com.metamx.druid.index.v1; import com.fasterxml.jackson.databind.ObjectMapper; import com.google.common.base.Preconditions; +import com.google.common.base.Predicate; import com.google.common.collect.ImmutableMap; import com.google.common.collect.Iterables; import com.google.common.collect.Lists; @@ -62,7 +63,6 @@ import com.metamx.druid.kv.VSizeIndexedInts; import com.metamx.druid.utils.SerializerUtils; import it.uniroma3.mat.extendedset.intset.ConciseSet; import it.uniroma3.mat.extendedset.intset.ImmutableConciseSet; - import org.joda.time.Interval; import java.io.ByteArrayOutputStream; @@ -369,7 +369,8 @@ public class IndexIO ); } - LinkedHashSet skippedFiles = Sets.newLinkedHashSet(); + final LinkedHashSet skippedFiles = Sets.newLinkedHashSet(); + final Set skippedDimensions = Sets.newLinkedHashSet(); for (String filename : v8SmooshedFiles.getInternalFilenames()) { log.info("Processing file[%s]", filename); if (filename.startsWith("dim_")) { @@ -392,6 +393,12 @@ public class IndexIO dimBuffer, GenericIndexed.stringStrategy ); + if (dictionary.size() == 0) { + log.info("Dimension[%s] had cardinality 0, equivalent to no column, so skipping.", dimension); + skippedDimensions.add(dimension); + continue; + } + VSizeIndexedInts singleValCol = null; VSizeIndexed multiValCol = VSizeIndexed.readFromByteBuffer(dimBuffer.asReadOnlyBuffer()); GenericIndexed bitmaps = bitmapIndexes.get(dimension); @@ -555,35 +562,49 @@ public class IndexIO channel.write(ByteBuffer.wrap(specBytes)); serdeficator.write(channel); channel.close(); - } else if ("index.drd".equals(filename)) { - final ByteBuffer indexBuffer = v8SmooshedFiles.mapFile(filename); - - indexBuffer.get(); // Skip the version byte - final GenericIndexed dims = GenericIndexed.read( - indexBuffer, GenericIndexed.stringStrategy - ); - final GenericIndexed availableMetrics = GenericIndexed.read( - indexBuffer, GenericIndexed.stringStrategy - ); - final Interval dataInterval = new Interval(serializerUtils.readString(indexBuffer)); - - Set columns = Sets.newTreeSet(); - columns.addAll(Lists.newArrayList(dims)); - columns.addAll(Lists.newArrayList(availableMetrics)); - - GenericIndexed cols = GenericIndexed.fromIterable(columns, GenericIndexed.stringStrategy); - - final int numBytes = cols.getSerializedSize() + dims.getSerializedSize() + 16; - final SmooshedWriter writer = v9Smoosher.addWithSmooshedWriter("index.drd", numBytes); - cols.writeToChannel(writer); - dims.writeToChannel(writer); - serializerUtils.writeLong(writer, dataInterval.getStartMillis()); - serializerUtils.writeLong(writer, dataInterval.getEndMillis()); - writer.close(); } else { skippedFiles.add(filename); } } + + final ByteBuffer indexBuffer = v8SmooshedFiles.mapFile("index.drd"); + + indexBuffer.get(); // Skip the version byte + final GenericIndexed dims8 = GenericIndexed.read( + indexBuffer, GenericIndexed.stringStrategy + ); + final GenericIndexed dims9 = GenericIndexed.fromIterable( + Iterables.filter( + dims8, new Predicate() + { + @Override + public boolean apply(String s) + { + return !skippedDimensions.contains(s); + } + } + ), + GenericIndexed.stringStrategy + ); + final GenericIndexed availableMetrics = GenericIndexed.read( + indexBuffer, GenericIndexed.stringStrategy + ); + final Interval dataInterval = new Interval(serializerUtils.readString(indexBuffer)); + + Set columns = Sets.newTreeSet(); + columns.addAll(Lists.newArrayList(dims9)); + columns.addAll(Lists.newArrayList(availableMetrics)); + + GenericIndexed cols = GenericIndexed.fromIterable(columns, GenericIndexed.stringStrategy); + + final int numBytes = cols.getSerializedSize() + dims9.getSerializedSize() + 16; + final SmooshedWriter writer = v9Smoosher.addWithSmooshedWriter("index.drd", numBytes); + cols.writeToChannel(writer); + dims9.writeToChannel(writer); + serializerUtils.writeLong(writer, dataInterval.getStartMillis()); + serializerUtils.writeLong(writer, dataInterval.getEndMillis()); + writer.close(); + log.info("Skipped files[%s]", skippedFiles); v9Smoosher.close(); diff --git a/indexer/pom.xml b/indexer/pom.xml index 5ff19d88607..006dea455d5 100644 --- a/indexer/pom.xml +++ b/indexer/pom.xml @@ -18,8 +18,7 @@ ~ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. --> - + 4.0.0 com.metamx.druid druid-indexer @@ -29,7 +28,7 @@ com.metamx druid - 0.2.8-SNAPSHOT + 0.3.17-SNAPSHOT @@ -58,14 +57,6 @@ javax.mail mail - - org.codehaus.jackson - jackson-core-asl - - - org.codehaus.jackson - jackson-mapper-asl - diff --git a/indexer/src/main/java/com/metamx/druid/indexer/DbUpdaterJob.java b/indexer/src/main/java/com/metamx/druid/indexer/DbUpdaterJob.java index a7dbee1f4e2..3025df982e8 100644 --- a/indexer/src/main/java/com/metamx/druid/indexer/DbUpdaterJob.java +++ b/indexer/src/main/java/com/metamx/druid/indexer/DbUpdaterJob.java @@ -20,24 +20,18 @@ package com.metamx.druid.indexer; import com.fasterxml.jackson.databind.ObjectMapper; -import com.google.common.base.Throwables; -import com.google.common.collect.ImmutableList; +import com.google.common.collect.ImmutableMap; import com.metamx.common.logger.Logger; import com.metamx.druid.client.DataSegment; import com.metamx.druid.db.DbConnector; import com.metamx.druid.indexer.updater.DbUpdaterJobSpec; import com.metamx.druid.jackson.DefaultObjectMapper; -import org.apache.hadoop.conf.Configuration; -import org.apache.hadoop.fs.FileStatus; -import org.apache.hadoop.fs.FileSystem; -import org.apache.hadoop.fs.Path; - import org.joda.time.DateTime; import org.skife.jdbi.v2.DBI; import org.skife.jdbi.v2.Handle; +import org.skife.jdbi.v2.PreparedBatch; import org.skife.jdbi.v2.tweak.HandleCallback; -import java.io.IOException; import java.util.List; /** @@ -52,9 +46,6 @@ public class DbUpdaterJob implements Jobby private final DbUpdaterJobSpec spec; private final DBI dbi; - // Keep track of published segment identifiers, in case a client is interested. - private volatile ImmutableList publishedSegments = null; - public DbUpdaterJob( HadoopDruidIndexerConfig config ) @@ -67,75 +58,48 @@ public class DbUpdaterJob implements Jobby @Override public boolean run() { - final Configuration conf = new Configuration(); + final List segments = IndexGeneratorJob.getPublishedSegments(config); - ImmutableList.Builder publishedSegmentsBuilder = ImmutableList.builder(); + dbi.withHandle( + new HandleCallback() + { + @Override + public Void withHandle(Handle handle) throws Exception + { + final PreparedBatch batch = handle.prepareBatch( + String.format( + "INSERT INTO %s (id, dataSource, created_date, start, end, partitioned, version, used, payload) " + + "VALUES (:id, :dataSource, :created_date, :start, :end, :partitioned, :version, :used, :payload)", + spec.getSegmentTable() + ) + ); + for (final DataSegment segment : segments) { - for (String propName : System.getProperties().stringPropertyNames()) { - if (propName.startsWith("hadoop.")) { - conf.set(propName.substring("hadoop.".length()), System.getProperty(propName)); - } - } + batch.add( + new ImmutableMap.Builder() + .put("id", segment.getIdentifier()) + .put("dataSource", segment.getDataSource()) + .put("created_date", new DateTime().toString()) + .put("start", segment.getInterval().getStart().toString()) + .put("end", segment.getInterval().getEnd().toString()) + .put("partitioned", segment.getShardSpec().getPartitionNum()) + .put("version", segment.getVersion()) + .put("used", true) + .put("payload", jsonMapper.writeValueAsString(segment)) + .build() + ); - final Path descriptorInfoDir = config.makeDescriptorInfoDir(); + log.info("Published %s", segment.getIdentifier()); - try { - FileSystem fs = descriptorInfoDir.getFileSystem(conf); - - for (FileStatus status : fs.listStatus(descriptorInfoDir)) { - final DataSegment segment = jsonMapper.readValue(fs.open(status.getPath()), DataSegment.class); - - dbi.withHandle( - new HandleCallback() - { - @Override - public Void withHandle(Handle handle) throws Exception - { - handle.createStatement(String.format( - "INSERT INTO %s (id, dataSource, created_date, start, end, partitioned, version, used, payload) " - + "VALUES (:id, :dataSource, :created_date, :start, :end, :partitioned, :version, :used, :payload)", - spec.getSegmentTable() - )) - .bind("id", segment.getIdentifier()) - .bind("dataSource", segment.getDataSource()) - .bind("created_date", new DateTime().toString()) - .bind("start", segment.getInterval().getStart().toString()) - .bind("end", segment.getInterval().getEnd().toString()) - .bind("partitioned", segment.getShardSpec().getPartitionNum()) - .bind("version", segment.getVersion()) - .bind("used", true) - .bind("payload", jsonMapper.writeValueAsString(segment)) - .execute(); - - return null; - } } - ); + batch.execute(); - publishedSegmentsBuilder.add(segment); - log.info("Published %s", segment.getIdentifier()); - } - } - catch (IOException e) { - throw Throwables.propagate(e); - } - - publishedSegments = publishedSegmentsBuilder.build(); + return null; + } + } + ); return true; } - /** - * Returns a list of segment identifiers published by the most recent call to run(). - * Throws an IllegalStateException if run() has never been called. - */ - public List getPublishedSegments() - { - if (publishedSegments == null) { - log.error("getPublishedSegments called before run!"); - throw new IllegalStateException("DbUpdaterJob has not run yet"); - } else { - return publishedSegments; - } - } } diff --git a/indexer/src/main/java/com/metamx/druid/indexer/DeterminePartitionsJob.java b/indexer/src/main/java/com/metamx/druid/indexer/DeterminePartitionsJob.java index d4ee1941396..f34ff2988f2 100644 --- a/indexer/src/main/java/com/metamx/druid/indexer/DeterminePartitionsJob.java +++ b/indexer/src/main/java/com/metamx/druid/indexer/DeterminePartitionsJob.java @@ -266,8 +266,7 @@ public class DeterminePartitionsJob implements Jobby Context context ) throws IOException, InterruptedException { - // Create group key - // TODO -- There are more efficient ways to do this + // Create group key, there are probably more efficient ways of doing this final Map> dims = Maps.newTreeMap(); for(final String dim : inputRow.getDimensions()) { final Set dimValues = ImmutableSortedSet.copyOf(inputRow.getDimension(dim)); @@ -394,6 +393,9 @@ public class DeterminePartitionsJob implements Jobby final Interval interval = maybeInterval.get(); final byte[] groupKey = interval.getStart().toString().getBytes(Charsets.UTF_8); + // Emit row-counter value. + write(context, groupKey, new DimValueCount("", "", 1)); + for(final Map.Entry> dimAndValues : dims.entrySet()) { final String dim = dimAndValues.getKey(); @@ -510,9 +512,23 @@ public class DeterminePartitionsJob implements Jobby Context context, SortableBytes keyBytes, Iterable combinedIterable ) throws IOException, InterruptedException { - PeekingIterator iterator = Iterators.peekingIterator(combinedIterable.iterator()); + final DateTime bucket = new DateTime(new String(keyBytes.getGroupKey(), Charsets.UTF_8)); + final PeekingIterator iterator = Iterators.peekingIterator(combinedIterable.iterator()); - // "iterator" will take us over many candidate dimensions + log.info( + "Determining partitions for interval: %s", + config.getGranularitySpec().bucketInterval(bucket).orNull() + ); + + // First DVC should be the total row count indicator + final DimValueCount firstDvc = iterator.next(); + final int totalRows = firstDvc.numRows; + + if(!firstDvc.dim.equals("") || !firstDvc.value.equals("")) { + throw new IllegalStateException("WTF?! Expected total row indicator on first k/v pair!"); + } + + // "iterator" will now take us over many candidate dimensions DimPartitions currentDimPartitions = null; DimPartition currentDimPartition = null; String currentDimPartitionStart = null; @@ -636,8 +652,6 @@ public class DeterminePartitionsJob implements Jobby throw new ISE("No suitable partitioning dimension found!"); } - final int totalRows = dimPartitionss.values().iterator().next().getRows(); - int maxCardinality = Integer.MIN_VALUE; long minVariance = Long.MAX_VALUE; DimPartitions minVariancePartitions = null; @@ -645,12 +659,14 @@ public class DeterminePartitionsJob implements Jobby for(final DimPartitions dimPartitions : dimPartitionss.values()) { if(dimPartitions.getRows() != totalRows) { - throw new ISE( - "WTF?! Dimension[%s] row count %,d != expected row count %,d", + log.info( + "Dimension[%s] is not present in all rows (row count %,d != expected row count %,d)", dimPartitions.dim, dimPartitions.getRows(), totalRows ); + + continue; } // Make sure none of these shards are oversized @@ -684,7 +700,6 @@ public class DeterminePartitionsJob implements Jobby throw new ISE("No suitable partitioning dimension found!"); } - final DateTime bucket = new DateTime(new String(keyBytes.getGroupKey(), Charsets.UTF_8)); final OutputStream out = Utils.makePathAndOutputStream( context, config.makeSegmentPartitionInfoPath(new Bucket(0, bucket, 0)), config.isOverwriteFiles() ); diff --git a/indexer/src/main/java/com/metamx/druid/indexer/HadoopDruidIndexerAzkWrapper.java b/indexer/src/main/java/com/metamx/druid/indexer/HadoopDruidIndexerAzkWrapper.java index 8d073dd7297..5318d5cb600 100644 --- a/indexer/src/main/java/com/metamx/druid/indexer/HadoopDruidIndexerAzkWrapper.java +++ b/indexer/src/main/java/com/metamx/druid/indexer/HadoopDruidIndexerAzkWrapper.java @@ -83,7 +83,7 @@ public class HadoopDruidIndexerAzkWrapper final HadoopDruidIndexerConfig config = jsonMapper.convertValue(theMap, HadoopDruidIndexerConfig.class); config.setIntervals(dataInterval); - config.setVersion(new DateTime()); + config.setVersion(new DateTime().toString()); new HadoopDruidIndexerJob(config).run(); } diff --git a/indexer/src/main/java/com/metamx/druid/indexer/HadoopDruidIndexerConfig.java b/indexer/src/main/java/com/metamx/druid/indexer/HadoopDruidIndexerConfig.java index eed2339114b..5dc45d19484 100644 --- a/indexer/src/main/java/com/metamx/druid/indexer/HadoopDruidIndexerConfig.java +++ b/indexer/src/main/java/com/metamx/druid/indexer/HadoopDruidIndexerConfig.java @@ -175,7 +175,7 @@ public class HadoopDruidIndexerConfig private volatile PathSpec pathSpec; private volatile String jobOutputDir; private volatile String segmentOutputDir; - private volatile DateTime version = new DateTime(); + private volatile String version = new DateTime().toString(); private volatile PartitionsSpec partitionsSpec; private volatile boolean leaveIntermediate = false; private volatile boolean cleanupOnFailure = true; @@ -198,7 +198,7 @@ public class HadoopDruidIndexerConfig final @JsonProperty("pathSpec") PathSpec pathSpec, final @JsonProperty("workingPath") String jobOutputDir, final @JsonProperty("segmentOutputPath") String segmentOutputDir, - final @JsonProperty("version") DateTime version, + final @JsonProperty("version") String version, final @JsonProperty("partitionDimension") String partitionDimension, final @JsonProperty("targetPartitionSize") Long targetPartitionSize, final @JsonProperty("partitionsSpec") PartitionsSpec partitionsSpec, @@ -220,7 +220,7 @@ public class HadoopDruidIndexerConfig this.pathSpec = pathSpec; this.jobOutputDir = jobOutputDir; this.segmentOutputDir = segmentOutputDir; - this.version = version == null ? new DateTime() : version; + this.version = version == null ? new DateTime().toString() : version; this.partitionsSpec = partitionsSpec; this.leaveIntermediate = leaveIntermediate; this.cleanupOnFailure = cleanupOnFailure; @@ -410,12 +410,12 @@ public class HadoopDruidIndexerConfig } @JsonProperty - public DateTime getVersion() + public String getVersion() { return version; } - public void setVersion(DateTime version) + public void setVersion(String version) { this.version = version; } @@ -624,7 +624,7 @@ public class HadoopDruidIndexerConfig */ public Path makeIntermediatePath() { - return new Path(String.format("%s/%s/%s", getJobOutputDir(), dataSource, getVersion().toString().replace(":", ""))); + return new Path(String.format("%s/%s/%s", getJobOutputDir(), dataSource, getVersion().replace(":", ""))); } public Path makeSegmentPartitionInfoPath(Bucket bucket) @@ -662,11 +662,12 @@ public class HadoopDruidIndexerConfig return new Path( String.format( - "%s/%s_%s/%s/%s", + "%s/%s/%s_%s/%s/%s", getSegmentOutputDir(), + dataSource, bucketInterval.getStart().toString(), bucketInterval.getEnd().toString(), - getVersion().toString(), + getVersion(), bucket.partitionNum ) ); diff --git a/indexer/src/main/java/com/metamx/druid/indexer/HadoopDruidIndexerJob.java b/indexer/src/main/java/com/metamx/druid/indexer/HadoopDruidIndexerJob.java index eae1bfa885d..aff1265782a 100644 --- a/indexer/src/main/java/com/metamx/druid/indexer/HadoopDruidIndexerJob.java +++ b/indexer/src/main/java/com/metamx/druid/indexer/HadoopDruidIndexerJob.java @@ -47,6 +47,7 @@ public class HadoopDruidIndexerJob implements Jobby private final HadoopDruidIndexerConfig config; private final DbUpdaterJob dbUpdaterJob; private IndexGeneratorJob indexJob; + private volatile List publishedSegments = null; public HadoopDruidIndexerJob( HadoopDruidIndexerConfig config @@ -102,6 +103,8 @@ public class HadoopDruidIndexerJob implements Jobby } } + publishedSegments = IndexGeneratorJob.getPublishedSegments(config); + if (!config.isLeaveIntermediate()) { if (failedMessage == null || config.isCleanupOnFailure()) { Path workingPath = config.makeIntermediatePath(); @@ -147,8 +150,10 @@ public class HadoopDruidIndexerJob implements Jobby } public List getPublishedSegments() { - Preconditions.checkState(dbUpdaterJob != null, "No updaterJobSpec set, cannot get published segments"); - return dbUpdaterJob.getPublishedSegments(); + if(publishedSegments == null) { + throw new IllegalStateException("Job hasn't run yet. No segments have been published yet."); + } + return publishedSegments; } public IndexGeneratorJob.IndexGeneratorStats getIndexJobStats() diff --git a/indexer/src/main/java/com/metamx/druid/indexer/IndexGeneratorJob.java b/indexer/src/main/java/com/metamx/druid/indexer/IndexGeneratorJob.java index d8eba264c11..9359933b645 100644 --- a/indexer/src/main/java/com/metamx/druid/indexer/IndexGeneratorJob.java +++ b/indexer/src/main/java/com/metamx/druid/indexer/IndexGeneratorJob.java @@ -21,6 +21,7 @@ package com.metamx.druid.indexer; import com.google.common.base.Optional; import com.google.common.base.Throwables; +import com.google.common.collect.ImmutableList; import com.google.common.collect.ImmutableMap; import com.google.common.collect.Iterables; import com.google.common.collect.Lists; @@ -57,6 +58,7 @@ import org.apache.hadoop.mapreduce.Reducer; import org.apache.hadoop.mapreduce.lib.input.TextInputFormat; import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat; import org.apache.hadoop.mapreduce.lib.output.TextOutputFormat; +import com.fasterxml.jackson.databind.ObjectMapper; import org.joda.time.DateTime; import org.joda.time.Interval; @@ -149,7 +151,40 @@ public class IndexGeneratorJob implements Jobby } } + public static List getPublishedSegments(HadoopDruidIndexerConfig config) { + + final Configuration conf = new Configuration(); + final ObjectMapper jsonMapper = HadoopDruidIndexerConfig.jsonMapper; + + ImmutableList.Builder publishedSegmentsBuilder = ImmutableList.builder(); + + for (String propName : System.getProperties().stringPropertyNames()) { + if (propName.startsWith("hadoop.")) { + conf.set(propName.substring("hadoop.".length()), System.getProperty(propName)); + } + } + + final Path descriptorInfoDir = config.makeDescriptorInfoDir(); + + try { + FileSystem fs = descriptorInfoDir.getFileSystem(conf); + + for (FileStatus status : fs.listStatus(descriptorInfoDir)) { + final DataSegment segment = jsonMapper.readValue(fs.open(status.getPath()), DataSegment.class); + publishedSegmentsBuilder.add(segment); + log.info("Adding segment %s to the list of published segments", segment.getIdentifier()); + } + } + catch (IOException e) { + throw Throwables.propagate(e); + } + List publishedSegments = publishedSegmentsBuilder.build(); + + return publishedSegments; +} + public static class IndexGeneratorMapper extends HadoopDruidIndexerMapper + { @Override protected void innerMap( @@ -379,7 +414,8 @@ public class IndexGeneratorJob implements Jobby ); } else if (outputFS instanceof LocalFileSystem) { loadSpec = ImmutableMap.of( - "type", "test" + "type", "local", + "path", indexOutURI.getPath() ); } else { throw new ISE("Unknown file system[%s]", outputFS.getClass()); @@ -388,7 +424,7 @@ public class IndexGeneratorJob implements Jobby DataSegment segment = new DataSegment( config.getDataSource(), interval, - config.getVersion().toString(), + config.getVersion(), loadSpec, dimensionNames, metricNames, diff --git a/merger/pom.xml b/merger/pom.xml index da776d6e512..3d7d4a05f30 100644 --- a/merger/pom.xml +++ b/merger/pom.xml @@ -18,8 +18,7 @@ ~ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. --> - + 4.0.0 com.metamx.druid druid-merger @@ -29,7 +28,7 @@ com.metamx druid - 0.2.8-SNAPSHOT + 0.3.17-SNAPSHOT diff --git a/merger/src/main/java/com/metamx/druid/merger/common/TaskToolbox.java b/merger/src/main/java/com/metamx/druid/merger/common/TaskToolbox.java index 59a7f12cd16..58ad3c5cc43 100644 --- a/merger/src/main/java/com/metamx/druid/merger/common/TaskToolbox.java +++ b/merger/src/main/java/com/metamx/druid/merger/common/TaskToolbox.java @@ -20,13 +20,15 @@ package com.metamx.druid.merger.common; import com.fasterxml.jackson.databind.ObjectMapper; -import com.google.common.collect.ImmutableMap; -import com.metamx.druid.loading.S3SegmentGetterConfig; -import com.metamx.druid.loading.S3SegmentPuller; -import com.metamx.druid.loading.S3ZippedSegmentPuller; +import com.google.common.collect.Maps; +import com.metamx.druid.client.DataSegment; +import com.metamx.druid.loading.DataSegmentPusher; +import com.metamx.druid.loading.MMappedQueryableIndexFactory; +import com.metamx.druid.loading.S3DataSegmentPuller; import com.metamx.druid.loading.SegmentKiller; -import com.metamx.druid.loading.SegmentPuller; -import com.metamx.druid.loading.SegmentPusher; +import com.metamx.druid.loading.SegmentLoaderConfig; +import com.metamx.druid.loading.SegmentLoadingException; +import com.metamx.druid.loading.SingleSegmentLoader; import com.metamx.druid.merger.common.actions.TaskActionClient; import com.metamx.druid.merger.common.config.TaskConfig; import com.metamx.druid.merger.common.task.Task; @@ -34,6 +36,7 @@ import com.metamx.emitter.service.ServiceEmitter; import org.jets3t.service.impl.rest.httpclient.RestS3Service; import java.io.File; +import java.util.List; import java.util.Map; /** @@ -45,7 +48,7 @@ public class TaskToolbox private final TaskActionClient taskActionClient; private final ServiceEmitter emitter; private final RestS3Service s3Client; - private final SegmentPusher segmentPusher; + private final DataSegmentPusher segmentPusher; private final SegmentKiller segmentKiller; private final ObjectMapper objectMapper; @@ -54,7 +57,7 @@ public class TaskToolbox TaskActionClient taskActionClient, ServiceEmitter emitter, RestS3Service s3Client, - SegmentPusher segmentPusher, + DataSegmentPusher segmentPusher, SegmentKiller segmentKiller, ObjectMapper objectMapper ) @@ -83,7 +86,7 @@ public class TaskToolbox return emitter; } - public SegmentPusher getSegmentPusher() + public DataSegmentPusher getSegmentPusher() { return segmentPusher; } @@ -98,21 +101,27 @@ public class TaskToolbox return objectMapper; } - public Map getSegmentGetters(final Task task) + public Map getSegments(final Task task, List segments) + throws SegmentLoadingException { - final S3SegmentGetterConfig getterConfig = new S3SegmentGetterConfig() - { - @Override - public File getCacheDirectory() - { - return new File(config.getTaskDir(task), "fetched_segments"); - } - }; + final SingleSegmentLoader loader = new SingleSegmentLoader( + new S3DataSegmentPuller(s3Client), + new MMappedQueryableIndexFactory(), + new SegmentLoaderConfig() + { + @Override + public File getCacheDirectory() + { + return new File(config.getTaskDir(task), "fetched_segments"); + } + } + ); - return ImmutableMap.builder() - .put("s3", new S3SegmentPuller(s3Client, getterConfig)) - .put("s3_union", new S3SegmentPuller(s3Client, getterConfig)) - .put("s3_zip", new S3ZippedSegmentPuller(s3Client, getterConfig)) - .build(); + Map retVal = Maps.newLinkedHashMap(); + for (DataSegment segment : segments) { + retVal.put(segment, loader.getSegmentFiles(segment)); + } + + return retVal; } } diff --git a/merger/src/main/java/com/metamx/druid/merger/common/actions/RemoteTaskActionClient.java b/merger/src/main/java/com/metamx/druid/merger/common/actions/RemoteTaskActionClient.java index 2dc86ab3115..26900e29942 100644 --- a/merger/src/main/java/com/metamx/druid/merger/common/actions/RemoteTaskActionClient.java +++ b/merger/src/main/java/com/metamx/druid/merger/common/actions/RemoteTaskActionClient.java @@ -1,25 +1,31 @@ package com.metamx.druid.merger.common.actions; +import com.fasterxml.jackson.core.type.TypeReference; import com.google.common.base.Charsets; import com.google.common.base.Throwables; import com.metamx.common.logger.Logger; import com.metamx.http.client.HttpClient; import com.metamx.http.client.response.ToStringResponseHandler; import com.fasterxml.jackson.databind.ObjectMapper; +import com.netflix.curator.x.discovery.ServiceInstance; +import com.netflix.curator.x.discovery.ServiceProvider; import java.net.URI; import java.net.URISyntaxException; +import java.util.Map; public class RemoteTaskActionClient implements TaskActionClient { private final HttpClient httpClient; + private final ServiceProvider serviceProvider; private final ObjectMapper jsonMapper; private static final Logger log = new Logger(RemoteTaskActionClient.class); - public RemoteTaskActionClient(HttpClient httpClient, ObjectMapper jsonMapper) + public RemoteTaskActionClient(HttpClient httpClient, ServiceProvider serviceProvider, ObjectMapper jsonMapper) { this.httpClient = httpClient; + this.serviceProvider = serviceProvider; this.jsonMapper = jsonMapper; } @@ -34,20 +40,36 @@ public class RemoteTaskActionClient implements TaskActionClient .go(new ToStringResponseHandler(Charsets.UTF_8)) .get(); - // TODO Figure out how to check HTTP status code - if(response.equals("")) { - return null; - } else { - return jsonMapper.readValue(response, taskAction.getReturnTypeReference()); - } + final Map responseDict = jsonMapper.readValue( + response, + new TypeReference>() {} + ); + + return jsonMapper.convertValue(responseDict.get("result"), taskAction.getReturnTypeReference()); } catch (Exception e) { throw Throwables.propagate(e); } } - public URI getServiceUri() throws URISyntaxException + private URI getServiceUri() throws Exception { - return new URI("http://localhost:8087/mmx/merger/v1/action"); + final ServiceInstance instance = serviceProvider.getInstance(); + final String scheme; + final String host; + final int port; + final String path = "/mmx/merger/v1/action"; + + host = instance.getAddress(); + + if (instance.getSslPort() != null && instance.getSslPort() > 0) { + scheme = "https"; + port = instance.getSslPort(); + } else { + scheme = "http"; + port = instance.getPort(); + } + + return new URI(scheme, null, host, port, path, null, null); } } diff --git a/merger/src/main/java/com/metamx/druid/merger/common/actions/SegmentInsertAction.java b/merger/src/main/java/com/metamx/druid/merger/common/actions/SegmentInsertAction.java index 436c16e26fa..75ad4a9161f 100644 --- a/merger/src/main/java/com/metamx/druid/merger/common/actions/SegmentInsertAction.java +++ b/merger/src/main/java/com/metamx/druid/merger/common/actions/SegmentInsertAction.java @@ -51,27 +51,8 @@ public class SegmentInsertAction implements TaskAction @Override public Void perform(TaskActionToolbox toolbox) { - // Verify that each of these segments-to-insert falls under some lock - // TODO: Should this be done while holding the giant lock on TaskLockbox? (To prevent anyone from grabbing - // TODO: these locks out from under us while the operation is ongoing.) Probably not necessary. - final List taskLocks = toolbox.getTaskLockbox().findLocksForTask(task); - for(final DataSegment segment : segments) { - final boolean ok = Iterables.any( - taskLocks, new Predicate() - { - @Override - public boolean apply(TaskLock taskLock) - { - return taskLock.getVersion().equals(segment.getVersion()) - && taskLock.getDataSource().equals(segment.getDataSource()) - && taskLock.getInterval().contains(segment.getInterval()); - } - } - ); - - if(!ok) { - throw new ISE("No currently-held lock covers segment: %s", segment); - } + if(!toolbox.taskLockCoversSegments(task, segments, false)) { + throw new ISE("Segments not covered by locks for task: %s", task.getId()); } try { diff --git a/merger/src/main/java/com/metamx/druid/merger/common/actions/SegmentNukeAction.java b/merger/src/main/java/com/metamx/druid/merger/common/actions/SegmentNukeAction.java index ca0d9f3a9bd..f1b61c58d9f 100644 --- a/merger/src/main/java/com/metamx/druid/merger/common/actions/SegmentNukeAction.java +++ b/merger/src/main/java/com/metamx/druid/merger/common/actions/SegmentNukeAction.java @@ -51,27 +51,8 @@ public class SegmentNukeAction implements TaskAction @Override public Void perform(TaskActionToolbox toolbox) { - // Verify that each of these segments-to-nuke falls under some lock - // TODO: Should this be done while holding the giant lock on TaskLockbox? (To prevent anyone from grabbing - // TODO: these locks out from under us while the operation is ongoing.) Probably not necessary. - final List taskLocks = toolbox.getTaskLockbox().findLocksForTask(task); - for(final DataSegment segment : segments) { - final boolean ok = Iterables.any( - taskLocks, new Predicate() - { - @Override - public boolean apply(TaskLock taskLock) - { - return taskLock.getVersion().compareTo(segment.getVersion()) >= 0 - && taskLock.getDataSource().equals(segment.getDataSource()) - && taskLock.getInterval().contains(segment.getInterval()); - } - } - ); - - if(!ok) { - throw new ISE("No currently-held lock covers segment: %s", segment); - } + if(!toolbox.taskLockCoversSegments(task, segments, true)) { + throw new ISE("Segments not covered by locks for task: %s", task.getId()); } try { diff --git a/merger/src/main/java/com/metamx/druid/merger/common/actions/TaskActionToolbox.java b/merger/src/main/java/com/metamx/druid/merger/common/actions/TaskActionToolbox.java index 30f87a1f6c3..af8f6dcd40c 100644 --- a/merger/src/main/java/com/metamx/druid/merger/common/actions/TaskActionToolbox.java +++ b/merger/src/main/java/com/metamx/druid/merger/common/actions/TaskActionToolbox.java @@ -1,10 +1,19 @@ package com.metamx.druid.merger.common.actions; +import com.google.common.base.Predicate; +import com.google.common.collect.Iterables; +import com.metamx.common.ISE; +import com.metamx.druid.client.DataSegment; +import com.metamx.druid.merger.common.TaskLock; +import com.metamx.druid.merger.common.task.Task; import com.metamx.druid.merger.coordinator.MergerDBCoordinator; import com.metamx.druid.merger.coordinator.TaskLockbox; import com.metamx.druid.merger.coordinator.TaskQueue; import com.metamx.emitter.service.ServiceEmitter; +import java.util.List; +import java.util.Set; + public class TaskActionToolbox { private final TaskQueue taskQueue; @@ -44,4 +53,43 @@ public class TaskActionToolbox { return emitter; } + + public boolean taskLockCoversSegments( + final Task task, + final Set segments, + final boolean allowOlderVersions + ) + { + // Verify that each of these segments falls under some lock + + // NOTE: It is possible for our lock to be revoked (if the task has failed and given up its locks) after we check + // NOTE: it and before we perform the segment insert, but, that should be OK since the worst that happens is we + // NOTE: insert some segments from the task but not others. + + final List taskLocks = getTaskLockbox().findLocksForTask(task); + for(final DataSegment segment : segments) { + final boolean ok = Iterables.any( + taskLocks, new Predicate() + { + @Override + public boolean apply(TaskLock taskLock) + { + final boolean versionOk = allowOlderVersions + ? taskLock.getVersion().compareTo(segment.getVersion()) >= 0 + : taskLock.getVersion().equals(segment.getVersion()); + + return versionOk + && taskLock.getDataSource().equals(segment.getDataSource()) + && taskLock.getInterval().contains(segment.getInterval()); + } + } + ); + + if (!ok) { + return false; + } + } + + return true; + } } diff --git a/merger/src/main/java/com/metamx/druid/merger/common/config/TaskConfig.java b/merger/src/main/java/com/metamx/druid/merger/common/config/TaskConfig.java index c66009cd8ac..5918f0627c6 100644 --- a/merger/src/main/java/com/metamx/druid/merger/common/config/TaskConfig.java +++ b/merger/src/main/java/com/metamx/druid/merger/common/config/TaskConfig.java @@ -13,7 +13,10 @@ public abstract class TaskConfig @Config("druid.merger.rowFlushBoundary") @Default("500000") - public abstract long getRowFlushBoundary(); + public abstract int getDefaultRowFlushBoundary(); + + @Config("druid.merger.hadoopWorkingPath") + public abstract String getHadoopWorkingPath(); public File getTaskDir(final Task task) { return new File(getBaseTaskDir(), task.getId()); diff --git a/merger/src/main/java/com/metamx/druid/merger/common/index/YeOldePlumberSchool.java b/merger/src/main/java/com/metamx/druid/merger/common/index/YeOldePlumberSchool.java index 7d456d29e01..c26888c4485 100644 --- a/merger/src/main/java/com/metamx/druid/merger/common/index/YeOldePlumberSchool.java +++ b/merger/src/main/java/com/metamx/druid/merger/common/index/YeOldePlumberSchool.java @@ -35,7 +35,7 @@ import com.metamx.druid.client.DataSegment; import com.metamx.druid.index.QueryableIndex; import com.metamx.druid.index.v1.IndexIO; import com.metamx.druid.index.v1.IndexMerger; -import com.metamx.druid.loading.SegmentPusher; +import com.metamx.druid.loading.DataSegmentPusher; import com.metamx.druid.query.QueryRunner; import com.metamx.druid.realtime.FireDepartmentMetrics; import com.metamx.druid.realtime.FireHydrant; @@ -45,11 +45,11 @@ import com.metamx.druid.realtime.Schema; import com.metamx.druid.realtime.Sink; - - +import org.apache.commons.io.FileUtils; import org.joda.time.Interval; import java.io.File; +import java.io.IOException; import java.util.List; import java.util.Set; @@ -61,7 +61,7 @@ public class YeOldePlumberSchool implements PlumberSchool { private final Interval interval; private final String version; - private final SegmentPusher segmentPusher; + private final DataSegmentPusher dataSegmentPusher; private final File tmpSegmentDir; private static final Logger log = new Logger(YeOldePlumberSchool.class); @@ -70,13 +70,13 @@ public class YeOldePlumberSchool implements PlumberSchool public YeOldePlumberSchool( @JsonProperty("interval") Interval interval, @JsonProperty("version") String version, - @JacksonInject("segmentPusher") SegmentPusher segmentPusher, + @JacksonInject("segmentPusher") DataSegmentPusher dataSegmentPusher, @JacksonInject("tmpSegmentDir") File tmpSegmentDir ) { this.interval = interval; this.version = version; - this.segmentPusher = segmentPusher; + this.dataSegmentPusher = dataSegmentPusher; this.tmpSegmentDir = tmpSegmentDir; } @@ -120,13 +120,13 @@ public class YeOldePlumberSchool implements PlumberSchool @Override public void finishJob() { + // The segment we will upload + File fileToUpload = null; + try { // User should have persisted everything by now. Preconditions.checkState(!theSink.swappable(), "All data must be persisted before fininshing the job!"); - // The segment we will upload - final File fileToUpload; - if(spilled.size() == 0) { throw new IllegalStateException("Nothing indexed?"); } else if(spilled.size() == 1) { @@ -149,7 +149,7 @@ public class YeOldePlumberSchool implements PlumberSchool .withVersion(version) .withBinaryVersion(IndexIO.getVersionFromDir(fileToUpload)); - segmentPusher.push(fileToUpload, segmentToUpload); + dataSegmentPusher.push(fileToUpload, segmentToUpload); log.info( "Uploaded segment[%s]", @@ -160,6 +160,17 @@ public class YeOldePlumberSchool implements PlumberSchool log.warn(e, "Failed to merge and upload"); throw Throwables.propagate(e); } + finally { + try { + if (fileToUpload != null) { + log.info("Deleting Index File[%s]", fileToUpload); + FileUtils.deleteDirectory(fileToUpload); + } + } + catch (IOException e) { + log.warn(e, "Error deleting directory[%s]", fileToUpload); + } + } } private void spillIfSwappable() diff --git a/merger/src/main/java/com/metamx/druid/merger/common/task/AbstractTask.java b/merger/src/main/java/com/metamx/druid/merger/common/task/AbstractTask.java index 53653aa6595..899e2066fb0 100644 --- a/merger/src/main/java/com/metamx/druid/merger/common/task/AbstractTask.java +++ b/merger/src/main/java/com/metamx/druid/merger/common/task/AbstractTask.java @@ -19,7 +19,6 @@ package com.metamx.druid.merger.common.task; -import com.fasterxml.jackson.annotation.JsonCreator; import com.fasterxml.jackson.annotation.JsonProperty; import com.google.common.base.Objects; import com.google.common.base.Optional; @@ -69,20 +68,13 @@ public abstract class AbstractTask implements Task return dataSource; } + @JsonProperty("interval") @Override - public Optional getFixedInterval() + public Optional getImplicitLockInterval() { return interval; } - // Awesome hack to get around lack of serde for Optional - // TODO Look into jackson-datatype-guava - @JsonProperty("interval") - private Interval getNullableIntervalForJackson() - { - return interval.orNull(); - } - @Override public TaskStatus preflight(TaskToolbox toolbox) throws Exception { @@ -96,7 +88,7 @@ public abstract class AbstractTask implements Task .add("id", id) .add("type", getType()) .add("dataSource", dataSource) - .add("interval", getFixedInterval()) + .add("interval", getImplicitLockInterval()) .toString(); } } diff --git a/merger/src/main/java/com/metamx/druid/merger/common/task/DeleteTask.java b/merger/src/main/java/com/metamx/druid/merger/common/task/DeleteTask.java index f9d0de34138..991d76228dd 100644 --- a/merger/src/main/java/com/metamx/druid/merger/common/task/DeleteTask.java +++ b/merger/src/main/java/com/metamx/druid/merger/common/task/DeleteTask.java @@ -78,7 +78,7 @@ public class DeleteTask extends AbstractTask { // Strategy: Create an empty segment covering the interval to be deleted final TaskLock myLock = Iterables.getOnlyElement(toolbox.getTaskActionClient().submit(new LockListAction(this))); - final Interval interval = this.getFixedInterval().get(); + final Interval interval = this.getImplicitLockInterval().get(); final IncrementalIndex empty = new IncrementalIndex(0, QueryGranularity.NONE, new AggregatorFactory[0]); final IndexableAdapter emptyAdapter = new IncrementalIndexAdapter(interval, empty); diff --git a/merger/src/main/java/com/metamx/druid/merger/common/task/HadoopIndexTask.java b/merger/src/main/java/com/metamx/druid/merger/common/task/HadoopIndexTask.java new file mode 100644 index 00000000000..29c0c517f17 --- /dev/null +++ b/merger/src/main/java/com/metamx/druid/merger/common/task/HadoopIndexTask.java @@ -0,0 +1,117 @@ +package com.metamx.druid.merger.common.task; + +import com.fasterxml.jackson.annotation.JsonCreator; +import com.fasterxml.jackson.annotation.JsonProperty; +import com.google.common.base.Preconditions; +import com.google.common.collect.ImmutableSet; +import com.google.common.collect.Iterables; +import com.metamx.common.logger.Logger; +import com.metamx.druid.client.DataSegment; +import com.metamx.druid.indexer.HadoopDruidIndexerConfig; +import com.metamx.druid.indexer.HadoopDruidIndexerJob; +import com.metamx.druid.loading.S3DataSegmentPusher; +import com.metamx.druid.merger.common.TaskLock; +import com.metamx.druid.merger.common.TaskStatus; +import com.metamx.druid.merger.common.TaskToolbox; +import com.metamx.druid.merger.common.actions.LockListAction; +import com.metamx.druid.merger.common.actions.SegmentInsertAction; +import com.metamx.druid.utils.JodaUtils; +import org.joda.time.DateTime; + +import java.util.List; + +public class HadoopIndexTask extends AbstractTask +{ + @JsonProperty + private final HadoopDruidIndexerConfig config; + + private static final Logger log = new Logger(HadoopIndexTask.class); + + /** + * @param config is used by the HadoopDruidIndexerJob to set up the appropriate parameters + * for creating Druid index segments. It may be modified. + *

+ * Here, we will ensure that the UpdaterJobSpec field of the config is set to null, such that the + * job does not push a list of published segments the database. Instead, we will use the method + * IndexGeneratorJob.getPublishedSegments() to simply return a list of the published + * segments, and let the indexing service report these segments to the database. + */ + + @JsonCreator + public HadoopIndexTask( + @JsonProperty("config") HadoopDruidIndexerConfig config + ) + { + super( + String.format("index_hadoop_%s_%s", config.getDataSource(), new DateTime()), + config.getDataSource(), + JodaUtils.umbrellaInterval(config.getIntervals()) + ); + + // Some HadoopDruidIndexerConfig stuff doesn't make sense in the context of the indexing service + Preconditions.checkArgument(config.getSegmentOutputDir() == null, "segmentOutputPath must be absent"); + Preconditions.checkArgument(config.getJobOutputDir() == null, "workingPath must be absent"); + Preconditions.checkArgument(!config.isUpdaterJobSpecSet(), "updaterJobSpec must be absent"); + + this.config = config; + } + + @Override + public String getType() + { + return "index_hadoop"; + } + + @Override + public TaskStatus run(TaskToolbox toolbox) throws Exception + { + // Copy config so we don't needlessly modify our provided one + // Also necessary to make constructor validations work upon serde-after-run + final HadoopDruidIndexerConfig configCopy = toolbox.getObjectMapper() + .readValue( + toolbox.getObjectMapper().writeValueAsBytes(config), + HadoopDruidIndexerConfig.class + ); + + // We should have a lock from before we started running + final TaskLock myLock = Iterables.getOnlyElement(toolbox.getTaskActionClient().submit(new LockListAction(this))); + log.info("Setting version to: %s", myLock.getVersion()); + configCopy.setVersion(myLock.getVersion()); + + // Set workingPath to some reasonable default + configCopy.setJobOutputDir(toolbox.getConfig().getHadoopWorkingPath()); + + if (toolbox.getSegmentPusher() instanceof S3DataSegmentPusher) { + // Hack alert! Bypassing DataSegmentPusher... + S3DataSegmentPusher segmentPusher = (S3DataSegmentPusher) toolbox.getSegmentPusher(); + String s3Path = String.format( + "s3n://%s/%s/%s", + segmentPusher.getConfig().getBucket(), + segmentPusher.getConfig().getBaseKey(), + getDataSource() + ); + + log.info("Setting segment output path to: %s", s3Path); + configCopy.setSegmentOutputDir(s3Path); + } else { + throw new IllegalStateException("Sorry, we only work with S3DataSegmentPushers! Bummer!"); + } + + HadoopDruidIndexerJob job = new HadoopDruidIndexerJob(configCopy); + configCopy.verify(); + + log.info("Starting a hadoop index generator job..."); + if (job.run()) { + List publishedSegments = job.getPublishedSegments(); + + // Request segment pushes + toolbox.getTaskActionClient().submit(new SegmentInsertAction(this, ImmutableSet.copyOf(publishedSegments))); + + // Done + return TaskStatus.success(getId()); + } else { + return TaskStatus.failure(getId()); + } + + } +} diff --git a/merger/src/main/java/com/metamx/druid/merger/common/task/IndexDeterminePartitionsTask.java b/merger/src/main/java/com/metamx/druid/merger/common/task/IndexDeterminePartitionsTask.java index b94625e7ffa..2886c8cc77a 100644 --- a/merger/src/main/java/com/metamx/druid/merger/common/task/IndexDeterminePartitionsTask.java +++ b/merger/src/main/java/com/metamx/druid/merger/common/task/IndexDeterminePartitionsTask.java @@ -50,11 +50,16 @@ public class IndexDeterminePartitionsTask extends AbstractTask { @JsonProperty private final FirehoseFactory firehoseFactory; + @JsonProperty private final Schema schema; + @JsonProperty private final long targetPartitionSize; + @JsonProperty + private final int rowFlushBoundary; + private static final Logger log = new Logger(IndexTask.class); @JsonCreator @@ -63,7 +68,8 @@ public class IndexDeterminePartitionsTask extends AbstractTask @JsonProperty("interval") Interval interval, @JsonProperty("firehose") FirehoseFactory firehoseFactory, @JsonProperty("schema") Schema schema, - @JsonProperty("targetPartitionSize") long targetPartitionSize + @JsonProperty("targetPartitionSize") long targetPartitionSize, + @JsonProperty("rowFlushBoundary") int rowFlushBoundary ) { super( @@ -81,6 +87,7 @@ public class IndexDeterminePartitionsTask extends AbstractTask this.firehoseFactory = firehoseFactory; this.schema = schema; this.targetPartitionSize = targetPartitionSize; + this.rowFlushBoundary = rowFlushBoundary; } @Override @@ -97,7 +104,7 @@ public class IndexDeterminePartitionsTask extends AbstractTask // TODO: Replace/merge/whatever with hadoop determine-partitions code // We know this exists - final Interval interval = getFixedInterval().get(); + final Interval interval = getImplicitLockInterval().get(); // Blacklist dimensions that have multiple values per row final Set unusableDimensions = Sets.newHashSet(); @@ -237,14 +244,15 @@ public class IndexDeterminePartitionsTask extends AbstractTask { return new IndexGeneratorTask( getGroupId(), - getFixedInterval().get(), + getImplicitLockInterval().get(), firehoseFactory, new Schema( schema.getDataSource(), schema.getAggregators(), schema.getIndexGranularity(), shardSpec - ) + ), + rowFlushBoundary ); } } diff --git a/merger/src/main/java/com/metamx/druid/merger/common/task/IndexGeneratorTask.java b/merger/src/main/java/com/metamx/druid/merger/common/task/IndexGeneratorTask.java index f7a95d373bf..7dc65797ea8 100644 --- a/merger/src/main/java/com/metamx/druid/merger/common/task/IndexGeneratorTask.java +++ b/merger/src/main/java/com/metamx/druid/merger/common/task/IndexGeneratorTask.java @@ -28,7 +28,7 @@ import com.google.common.collect.Maps; import com.metamx.common.logger.Logger; import com.metamx.druid.client.DataSegment; import com.metamx.druid.input.InputRow; -import com.metamx.druid.loading.SegmentPusher; +import com.metamx.druid.loading.DataSegmentPusher; import com.metamx.druid.merger.common.TaskLock; import com.metamx.druid.merger.common.TaskStatus; import com.metamx.druid.merger.common.TaskToolbox; @@ -58,6 +58,9 @@ public class IndexGeneratorTask extends AbstractTask @JsonProperty private final Schema schema; + @JsonProperty + private final int rowFlushBoundary; + private static final Logger log = new Logger(IndexTask.class); @JsonCreator @@ -65,7 +68,8 @@ public class IndexGeneratorTask extends AbstractTask @JsonProperty("groupId") String groupId, @JsonProperty("interval") Interval interval, @JsonProperty("firehose") FirehoseFactory firehoseFactory, - @JsonProperty("schema") Schema schema + @JsonProperty("schema") Schema schema, + @JsonProperty("rowFlushBoundary") int rowFlushBoundary ) { super( @@ -83,6 +87,7 @@ public class IndexGeneratorTask extends AbstractTask this.firehoseFactory = firehoseFactory; this.schema = schema; + this.rowFlushBoundary = rowFlushBoundary; } @Override @@ -98,7 +103,7 @@ public class IndexGeneratorTask extends AbstractTask final TaskLock myLock = Iterables.getOnlyElement(toolbox.getTaskActionClient().submit(new LockListAction(this))); // We know this exists - final Interval interval = getFixedInterval().get(); + final Interval interval = getImplicitLockInterval().get(); // Set up temporary directory for indexing final File tmpDir = new File( @@ -118,7 +123,7 @@ public class IndexGeneratorTask extends AbstractTask // We need to track published segments. final List pushedSegments = new CopyOnWriteArrayList(); - final SegmentPusher wrappedSegmentPusher = new SegmentPusher() + final DataSegmentPusher wrappedDataSegmentPusher = new DataSegmentPusher() { @Override public DataSegment push(File file, DataSegment segment) throws IOException @@ -135,10 +140,15 @@ public class IndexGeneratorTask extends AbstractTask final Plumber plumber = new YeOldePlumberSchool( interval, myLock.getVersion(), - wrappedSegmentPusher, + wrappedDataSegmentPusher, tmpDir ).findPlumber(schema, metrics); + // rowFlushBoundary for this job + final int myRowFlushBoundary = this.rowFlushBoundary > 0 + ? rowFlushBoundary + : toolbox.getConfig().getDefaultRowFlushBoundary(); + try { while(firehose.hasMore()) { final InputRow inputRow = firehose.nextRow(); @@ -157,7 +167,7 @@ public class IndexGeneratorTask extends AbstractTask int numRows = sink.add(inputRow); metrics.incrementProcessed(); - if(numRows >= toolbox.getConfig().getRowFlushBoundary()) { + if(numRows >= myRowFlushBoundary) { plumber.persist(firehose.commit()); } } else { @@ -195,7 +205,7 @@ public class IndexGeneratorTask extends AbstractTask * @return true or false */ private boolean shouldIndex(InputRow inputRow) { - if(!getFixedInterval().get().contains(inputRow.getTimestampFromEpoch())) { + if(!getImplicitLockInterval().get().contains(inputRow.getTimestampFromEpoch())) { return false; } diff --git a/merger/src/main/java/com/metamx/druid/merger/common/task/IndexTask.java b/merger/src/main/java/com/metamx/druid/merger/common/task/IndexTask.java index 8cfaa15d300..246389470ef 100644 --- a/merger/src/main/java/com/metamx/druid/merger/common/task/IndexTask.java +++ b/merger/src/main/java/com/metamx/druid/merger/common/task/IndexTask.java @@ -42,11 +42,23 @@ import java.util.List; public class IndexTask extends AbstractTask { - @JsonProperty private final GranularitySpec granularitySpec; - @JsonProperty private final AggregatorFactory[] aggregators; - @JsonProperty private final QueryGranularity indexGranularity; - @JsonProperty private final long targetPartitionSize; - @JsonProperty private final FirehoseFactory firehoseFactory; + @JsonProperty + private final GranularitySpec granularitySpec; + + @JsonProperty + private final AggregatorFactory[] aggregators; + + @JsonProperty + private final QueryGranularity indexGranularity; + + @JsonProperty + private final long targetPartitionSize; + + @JsonProperty + private final FirehoseFactory firehoseFactory; + + @JsonProperty + private final int rowFlushBoundary; private static final Logger log = new Logger(IndexTask.class); @@ -57,7 +69,8 @@ public class IndexTask extends AbstractTask @JsonProperty("aggregators") AggregatorFactory[] aggregators, @JsonProperty("indexGranularity") QueryGranularity indexGranularity, @JsonProperty("targetPartitionSize") long targetPartitionSize, - @JsonProperty("firehose") FirehoseFactory firehoseFactory + @JsonProperty("firehose") FirehoseFactory firehoseFactory, + @JsonProperty("rowFlushBoundary") int rowFlushBoundary ) { super( @@ -75,6 +88,7 @@ public class IndexTask extends AbstractTask this.indexGranularity = indexGranularity; this.targetPartitionSize = targetPartitionSize; this.firehoseFactory = firehoseFactory; + this.rowFlushBoundary = rowFlushBoundary; } public List toSubtasks() @@ -95,7 +109,8 @@ public class IndexTask extends AbstractTask indexGranularity, new NoneShardSpec() ), - targetPartitionSize + targetPartitionSize, + rowFlushBoundary ) ); } else { @@ -110,7 +125,8 @@ public class IndexTask extends AbstractTask aggregators, indexGranularity, new NoneShardSpec() - ) + ), + rowFlushBoundary ) ); } diff --git a/merger/src/main/java/com/metamx/druid/merger/common/task/KillTask.java b/merger/src/main/java/com/metamx/druid/merger/common/task/KillTask.java index 84deaee5e33..0c1602c759d 100644 --- a/merger/src/main/java/com/metamx/druid/merger/common/task/KillTask.java +++ b/merger/src/main/java/com/metamx/druid/merger/common/task/KillTask.java @@ -59,8 +59,8 @@ public class KillTask extends AbstractTask throw new ISE("WTF?! Lock dataSource[%s] != task dataSource[%s]", myLock.getDataSource(), getDataSource()); } - if(!myLock.getInterval().equals(getFixedInterval().get())) { - throw new ISE("WTF?! Lock interval[%s] != task interval[%s]", myLock.getInterval(), getFixedInterval().get()); + if(!myLock.getInterval().equals(getImplicitLockInterval().get())) { + throw new ISE("WTF?! Lock interval[%s] != task interval[%s]", myLock.getInterval(), getImplicitLockInterval().get()); } // List unused segments diff --git a/merger/src/main/java/com/metamx/druid/merger/common/task/MergeTask.java b/merger/src/main/java/com/metamx/druid/merger/common/task/MergeTask.java index 9af322d3bfc..1791c2a097d 100644 --- a/merger/src/main/java/com/metamx/druid/merger/common/task/MergeTask.java +++ b/merger/src/main/java/com/metamx/druid/merger/common/task/MergeTask.java @@ -22,6 +22,7 @@ package com.metamx.druid.merger.common.task; import com.fasterxml.jackson.annotation.JsonProperty; import com.fasterxml.jackson.annotation.JsonSubTypes; import com.fasterxml.jackson.annotation.JsonTypeInfo; +import com.google.common.base.Charsets; import com.google.common.base.Function; import com.google.common.base.Joiner; import com.google.common.base.Objects; @@ -31,27 +32,22 @@ import com.google.common.collect.ImmutableMap; import com.google.common.collect.ImmutableSet; import com.google.common.collect.Iterables; import com.google.common.collect.Lists; -import com.google.common.collect.Maps; import com.google.common.collect.Ordering; import com.google.common.collect.Sets; +import com.google.common.hash.Hashing; import com.metamx.common.ISE; import com.metamx.common.logger.Logger; import com.metamx.druid.client.DataSegment; -import com.metamx.druid.loading.SegmentPuller; +import com.metamx.druid.merger.common.TaskLock; import com.metamx.druid.merger.common.TaskStatus; import com.metamx.druid.merger.common.TaskToolbox; import com.metamx.druid.merger.common.actions.LockListAction; import com.metamx.druid.merger.common.actions.SegmentInsertAction; import com.metamx.druid.merger.common.actions.SegmentListUsedAction; -import com.metamx.druid.merger.common.TaskLock; import com.metamx.druid.shard.NoneShardSpec; import com.metamx.emitter.service.AlertEvent; import com.metamx.emitter.service.ServiceEmitter; import com.metamx.emitter.service.ServiceMetricEvent; -import org.apache.commons.codec.digest.DigestUtils; - - - import org.joda.time.DateTime; import org.joda.time.Interval; @@ -151,13 +147,7 @@ public abstract class MergeTask extends AbstractTask // download segments to merge - final Map segmentGetters = toolbox.getSegmentGetters(this); - final Map gettedSegments = Maps.newHashMap(); - for (final DataSegment segment : segments) { - Map loadSpec = segment.getLoadSpec(); - SegmentPuller segmentPuller = segmentGetters.get(loadSpec.get("type")); - gettedSegments.put(segment, segmentPuller.getSegmentFiles(segment)); - } + final Map gettedSegments = toolbox.getSegments(this, segments); // merge files together final File fileToUpload = merge(gettedSegments, new File(taskDir, "merged")); @@ -225,7 +215,7 @@ public abstract class MergeTask extends AbstractTask final Set current = ImmutableSet.copyOf( Iterables.transform( toolbox.getTaskActionClient() - .submit(new SegmentListUsedAction(this, getDataSource(), getFixedInterval().get())), + .submit(new SegmentListUsedAction(this, getDataSource(), getImplicitLockInterval().get())), toIdentifier ) ); @@ -266,7 +256,7 @@ public abstract class MergeTask extends AbstractTask return Objects.toStringHelper(this) .add("id", getId()) .add("dataSource", getDataSource()) - .add("interval", getFixedInterval()) + .add("interval", getImplicitLockInterval()) .add("segments", segments) .toString(); } @@ -292,7 +282,11 @@ public abstract class MergeTask extends AbstractTask ) ); - return String.format("%s_%s", dataSource, DigestUtils.sha1Hex(segmentIDs).toLowerCase()); + return String.format( + "%s_%s", + dataSource, + Hashing.sha1().hashString(segmentIDs, Charsets.UTF_8).toString().toLowerCase() + ); } private static Interval computeMergedInterval(final List segments) diff --git a/merger/src/main/java/com/metamx/druid/merger/common/task/Task.java b/merger/src/main/java/com/metamx/druid/merger/common/task/Task.java index 5ac4dcf71df..8d93486ff6b 100644 --- a/merger/src/main/java/com/metamx/druid/merger/common/task/Task.java +++ b/merger/src/main/java/com/metamx/druid/merger/common/task/Task.java @@ -27,7 +27,19 @@ import com.metamx.druid.merger.common.TaskToolbox; import org.joda.time.Interval; /** - * Represents a task that can run on a worker. Immutable. + * Represents a task that can run on a worker. The general contracts surrounding Tasks are: + *

    + *
  • Tasks must operate on a single datasource.
  • + *
  • Tasks should be immutable, since the task ID is used as a proxy for the task in many locations.
  • + *
  • Task IDs must be unique. This can be done by naming them using UUIDs or the current timestamp.
  • + *
  • Tasks are each part of a "task group", which is a set of tasks that can share interval locks. These are + * useful for producing sharded segments.
  • + *
  • Tasks can optionally have an "implicit lock interval". Tasks with this property are guaranteed to have + * a lock on that interval during their {@link #preflight(com.metamx.druid.merger.common.TaskToolbox)} and + * {@link #run(com.metamx.druid.merger.common.TaskToolbox)} methods.
  • + *
  • Tasks do not need to explicitly release locks; they are released upon task completion. Tasks may choose + * to release locks early if they desire.
  • + *
*/ @JsonTypeInfo(use = JsonTypeInfo.Id.NAME, property = "type", defaultImpl = DefaultMergeTask.class) @JsonSubTypes(value = { @@ -37,7 +49,8 @@ import org.joda.time.Interval; @JsonSubTypes.Type(name = "kill", value = KillTask.class), @JsonSubTypes.Type(name = "index", value = IndexTask.class), @JsonSubTypes.Type(name = "index_partitions", value = IndexDeterminePartitionsTask.class), - @JsonSubTypes.Type(name = "index_generator", value = IndexGeneratorTask.class) + @JsonSubTypes.Type(name = "index_generator", value = IndexGeneratorTask.class), + @JsonSubTypes.Type(name = "index_hadoop", value = HadoopIndexTask.class) }) public interface Task { @@ -63,15 +76,15 @@ public interface Task public String getDataSource(); /** - * Returns fixed interval for this task, if any. Tasks without fixed intervals are not granted locks when started - * and must explicitly request them. + * Returns implicit lock interval for this task, if any. Tasks without implicit lock intervals are not granted locks + * when started and must explicitly request them. */ - public Optional getFixedInterval(); + public Optional getImplicitLockInterval(); /** * Execute preflight checks for a task. This typically runs on the coordinator, and will be run while - * holding a lock on our dataSource and interval. If this method throws an exception, the task should be - * considered a failure. + * holding a lock on our dataSource and implicit lock interval (if any). If this method throws an exception, the + * task should be considered a failure. * * @param toolbox Toolbox for this task * @@ -84,8 +97,8 @@ public interface Task /** * Execute a task. This typically runs on a worker as determined by a TaskRunner, and will be run while - * holding a lock on our dataSource and interval. If this method throws an exception, the task should be - * considered a failure. + * holding a lock on our dataSource and implicit lock interval (if any). If this method throws an exception, the task + * should be considered a failure. * * @param toolbox Toolbox for this task * diff --git a/merger/src/main/java/com/metamx/druid/merger/coordinator/DbTaskStorage.java b/merger/src/main/java/com/metamx/druid/merger/coordinator/DbTaskStorage.java index 4bbf0f37a32..5f39efae0bb 100644 --- a/merger/src/main/java/com/metamx/druid/merger/coordinator/DbTaskStorage.java +++ b/merger/src/main/java/com/metamx/druid/merger/coordinator/DbTaskStorage.java @@ -379,7 +379,7 @@ public class DbTaskStorage implements TaskStorage public TaskAction apply(Map row) { try { - return jsonMapper.readValue(row.get("payload").toString(), TaskAction.class); + return jsonMapper.readValue(row.get("log_payload").toString(), TaskAction.class); } catch(Exception e) { throw Throwables.propagate(e); } diff --git a/merger/src/main/java/com/metamx/druid/merger/coordinator/RemoteTaskRunner.java b/merger/src/main/java/com/metamx/druid/merger/coordinator/RemoteTaskRunner.java index 06dd63730cc..e4a13136152 100644 --- a/merger/src/main/java/com/metamx/druid/merger/coordinator/RemoteTaskRunner.java +++ b/merger/src/main/java/com/metamx/druid/merger/coordinator/RemoteTaskRunner.java @@ -53,10 +53,8 @@ import java.util.Map; import java.util.Set; import java.util.concurrent.Callable; import java.util.concurrent.ConcurrentHashMap; -import java.util.concurrent.ExecutionException; import java.util.concurrent.ExecutorService; import java.util.concurrent.Executors; -import java.util.concurrent.Future; import java.util.concurrent.ScheduledExecutorService; import java.util.concurrent.TimeUnit; @@ -241,35 +239,28 @@ public class RemoteTaskRunner implements TaskRunner */ private void runPendingTasks() { - Future future = runPendingTasksExec.submit( + runPendingTasksExec.submit( new Callable() { @Override public Void call() throws Exception { - // make a copy of the pending tasks because assignTask may delete tasks from pending and move them - // into running status - List copy = Lists.newArrayList(pendingTasks.values()); - for (TaskRunnerWorkItem taskWrapper : copy) { - assignTask(taskWrapper); + try { + // make a copy of the pending tasks because assignTask may delete tasks from pending and move them + // into running status + List copy = Lists.newArrayList(pendingTasks.values()); + for (TaskRunnerWorkItem taskWrapper : copy) { + assignTask(taskWrapper); + } + } + catch (Exception e) { + log.makeAlert(e, "Exception in running pending tasks").emit(); } return null; } } ); - - try { - future.get(); - } - catch (InterruptedException e) { - log.error(e, "InterruptedException in runPendingTasks()"); - throw Throwables.propagate(e); - } - catch (ExecutionException e) { - log.error(e, "ExecutionException in runPendingTasks()"); - throw Throwables.propagate(e.getCause()); - } } /** @@ -414,82 +405,67 @@ public class RemoteTaskRunner implements TaskRunner @Override public void childEvent(CuratorFramework client, PathChildrenCacheEvent event) throws Exception { - - try { - if (event.getType().equals(PathChildrenCacheEvent.Type.CHILD_ADDED) || - event.getType().equals(PathChildrenCacheEvent.Type.CHILD_UPDATED)) { - final String taskId = ZKPaths.getNodeFromPath(event.getData().getPath()); - final TaskStatus taskStatus; - - // This can fail if a worker writes a bogus status. Retry if so. - try { - taskStatus = jsonMapper.readValue( + synchronized (statusLock) { + try { + if (event.getType().equals(PathChildrenCacheEvent.Type.CHILD_ADDED) || + event.getType().equals(PathChildrenCacheEvent.Type.CHILD_UPDATED)) { + final String taskId = ZKPaths.getNodeFromPath(event.getData().getPath()); + final TaskStatus taskStatus = jsonMapper.readValue( event.getData().getData(), TaskStatus.class ); + // This can fail if a worker writes a bogus status. Retry if so. if (!taskStatus.getId().equals(taskId)) { - // Sanity check - throw new ISE( - "Worker[%s] status id does not match payload id: %s != %s", - worker.getHost(), - taskId, - taskStatus.getId() - ); + retryTask(runningTasks.get(taskId), worker.getHost()); + return; } - } - catch (Exception e) { - log.warn(e, "Worker[%s] wrote bogus status for task: %s", worker.getHost(), taskId); - retryTask(runningTasks.get(taskId), worker.getHost()); - throw Throwables.propagate(e); - } - log.info( - "Worker[%s] wrote %s status for task: %s", - worker.getHost(), - taskStatus.getStatusCode(), - taskId - ); - - // Synchronizing state with ZK - synchronized (statusLock) { - statusLock.notify(); - } - - final TaskRunnerWorkItem taskRunnerWorkItem = runningTasks.get(taskId); - if (taskRunnerWorkItem == null) { - log.warn( - "WTF?! Worker[%s] announcing a status for a task I didn't know about: %s", + log.info( + "Worker[%s] wrote %s status for task: %s", worker.getHost(), + taskStatus.getStatusCode(), taskId ); - } - if (taskStatus.isComplete()) { - if (taskRunnerWorkItem != null) { - final TaskCallback callback = taskRunnerWorkItem.getCallback(); - if (callback != null) { - callback.notify(taskStatus); - } + // Synchronizing state with ZK + statusLock.notify(); + + final TaskRunnerWorkItem taskRunnerWorkItem = runningTasks.get(taskId); + if (taskRunnerWorkItem == null) { + log.warn( + "WTF?! Worker[%s] announcing a status for a task I didn't know about: %s", + worker.getHost(), + taskId + ); } - // Worker is done with this task - zkWorker.setLastCompletedTaskTime(new DateTime()); - cleanup(worker.getHost(), taskId); - runPendingTasks(); - } - } else if (event.getType().equals(PathChildrenCacheEvent.Type.CHILD_REMOVED)) { - final String taskId = ZKPaths.getNodeFromPath(event.getData().getPath()); - if (runningTasks.containsKey(taskId)) { - log.info("Task %s just disappeared!", taskId); - retryTask(runningTasks.get(taskId), worker.getHost()); + if (taskStatus.isComplete()) { + if (taskRunnerWorkItem != null) { + final TaskCallback callback = taskRunnerWorkItem.getCallback(); + if (callback != null) { + callback.notify(taskStatus); + } + } + + // Worker is done with this task + zkWorker.setLastCompletedTaskTime(new DateTime()); + cleanup(worker.getHost(), taskId); + runPendingTasks(); + } + } else if (event.getType().equals(PathChildrenCacheEvent.Type.CHILD_REMOVED)) { + final String taskId = ZKPaths.getNodeFromPath(event.getData().getPath()); + if (runningTasks.containsKey(taskId)) { + log.info("Task %s just disappeared!", taskId); + retryTask(runningTasks.get(taskId), worker.getHost()); + } } } - } - catch (Exception e) { - log.makeAlert(e, "Failed to handle new worker status") - .addData("worker", worker.getHost()) - .addData("znode", event.getData().getPath()) - .emit(); + catch (Exception e) { + log.makeAlert(e, "Failed to handle new worker status") + .addData("worker", worker.getHost()) + .addData("znode", event.getData().getPath()) + .emit(); + } } } } @@ -516,19 +492,26 @@ public class RemoteTaskRunner implements TaskRunner ZkWorker zkWorker = zkWorkers.get(worker.getHost()); if (zkWorker != null) { try { - Set tasksPending = Sets.newHashSet( + Set tasksToRetry = Sets.newHashSet( cf.getChildren() .forPath(JOINER.join(config.getTaskPath(), worker.getHost())) ); - log.info("%s had %d tasks pending", worker.getHost(), tasksPending.size()); + tasksToRetry.addAll( + cf.getChildren() + .forPath(JOINER.join(config.getStatusPath(), worker.getHost())) + ); + log.info("%s has %d tasks to retry", worker.getHost(), tasksToRetry.size()); - for (String taskId : tasksPending) { - TaskRunnerWorkItem taskRunnerWorkItem = pendingTasks.get(taskId); + for (String taskId : tasksToRetry) { + TaskRunnerWorkItem taskRunnerWorkItem = runningTasks.get(taskId); if (taskRunnerWorkItem != null) { - cf.delete().guaranteed().forPath(JOINER.join(config.getTaskPath(), worker.getHost(), taskId)); + String taskPath = JOINER.join(config.getTaskPath(), worker.getHost(), taskId); + if (cf.checkExists().forPath(taskPath) != null) { + cf.delete().guaranteed().forPath(taskPath); + } retryTask(taskRunnerWorkItem, worker.getHost()); } else { - log.warn("RemoteTaskRunner has no knowledge of pending task %s", taskId); + log.warn("RemoteTaskRunner has no knowledge of task %s", taskId); } } diff --git a/merger/src/main/java/com/metamx/druid/merger/coordinator/TaskLockbox.java b/merger/src/main/java/com/metamx/druid/merger/coordinator/TaskLockbox.java index b62bca604d4..07abd0594ac 100644 --- a/merger/src/main/java/com/metamx/druid/merger/coordinator/TaskLockbox.java +++ b/merger/src/main/java/com/metamx/druid/merger/coordinator/TaskLockbox.java @@ -1,6 +1,7 @@ package com.metamx.druid.merger.coordinator; import com.google.common.base.Function; +import com.google.common.base.Objects; import com.google.common.base.Optional; import com.google.common.base.Predicate; import com.google.common.collect.ImmutableList; @@ -77,7 +78,7 @@ public class TaskLockbox try { - if(task.getFixedInterval().isPresent() && !task.getFixedInterval().get().equals(interval)) { + if(task.getImplicitLockInterval().isPresent() && !task.getImplicitLockInterval().get().equals(interval)) { // Task may only lock its fixed interval, if present throw new IAE("Task must lock its fixed interval: %s", task.getId()); } @@ -291,9 +292,9 @@ public class TaskLockbox try { final Iterable searchSpace; - if (task.getFixedInterval().isPresent()) { + if (task.getImplicitLockInterval().isPresent()) { // Narrow down search using findLockPossesForInterval - searchSpace = findLockPossesForInterval(task.getDataSource(), task.getFixedInterval().get()); + searchSpace = findLockPossesForInterval(task.getDataSource(), task.getImplicitLockInterval().get()); } else { // Scan through all locks for this datasource final NavigableMap dsRunning = running.get(task.getDataSource()); @@ -401,5 +402,14 @@ public class TaskLockbox { return taskIds; } + + @Override + public String toString() + { + return Objects.toStringHelper(this) + .add("taskLock", taskLock) + .add("taskIds", taskIds) + .toString(); + } } } diff --git a/merger/src/main/java/com/metamx/druid/merger/coordinator/TaskMasterLifecycle.java b/merger/src/main/java/com/metamx/druid/merger/coordinator/TaskMasterLifecycle.java index 4651840e4d2..c1daf6a8d50 100644 --- a/merger/src/main/java/com/metamx/druid/merger/coordinator/TaskMasterLifecycle.java +++ b/merger/src/main/java/com/metamx/druid/merger/coordinator/TaskMasterLifecycle.java @@ -100,6 +100,7 @@ public class TaskMasterLifecycle leaderLifecycle.addManagedInstance(taskRunner); Initialization.makeServiceDiscoveryClient(curator, serviceDiscoveryConfig, leaderLifecycle); leaderLifecycle.addManagedInstance(taskConsumer); + leaderLifecycle.addManagedInstance(scheduler); leading = true; diff --git a/merger/src/main/java/com/metamx/druid/merger/coordinator/TaskQueue.java b/merger/src/main/java/com/metamx/druid/merger/coordinator/TaskQueue.java index 377004d473f..e16912b4c6e 100644 --- a/merger/src/main/java/com/metamx/druid/merger/coordinator/TaskQueue.java +++ b/merger/src/main/java/com/metamx/druid/merger/coordinator/TaskQueue.java @@ -225,8 +225,8 @@ public class TaskQueue // Attempt to add this task to a running task group. Silently continue if this is not possible. // The main reason this is here is so when subtasks are added, they end up in the same task group // as their parent whenever possible. - if(task.getFixedInterval().isPresent()) { - taskLockbox.tryLock(task, task.getFixedInterval().get()); + if(task.getImplicitLockInterval().isPresent()) { + taskLockbox.tryLock(task, task.getImplicitLockInterval().get()); } return true; @@ -274,9 +274,9 @@ public class TaskQueue try { for (final Task task : queue) { - if(task.getFixedInterval().isPresent()) { + if(task.getImplicitLockInterval().isPresent()) { // If this task has a fixed interval, attempt to lock it right now. - final Optional maybeLock = taskLockbox.tryLock(task, task.getFixedInterval().get()); + final Optional maybeLock = taskLockbox.tryLock(task, task.getImplicitLockInterval().get()); if(maybeLock.isPresent()) { log.info("Task claimed with fixed interval lock: %s", task.getId()); queue.remove(task); diff --git a/merger/src/main/java/com/metamx/druid/merger/coordinator/exec/TaskConsumer.java b/merger/src/main/java/com/metamx/druid/merger/coordinator/exec/TaskConsumer.java index 14135174e59..f1e31a9e5c1 100644 --- a/merger/src/main/java/com/metamx/druid/merger/coordinator/exec/TaskConsumer.java +++ b/merger/src/main/java/com/metamx/druid/merger/coordinator/exec/TaskConsumer.java @@ -96,7 +96,7 @@ public class TaskConsumer implements Runnable .addData("task", task.getId()) .addData("type", task.getType()) .addData("dataSource", task.getDataSource()) - .addData("interval", task.getFixedInterval()) + .addData("interval", task.getImplicitLockInterval()) .emit(); // Retry would be nice, but only after we have a way to throttle and limit them. Just fail for now. @@ -118,7 +118,7 @@ public class TaskConsumer implements Runnable final ServiceMetricEvent.Builder metricBuilder = new ServiceMetricEvent.Builder() .setUser2(task.getDataSource()) .setUser4(task.getType()) - .setUser5(task.getFixedInterval().toString()); + .setUser5(task.getImplicitLockInterval().toString()); // Run preflight checks TaskStatus preflightStatus; @@ -166,7 +166,7 @@ public class TaskConsumer implements Runnable .addData("task", task.getId()) .addData("type", task.getType()) .addData("dataSource", task.getDataSource()) - .addData("interval", task.getFixedInterval()) + .addData("interval", task.getImplicitLockInterval()) .emit(); } diff --git a/merger/src/main/java/com/metamx/druid/merger/coordinator/http/IndexerCoordinatorNode.java b/merger/src/main/java/com/metamx/druid/merger/coordinator/http/IndexerCoordinatorNode.java index eeea963ab8f..64bd0a0c9d2 100644 --- a/merger/src/main/java/com/metamx/druid/merger/coordinator/http/IndexerCoordinatorNode.java +++ b/merger/src/main/java/com/metamx/druid/merger/coordinator/http/IndexerCoordinatorNode.java @@ -51,11 +51,11 @@ import com.metamx.druid.initialization.Initialization; import com.metamx.druid.initialization.ServerConfig; import com.metamx.druid.initialization.ServiceDiscoveryConfig; import com.metamx.druid.jackson.DefaultObjectMapper; +import com.metamx.druid.loading.DataSegmentPusher; +import com.metamx.druid.loading.S3DataSegmentPusher; +import com.metamx.druid.loading.S3DataSegmentPusherConfig; import com.metamx.druid.loading.S3SegmentKiller; -import com.metamx.druid.loading.S3SegmentPusher; -import com.metamx.druid.loading.S3SegmentPusherConfig; import com.metamx.druid.loading.SegmentKiller; -import com.metamx.druid.loading.SegmentPusher; import com.metamx.druid.merger.common.TaskToolbox; import com.metamx.druid.merger.common.actions.LocalTaskActionClient; import com.metamx.druid.merger.common.actions.TaskActionToolbox; @@ -105,8 +105,6 @@ import com.metamx.metrics.MonitorSchedulerConfig; import com.metamx.metrics.SysMonitor; import com.netflix.curator.framework.CuratorFramework; import com.netflix.curator.framework.recipes.cache.PathChildrenCache; - - import org.jets3t.service.S3ServiceException; import org.jets3t.service.impl.rest.httpclient.RestS3Service; import org.jets3t.service.security.AWSCredentials; @@ -473,9 +471,9 @@ public class IndexerCoordinatorNode extends RegisteringNode public void initializeTaskToolbox() { if (taskToolbox == null) { - final SegmentPusher segmentPusher = new S3SegmentPusher( + final DataSegmentPusher dataSegmentPusher = new S3DataSegmentPusher( s3Service, - configFactory.build(S3SegmentPusherConfig.class), + configFactory.build(S3DataSegmentPusherConfig.class), jsonMapper ); final SegmentKiller segmentKiller = new S3SegmentKiller( @@ -489,7 +487,7 @@ public class IndexerCoordinatorNode extends RegisteringNode ), emitter, s3Service, - segmentPusher, + dataSegmentPusher, segmentKiller, jsonMapper ); diff --git a/merger/src/main/java/com/metamx/druid/merger/coordinator/http/IndexerCoordinatorResource.java b/merger/src/main/java/com/metamx/druid/merger/coordinator/http/IndexerCoordinatorResource.java index d0741968cf2..30e641b2b5b 100644 --- a/merger/src/main/java/com/metamx/druid/merger/coordinator/http/IndexerCoordinatorResource.java +++ b/merger/src/main/java/com/metamx/druid/merger/coordinator/http/IndexerCoordinatorResource.java @@ -21,6 +21,7 @@ package com.metamx.druid.merger.coordinator.http; import com.google.common.base.Optional; import com.google.common.collect.ImmutableMap; +import com.google.common.collect.Maps; import com.google.inject.Inject; import com.metamx.common.logger.Logger; import com.metamx.druid.client.DataSegment; @@ -180,7 +181,10 @@ public class IndexerCoordinatorResource public Response doAction(final TaskAction action) { final T ret = taskMasterLifecycle.getTaskToolbox().getTaskActionClient().submit(action); - return Response.ok().entity(ret).build(); + final Map retMap = Maps.newHashMap(); + retMap.put("result", ret); + + return Response.ok().entity(retMap).build(); } @GET diff --git a/merger/src/main/java/com/metamx/druid/merger/coordinator/scaling/ResourceManagementScheduler.java b/merger/src/main/java/com/metamx/druid/merger/coordinator/scaling/ResourceManagementScheduler.java index 45c0060fda6..b3ea474cc0c 100644 --- a/merger/src/main/java/com/metamx/druid/merger/coordinator/scaling/ResourceManagementScheduler.java +++ b/merger/src/main/java/com/metamx/druid/merger/coordinator/scaling/ResourceManagementScheduler.java @@ -71,6 +71,8 @@ public class ResourceManagementScheduler return; } + log.info("Started Resource Management Scheduler"); + ScheduledExecutors.scheduleAtFixedRate( exec, config.getProvisionResourcesDuration(), @@ -123,6 +125,7 @@ public class ResourceManagementScheduler if (!started) { return; } + log.info("Stopping Resource Management Scheduler"); exec.shutdown(); started = false; } diff --git a/merger/src/main/java/com/metamx/druid/merger/coordinator/scaling/SimpleResourceManagementStrategy.java b/merger/src/main/java/com/metamx/druid/merger/coordinator/scaling/SimpleResourceManagementStrategy.java index 1cd3f8e46e6..74d34d718d2 100644 --- a/merger/src/main/java/com/metamx/druid/merger/coordinator/scaling/SimpleResourceManagementStrategy.java +++ b/merger/src/main/java/com/metamx/druid/merger/coordinator/scaling/SimpleResourceManagementStrategy.java @@ -167,7 +167,7 @@ public class SimpleResourceManagementStrategy implements ResourceManagementStrat ) ); - if (thoseLazyWorkers.isEmpty()) { + if (thoseLazyWorkers.size() <= minNumWorkers) { return false; } diff --git a/merger/src/main/java/com/metamx/druid/merger/worker/Worker.java b/merger/src/main/java/com/metamx/druid/merger/worker/Worker.java index 6b349de38f7..e8eaac824b3 100644 --- a/merger/src/main/java/com/metamx/druid/merger/worker/Worker.java +++ b/merger/src/main/java/com/metamx/druid/merger/worker/Worker.java @@ -84,4 +84,15 @@ public class Worker { return version; } + + @Override + public String toString() + { + return "Worker{" + + "host='" + host + '\'' + + ", ip='" + ip + '\'' + + ", capacity=" + capacity + + ", version='" + version + '\'' + + '}'; + } } diff --git a/merger/src/main/java/com/metamx/druid/merger/worker/config/WorkerConfig.java b/merger/src/main/java/com/metamx/druid/merger/worker/config/WorkerConfig.java index f668533a86b..afb217c5e77 100644 --- a/merger/src/main/java/com/metamx/druid/merger/worker/config/WorkerConfig.java +++ b/merger/src/main/java/com/metamx/druid/merger/worker/config/WorkerConfig.java @@ -39,6 +39,9 @@ public abstract class WorkerConfig @Config("druid.worker.version") public abstract String getVersion(); + @Config("druid.worker.masterService") + public abstract String getMasterService(); + @Config("druid.worker.capacity") public int getCapacity() { diff --git a/merger/src/main/java/com/metamx/druid/merger/worker/http/WorkerNode.java b/merger/src/main/java/com/metamx/druid/merger/worker/http/WorkerNode.java index 82af6066376..d02ffa5d9e3 100644 --- a/merger/src/main/java/com/metamx/druid/merger/worker/http/WorkerNode.java +++ b/merger/src/main/java/com/metamx/druid/merger/worker/http/WorkerNode.java @@ -35,12 +35,13 @@ import com.metamx.druid.http.StatusServlet; import com.metamx.druid.initialization.CuratorConfig; import com.metamx.druid.initialization.Initialization; import com.metamx.druid.initialization.ServerConfig; +import com.metamx.druid.initialization.ServiceDiscoveryConfig; import com.metamx.druid.jackson.DefaultObjectMapper; +import com.metamx.druid.loading.DataSegmentPusher; +import com.metamx.druid.loading.S3DataSegmentPusher; +import com.metamx.druid.loading.S3DataSegmentPusherConfig; import com.metamx.druid.loading.S3SegmentKiller; -import com.metamx.druid.loading.S3SegmentPusher; -import com.metamx.druid.loading.S3SegmentPusherConfig; import com.metamx.druid.loading.SegmentKiller; -import com.metamx.druid.loading.SegmentPusher; import com.metamx.druid.merger.common.TaskToolbox; import com.metamx.druid.merger.common.actions.RemoteTaskActionClient; import com.metamx.druid.merger.common.config.IndexerZkConfig; @@ -64,6 +65,8 @@ import com.metamx.metrics.MonitorSchedulerConfig; import com.metamx.metrics.SysMonitor; import com.netflix.curator.framework.CuratorFramework; import com.netflix.curator.framework.recipes.cache.PathChildrenCache; +import com.netflix.curator.x.discovery.ServiceDiscovery; +import com.netflix.curator.x.discovery.ServiceProvider; import org.jets3t.service.S3ServiceException; import org.jets3t.service.impl.rest.httpclient.RestS3Service; import org.jets3t.service.security.AWSCredentials; @@ -105,6 +108,8 @@ public class WorkerNode extends RegisteringNode private WorkerConfig workerConfig = null; private TaskToolbox taskToolbox = null; private CuratorFramework curatorFramework = null; + private ServiceDiscovery serviceDiscovery = null; + private ServiceProvider coordinatorServiceProvider = null; private WorkerCuratorCoordinator workerCuratorCoordinator = null; private TaskMonitor taskMonitor = null; private Server server = null; @@ -156,6 +161,18 @@ public class WorkerNode extends RegisteringNode return this; } + public WorkerNode setCoordinatorServiceProvider(ServiceProvider coordinatorServiceProvider) + { + this.coordinatorServiceProvider = coordinatorServiceProvider; + return this; + } + + public WorkerNode setServiceDiscovery(ServiceDiscovery serviceDiscovery) + { + this.serviceDiscovery = serviceDiscovery; + return this; + } + public WorkerNode setWorkerCuratorCoordinator(WorkerCuratorCoordinator workerCuratorCoordinator) { this.workerCuratorCoordinator = workerCuratorCoordinator; @@ -175,10 +192,12 @@ public class WorkerNode extends RegisteringNode initializeS3Service(); initializeMonitors(); initializeMergerConfig(); + initializeCuratorFramework(); + initializeServiceDiscovery(); + initializeCoordinatorServiceProvider(); initializeTaskToolbox(); initializeJacksonInjections(); initializeJacksonSubtypes(); - initializeCuratorFramework(); initializeCuratorCoordinator(); initializeTaskMonitor(); initializeServer(); @@ -318,9 +337,9 @@ public class WorkerNode extends RegisteringNode public void initializeTaskToolbox() throws S3ServiceException { if (taskToolbox == null) { - final SegmentPusher segmentPusher = new S3SegmentPusher( + final DataSegmentPusher dataSegmentPusher = new S3DataSegmentPusher( s3Service, - configFactory.build(S3SegmentPusherConfig.class), + configFactory.build(S3DataSegmentPusherConfig.class), jsonMapper ); final SegmentKiller segmentKiller = new S3SegmentKiller( @@ -328,10 +347,10 @@ public class WorkerNode extends RegisteringNode ); taskToolbox = new TaskToolbox( taskConfig, - new RemoteTaskActionClient(httpClient, jsonMapper), + new RemoteTaskActionClient(httpClient, coordinatorServiceProvider, jsonMapper), emitter, s3Service, - segmentPusher, + dataSegmentPusher, segmentKiller, jsonMapper ); @@ -340,11 +359,36 @@ public class WorkerNode extends RegisteringNode public void initializeCuratorFramework() throws IOException { - final CuratorConfig curatorConfig = configFactory.build(CuratorConfig.class); - curatorFramework = Initialization.makeCuratorFrameworkClient( - curatorConfig, - lifecycle - ); + if (curatorFramework == null) { + final CuratorConfig curatorConfig = configFactory.build(CuratorConfig.class); + curatorFramework = Initialization.makeCuratorFrameworkClient( + curatorConfig, + lifecycle + ); + } + } + + public void initializeServiceDiscovery() throws Exception + { + if (serviceDiscovery == null) { + final ServiceDiscoveryConfig config = configFactory.build(ServiceDiscoveryConfig.class); + this.serviceDiscovery = Initialization.makeServiceDiscoveryClient( + curatorFramework, + config, + lifecycle + ); + } + } + + public void initializeCoordinatorServiceProvider() + { + if (coordinatorServiceProvider == null) { + this.coordinatorServiceProvider = Initialization.makeServiceProvider( + workerConfig.getMasterService(), + serviceDiscovery, + lifecycle + ); + } } public void initializeCuratorCoordinator() diff --git a/merger/src/test/java/com/metamx/druid/merger/common/task/MergeTaskTest.java b/merger/src/test/java/com/metamx/druid/merger/common/task/MergeTaskTest.java index 5267b8c7983..7c779f6a74c 100644 --- a/merger/src/test/java/com/metamx/druid/merger/common/task/MergeTaskTest.java +++ b/merger/src/test/java/com/metamx/druid/merger/common/task/MergeTaskTest.java @@ -19,9 +19,10 @@ package com.metamx.druid.merger.common.task; +import com.google.common.base.Charsets; import com.google.common.collect.ImmutableList; +import com.google.common.hash.Hashing; import com.metamx.druid.client.DataSegment; -import org.apache.commons.codec.digest.DigestUtils; import org.joda.time.Interval; import org.junit.Assert; import org.junit.Test; @@ -66,17 +67,18 @@ public class MergeTaskTest @Test public void testInterval() { - Assert.assertEquals(new Interval("2012-01-03/2012-01-07"), testMergeTask.getFixedInterval().get()); + Assert.assertEquals(new Interval("2012-01-03/2012-01-07"), testMergeTask.getImplicitLockInterval().get()); } @Test public void testID() { - final String desiredPrefix = "merge_foo_" + DigestUtils.sha1Hex( + final String desiredPrefix = "merge_foo_" + Hashing.sha1().hashString( "2012-01-03T00:00:00.000Z_2012-01-05T00:00:00.000Z_V1_0" + "_2012-01-04T00:00:00.000Z_2012-01-06T00:00:00.000Z_V1_0" + "_2012-01-05T00:00:00.000Z_2012-01-07T00:00:00.000Z_V1_0" - ) + "_"; + , Charsets.UTF_8 + ).toString().toLowerCase() + "_"; Assert.assertEquals( desiredPrefix, testMergeTask.getId().substring(0, desiredPrefix.length()) diff --git a/merger/src/test/java/com/metamx/druid/merger/common/task/TaskSerdeTest.java b/merger/src/test/java/com/metamx/druid/merger/common/task/TaskSerdeTest.java index 88b43a71667..701093209ea 100644 --- a/merger/src/test/java/com/metamx/druid/merger/common/task/TaskSerdeTest.java +++ b/merger/src/test/java/com/metamx/druid/merger/common/task/TaskSerdeTest.java @@ -1,12 +1,17 @@ package com.metamx.druid.merger.common.task; +import com.google.common.base.Optional; import com.google.common.collect.ImmutableList; import com.metamx.common.Granularity; import com.metamx.druid.QueryGranularity; import com.metamx.druid.aggregation.AggregatorFactory; import com.metamx.druid.aggregation.DoubleSumAggregatorFactory; import com.metamx.druid.client.DataSegment; +import com.metamx.druid.indexer.HadoopDruidIndexerConfig; +import com.metamx.druid.indexer.data.JSONDataSpec; import com.metamx.druid.indexer.granularity.UniformGranularitySpec; +import com.metamx.druid.indexer.path.StaticPathSpec; +import com.metamx.druid.indexer.rollup.DataRollupSpec; import com.metamx.druid.jackson.DefaultObjectMapper; import com.metamx.druid.realtime.Schema; import com.metamx.druid.shard.NoneShardSpec; @@ -26,18 +31,23 @@ public class TaskSerdeTest new AggregatorFactory[]{new DoubleSumAggregatorFactory("met", "met")}, QueryGranularity.NONE, 10000, - null + null, + -1 ); final ObjectMapper jsonMapper = new DefaultObjectMapper(); final String json = jsonMapper.writeValueAsString(task); + + Thread.sleep(100); // Just want to run the clock a bit to make sure the task id doesn't change final Task task2 = jsonMapper.readValue(json, Task.class); + Assert.assertEquals("foo", task.getDataSource()); + Assert.assertEquals(Optional.of(new Interval("2010-01-01/P2D")), task.getImplicitLockInterval()); + Assert.assertEquals(task.getId(), task2.getId()); Assert.assertEquals(task.getGroupId(), task2.getGroupId()); Assert.assertEquals(task.getDataSource(), task2.getDataSource()); - Assert.assertEquals(task.getFixedInterval(), task2.getFixedInterval()); - Assert.assertEquals(task.getFixedInterval().get(), task2.getFixedInterval().get()); + Assert.assertEquals(task.getImplicitLockInterval(), task2.getImplicitLockInterval()); } @Test @@ -52,18 +62,21 @@ public class TaskSerdeTest new AggregatorFactory[]{new DoubleSumAggregatorFactory("met", "met")}, QueryGranularity.NONE, new NoneShardSpec() - ) + ), + -1 ); final ObjectMapper jsonMapper = new DefaultObjectMapper(); final String json = jsonMapper.writeValueAsString(task); final Task task2 = jsonMapper.readValue(json, Task.class); + Assert.assertEquals("foo", task.getDataSource()); + Assert.assertEquals(Optional.of(new Interval("2010-01-01/P1D")), task.getImplicitLockInterval()); + Assert.assertEquals(task.getId(), task2.getId()); Assert.assertEquals(task.getGroupId(), task2.getGroupId()); Assert.assertEquals(task.getDataSource(), task2.getDataSource()); - Assert.assertEquals(task.getFixedInterval(), task2.getFixedInterval()); - Assert.assertEquals(task.getFixedInterval().get(), task2.getFixedInterval().get()); + Assert.assertEquals(task.getImplicitLockInterval(), task2.getImplicitLockInterval()); } @Test @@ -80,11 +93,13 @@ public class TaskSerdeTest final String json = jsonMapper.writeValueAsString(task); final Task task2 = jsonMapper.readValue(json, Task.class); + Assert.assertEquals("foo", task.getDataSource()); + Assert.assertEquals(Optional.of(new Interval("2010-01-01/P1D")), task.getImplicitLockInterval()); + Assert.assertEquals(task.getId(), task2.getId()); Assert.assertEquals(task.getGroupId(), task2.getGroupId()); Assert.assertEquals(task.getDataSource(), task2.getDataSource()); - Assert.assertEquals(task.getFixedInterval(), task2.getFixedInterval()); - Assert.assertEquals(task.getFixedInterval().get(), task2.getFixedInterval().get()); + Assert.assertEquals(task.getImplicitLockInterval(), task2.getImplicitLockInterval()); } @Test @@ -102,7 +117,50 @@ public class TaskSerdeTest Assert.assertEquals(task.getId(), task2.getId()); Assert.assertEquals(task.getGroupId(), task2.getGroupId()); Assert.assertEquals(task.getDataSource(), task2.getDataSource()); - Assert.assertEquals(task.getFixedInterval(), task2.getFixedInterval()); - Assert.assertEquals(task.getFixedInterval().get(), task2.getFixedInterval().get()); + Assert.assertEquals(task.getImplicitLockInterval(), task2.getImplicitLockInterval()); + Assert.assertEquals(task.getImplicitLockInterval().get(), task2.getImplicitLockInterval().get()); + } + + @Test + public void testHadoopIndexTaskSerde() throws Exception + { + final HadoopIndexTask task = new HadoopIndexTask( + new HadoopDruidIndexerConfig( + null, + "foo", + "timestamp", + "auto", + new JSONDataSpec(ImmutableList.of("foo")), + null, + new UniformGranularitySpec(Granularity.DAY, ImmutableList.of(new Interval("2010-01-01/P1D"))), + new StaticPathSpec("bar"), + null, + null, + null, + null, + null, + null, + false, + true, + null, + false, + new DataRollupSpec(ImmutableList.of(), QueryGranularity.NONE), + null, + false, + ImmutableList.of() + ) + ); + + final ObjectMapper jsonMapper = new DefaultObjectMapper(); + final String json = jsonMapper.writeValueAsString(task); + final Task task2 = jsonMapper.readValue(json, Task.class); + + Assert.assertEquals("foo", task.getDataSource()); + Assert.assertEquals(Optional.of(new Interval("2010-01-01/P1D")), task.getImplicitLockInterval()); + + Assert.assertEquals(task.getId(), task2.getId()); + Assert.assertEquals(task.getGroupId(), task2.getGroupId()); + Assert.assertEquals(task.getDataSource(), task2.getDataSource()); + Assert.assertEquals(task.getImplicitLockInterval(), task2.getImplicitLockInterval()); } } diff --git a/merger/src/test/java/com/metamx/druid/merger/coordinator/RemoteTaskRunnerTest.java b/merger/src/test/java/com/metamx/druid/merger/coordinator/RemoteTaskRunnerTest.java index 4855bab2457..9242e19f355 100644 --- a/merger/src/test/java/com/metamx/druid/merger/coordinator/RemoteTaskRunnerTest.java +++ b/merger/src/test/java/com/metamx/druid/merger/coordinator/RemoteTaskRunnerTest.java @@ -21,6 +21,8 @@ import com.metamx.druid.merger.coordinator.setup.WorkerSetupManager; import com.metamx.druid.merger.worker.TaskMonitor; import com.metamx.druid.merger.worker.Worker; import com.metamx.druid.merger.worker.WorkerCuratorCoordinator; +import com.metamx.emitter.EmittingLogger; +import com.metamx.emitter.service.ServiceEmitter; import com.netflix.curator.framework.CuratorFramework; import com.netflix.curator.framework.CuratorFrameworkFactory; import com.netflix.curator.framework.recipes.cache.PathChildrenCache; @@ -158,35 +160,32 @@ public class RemoteTaskRunnerTest @Test public void testRunTooMuchZKData() throws Exception { - boolean exceptionOccurred = false; - try { - remoteTaskRunner.run( - new TestTask( - new String(new char[5000]), - "dummyDs", - Lists.newArrayList( - new DataSegment( - "dummyDs", - new Interval(new DateTime(), new DateTime()), - new DateTime().toString(), - null, - null, - null, - null, - 0, - 0 - ) - ), - Lists.newArrayList(), - TaskStatus.success("foo") - ), - null - ); - } - catch (IllegalStateException e) { - exceptionOccurred = true; - } - Assert.assertTrue(exceptionOccurred); + ServiceEmitter emitter = EasyMock.createMock(ServiceEmitter.class); + EmittingLogger.registerEmitter(emitter); + EasyMock.replay(emitter); + remoteTaskRunner.run( + new TestTask( + new String(new char[5000]), + "dummyDs", + Lists.newArrayList( + new DataSegment( + "dummyDs", + new Interval(new DateTime(), new DateTime()), + new DateTime().toString(), + null, + null, + null, + null, + 0, + 0 + ) + ), + Lists.newArrayList(), + TaskStatus.success("foo") + ), + null + ); + EasyMock.verify(emitter); } @Test @@ -296,10 +295,16 @@ public class RemoteTaskRunnerTest } @Override - public long getRowFlushBoundary() + public int getDefaultRowFlushBoundary() { return 0; } + + @Override + public String getHadoopWorkingPath() + { + return null; + } }, null, null, null, null, null, jsonMapper ), Executors.newSingleThreadExecutor() diff --git a/merger/src/test/java/com/metamx/druid/merger/coordinator/TaskLifecycleTest.java b/merger/src/test/java/com/metamx/druid/merger/coordinator/TaskLifecycleTest.java index af662cb5617..ae5b46fdfc1 100644 --- a/merger/src/test/java/com/metamx/druid/merger/coordinator/TaskLifecycleTest.java +++ b/merger/src/test/java/com/metamx/druid/merger/coordinator/TaskLifecycleTest.java @@ -20,9 +20,9 @@ import com.metamx.druid.indexer.granularity.UniformGranularitySpec; import com.metamx.druid.input.InputRow; import com.metamx.druid.input.MapBasedInputRow; import com.metamx.druid.jackson.DefaultObjectMapper; +import com.metamx.druid.loading.DataSegmentPusher; import com.metamx.druid.loading.SegmentKiller; -import com.metamx.druid.loading.SegmentPuller; -import com.metamx.druid.loading.SegmentPusher; +import com.metamx.druid.loading.SegmentLoadingException; import com.metamx.druid.merger.common.TaskLock; import com.metamx.druid.merger.common.TaskStatus; import com.metamx.druid.merger.common.TaskToolbox; @@ -106,15 +106,21 @@ public class TaskLifecycleTest } @Override - public long getRowFlushBoundary() + public int getDefaultRowFlushBoundary() { return 50000; } + + @Override + public String getHadoopWorkingPath() + { + return null; + } }, new LocalTaskActionClient(ts, new TaskActionToolbox(tq, tl, mdc, newMockEmitter())), newMockEmitter(), null, // s3 client - new SegmentPusher() + new DataSegmentPusher() { @Override public DataSegment push(File file, DataSegment segment) throws IOException @@ -134,7 +140,9 @@ public class TaskLifecycleTest ) { @Override - public Map getSegmentGetters(Task task) + public Map getSegments( + Task task, List segments + ) throws SegmentLoadingException { return ImmutableMap.of(); } @@ -180,7 +188,8 @@ public class TaskLifecycleTest IR("2010-01-02T01", "a", "b", 2), IR("2010-01-02T01", "a", "c", 1) ) - ) + ), + -1 ); final TaskStatus mergedStatus = runTask(indexTask); @@ -214,7 +223,8 @@ public class TaskLifecycleTest new AggregatorFactory[]{new DoubleSumAggregatorFactory("met", "met")}, QueryGranularity.NONE, 10000, - newMockExceptionalFirehoseFactory() + newMockExceptionalFirehoseFactory(), + -1 ); final TaskStatus mergedStatus = runTask(indexTask); diff --git a/pom.xml b/pom.xml index a1f1dd515ff..ebdb8169216 100644 --- a/pom.xml +++ b/pom.xml @@ -23,7 +23,7 @@ com.metamx druid pom - 0.2.8-SNAPSHOT + 0.3.17-SNAPSHOT druid druid @@ -115,22 +115,22 @@ com.netflix.curator curator-client - 1.2.2 + 1.3.2 com.netflix.curator curator-framework - 1.2.2 + 1.3.2 com.netflix.curator curator-recipes - 1.2.2 + 1.3.2 com.netflix.curator curator-x-discovery - 1.2.2 + 1.3.2 com.netflix.curator @@ -175,7 +175,12 @@ com.fasterxml.jackson.core jackson-databind - 2.1.3 + 2.1.4-mmx-2 + + + com.fasterxml.jackson.datatype + jackson-datatype-guava + 2.1.2 com.fasterxml.jackson.datatype @@ -192,6 +197,16 @@ jackson-jaxrs-json-provider 2.1.3 + + org.codehaus.jackson + jackson-core-asl + 1.9.11 + + + org.codehaus.jackson + jackson-mapper-asl + 1.9.11 + javax.inject javax.inject diff --git a/realtime/pom.xml b/realtime/pom.xml index c321afa695d..37bc5d12c58 100644 --- a/realtime/pom.xml +++ b/realtime/pom.xml @@ -18,8 +18,7 @@ ~ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. --> - + 4.0.0 com.metamx.druid druid-realtime @@ -29,7 +28,7 @@ com.metamx druid - 0.2.8-SNAPSHOT + 0.3.17-SNAPSHOT diff --git a/realtime/src/main/java/com/metamx/druid/realtime/FireDepartmentConfig.java b/realtime/src/main/java/com/metamx/druid/realtime/FireDepartmentConfig.java index d98997b5051..efc0c7a598f 100644 --- a/realtime/src/main/java/com/metamx/druid/realtime/FireDepartmentConfig.java +++ b/realtime/src/main/java/com/metamx/druid/realtime/FireDepartmentConfig.java @@ -19,6 +19,7 @@ package com.metamx.druid.realtime; +import com.fasterxml.jackson.annotation.JsonCreator; import com.fasterxml.jackson.annotation.JsonProperty; import com.google.common.base.Preconditions; @@ -31,6 +32,7 @@ public class FireDepartmentConfig private final int maxRowsInMemory; private final Period intermediatePersistPeriod; + @JsonCreator public FireDepartmentConfig( @JsonProperty("maxRowsInMemory") int maxRowsInMemory, @JsonProperty("intermediatePersistPeriod") Period intermediatePersistPeriod diff --git a/realtime/src/main/java/com/metamx/druid/realtime/RealtimeNode.java b/realtime/src/main/java/com/metamx/druid/realtime/RealtimeNode.java index d8fc7ebeea7..087e87d107f 100644 --- a/realtime/src/main/java/com/metamx/druid/realtime/RealtimeNode.java +++ b/realtime/src/main/java/com/metamx/druid/realtime/RealtimeNode.java @@ -46,9 +46,11 @@ import com.metamx.druid.http.QueryServlet; import com.metamx.druid.http.StatusServlet; import com.metamx.druid.initialization.Initialization; import com.metamx.druid.jackson.DefaultObjectMapper; -import com.metamx.druid.loading.S3SegmentPusher; -import com.metamx.druid.loading.S3SegmentPusherConfig; -import com.metamx.druid.loading.SegmentPusher; +import com.metamx.druid.loading.DataSegmentPusher; +import com.metamx.druid.loading.LocalDataSegmentPusher; +import com.metamx.druid.loading.LocalDataSegmentPusherConfig; +import com.metamx.druid.loading.S3DataSegmentPusher; +import com.metamx.druid.loading.S3DataSegmentPusherConfig; import com.metamx.druid.query.QueryRunnerFactoryConglomerate; import com.metamx.druid.utils.PropUtils; import com.metamx.emitter.service.ServiceEmitter; @@ -86,7 +88,7 @@ public class RealtimeNode extends BaseServerNode private final Map injectablesMap = Maps.newLinkedHashMap(); private MetadataUpdater metadataUpdater = null; - private SegmentPusher segmentPusher = null; + private DataSegmentPusher dataSegmentPusher = null; private List fireDepartments = null; private ServerView view = null; @@ -117,10 +119,10 @@ public class RealtimeNode extends BaseServerNode return this; } - public RealtimeNode setSegmentPusher(SegmentPusher segmentPusher) + public RealtimeNode setDataSegmentPusher(DataSegmentPusher dataSegmentPusher) { - Preconditions.checkState(this.segmentPusher == null, "Cannot set segmentPusher once it has already been set."); - this.segmentPusher = segmentPusher; + Preconditions.checkState(this.dataSegmentPusher == null, "Cannot set segmentPusher once it has already been set."); + this.dataSegmentPusher = dataSegmentPusher; return this; } @@ -144,10 +146,10 @@ public class RealtimeNode extends BaseServerNode return metadataUpdater; } - public SegmentPusher getSegmentPusher() + public DataSegmentPusher getDataSegmentPusher() { initializeSegmentPusher(); - return segmentPusher; + return dataSegmentPusher; } public List getFireDepartments() @@ -220,7 +222,7 @@ public class RealtimeNode extends BaseServerNode } injectables.put("queryRunnerFactoryConglomerate", getConglomerate()); - injectables.put("segmentPusher", segmentPusher); + injectables.put("segmentPusher", dataSegmentPusher); injectables.put("metadataUpdater", metadataUpdater); injectables.put("serverView", view); injectables.put("serviceEmitter", getEmitter()); @@ -256,22 +258,32 @@ public class RealtimeNode extends BaseServerNode private void initializeSegmentPusher() { - if (segmentPusher == null) { + if (dataSegmentPusher == null) { final Properties props = getProps(); - final RestS3Service s3Client; - try { - s3Client = new RestS3Service( - new AWSCredentials( - PropUtils.getProperty(props, "com.metamx.aws.accessKey"), - PropUtils.getProperty(props, "com.metamx.aws.secretKey") - ) + if (Boolean.parseBoolean(props.getProperty("druid.pusher.local", "false"))) { + dataSegmentPusher = new LocalDataSegmentPusher( + getConfigFactory().build(LocalDataSegmentPusherConfig.class), getJsonMapper() ); } - catch (S3ServiceException e) { - throw Throwables.propagate(e); - } + else { - segmentPusher = new S3SegmentPusher(s3Client, getConfigFactory().build(S3SegmentPusherConfig.class), getJsonMapper()); + final RestS3Service s3Client; + try { + s3Client = new RestS3Service( + new AWSCredentials( + PropUtils.getProperty(props, "com.metamx.aws.accessKey"), + PropUtils.getProperty(props, "com.metamx.aws.secretKey") + ) + ); + } + catch (S3ServiceException e) { + throw Throwables.propagate(e); + } + + dataSegmentPusher = new S3DataSegmentPusher( + s3Client, getConfigFactory().build(S3DataSegmentPusherConfig.class), getJsonMapper() + ); + } } } diff --git a/realtime/src/main/java/com/metamx/druid/realtime/RealtimePlumberSchool.java b/realtime/src/main/java/com/metamx/druid/realtime/RealtimePlumberSchool.java index 47a4ef0f40f..775dc7d5305 100644 --- a/realtime/src/main/java/com/metamx/druid/realtime/RealtimePlumberSchool.java +++ b/realtime/src/main/java/com/metamx/druid/realtime/RealtimePlumberSchool.java @@ -48,7 +48,7 @@ import com.metamx.druid.index.Segment; import com.metamx.druid.index.v1.IndexGranularity; import com.metamx.druid.index.v1.IndexIO; import com.metamx.druid.index.v1.IndexMerger; -import com.metamx.druid.loading.SegmentPusher; +import com.metamx.druid.loading.DataSegmentPusher; import com.metamx.druid.query.MetricsEmittingQueryRunner; import com.metamx.druid.query.QueryRunner; import com.metamx.druid.query.QueryRunnerFactory; @@ -95,7 +95,7 @@ public class RealtimePlumberSchool implements PlumberSchool private volatile RejectionPolicyFactory rejectionPolicyFactory = null; private volatile QueryRunnerFactoryConglomerate conglomerate = null; - private volatile SegmentPusher segmentPusher = null; + private volatile DataSegmentPusher dataSegmentPusher = null; private volatile MetadataUpdater metadataUpdater = null; private volatile ServerView serverView = null; private ServiceEmitter emitter; @@ -130,9 +130,9 @@ public class RealtimePlumberSchool implements PlumberSchool } @JacksonInject("segmentPusher") - public void setSegmentPusher(SegmentPusher segmentPusher) + public void setDataSegmentPusher(DataSegmentPusher dataSegmentPusher) { - this.segmentPusher = segmentPusher; + this.dataSegmentPusher = dataSegmentPusher; } @JacksonInject("metadataUpdater") @@ -307,7 +307,7 @@ public class RealtimePlumberSchool implements PlumberSchool } } - final File mergedFile; + File mergedFile = null; try { List indexes = Lists.newArrayList(); for (FireHydrant fireHydrant : sink) { @@ -325,7 +325,7 @@ public class RealtimePlumberSchool implements PlumberSchool QueryableIndex index = IndexIO.loadIndex(mergedFile); - DataSegment segment = segmentPusher.push( + DataSegment segment = dataSegmentPusher.push( mergedFile, sink.getSegment().withDimensions(Lists.newArrayList(index.getAvailableDimensions())) ); @@ -337,6 +337,19 @@ public class RealtimePlumberSchool implements PlumberSchool .addData("interval", interval) .emit(); } + + + if (mergedFile != null) { + try { + if (mergedFile != null) { + log.info("Deleting Index File[%s]", mergedFile); + FileUtils.deleteDirectory(mergedFile); + } + } + catch (IOException e) { + log.warn(e, "Error deleting directory[%s]", mergedFile); + } + } } } ); @@ -512,7 +525,7 @@ public class RealtimePlumberSchool implements PlumberSchool private void verifyState() { Preconditions.checkNotNull(conglomerate, "must specify a queryRunnerFactoryConglomerate to do this action."); - Preconditions.checkNotNull(segmentPusher, "must specify a segmentPusher to do this action."); + Preconditions.checkNotNull(dataSegmentPusher, "must specify a segmentPusher to do this action."); Preconditions.checkNotNull(metadataUpdater, "must specify a metadataUpdater to do this action."); Preconditions.checkNotNull(serverView, "must specify a serverView to do this action."); Preconditions.checkNotNull(emitter, "must specify a serviceEmitter to do this action."); diff --git a/realtime/src/main/java/com/metamx/druid/realtime/S3SegmentPusher.java b/realtime/src/main/java/com/metamx/druid/realtime/S3SegmentPusher.java index 5a74b17e223..007ea188b82 100644 --- a/realtime/src/main/java/com/metamx/druid/realtime/S3SegmentPusher.java +++ b/realtime/src/main/java/com/metamx/druid/realtime/S3SegmentPusher.java @@ -20,20 +20,22 @@ package com.metamx.druid.realtime; import com.fasterxml.jackson.databind.ObjectMapper; -import com.metamx.druid.loading.S3SegmentPusherConfig; -import com.metamx.druid.loading.SegmentPusher; +import com.metamx.druid.loading.DataSegmentPusher; +import com.metamx.druid.loading.S3DataSegmentPusherConfig; import org.jets3t.service.impl.rest.httpclient.RestS3Service; /** * A placeholder class to make the move of the SegmentPushers to a new package backwards compatible + * + * Exists in 0.2, can be removed from 0.3 on */ @Deprecated -public class S3SegmentPusher extends com.metamx.druid.loading.S3SegmentPusher implements SegmentPusher +public class S3SegmentPusher extends com.metamx.druid.loading.S3DataSegmentPusher implements DataSegmentPusher { public S3SegmentPusher( RestS3Service s3Client, - S3SegmentPusherConfig config, + S3DataSegmentPusherConfig config, ObjectMapper jsonMapper ) { diff --git a/server/pom.xml b/server/pom.xml index 9b4a0707961..45584f572ba 100644 --- a/server/pom.xml +++ b/server/pom.xml @@ -18,8 +18,7 @@ ~ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. --> - + 4.0.0 com.metamx.druid druid-server @@ -29,7 +28,7 @@ com.metamx druid - 0.2.8-SNAPSHOT + 0.3.17-SNAPSHOT diff --git a/server/src/main/java/com/metamx/druid/coordination/ServerManager.java b/server/src/main/java/com/metamx/druid/coordination/ServerManager.java index 3bde07908c7..38e7d1e4a39 100644 --- a/server/src/main/java/com/metamx/druid/coordination/ServerManager.java +++ b/server/src/main/java/com/metamx/druid/coordination/ServerManager.java @@ -30,7 +30,7 @@ import com.metamx.druid.client.DataSegment; import com.metamx.druid.collect.CountingMap; import com.metamx.druid.index.Segment; import com.metamx.druid.loading.SegmentLoader; -import com.metamx.druid.loading.StorageAdapterLoadingException; +import com.metamx.druid.loading.SegmentLoadingException; import com.metamx.druid.partition.PartitionChunk; import com.metamx.druid.partition.PartitionHolder; import com.metamx.druid.query.BySegmentQueryRunner; @@ -104,24 +104,24 @@ public class ServerManager implements QuerySegmentWalker } } - public void loadSegment(final DataSegment segment) throws StorageAdapterLoadingException + public void loadSegment(final DataSegment segment) throws SegmentLoadingException { final Segment adapter; try { adapter = segmentLoader.getSegment(segment); } - catch (StorageAdapterLoadingException e) { + catch (SegmentLoadingException e) { try { segmentLoader.cleanup(segment); } - catch (StorageAdapterLoadingException e1) { + catch (SegmentLoadingException e1) { // ignore } throw e; } if (adapter == null) { - throw new StorageAdapterLoadingException("Null adapter from loadSpec[%s]", segment.getLoadSpec()); + throw new SegmentLoadingException("Null adapter from loadSpec[%s]", segment.getLoadSpec()); } synchronized (lock) { @@ -139,7 +139,7 @@ public class ServerManager implements QuerySegmentWalker ); if ((entry != null) && (entry.getChunk(segment.getShardSpec().getPartitionNum()) != null)) { log.info("Told to load a adapter for a segment[%s] that already exists", segment.getIdentifier()); - throw new StorageAdapterLoadingException("Segment already exists[%s]", segment.getIdentifier()); + throw new SegmentLoadingException("Segment already exists[%s]", segment.getIdentifier()); } loadedIntervals.add( @@ -154,7 +154,7 @@ public class ServerManager implements QuerySegmentWalker } } - public void dropSegment(final DataSegment segment) throws StorageAdapterLoadingException + public void dropSegment(final DataSegment segment) throws SegmentLoadingException { String dataSource = segment.getDataSource(); synchronized (lock) { diff --git a/server/src/main/java/com/metamx/druid/coordination/ZkCoordinator.java b/server/src/main/java/com/metamx/druid/coordination/ZkCoordinator.java index 57d64e0ba32..1951205975c 100644 --- a/server/src/main/java/com/metamx/druid/coordination/ZkCoordinator.java +++ b/server/src/main/java/com/metamx/druid/coordination/ZkCoordinator.java @@ -29,7 +29,7 @@ import com.metamx.common.lifecycle.LifecycleStop; import com.metamx.common.logger.Logger; import com.metamx.druid.client.DataSegment; import com.metamx.druid.client.DruidServer; -import com.metamx.druid.loading.StorageAdapterLoadingException; +import com.metamx.druid.loading.SegmentLoadingException; import com.metamx.emitter.service.AlertEvent; import com.metamx.emitter.service.ServiceEmitter; import com.metamx.phonebook.PhoneBook; @@ -245,14 +245,14 @@ public class ZkCoordinator implements DataSegmentChangeHandler } catch (IOException e) { removeSegment(segment); - throw new StorageAdapterLoadingException( + throw new SegmentLoadingException( "Failed to write to disk segment info cache file[%s]", segmentInfoCacheFile ); } yp.announce(servedSegmentsLocation, segment.getIdentifier(), segment); } - catch (StorageAdapterLoadingException e) { + catch (SegmentLoadingException e) { log.error(e, "Failed to load segment[%s]", segment); emitter.emit( new AlertEvent.Builder().build( diff --git a/server/src/main/java/com/metamx/druid/http/ComputeNode.java b/server/src/main/java/com/metamx/druid/http/ComputeNode.java index 2230932d9a1..08b3eb93da3 100644 --- a/server/src/main/java/com/metamx/druid/http/ComputeNode.java +++ b/server/src/main/java/com/metamx/druid/http/ComputeNode.java @@ -39,7 +39,7 @@ import com.metamx.druid.coordination.ZkCoordinatorConfig; import com.metamx.druid.initialization.Initialization; import com.metamx.druid.initialization.ServerInit; import com.metamx.druid.jackson.DefaultObjectMapper; -import com.metamx.druid.loading.QueryableLoaderConfig; +import com.metamx.druid.loading.SegmentLoaderConfig; import com.metamx.druid.loading.SegmentLoader; import com.metamx.druid.metrics.ServerMonitor; import com.metamx.druid.query.MetricsEmittingExecutorService; @@ -172,7 +172,7 @@ public class ComputeNode extends BaseServerNode ); setSegmentLoader( - ServerInit.makeDefaultQueryableLoader(s3Client, getConfigFactory().build(QueryableLoaderConfig.class)) + ServerInit.makeDefaultQueryableLoader(s3Client, getConfigFactory().build(SegmentLoaderConfig.class)) ); } catch (S3ServiceException e) { diff --git a/server/src/main/java/com/metamx/druid/initialization/ServerInit.java b/server/src/main/java/com/metamx/druid/initialization/ServerInit.java index 1d727f9abe3..7cd6caf3c1b 100644 --- a/server/src/main/java/com/metamx/druid/initialization/ServerInit.java +++ b/server/src/main/java/com/metamx/druid/initialization/ServerInit.java @@ -26,16 +26,16 @@ import com.metamx.common.ISE; import com.metamx.common.logger.Logger; import com.metamx.druid.DruidProcessingConfig; import com.metamx.druid.loading.DelegatingSegmentLoader; +import com.metamx.druid.loading.LocalDataSegmentPuller; import com.metamx.druid.loading.MMappedQueryableIndexFactory; import com.metamx.druid.loading.QueryableIndexFactory; -import com.metamx.druid.loading.S3SegmentPuller; +import com.metamx.druid.loading.S3DataSegmentPuller; +import com.metamx.druid.loading.SegmentLoaderConfig; import com.metamx.druid.loading.SingleSegmentLoader; import com.metamx.druid.query.group.GroupByQueryEngine; import com.metamx.druid.query.group.GroupByQueryEngineConfig; import com.metamx.druid.Query; import com.metamx.druid.collect.StupidPool; -import com.metamx.druid.loading.QueryableLoaderConfig; -import com.metamx.druid.loading.S3ZippedSegmentPuller; import com.metamx.druid.loading.SegmentLoader; import com.metamx.druid.query.QueryRunnerFactory; import com.metamx.druid.query.group.GroupByQuery; @@ -64,24 +64,22 @@ public class ServerInit public static SegmentLoader makeDefaultQueryableLoader( RestS3Service s3Client, - QueryableLoaderConfig config + SegmentLoaderConfig config ) { DelegatingSegmentLoader delegateLoader = new DelegatingSegmentLoader(); - final S3SegmentPuller segmentGetter = new S3SegmentPuller(s3Client, config); - final S3ZippedSegmentPuller zippedGetter = new S3ZippedSegmentPuller(s3Client, config); - final QueryableIndexFactory factory; - if ("mmap".equals(config.getQueryableFactoryType())) { - factory = new MMappedQueryableIndexFactory(); - } else { - throw new ISE("Unknown queryableFactoryType[%s]", config.getQueryableFactoryType()); - } + final S3DataSegmentPuller segmentGetter = new S3DataSegmentPuller(s3Client); + final QueryableIndexFactory factory = new MMappedQueryableIndexFactory(); + + SingleSegmentLoader s3segmentLoader = new SingleSegmentLoader(segmentGetter, factory, config); + SingleSegmentLoader localSegmentLoader = new SingleSegmentLoader(new LocalDataSegmentPuller(), factory, config); delegateLoader.setLoaderTypes( ImmutableMap.builder() - .put("s3", new SingleSegmentLoader(segmentGetter, factory)) - .put("s3_zip", new SingleSegmentLoader(zippedGetter, factory)) + .put("s3", s3segmentLoader) + .put("s3_zip", s3segmentLoader) + .put("local", localSegmentLoader) .build() ); diff --git a/server/src/main/java/com/metamx/druid/loading/DataSegmentPuller.java b/server/src/main/java/com/metamx/druid/loading/DataSegmentPuller.java new file mode 100644 index 00000000000..b821c653a6e --- /dev/null +++ b/server/src/main/java/com/metamx/druid/loading/DataSegmentPuller.java @@ -0,0 +1,47 @@ +/* + * Druid - a distributed column store. + * Copyright (C) 2012 Metamarkets Group Inc. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version 2 + * of the License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + */ + +package com.metamx.druid.loading; + +import com.metamx.druid.client.DataSegment; + +import java.io.File; + +/** + */ +public interface DataSegmentPuller +{ + /** + * Pull down segment files for the given DataSegment and put them in the given directory. + * + * @param segment The segment to pull down files for + * @param dir The directory to store the files in + * @throws SegmentLoadingException if there are any errors + */ + public void getSegmentFiles(DataSegment segment, File dir) throws SegmentLoadingException; + + /** + * Returns the last modified time of the given segment. + * + * @param segment The segment to check the last modified time for + * @return the last modified time in millis from the epoch + * @throws SegmentLoadingException if there are any errors + */ + public long getLastModified(DataSegment segment) throws SegmentLoadingException; +} diff --git a/server/src/main/java/com/metamx/druid/loading/SegmentPusher.java b/server/src/main/java/com/metamx/druid/loading/DataSegmentPusher.java similarity index 96% rename from server/src/main/java/com/metamx/druid/loading/SegmentPusher.java rename to server/src/main/java/com/metamx/druid/loading/DataSegmentPusher.java index 3700215efc1..5369480d6b9 100644 --- a/server/src/main/java/com/metamx/druid/loading/SegmentPusher.java +++ b/server/src/main/java/com/metamx/druid/loading/DataSegmentPusher.java @@ -24,7 +24,7 @@ import com.metamx.druid.client.DataSegment; import java.io.File; import java.io.IOException; -public interface SegmentPusher +public interface DataSegmentPusher { public DataSegment push(File file, DataSegment segment) throws IOException; } diff --git a/server/src/main/java/com/metamx/druid/loading/SegmentPuller.java b/server/src/main/java/com/metamx/druid/loading/DataSegmentPusherUtil.java similarity index 64% rename from server/src/main/java/com/metamx/druid/loading/SegmentPuller.java rename to server/src/main/java/com/metamx/druid/loading/DataSegmentPusherUtil.java index 9cba65f425c..e72bd787bb3 100644 --- a/server/src/main/java/com/metamx/druid/loading/SegmentPuller.java +++ b/server/src/main/java/com/metamx/druid/loading/DataSegmentPusherUtil.java @@ -19,15 +19,26 @@ package com.metamx.druid.loading; +import com.google.common.base.Joiner; import com.metamx.druid.client.DataSegment; -import java.io.File; -import java.util.Map; - /** */ -public interface SegmentPuller +public class DataSegmentPusherUtil { - public File getSegmentFiles(DataSegment loadSpec) throws StorageAdapterLoadingException; - public boolean cleanSegmentFiles(DataSegment loadSpec) throws StorageAdapterLoadingException; + private static final Joiner JOINER = Joiner.on("/").skipNulls(); + + public static String getStorageDir(DataSegment segment) + { + return JOINER.join( + segment.getDataSource(), + String.format( + "%s_%s", + segment.getInterval().getStart(), + segment.getInterval().getEnd() + ), + segment.getVersion(), + segment.getShardSpec().getPartitionNum() + ); + } } diff --git a/server/src/main/java/com/metamx/druid/loading/DelegatingSegmentLoader.java b/server/src/main/java/com/metamx/druid/loading/DelegatingSegmentLoader.java index d576e59ae82..0f8e1e7074f 100644 --- a/server/src/main/java/com/metamx/druid/loading/DelegatingSegmentLoader.java +++ b/server/src/main/java/com/metamx/druid/loading/DelegatingSegmentLoader.java @@ -44,24 +44,24 @@ public class DelegatingSegmentLoader implements SegmentLoader } @Override - public Segment getSegment(DataSegment segment) throws StorageAdapterLoadingException + public Segment getSegment(DataSegment segment) throws SegmentLoadingException { return getLoader(segment.getLoadSpec()).getSegment(segment); } @Override - public void cleanup(DataSegment segment) throws StorageAdapterLoadingException + public void cleanup(DataSegment segment) throws SegmentLoadingException { getLoader(segment.getLoadSpec()).cleanup(segment); } - private SegmentLoader getLoader(Map loadSpec) throws StorageAdapterLoadingException + private SegmentLoader getLoader(Map loadSpec) throws SegmentLoadingException { String type = MapUtils.getString(loadSpec, "type"); SegmentLoader loader = loaderTypes.get(type); if (loader == null) { - throw new StorageAdapterLoadingException("Unknown loader type[%s]. Known types are %s", type, loaderTypes.keySet()); + throw new SegmentLoadingException("Unknown loader type[%s]. Known types are %s", type, loaderTypes.keySet()); } return loader; } diff --git a/server/src/main/java/com/metamx/druid/loading/LocalDataSegmentPuller.java b/server/src/main/java/com/metamx/druid/loading/LocalDataSegmentPuller.java new file mode 100644 index 00000000000..8cdb8e0a7a2 --- /dev/null +++ b/server/src/main/java/com/metamx/druid/loading/LocalDataSegmentPuller.java @@ -0,0 +1,105 @@ +/* + * Druid - a distributed column store. + * Copyright (C) 2012 Metamarkets Group Inc. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version 2 + * of the License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + */ + +package com.metamx.druid.loading; + +import com.google.common.io.Files; +import com.metamx.common.MapUtils; +import com.metamx.common.logger.Logger; +import com.metamx.druid.client.DataSegment; +import com.metamx.druid.utils.CompressionUtils; + +import java.io.File; +import java.io.IOException; +import java.util.Map; + +/** + */ +public class LocalDataSegmentPuller implements DataSegmentPuller +{ + private static final Logger log = new Logger(LocalDataSegmentPuller.class); + + @Override + public void getSegmentFiles(DataSegment segment, File dir) throws SegmentLoadingException + { + final File path = getFile(segment); + + if (path.isDirectory()) { + if (path.equals(dir)) { + log.info("Asked to load [%s] into itself, done!", dir); + return; + } + + log.info("Copying files from [%s] to [%s]", path, dir); + File file = null; + try { + final File[] files = path.listFiles(); + for (int i = 0; i < files.length; ++i) { + file = files[i]; + Files.copy(file, new File(dir, file.getName())); + } + } + catch (IOException e) { + throw new SegmentLoadingException(e, "Unable to copy file[%s].", file); + } + } else { + if (!path.getName().endsWith(".zip")) { + throw new SegmentLoadingException("File is not a zip file[%s]", path); + } + + log.info("Unzipping local file[%s] to [%s]", path, dir); + try { + CompressionUtils.unzip(path, dir); + } + catch (IOException e) { + throw new SegmentLoadingException(e, "Unable to unzip file[%s]", path); + } + } + } + + @Override + public long getLastModified(DataSegment segment) throws SegmentLoadingException + { + final File file = getFile(segment); + + long lastModified = Long.MAX_VALUE; + if (file.isDirectory()) { + for (File childFile : file.listFiles()) { + lastModified = Math.min(childFile.lastModified(), lastModified); + } + } + else { + lastModified = file.lastModified(); + } + + return lastModified; + } + + private File getFile(DataSegment segment) throws SegmentLoadingException + { + final Map loadSpec = segment.getLoadSpec(); + final File path = new File(MapUtils.getString(loadSpec, "path")); + + if (!path.exists()) { + throw new SegmentLoadingException("Asked to load path[%s], but it doesn't exist.", path); + } + + return path; + } +} diff --git a/server/src/main/java/com/metamx/druid/loading/LocalDataSegmentPusher.java b/server/src/main/java/com/metamx/druid/loading/LocalDataSegmentPusher.java new file mode 100644 index 00000000000..1493b162572 --- /dev/null +++ b/server/src/main/java/com/metamx/druid/loading/LocalDataSegmentPusher.java @@ -0,0 +1,96 @@ +/* + * Druid - a distributed column store. + * Copyright (C) 2012 Metamarkets Group Inc. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version 2 + * of the License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + */ + +package com.metamx.druid.loading; + +import com.fasterxml.jackson.databind.ObjectMapper; +import com.google.common.collect.ImmutableMap; +import com.google.common.io.ByteStreams; +import com.google.common.io.Files; +import com.metamx.common.logger.Logger; +import com.metamx.druid.client.DataSegment; +import com.metamx.druid.index.v1.IndexIO; +import com.metamx.druid.utils.CompressionUtils; + +import java.io.File; +import java.io.IOException; + +/** + */ +public class LocalDataSegmentPusher implements DataSegmentPusher +{ + private static final Logger log = new Logger(LocalDataSegmentPusher.class); + + private final LocalDataSegmentPusherConfig config; + private final ObjectMapper jsonMapper; + + public LocalDataSegmentPusher( + LocalDataSegmentPusherConfig config, + ObjectMapper jsonMapper + ) + { + this.config = config; + this.jsonMapper = jsonMapper; + } + + @Override + public DataSegment push(File dataSegmentFile, DataSegment segment) throws IOException + { + File outDir = new File(config.getStorageDirectory(), DataSegmentPusherUtil.getStorageDir(segment)); + + if (dataSegmentFile.equals(outDir)) { + long size = 0; + for (File file : dataSegmentFile.listFiles()) { + size += file.length(); + } + + return createDescriptorFile( + segment.withLoadSpec(makeLoadSpec(outDir)) + .withSize(size) + .withBinaryVersion(IndexIO.getVersionFromDir(dataSegmentFile)), + outDir + ); + } + + outDir.mkdirs(); + File outFile = new File(outDir, "index.zip"); + log.info("Compressing files from[%s] to [%s]", dataSegmentFile, outFile); + long size = CompressionUtils.zip(dataSegmentFile, outFile); + + return createDescriptorFile( + segment.withLoadSpec(makeLoadSpec(outFile)) + .withSize(size) + .withBinaryVersion(IndexIO.getVersionFromDir(dataSegmentFile)), + outDir + ); + } + + private DataSegment createDescriptorFile(DataSegment segment, File outDir) throws IOException + { + File descriptorFile = new File(outDir, "descriptor.json"); + log.info("Creating descriptor file at[%s]", descriptorFile); + Files.copy(ByteStreams.newInputStreamSupplier(jsonMapper.writeValueAsBytes(segment)), descriptorFile); + return segment; + } + + private ImmutableMap makeLoadSpec(File outFile) + { + return ImmutableMap.of("type", "local", "path", outFile.toString()); + } +} diff --git a/server/src/main/java/com/metamx/druid/loading/S3SegmentGetterConfig.java b/server/src/main/java/com/metamx/druid/loading/LocalDataSegmentPusherConfig.java similarity index 86% rename from server/src/main/java/com/metamx/druid/loading/S3SegmentGetterConfig.java rename to server/src/main/java/com/metamx/druid/loading/LocalDataSegmentPusherConfig.java index c2a4c7f6308..d33a9a5130b 100644 --- a/server/src/main/java/com/metamx/druid/loading/S3SegmentGetterConfig.java +++ b/server/src/main/java/com/metamx/druid/loading/LocalDataSegmentPusherConfig.java @@ -25,8 +25,8 @@ import java.io.File; /** */ -public abstract class S3SegmentGetterConfig +public abstract class LocalDataSegmentPusherConfig { - @Config("druid.paths.indexCache") - public abstract File getCacheDirectory(); + @Config("druid.pusher.local.storageDirectory") + public abstract File getStorageDirectory(); } diff --git a/server/src/main/java/com/metamx/druid/loading/MMappedQueryableIndexFactory.java b/server/src/main/java/com/metamx/druid/loading/MMappedQueryableIndexFactory.java index 648813d62ac..9896c3f800b 100644 --- a/server/src/main/java/com/metamx/druid/loading/MMappedQueryableIndexFactory.java +++ b/server/src/main/java/com/metamx/druid/loading/MMappedQueryableIndexFactory.java @@ -34,7 +34,7 @@ public class MMappedQueryableIndexFactory implements QueryableIndexFactory private static final Logger log = new Logger(MMappedQueryableIndexFactory.class); @Override - public QueryableIndex factorize(File parentDir) throws StorageAdapterLoadingException + public QueryableIndex factorize(File parentDir) throws SegmentLoadingException { try { if (! IndexIO.canBeMapped(parentDir)) { @@ -46,11 +46,11 @@ public class MMappedQueryableIndexFactory implements QueryableIndexFactory IndexIO.storeLatest(IndexIO.readIndex(parentDir), canBeMappedDir); if (! IndexIO.canBeMapped(canBeMappedDir)) { - throw new StorageAdapterLoadingException("WTF!? newly written file[%s] cannot be mapped!?", canBeMappedDir); + throw new SegmentLoadingException("WTF!? newly written file[%s] cannot be mapped!?", canBeMappedDir); } for (File file : canBeMappedDir.listFiles()) { if (! file.renameTo(new File(parentDir, file.getName()))) { - throw new StorageAdapterLoadingException("Couldn't rename[%s] to [%s]", canBeMappedDir, parentDir); + throw new SegmentLoadingException("Couldn't rename[%s] to [%s]", canBeMappedDir, parentDir); } } FileUtils.deleteDirectory(canBeMappedDir); @@ -66,7 +66,7 @@ public class MMappedQueryableIndexFactory implements QueryableIndexFactory catch (IOException e2) { log.error(e, "Problem deleting parentDir[%s]", parentDir); } - throw new StorageAdapterLoadingException(e, e.getMessage()); + throw new SegmentLoadingException(e, "%s", e.getMessage()); } } } diff --git a/server/src/main/java/com/metamx/druid/loading/QueryableIndexFactory.java b/server/src/main/java/com/metamx/druid/loading/QueryableIndexFactory.java index d7f60309aa6..276bbc2028a 100644 --- a/server/src/main/java/com/metamx/druid/loading/QueryableIndexFactory.java +++ b/server/src/main/java/com/metamx/druid/loading/QueryableIndexFactory.java @@ -27,5 +27,5 @@ import java.io.File; */ public interface QueryableIndexFactory { - public QueryableIndex factorize(File parentDir) throws StorageAdapterLoadingException; + public QueryableIndex factorize(File parentDir) throws SegmentLoadingException; } diff --git a/server/src/main/java/com/metamx/druid/loading/S3DataSegmentPuller.java b/server/src/main/java/com/metamx/druid/loading/S3DataSegmentPuller.java new file mode 100644 index 00000000000..011e1633ca1 --- /dev/null +++ b/server/src/main/java/com/metamx/druid/loading/S3DataSegmentPuller.java @@ -0,0 +1,170 @@ +/* + * Druid - a distributed column store. + * Copyright (C) 2012 Metamarkets Group Inc. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version 2 + * of the License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + */ + +package com.metamx.druid.loading; + +import com.google.common.io.ByteStreams; +import com.google.common.io.Closeables; +import com.google.common.io.Files; +import com.google.inject.Inject; +import com.metamx.common.ISE; +import com.metamx.common.MapUtils; +import com.metamx.common.logger.Logger; +import com.metamx.druid.client.DataSegment; +import com.metamx.druid.common.s3.S3Utils; +import com.metamx.druid.utils.CompressionUtils; +import org.apache.commons.io.FileUtils; +import org.jets3t.service.S3ServiceException; +import org.jets3t.service.ServiceException; +import org.jets3t.service.impl.rest.httpclient.RestS3Service; +import org.jets3t.service.model.S3Bucket; +import org.jets3t.service.model.S3Object; + +import java.io.File; +import java.io.IOException; +import java.io.InputStream; +import java.util.Map; +import java.util.zip.GZIPInputStream; + +/** + */ +public class S3DataSegmentPuller implements DataSegmentPuller +{ + private static final Logger log = new Logger(S3DataSegmentPuller.class); + + private static final String BUCKET = "bucket"; + private static final String KEY = "key"; + + private final RestS3Service s3Client; + + @Inject + public S3DataSegmentPuller( + RestS3Service s3Client + ) + { + this.s3Client = s3Client; + } + + @Override + public void getSegmentFiles(DataSegment segment, File outDir) throws SegmentLoadingException + { + S3Coords s3Coords = new S3Coords(segment); + + log.info("Pulling index at path[%s] to outDir[%s]", s3Coords, outDir); + + if (!isObjectInBucket(s3Coords)) { + throw new SegmentLoadingException("IndexFile[%s] does not exist.", s3Coords); + } + + if (!outDir.exists()) { + outDir.mkdirs(); + } + + if (!outDir.isDirectory()) { + throw new ISE("outDir[%s] must be a directory.", outDir); + } + + long startTime = System.currentTimeMillis(); + S3Object s3Obj = null; + + try { + s3Obj = s3Client.getObject(new S3Bucket(s3Coords.bucket), s3Coords.path); + + InputStream in = null; + try { + in = s3Obj.getDataInputStream(); + final String key = s3Obj.getKey(); + if (key.endsWith(".zip")) { + CompressionUtils.unzip(in, outDir); + } else if (key.endsWith(".gz")) { + final File outFile = new File(outDir, toFilename(key, ".gz")); + ByteStreams.copy(new GZIPInputStream(in), Files.newOutputStreamSupplier(outFile)); + } else { + ByteStreams.copy(in, Files.newOutputStreamSupplier(new File(outDir, toFilename(key, "")))); + } + log.info("Pull of file[%s] completed in %,d millis", s3Obj, System.currentTimeMillis() - startTime); + } + catch (IOException e) { + FileUtils.deleteDirectory(outDir); + throw new SegmentLoadingException(e, "Problem decompressing object[%s]", s3Obj); + } + finally { + Closeables.closeQuietly(in); + } + } + catch (Exception e) { + throw new SegmentLoadingException(e, e.getMessage()); + } + finally { + S3Utils.closeStreamsQuietly(s3Obj); + } + + } + + private String toFilename(String key, final String suffix) + { + String filename = key.substring(key.lastIndexOf("/") + 1); // characters after last '/' + filename = filename.substring(0, filename.length() - suffix.length()); // remove the suffix from the end + return filename; + } + + private boolean isObjectInBucket(S3Coords coords) throws SegmentLoadingException + { + try { + return s3Client.isObjectInBucket(coords.bucket, coords.path); + } + catch (ServiceException e) { + throw new SegmentLoadingException(e, "S3 fail! Key[%s]", coords); + } + } + + @Override + public long getLastModified(DataSegment segment) throws SegmentLoadingException + { + S3Coords coords = new S3Coords(segment); + try { + S3Object objDetails = s3Client.getObjectDetails(new S3Bucket(coords.bucket), coords.path); + return objDetails.getLastModifiedDate().getTime(); + } + catch (S3ServiceException e) { + throw new SegmentLoadingException(e, e.getMessage()); + } + } + + private static class S3Coords + { + String bucket; + String path; + + public S3Coords(DataSegment segment) + { + Map loadSpec = segment.getLoadSpec(); + bucket = MapUtils.getString(loadSpec, BUCKET); + path = MapUtils.getString(loadSpec, KEY); + if (path.startsWith("/")) { + path = path.substring(1); + } + } + + public String toString() + { + return String.format("s3://%s/%s", bucket, path); + } + } +} diff --git a/server/src/main/java/com/metamx/druid/loading/S3SegmentPusher.java b/server/src/main/java/com/metamx/druid/loading/S3DataSegmentPusher.java similarity index 67% rename from server/src/main/java/com/metamx/druid/loading/S3SegmentPusher.java rename to server/src/main/java/com/metamx/druid/loading/S3DataSegmentPusher.java index 5af4b905719..dd5ce951695 100644 --- a/server/src/main/java/com/metamx/druid/loading/S3SegmentPusher.java +++ b/server/src/main/java/com/metamx/druid/loading/S3DataSegmentPusher.java @@ -22,38 +22,34 @@ package com.metamx.druid.loading; import com.fasterxml.jackson.databind.ObjectMapper; import com.google.common.base.Joiner; import com.google.common.collect.ImmutableMap; -import com.google.common.io.Closeables; -import com.metamx.common.ISE; -import com.metamx.common.StreamUtils; +import com.google.common.io.ByteStreams; +import com.google.common.io.Files; import com.metamx.druid.client.DataSegment; import com.metamx.druid.index.v1.IndexIO; +import com.metamx.druid.utils.CompressionUtils; import com.metamx.emitter.EmittingLogger; -import org.apache.commons.io.FileUtils; -import org.apache.commons.io.IOUtils; - import org.jets3t.service.S3ServiceException; import org.jets3t.service.acl.gs.GSAccessControlList; import org.jets3t.service.impl.rest.httpclient.RestS3Service; import org.jets3t.service.model.S3Object; -import java.io.*; +import java.io.File; +import java.io.IOException; import java.security.NoSuchAlgorithmException; -import java.util.zip.ZipEntry; -import java.util.zip.ZipOutputStream; -public class S3SegmentPusher implements SegmentPusher +public class S3DataSegmentPusher implements DataSegmentPusher { - private static final EmittingLogger log = new EmittingLogger(S3SegmentPusher.class); + private static final EmittingLogger log = new EmittingLogger(S3DataSegmentPusher.class); private static final Joiner JOINER = Joiner.on("/").skipNulls(); private final RestS3Service s3Client; - private final S3SegmentPusherConfig config; + private final S3DataSegmentPusherConfig config; private final ObjectMapper jsonMapper; - public S3SegmentPusher( - RestS3Service s3Client, - S3SegmentPusherConfig config, - ObjectMapper jsonMapper + public S3DataSegmentPusher( + RestS3Service s3Client, + S3DataSegmentPusherConfig config, + ObjectMapper jsonMapper ) { this.s3Client = s3Client; @@ -61,41 +57,22 @@ public class S3SegmentPusher implements SegmentPusher this.jsonMapper = jsonMapper; } + public S3DataSegmentPusherConfig getConfig() + { + return config; + } + @Override public DataSegment push(final File indexFilesDir, DataSegment segment) throws IOException { log.info("Uploading [%s] to S3", indexFilesDir); String outputKey = JOINER.join( config.getBaseKey().isEmpty() ? null : config.getBaseKey(), - segment.getDataSource(), - String.format( - "%s_%s", - segment.getInterval().getStart(), - segment.getInterval().getEnd() - ), - segment.getVersion(), - segment.getShardSpec().getPartitionNum() + DataSegmentPusherUtil.getStorageDir(segment) ); - long indexSize = 0; final File zipOutFile = File.createTempFile("druid", "index.zip"); - ZipOutputStream zipOut = null; - try { - zipOut = new ZipOutputStream(new FileOutputStream(zipOutFile)); - File[] indexFiles = indexFilesDir.listFiles(); - for (File indexFile : indexFiles) { - log.info("Adding indexFile[%s] with size[%,d]. Total size[%,d]", indexFile, indexFile.length(), indexSize); - if (indexFile.length() >= Integer.MAX_VALUE) { - throw new ISE("indexFile[%s] too large [%,d]", indexFile, indexFile.length()); - } - zipOut.putNextEntry(new ZipEntry(indexFile.getName())); - IOUtils.copy(new FileInputStream(indexFile), zipOut); - indexSize += indexFile.length(); - } - } - finally { - Closeables.closeQuietly(zipOut); - } + long indexSize = CompressionUtils.zip(indexFilesDir, zipOutFile); try { S3Object toPush = new S3Object(zipOutFile); @@ -119,7 +96,7 @@ public class S3SegmentPusher implements SegmentPusher .withBinaryVersion(IndexIO.getVersionFromDir(indexFilesDir)); File descriptorFile = File.createTempFile("druid", "descriptor.json"); - StreamUtils.copyToFileAndClose(new ByteArrayInputStream(jsonMapper.writeValueAsBytes(segment)), descriptorFile); + Files.copy(ByteStreams.newInputStreamSupplier(jsonMapper.writeValueAsBytes(segment)), descriptorFile); S3Object descriptorObject = new S3Object(descriptorFile); descriptorObject.setBucketName(outputBucket); descriptorObject.setKey(outputKey + "/descriptor.json"); @@ -128,9 +105,6 @@ public class S3SegmentPusher implements SegmentPusher log.info("Pushing %s", descriptorObject); s3Client.putObject(outputBucket, descriptorObject); - log.info("Deleting Index File[%s]", indexFilesDir); - FileUtils.deleteDirectory(indexFilesDir); - log.info("Deleting zipped index File[%s]", zipOutFile); zipOutFile.delete(); diff --git a/server/src/main/java/com/metamx/druid/loading/S3SegmentPusherConfig.java b/server/src/main/java/com/metamx/druid/loading/S3DataSegmentPusherConfig.java similarity index 95% rename from server/src/main/java/com/metamx/druid/loading/S3SegmentPusherConfig.java rename to server/src/main/java/com/metamx/druid/loading/S3DataSegmentPusherConfig.java index 0bd66a1a913..a2cada422fb 100644 --- a/server/src/main/java/com/metamx/druid/loading/S3SegmentPusherConfig.java +++ b/server/src/main/java/com/metamx/druid/loading/S3DataSegmentPusherConfig.java @@ -24,7 +24,7 @@ import org.skife.config.Default; /** */ -public abstract class S3SegmentPusherConfig +public abstract class S3DataSegmentPusherConfig { @Config("druid.pusher.s3.bucket") public abstract String getBucket(); diff --git a/server/src/main/java/com/metamx/druid/loading/S3SegmentPuller.java b/server/src/main/java/com/metamx/druid/loading/S3SegmentPuller.java deleted file mode 100644 index 380489548d5..00000000000 --- a/server/src/main/java/com/metamx/druid/loading/S3SegmentPuller.java +++ /dev/null @@ -1,180 +0,0 @@ -/* - * Druid - a distributed column store. - * Copyright (C) 2012 Metamarkets Group Inc. - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation; either version 2 - * of the License, or (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. - */ - -package com.metamx.druid.loading; - -import com.google.inject.Inject; -import com.metamx.common.MapUtils; -import com.metamx.common.StreamUtils; -import com.metamx.common.logger.Logger; -import com.metamx.druid.client.DataSegment; -import com.metamx.druid.common.s3.S3Utils; -import org.apache.commons.io.FileUtils; -import org.jets3t.service.impl.rest.httpclient.RestS3Service; -import org.jets3t.service.model.S3Bucket; -import org.jets3t.service.model.S3Object; -import org.joda.time.DateTime; - -import java.io.File; -import java.io.FileInputStream; -import java.io.IOException; -import java.util.Map; -import java.util.zip.GZIPInputStream; - -/** - */ -public class S3SegmentPuller implements SegmentPuller -{ - private static final Logger log = new Logger(S3SegmentPuller.class); - private static final long DEFAULT_TIMEOUT = 5 * 60 * 1000; - - private static final String BUCKET = "bucket"; - private static final String KEY = "key"; - - private final RestS3Service s3Client; - private final S3SegmentGetterConfig config; - - @Inject - public S3SegmentPuller( - RestS3Service s3Client, - S3SegmentGetterConfig config - ) - { - this.s3Client = s3Client; - this.config = config; - } - - @Override - public File getSegmentFiles(DataSegment segment) throws StorageAdapterLoadingException - { - Map loadSpec = segment.getLoadSpec(); - String s3Bucket = MapUtils.getString(loadSpec, "bucket"); - String s3Path = MapUtils.getString(loadSpec, "key"); - - log.info("Loading index at path[s3://%s/%s]", s3Bucket, s3Path); - - S3Object s3Obj = null; - File tmpFile = null; - try { - if (!s3Client.isObjectInBucket(s3Bucket, s3Path)) { - throw new StorageAdapterLoadingException("IndexFile[s3://%s/%s] does not exist.", s3Bucket, s3Path); - } - - File cacheFile = new File(config.getCacheDirectory(), computeCacheFilePath(s3Bucket, s3Path)); - - if (cacheFile.exists()) { - S3Object objDetails = s3Client.getObjectDetails(new S3Bucket(s3Bucket), s3Path); - DateTime cacheFileLastModified = new DateTime(cacheFile.lastModified()); - DateTime s3ObjLastModified = new DateTime(objDetails.getLastModifiedDate().getTime()); - if (cacheFileLastModified.isAfter(s3ObjLastModified)) { - log.info( - "Found cacheFile[%s] with modified[%s], which is after s3Obj[%s]. Using.", - cacheFile, - cacheFileLastModified, - s3ObjLastModified - ); - return cacheFile.getParentFile(); - } - FileUtils.deleteDirectory(cacheFile.getParentFile()); - } - - long currTime = System.currentTimeMillis(); - - tmpFile = File.createTempFile(s3Bucket, new DateTime().toString()); - log.info( - "Downloading file[s3://%s/%s] to local tmpFile[%s] for cacheFile[%s]", - s3Bucket, s3Path, tmpFile, cacheFile - ); - - s3Obj = s3Client.getObject(new S3Bucket(s3Bucket), s3Path); - StreamUtils.copyToFileAndClose(s3Obj.getDataInputStream(), tmpFile, DEFAULT_TIMEOUT); - final long downloadEndTime = System.currentTimeMillis(); - log.info("Download of file[%s] completed in %,d millis", cacheFile, downloadEndTime - currTime); - - if (!cacheFile.getParentFile().mkdirs()) { - log.info("Unable to make parent file[%s]", cacheFile.getParentFile()); - } - cacheFile.delete(); - - if (s3Path.endsWith("gz")) { - log.info("Decompressing file[%s] to [%s]", tmpFile, cacheFile); - StreamUtils.copyToFileAndClose( - new GZIPInputStream(new FileInputStream(tmpFile)), - cacheFile - ); - if (!tmpFile.delete()) { - log.error("Could not delete tmpFile[%s].", tmpFile); - } - } else { - log.info("Rename tmpFile[%s] to cacheFile[%s]", tmpFile, cacheFile); - if (!tmpFile.renameTo(cacheFile)) { - log.warn("Error renaming tmpFile[%s] to cacheFile[%s]. Copying instead.", tmpFile, cacheFile); - - StreamUtils.copyToFileAndClose(new FileInputStream(tmpFile), cacheFile); - if (!tmpFile.delete()) { - log.error("Could not delete tmpFile[%s].", tmpFile); - } - } - } - - long endTime = System.currentTimeMillis(); - log.info("Local processing of file[%s] done in %,d millis", cacheFile, endTime - downloadEndTime); - - return cacheFile.getParentFile(); - } - catch (Exception e) { - throw new StorageAdapterLoadingException(e, e.getMessage()); - } - finally { - S3Utils.closeStreamsQuietly(s3Obj); - if (tmpFile != null && tmpFile.exists()) { - log.warn("Deleting tmpFile[%s] in finally block. Why?", tmpFile); - tmpFile.delete(); - } - } - } - - private String computeCacheFilePath(String s3Bucket, String s3Path) - { - return String.format( - "%s/%s", s3Bucket, s3Path.endsWith("gz") ? s3Path.substring(0, s3Path.length() - ".gz".length()) : s3Path - ); - } - - @Override - public boolean cleanSegmentFiles(DataSegment segment) throws StorageAdapterLoadingException - { - Map loadSpec = segment.getLoadSpec(); - File cacheFile = new File( - config.getCacheDirectory(), - computeCacheFilePath(MapUtils.getString(loadSpec, BUCKET), MapUtils.getString(loadSpec, KEY)) - ); - - try { - final File parentFile = cacheFile.getParentFile(); - log.info("Recursively deleting file[%s]", parentFile); - FileUtils.deleteDirectory(parentFile); - } - catch (IOException e) { - throw new StorageAdapterLoadingException(e, e.getMessage()); - } - - return true; - } -} diff --git a/server/src/main/java/com/metamx/druid/loading/S3ZippedSegmentPuller.java b/server/src/main/java/com/metamx/druid/loading/S3ZippedSegmentPuller.java deleted file mode 100644 index 8fd8ebd4542..00000000000 --- a/server/src/main/java/com/metamx/druid/loading/S3ZippedSegmentPuller.java +++ /dev/null @@ -1,187 +0,0 @@ -/* - * Druid - a distributed column store. - * Copyright (C) 2012 Metamarkets Group Inc. - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation; either version 2 - * of the License, or (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. - */ - -package com.metamx.druid.loading; - -import com.google.common.io.Closeables; -import com.metamx.common.MapUtils; -import com.metamx.common.StreamUtils; -import com.metamx.common.logger.Logger; -import com.metamx.druid.client.DataSegment; -import com.metamx.druid.common.s3.S3Utils; -import org.apache.commons.io.FileUtils; -import org.apache.commons.io.IOUtils; -import org.jets3t.service.impl.rest.httpclient.RestS3Service; -import org.jets3t.service.model.S3Bucket; -import org.jets3t.service.model.S3Object; -import org.joda.time.DateTime; - -import java.io.BufferedInputStream; -import java.io.File; -import java.io.FileInputStream; -import java.io.FileOutputStream; -import java.io.IOException; -import java.io.OutputStream; -import java.util.Map; -import java.util.zip.ZipEntry; -import java.util.zip.ZipInputStream; - -/** - */ -public class S3ZippedSegmentPuller implements SegmentPuller -{ - private static final Logger log = new Logger(S3ZippedSegmentPuller.class); - - private static final String BUCKET = "bucket"; - private static final String KEY = "key"; - - private final RestS3Service s3Client; - private final S3SegmentGetterConfig config; - - public S3ZippedSegmentPuller( - RestS3Service s3Client, - S3SegmentGetterConfig config - ) - { - this.s3Client = s3Client; - this.config = config; - } - - @Override - public File getSegmentFiles(DataSegment segment) throws StorageAdapterLoadingException - { - Map loadSpec = segment.getLoadSpec(); - String s3Bucket = MapUtils.getString(loadSpec, "bucket"); - String s3Path = MapUtils.getString(loadSpec, "key"); - - if (s3Path.startsWith("/")) { - s3Path = s3Path.substring(1); - } - - log.info("Loading index at path[s3://%s/%s]", s3Bucket, s3Path); - - S3Object s3Obj = null; - File tmpFile = null; - try { - if (!s3Client.isObjectInBucket(s3Bucket, s3Path)) { - throw new StorageAdapterLoadingException("IndexFile[s3://%s/%s] does not exist.", s3Bucket, s3Path); - } - - File cacheFile = new File(config.getCacheDirectory(), computeCacheFilePath(s3Bucket, s3Path)); - - if (cacheFile.exists()) { - S3Object objDetails = s3Client.getObjectDetails(new S3Bucket(s3Bucket), s3Path); - DateTime cacheFileLastModified = new DateTime(cacheFile.lastModified()); - DateTime s3ObjLastModified = new DateTime(objDetails.getLastModifiedDate().getTime()); - if (cacheFileLastModified.isAfter(s3ObjLastModified)) { - log.info( - "Found cacheFile[%s] with modified[%s], which is after s3Obj[%s]. Using.", - cacheFile, - cacheFileLastModified, - s3ObjLastModified - ); - return cacheFile; - } - FileUtils.deleteDirectory(cacheFile); - } - - long currTime = System.currentTimeMillis(); - - tmpFile = File.createTempFile(s3Bucket, new DateTime().toString()); - log.info( - "Downloading file[s3://%s/%s] to local tmpFile[%s] for cacheFile[%s]", - s3Bucket, s3Path, tmpFile, cacheFile - ); - - s3Obj = s3Client.getObject(new S3Bucket(s3Bucket), s3Path); - StreamUtils.copyToFileAndClose(s3Obj.getDataInputStream(), tmpFile); - final long downloadEndTime = System.currentTimeMillis(); - log.info("Download of file[%s] completed in %,d millis", cacheFile, downloadEndTime - currTime); - - if (cacheFile.exists()) { - FileUtils.deleteDirectory(cacheFile); - } - cacheFile.mkdirs(); - - ZipInputStream zipIn = null; - OutputStream out = null; - ZipEntry entry = null; - try { - zipIn = new ZipInputStream(new BufferedInputStream(new FileInputStream(tmpFile))); - while ((entry = zipIn.getNextEntry()) != null) { - out = new FileOutputStream(new File(cacheFile, entry.getName())); - IOUtils.copy(zipIn, out); - zipIn.closeEntry(); - Closeables.closeQuietly(out); - out = null; - } - } - finally { - Closeables.closeQuietly(out); - Closeables.closeQuietly(zipIn); - } - - long endTime = System.currentTimeMillis(); - log.info("Local processing of file[%s] done in %,d millis", cacheFile, endTime - downloadEndTime); - - log.info("Deleting tmpFile[%s]", tmpFile); - tmpFile.delete(); - - return cacheFile; - } - catch (Exception e) { - throw new StorageAdapterLoadingException(e, e.getMessage()); - } - finally { - S3Utils.closeStreamsQuietly(s3Obj); - if (tmpFile != null && tmpFile.exists()) { - log.warn("Deleting tmpFile[%s] in finally block. Why?", tmpFile); - tmpFile.delete(); - } - } - } - - private String computeCacheFilePath(String s3Bucket, String s3Path) - { - return new File(String.format("%s/%s", s3Bucket, s3Path)).getParent(); - } - - @Override - public boolean cleanSegmentFiles(DataSegment segment) throws StorageAdapterLoadingException - { - Map loadSpec = segment.getLoadSpec(); - File cacheFile = new File( - config.getCacheDirectory(), - computeCacheFilePath( - MapUtils.getString(loadSpec, BUCKET), - MapUtils.getString(loadSpec, KEY) - ) - ); - - try { - log.info("Deleting directory[%s]", cacheFile); - FileUtils.deleteDirectory(cacheFile); - } - catch (IOException e) { - throw new StorageAdapterLoadingException(e, e.getMessage()); - } - - return true; - } -} diff --git a/server/src/main/java/com/metamx/druid/loading/SegmentLoader.java b/server/src/main/java/com/metamx/druid/loading/SegmentLoader.java index 1ca54b89106..20fa5592ac2 100644 --- a/server/src/main/java/com/metamx/druid/loading/SegmentLoader.java +++ b/server/src/main/java/com/metamx/druid/loading/SegmentLoader.java @@ -26,6 +26,6 @@ import com.metamx.druid.index.Segment; */ public interface SegmentLoader { - public Segment getSegment(DataSegment loadSpec) throws StorageAdapterLoadingException; - public void cleanup(DataSegment loadSpec) throws StorageAdapterLoadingException; + public Segment getSegment(DataSegment loadSpec) throws SegmentLoadingException; + public void cleanup(DataSegment loadSpec) throws SegmentLoadingException; } diff --git a/server/src/main/java/com/metamx/druid/loading/QueryableLoaderConfig.java b/server/src/main/java/com/metamx/druid/loading/SegmentLoaderConfig.java similarity index 76% rename from server/src/main/java/com/metamx/druid/loading/QueryableLoaderConfig.java rename to server/src/main/java/com/metamx/druid/loading/SegmentLoaderConfig.java index c6e2a72c931..294c91b9a38 100644 --- a/server/src/main/java/com/metamx/druid/loading/QueryableLoaderConfig.java +++ b/server/src/main/java/com/metamx/druid/loading/SegmentLoaderConfig.java @@ -21,13 +21,18 @@ package com.metamx.druid.loading; import org.skife.config.Config; +import java.io.File; + /** */ -public abstract class QueryableLoaderConfig extends S3SegmentGetterConfig +public abstract class SegmentLoaderConfig { - @Config("druid.queryable.factory") - public String getQueryableFactoryType() + @Config({"druid.paths.indexCache", "druid.segmentCache.path"}) + public abstract File getCacheDirectory(); + + @Config("druid.segmentCache.deleteOnRemove") + public boolean deleteOnRemove() { - return "mmap"; + return true; } } diff --git a/server/src/main/java/com/metamx/druid/loading/StorageAdapterLoadingException.java b/server/src/main/java/com/metamx/druid/loading/SegmentLoadingException.java similarity index 88% rename from server/src/main/java/com/metamx/druid/loading/StorageAdapterLoadingException.java rename to server/src/main/java/com/metamx/druid/loading/SegmentLoadingException.java index d0f0ba4be93..d52fd6e3a82 100644 --- a/server/src/main/java/com/metamx/druid/loading/StorageAdapterLoadingException.java +++ b/server/src/main/java/com/metamx/druid/loading/SegmentLoadingException.java @@ -21,9 +21,9 @@ package com.metamx.druid.loading; /** */ -public class StorageAdapterLoadingException extends Exception +public class SegmentLoadingException extends Exception { - public StorageAdapterLoadingException( + public SegmentLoadingException( String formatString, Object... objs ) @@ -31,7 +31,7 @@ public class StorageAdapterLoadingException extends Exception super(String.format(formatString, objs)); } - public StorageAdapterLoadingException( + public SegmentLoadingException( Throwable cause, String formatString, Object... objs diff --git a/server/src/main/java/com/metamx/druid/loading/SingleSegmentLoader.java b/server/src/main/java/com/metamx/druid/loading/SingleSegmentLoader.java index 19c3981e988..7e62f57fbf4 100644 --- a/server/src/main/java/com/metamx/druid/loading/SingleSegmentLoader.java +++ b/server/src/main/java/com/metamx/druid/loading/SingleSegmentLoader.java @@ -19,40 +19,133 @@ package com.metamx.druid.loading; +import com.google.common.base.Joiner; +import com.google.common.base.Throwables; import com.google.inject.Inject; +import com.metamx.common.StreamUtils; +import com.metamx.common.logger.Logger; import com.metamx.druid.client.DataSegment; import com.metamx.druid.index.QueryableIndex; import com.metamx.druid.index.QueryableIndexSegment; import com.metamx.druid.index.Segment; +import org.apache.commons.io.FileUtils; + +import java.io.*; /** */ public class SingleSegmentLoader implements SegmentLoader { - private final SegmentPuller segmentPuller; + private static final Logger log = new Logger(SingleSegmentLoader.class); + + private final DataSegmentPuller dataSegmentPuller; private final QueryableIndexFactory factory; + private final SegmentLoaderConfig config; + private static final Joiner JOINER = Joiner.on("/").skipNulls(); @Inject public SingleSegmentLoader( - SegmentPuller segmentPuller, - QueryableIndexFactory factory + DataSegmentPuller dataSegmentPuller, + QueryableIndexFactory factory, + SegmentLoaderConfig config ) { - this.segmentPuller = segmentPuller; + this.dataSegmentPuller = dataSegmentPuller; this.factory = factory; + this.config = config; } @Override - public Segment getSegment(DataSegment segment) throws StorageAdapterLoadingException + public Segment getSegment(DataSegment segment) throws SegmentLoadingException { - final QueryableIndex index = factory.factorize(segmentPuller.getSegmentFiles(segment)); + File segmentFiles = getSegmentFiles(segment); + final QueryableIndex index = factory.factorize(segmentFiles); return new QueryableIndexSegment(segment.getIdentifier(), index); } - @Override - public void cleanup(DataSegment segment) throws StorageAdapterLoadingException + public File getSegmentFiles(DataSegment segment) throws SegmentLoadingException { - segmentPuller.cleanSegmentFiles(segment); + File localStorageDir = new File(config.getCacheDirectory(), DataSegmentPusherUtil.getStorageDir(segment)); + if (localStorageDir.exists()) { + long localLastModified = localStorageDir.lastModified(); + long remoteLastModified = dataSegmentPuller.getLastModified(segment); + if (remoteLastModified > 0 && localLastModified >= remoteLastModified) { + log.info( + "Found localStorageDir[%s] with modified[%s], which is same or after remote[%s]. Using.", + localStorageDir, localLastModified, remoteLastModified + ); + return localStorageDir; + } + } + + if (localStorageDir.exists()) { + try { + FileUtils.deleteDirectory(localStorageDir); + } + catch (IOException e) { + log.warn(e, "Exception deleting previously existing local dir[%s]", localStorageDir); + } + } + if (!localStorageDir.mkdirs()) { + log.info("Unable to make parent file[%s]", localStorageDir); + } + + dataSegmentPuller.getSegmentFiles(segment, localStorageDir); + + return localStorageDir; + } + + private File getLocalStorageDir(DataSegment segment) + { + String outputKey = JOINER.join( + segment.getDataSource(), + String.format( + "%s_%s", + segment.getInterval().getStart(), + segment.getInterval().getEnd() + ), + segment.getVersion(), + segment.getShardSpec().getPartitionNum() + ); + + return new File(config.getCacheDirectory(), outputKey); + } + + private void moveToCache(File pulledFile, File cacheFile) throws SegmentLoadingException + { + log.info("Rename pulledFile[%s] to cacheFile[%s]", pulledFile, cacheFile); + if (!pulledFile.renameTo(cacheFile)) { + log.warn("Error renaming pulledFile[%s] to cacheFile[%s]. Copying instead.", pulledFile, cacheFile); + + try { + StreamUtils.copyToFileAndClose(new FileInputStream(pulledFile), cacheFile); + } + catch (IOException e) { + throw new SegmentLoadingException( + e, + "Problem moving pulledFile[%s] to cache[%s]", + pulledFile, + cacheFile + ); + } + if (!pulledFile.delete()) { + log.error("Could not delete pulledFile[%s].", pulledFile); + } + } + } + + @Override + public void cleanup(DataSegment segment) throws SegmentLoadingException + { + File cacheFile = getLocalStorageDir(segment).getParentFile(); + + try { + log.info("Deleting directory[%s]", cacheFile); + FileUtils.deleteDirectory(cacheFile); + } + catch (IOException e) { + throw new SegmentLoadingException(e, e.getMessage()); + } } } diff --git a/server/src/main/java/com/metamx/druid/master/BalancerAnalyzer.java b/server/src/main/java/com/metamx/druid/master/BalancerAnalyzer.java deleted file mode 100644 index bdf475b8d1b..00000000000 --- a/server/src/main/java/com/metamx/druid/master/BalancerAnalyzer.java +++ /dev/null @@ -1,131 +0,0 @@ -/* - * Druid - a distributed column store. - * Copyright (C) 2012 Metamarkets Group Inc. - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation; either version 2 - * of the License, or (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. - */ - -package com.metamx.druid.master; - -import com.google.common.collect.Lists; -import com.google.common.collect.Sets; -import com.metamx.common.logger.Logger; -import com.metamx.druid.client.DataSegment; -import com.metamx.druid.client.DruidDataSource; -import com.metamx.druid.client.DruidServer; - -import java.util.Collections; -import java.util.List; -import java.util.Set; - -/** - * The BalancerAnalyzer keeps the state of the highest and lowest percent used servers. It will update - * these states and perform lookaheads to make sure the updated states result in a more balanced cluster. - */ -public class BalancerAnalyzer -{ - private static final Logger log = new Logger(BalancerAnalyzer.class); - private static final int PERCENT_THRESHOLD = 3; - private static final int MAX_SEGMENTS_TO_MOVE = 5; - - private volatile Long highestSizeUsed; - private volatile double highestPercentUsed; - private volatile Long highestPercentUsedServerMaxSize; - private volatile Long lowestSizeUsed; - private volatile double lowestPercentUsed; - private volatile Long lowestPercentUsedServerMaxSize; - - public BalancerAnalyzer() - { - this.highestSizeUsed = 0L; - this.highestPercentUsed = 0; - this.highestPercentUsedServerMaxSize = 0L; - this.lowestSizeUsed = 0L; - this.lowestPercentUsed = 0; - this.lowestPercentUsedServerMaxSize = 0L; - } - - public void init(ServerHolder highestPercentUsedServer, ServerHolder lowestPercentUsedServer) - { - highestSizeUsed = highestPercentUsedServer.getSizeUsed(); - highestPercentUsed = highestPercentUsedServer.getPercentUsed(); - highestPercentUsedServerMaxSize = highestPercentUsedServer.getMaxSize(); - lowestSizeUsed = lowestPercentUsedServer.getSizeUsed(); - lowestPercentUsed = lowestPercentUsedServer.getPercentUsed(); - lowestPercentUsedServerMaxSize = lowestPercentUsedServer.getMaxSize(); - } - - public void update(long newHighestSizeUsed, long newLowestSizedUsed) - { - highestSizeUsed = newHighestSizeUsed; - highestPercentUsed = highestSizeUsed.doubleValue() / highestPercentUsedServerMaxSize; - lowestSizeUsed = newLowestSizedUsed; - lowestPercentUsed = lowestSizeUsed.doubleValue() / lowestPercentUsedServerMaxSize; - } - - public double getPercentDiff() - { - return Math.abs( - 100 * ((highestPercentUsed - lowestPercentUsed) - / ((highestPercentUsed + lowestPercentUsed) / 2)) - ); - } - - public double getLookaheadPercentDiff(Long newHighestSizeUsed, Long newLowestSizedUsed) - { - double newHighestPercentUsed = 100 * (newHighestSizeUsed.doubleValue() / highestPercentUsedServerMaxSize); - double newLowestPercentUsed = 100 * (newLowestSizedUsed.doubleValue() / lowestPercentUsedServerMaxSize); - - return Math.abs( - 100 * ((newHighestPercentUsed - newLowestPercentUsed) - / ((newHighestPercentUsed + newLowestPercentUsed) / 2)) - ); - } - - public Set findSegmentsToMove(DruidServer server) - { - Set segmentsToMove = Sets.newHashSet(); - double currPercentDiff = getPercentDiff(); - - if (currPercentDiff < PERCENT_THRESHOLD) { - log.info("Cluster usage is balanced."); - return segmentsToMove; - } - - List dataSources = Lists.newArrayList(server.getDataSources()); - Collections.shuffle(dataSources); - - for (DruidDataSource dataSource : dataSources) { - List segments = Lists.newArrayList(dataSource.getSegments()); - Collections.shuffle(segments); - - for (DataSegment segment : segments) { - if (segmentsToMove.size() >= MAX_SEGMENTS_TO_MOVE) { - return segmentsToMove; - } - - if (getLookaheadPercentDiff(highestSizeUsed - segment.getSize(), lowestSizeUsed + segment.getSize()) - < currPercentDiff) { - segmentsToMove.add(new BalancerSegmentHolder(server, segment)); - update(highestSizeUsed - segment.getSize(), lowestSizeUsed + segment.getSize()); - } - } - } - - return segmentsToMove; - } -} - - diff --git a/server/src/main/java/com/metamx/druid/master/BalancerCostAnalyzer.java b/server/src/main/java/com/metamx/druid/master/BalancerCostAnalyzer.java new file mode 100644 index 00000000000..6519785e76b --- /dev/null +++ b/server/src/main/java/com/metamx/druid/master/BalancerCostAnalyzer.java @@ -0,0 +1,236 @@ +/* + * Druid - a distributed column store. + * Copyright (C) 2012 Metamarkets Group Inc. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version 2 + * of the License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + */ + +package com.metamx.druid.master; + +import com.google.common.collect.Lists; +import com.metamx.common.logger.Logger; +import com.metamx.druid.client.DataSegment; +import org.joda.time.DateTime; +import org.joda.time.Interval; + +import java.util.List; +import java.util.Random; + + +/** + * The BalancerCostAnalyzer will compute the total initial cost of the cluster, with costs defined in + * computeJointSegmentCosts. It will then propose to move (pseudo-)randomly chosen segments from their + * respective initial servers to other servers, chosen greedily to minimize the cost of the cluster. + */ +public class BalancerCostAnalyzer +{ + private static final Logger log = new Logger(BalancerCostAnalyzer.class); + private static final int DAY_IN_MILLIS = 1000 * 60 * 60 * 24; + private static final int SEVEN_DAYS_IN_MILLIS = 7 * DAY_IN_MILLIS; + private static final int THIRTY_DAYS_IN_MILLIS = 30 * DAY_IN_MILLIS; + private final Random rand; + private final DateTime referenceTimestamp; + + public BalancerCostAnalyzer(DateTime referenceTimestamp) + { + this.referenceTimestamp = referenceTimestamp; + rand = new Random(0); + } + + /** + * Calculates the cost normalization. This is such that the normalized cost is lower bounded + * by 1 (e.g. when each segment gets its own compute node). + * + * @param serverHolders A list of ServerHolders for a particular tier. + * + * @return The normalization value (the sum of the diagonal entries in the + * pairwise cost matrix). This is the cost of a cluster if each + * segment were to get its own compute node. + */ + public double calculateNormalization(final List serverHolders) + { + double cost = 0; + for (ServerHolder server : serverHolders) { + for (DataSegment segment : server.getServer().getSegments().values()) { + cost += computeJointSegmentCosts(segment, segment); + } + } + return cost; + } + + /** + * Calculates the initial cost of the Druid segment configuration. + * + * @param serverHolders A list of ServerHolders for a particular tier. + * + * @return The initial cost of the Druid tier. + */ + public double calculateInitialTotalCost(final List serverHolders) + { + double cost = 0; + for (ServerHolder server : serverHolders) { + DataSegment[] segments = server.getServer().getSegments().values().toArray(new DataSegment[]{}); + for (int i = 0; i < segments.length; ++i) { + for (int j = i; j < segments.length; ++j) { + cost += computeJointSegmentCosts(segments[i], segments[j]); + } + } + } + return cost; + } + + /** + * This defines the unnormalized cost function between two segments. There is a base cost given by + * the minimum size of the two segments and additional penalties. + * recencyPenalty: it is more likely that recent segments will be queried together + * dataSourcePenalty: if two segments belong to the same data source, they are more likely to be involved + * in the same queries + * gapPenalty: it is more likely that segments close together in time will be queried together + * + * @param segment1 The first DataSegment. + * @param segment2 The second DataSegment. + * + * @return The joint cost of placing the two DataSegments together on one node. + */ + public double computeJointSegmentCosts(final DataSegment segment1, final DataSegment segment2) + { + final Interval gap = segment1.getInterval().gap(segment2.getInterval()); + + final double baseCost = Math.min(segment1.getSize(), segment2.getSize()); + double recencyPenalty = 1; + double dataSourcePenalty = 1; + double gapPenalty = 1; + + if (segment1.getDataSource().equals(segment2.getDataSource())) { + dataSourcePenalty = 2; + } + + double maxDiff = Math.max( + referenceTimestamp.getMillis() - segment1.getInterval().getEndMillis(), + referenceTimestamp.getMillis() - segment2.getInterval().getEndMillis() + ); + if (maxDiff < SEVEN_DAYS_IN_MILLIS) { + recencyPenalty = 2 - maxDiff / SEVEN_DAYS_IN_MILLIS; + } + + /** gap is null if the two segment intervals overlap or if they're adjacent */ + if (gap == null) { + gapPenalty = 2; + } else { + long gapMillis = gap.toDurationMillis(); + if (gapMillis < THIRTY_DAYS_IN_MILLIS) { + gapPenalty = 2 - gapMillis / THIRTY_DAYS_IN_MILLIS; + } + } + + final double cost = baseCost * recencyPenalty * dataSourcePenalty * gapPenalty; + + return cost; + } + + /** + * Sample from each server with probability proportional to the number of segments on that server. + * + * @param serverHolders A list of ServerHolders for a particular tier. + * @param numSegments + * + * @return A ServerHolder sampled with probability proportional to the + * number of segments on that server + */ + private ServerHolder sampleServer(final List serverHolders, final int numSegments) + { + final int num = rand.nextInt(numSegments); + int cumulativeSegments = 0; + int numToStopAt = 0; + + while (cumulativeSegments <= num) { + cumulativeSegments += serverHolders.get(numToStopAt).getServer().getSegments().size(); + numToStopAt++; + } + + return serverHolders.get(numToStopAt - 1); + } + + /** + * The balancing application requires us to pick a proposal segment. + * + * @param serverHolders A list of ServerHolders for a particular tier. + * @param numSegments The total number of segments on a particular tier. + * + * @return A BalancerSegmentHolder sampled uniformly at random. + */ + public BalancerSegmentHolder pickSegmentToMove(final List serverHolders, final int numSegments) + { + /** We want to sample from each server w.p. numSegmentsOnServer / totalSegments */ + ServerHolder fromServerHolder = sampleServer(serverHolders, numSegments); + + /** and actually pick that segment uniformly at random w.p. 1 / numSegmentsOnServer + so that the probability of picking a segment is 1 / totalSegments. */ + List segments = Lists.newArrayList(fromServerHolder.getServer().getSegments().values()); + + DataSegment proposalSegment = segments.get(rand.nextInt(segments.size())); + return new BalancerSegmentHolder(fromServerHolder.getServer(), proposalSegment); + } + + /** + * The assignment application requires us to supply a proposal segment. + * + * @param proposalSegment A DataSegment that we are proposing to move. + * @param serverHolders An iterable of ServerHolders for a particular tier. + * + * @return A ServerHolder with the new home for a segment. + */ + public ServerHolder findNewSegmentHome( + final DataSegment proposalSegment, + final Iterable serverHolders + ) + { + final long proposalSegmentSize = proposalSegment.getSize(); + double minCost = Double.MAX_VALUE; + ServerHolder toServer = null; + + for (ServerHolder server : serverHolders) { + /** Don't calculate cost if the server doesn't have enough space or is loading the segment */ + if (proposalSegmentSize > server.getAvailableSize() || + server.isLoadingSegment(proposalSegment) || + server.isServingSegment(proposalSegment)) { + continue; + } + + /** The contribution to the total cost of a given server by proposing to move the segment to that server is... */ + double cost = 0f; + /** the sum of the costs of other (exclusive of the proposalSegment) segments on the server */ + for (DataSegment segment : server.getServer().getSegments().values()) { + if (!proposalSegment.equals(segment)) { + cost += computeJointSegmentCosts(proposalSegment, segment); + } + } + /** plus the costs of segments that will be loaded */ + for (DataSegment segment : server.getPeon().getSegmentsToLoad()) { + cost += computeJointSegmentCosts(proposalSegment, segment); + } + + if (cost < minCost) { + minCost = cost; + toServer = server; + } + } + + return toServer; + } + +} + + diff --git a/server/src/main/java/com/metamx/druid/master/BalancerSegmentHolder.java b/server/src/main/java/com/metamx/druid/master/BalancerSegmentHolder.java index 97beb20ac59..7d62968e4b9 100644 --- a/server/src/main/java/com/metamx/druid/master/BalancerSegmentHolder.java +++ b/server/src/main/java/com/metamx/druid/master/BalancerSegmentHolder.java @@ -26,23 +26,23 @@ import com.metamx.druid.client.DruidServer; */ public class BalancerSegmentHolder { - private final DruidServer server; + private final DruidServer fromServer; private final DataSegment segment; private volatile int lifetime = 15; public BalancerSegmentHolder( - DruidServer server, + DruidServer fromServer, DataSegment segment ) { - this.server = server; + this.fromServer = fromServer; this.segment = segment; } - public DruidServer getServer() + public DruidServer getFromServer() { - return server; + return fromServer; } public DataSegment getSegment() diff --git a/server/src/main/java/com/metamx/druid/master/DruidMaster.java b/server/src/main/java/com/metamx/druid/master/DruidMaster.java index 6850ff610c5..a21408144b3 100644 --- a/server/src/main/java/com/metamx/druid/master/DruidMaster.java +++ b/server/src/main/java/com/metamx/druid/master/DruidMaster.java @@ -54,6 +54,8 @@ import com.metamx.phonebook.PhoneBook; import com.metamx.phonebook.PhoneBookPeon; import com.netflix.curator.x.discovery.ServiceProvider; import org.I0Itec.zkclient.exception.ZkNodeExistsException; + +import org.joda.time.DateTime; import org.joda.time.Duration; import javax.annotation.Nullable; @@ -601,6 +603,7 @@ public class DruidMaster .withEmitter(emitter) .withMergeBytesLimit(config.getMergeBytesLimit()) .withMergeSegmentsLimit(config.getMergeSegmentsLimit()) + .withMaxSegmentsToMove(config.getMaxSegmentsToMove()) .build(); for (DruidMasterHelper helper : helpers) { @@ -701,6 +704,7 @@ public class DruidMaster .withDatabaseRuleManager(databaseRuleManager) .withLoadManagementPeons(loadManagementPeons) .withSegmentReplicantLookup(segmentReplicantLookup) + .withBalancerReferenceTimestamp(DateTime.now()) .build(); } }, @@ -710,7 +714,7 @@ public class DruidMaster config.getReplicantThrottleLimit() ), new DruidMasterCleanup(DruidMaster.this), - new DruidMasterBalancer(DruidMaster.this, new BalancerAnalyzer()), + new DruidMasterBalancer(DruidMaster.this), new DruidMasterLogger() ) ); diff --git a/server/src/main/java/com/metamx/druid/master/DruidMasterBalancer.java b/server/src/main/java/com/metamx/druid/master/DruidMasterBalancer.java index 4ad3a839b4b..7678e85624d 100644 --- a/server/src/main/java/com/metamx/druid/master/DruidMasterBalancer.java +++ b/server/src/main/java/com/metamx/druid/master/DruidMasterBalancer.java @@ -19,18 +19,19 @@ package com.metamx.druid.master; +import com.google.common.collect.Lists; import com.google.common.collect.Maps; import com.google.common.collect.MinMaxPriorityQueue; -import com.google.common.collect.Sets; import com.metamx.common.guava.Comparators; import com.metamx.druid.client.DataSegment; import com.metamx.druid.client.DruidServer; import com.metamx.emitter.EmittingLogger; +import org.joda.time.DateTime; +import java.util.ArrayList; import java.util.Comparator; +import java.util.List; import java.util.Map; -import java.util.Set; -import java.util.TreeSet; import java.util.concurrent.ConcurrentHashMap; /** @@ -47,31 +48,28 @@ public class DruidMasterBalancer implements DruidMasterHelper } } ); - private static final EmittingLogger log = new EmittingLogger(DruidMasterBalancer.class); + protected static final EmittingLogger log = new EmittingLogger(DruidMasterBalancer.class); - private final BalancerAnalyzer analyzer; - private final DruidMaster master; + protected final DruidMaster master; - private final Map> currentlyMovingSegments = Maps.newHashMap(); + protected final Map> currentlyMovingSegments = Maps.newHashMap(); public DruidMasterBalancer( - DruidMaster master, - BalancerAnalyzer analyzer + DruidMaster master ) { this.master = master; - this.analyzer = analyzer; } - private void reduceLifetimes(String tier) + protected void reduceLifetimes(String tier) { for (BalancerSegmentHolder holder : currentlyMovingSegments.get(tier).values()) { holder.reduceLifetime(); if (holder.getLifetime() <= 0) { log.makeAlert("[%s]: Balancer move segments queue has a segment stuck", tier) - .addData("segment", holder.getSegment().getIdentifier()) - .addData("server", holder.getServer()) - .emit(); + .addData("segment", holder.getSegment().getIdentifier()) + .addData("server", holder.getFromServer().getStringProps()) + .emit(); } } } @@ -79,7 +77,10 @@ public class DruidMasterBalancer implements DruidMasterHelper @Override public DruidMasterRuntimeParams run(DruidMasterRuntimeParams params) { - MasterStats stats = new MasterStats(); + final MasterStats stats = new MasterStats(); + final DateTime referenceTimestamp = params.getBalancerReferenceTimestamp(); + final BalancerCostAnalyzer analyzer = params.getBalancerCostAnalyzer(referenceTimestamp); + final int maxSegmentsToMove = params.getMaxSegmentsToMove(); for (Map.Entry> entry : params.getDruidCluster().getCluster().entrySet()) { @@ -95,51 +96,51 @@ public class DruidMasterBalancer implements DruidMasterHelper continue; } - TreeSet serversByPercentUsed = Sets.newTreeSet(percentUsedComparator); - serversByPercentUsed.addAll(entry.getValue()); + final List serverHolderList = Lists.newArrayList(entry.getValue()); - if (serversByPercentUsed.size() <= 1) { - log.info( - "[%s]: No unique values found for highest and lowest percent used servers: nothing to balance", tier - ); + if (serverHolderList.size() <= 1) { + log.info("[%s]: One or fewer servers found. Cannot balance.", tier); continue; } - ServerHolder highestPercentUsedServer = serversByPercentUsed.first(); - ServerHolder lowestPercentUsedServer = serversByPercentUsed.last(); + int numSegments = 0; + for (ServerHolder server : serverHolderList) { + numSegments += server.getServer().getSegments().size(); + } - analyzer.init(highestPercentUsedServer, lowestPercentUsedServer); + if (numSegments == 0) { + log.info("No segments found. Cannot balance."); + continue; + } - log.info( - "[%s]: Percent difference in percent size used between highest/lowest servers: %s%%", - tier, - analyzer.getPercentDiff() - ); + int iter = 0; + while (iter < maxSegmentsToMove) { + iter++; + final BalancerSegmentHolder segmentToMove = analyzer.pickSegmentToMove(serverHolderList, numSegments); + final ServerHolder holder = analyzer.findNewSegmentHome(segmentToMove.getSegment(), serverHolderList); + if (holder == null) { + continue; + } + moveSegment(segmentToMove, holder.getServer(), params); + } - log.info( - "[%s]: Highest percent used [%s]: size used[%s], percent used[%s%%]", - tier, - highestPercentUsedServer.getServer().getName(), - highestPercentUsedServer.getSizeUsed(), - highestPercentUsedServer.getPercentUsed() - ); - - log.info( - "[%s]: Lowest percent used [%s]: size used[%s], percent used[%s%%]", - tier, - lowestPercentUsedServer.getServer().getName(), - lowestPercentUsedServer.getSizeUsed(), - lowestPercentUsedServer.getPercentUsed() - ); - - // Use the analyzer to find segments to move and then move them - moveSegments( - lowestPercentUsedServer.getServer(), - analyzer.findSegmentsToMove(highestPercentUsedServer.getServer()), - params - ); + final double initialTotalCost = analyzer.calculateInitialTotalCost(serverHolderList); + final double normalization = analyzer.calculateNormalization(serverHolderList); + final double normalizedInitialCost = initialTotalCost / normalization; + stats.addToTieredStat("initialCost", tier, (long) initialTotalCost); + stats.addToTieredStat("normalization", tier, (long) normalization); + stats.addToTieredStat("normalizedInitialCostTimesOneThousand", tier, (long) (normalizedInitialCost * 1000)); stats.addToTieredStat("movedCount", tier, currentlyMovingSegments.get(tier).size()); + + log.info( + "[%s]: Initial Total Cost: [%f], Normalization: [%f], Initial Normalized Cost: [%f], Segments Moved: [%d]", + tier, + initialTotalCost, + normalization, + normalizedInitialCost, + currentlyMovingSegments.get(tier).size() + ); } return params.buildFromExisting() @@ -147,49 +148,48 @@ public class DruidMasterBalancer implements DruidMasterHelper .build(); } - private void moveSegments( - final DruidServer server, - final Set segments, + protected void moveSegment( + final BalancerSegmentHolder segment, + final DruidServer toServer, final DruidMasterRuntimeParams params ) { - String toServer = server.getName(); - LoadQueuePeon toPeon = params.getLoadManagementPeons().get(toServer); + final String toServerName = toServer.getName(); + final LoadQueuePeon toPeon = params.getLoadManagementPeons().get(toServerName); - for (final BalancerSegmentHolder segment : Sets.newHashSet(segments)) { - String fromServer = segment.getServer().getName(); - DataSegment segmentToMove = segment.getSegment(); - final String segmentName = segmentToMove.getIdentifier(); + final String fromServerName = segment.getFromServer().getName(); + final DataSegment segmentToMove = segment.getSegment(); + final String segmentName = segmentToMove.getIdentifier(); - if (!toPeon.getSegmentsToLoad().contains(segmentToMove) && - (server.getSegment(segmentName) == null) && - new ServerHolder(server, toPeon).getAvailableSize() > segmentToMove.getSize()) { - log.info("Moving [%s] from [%s] to [%s]", segmentName, fromServer, toServer); - try { - master.moveSegment( - fromServer, - toServer, - segmentToMove.getIdentifier(), - new LoadPeonCallback() + if (!toPeon.getSegmentsToLoad().contains(segmentToMove) && + (toServer.getSegment(segmentName) == null) && + new ServerHolder(toServer, toPeon).getAvailableSize() > segmentToMove.getSize()) { + log.info("Moving [%s] from [%s] to [%s]", segmentName, fromServerName, toServerName); + try { + master.moveSegment( + fromServerName, + toServerName, + segmentToMove.getIdentifier(), + new LoadPeonCallback() + { + @Override + protected void execute() { - @Override - protected void execute() - { - Map movingSegments = currentlyMovingSegments.get(server.getTier()); - if (movingSegments != null) { - movingSegments.remove(segmentName); - } + Map movingSegments = currentlyMovingSegments.get(toServer.getTier()); + if (movingSegments != null) { + movingSegments.remove(segmentName); } } - ); - currentlyMovingSegments.get(server.getTier()).put(segmentName, segment); - } - catch (Exception e) { - log.makeAlert(e, String.format("[%s] : Moving exception", segmentName)).emit(); - } - } else { - currentlyMovingSegments.get(server.getTier()).remove(segment); + } + ); + currentlyMovingSegments.get(toServer.getTier()).put(segmentName, segment); } + catch (Exception e) { + log.makeAlert(e, String.format("[%s] : Moving exception", segmentName)).emit(); + } + } else { + currentlyMovingSegments.get(toServer.getTier()).remove(segmentName); } + } } \ No newline at end of file diff --git a/server/src/main/java/com/metamx/druid/master/DruidMasterConfig.java b/server/src/main/java/com/metamx/druid/master/DruidMasterConfig.java index a382fa382e1..755c668dc0e 100644 --- a/server/src/main/java/com/metamx/druid/master/DruidMasterConfig.java +++ b/server/src/main/java/com/metamx/druid/master/DruidMasterConfig.java @@ -81,6 +81,10 @@ public abstract class DruidMasterConfig return Integer.MAX_VALUE; } + @Config("druid.master.balancer.maxSegmentsToMove") + @Default("5") + public abstract int getMaxSegmentsToMove(); + @Config("druid.master.replicant.lifetime") @Default("15") public abstract int getReplicantLifetime(); diff --git a/server/src/main/java/com/metamx/druid/master/DruidMasterRuntimeParams.java b/server/src/main/java/com/metamx/druid/master/DruidMasterRuntimeParams.java index a5164128316..85fe6a35d89 100644 --- a/server/src/main/java/com/metamx/druid/master/DruidMasterRuntimeParams.java +++ b/server/src/main/java/com/metamx/druid/master/DruidMasterRuntimeParams.java @@ -26,6 +26,7 @@ import com.metamx.druid.client.DataSegment; import com.metamx.druid.client.DruidDataSource; import com.metamx.druid.db.DatabaseRuleManager; import com.metamx.emitter.service.ServiceEmitter; +import org.joda.time.DateTime; import java.util.Collection; import java.util.Collections; @@ -49,6 +50,8 @@ public class DruidMasterRuntimeParams private final MasterStats stats; private final long mergeBytesLimit; private final int mergeSegmentsLimit; + private final int maxSegmentsToMove; + private final DateTime balancerReferenceTimestamp; public DruidMasterRuntimeParams( long startTime, @@ -63,7 +66,9 @@ public class DruidMasterRuntimeParams long millisToWaitBeforeDeleting, MasterStats stats, long mergeBytesLimit, - int mergeSegmentsLimit + int mergeSegmentsLimit, + int maxSegmentsToMove, + DateTime balancerReferenceTimestamp ) { this.startTime = startTime; @@ -79,6 +84,8 @@ public class DruidMasterRuntimeParams this.stats = stats; this.mergeBytesLimit = mergeBytesLimit; this.mergeSegmentsLimit = mergeSegmentsLimit; + this.maxSegmentsToMove = maxSegmentsToMove; + this.balancerReferenceTimestamp = balancerReferenceTimestamp; } public long getStartTime() @@ -146,6 +153,21 @@ public class DruidMasterRuntimeParams return mergeSegmentsLimit; } + public int getMaxSegmentsToMove() + { + return maxSegmentsToMove; + } + + public DateTime getBalancerReferenceTimestamp() + { + return balancerReferenceTimestamp; + } + + public BalancerCostAnalyzer getBalancerCostAnalyzer(DateTime referenceTimestamp) + { + return new BalancerCostAnalyzer(referenceTimestamp); + } + public boolean hasDeletionWaitTimeElapsed() { return (System.currentTimeMillis() - getStartTime() > getMillisToWaitBeforeDeleting()); @@ -171,7 +193,9 @@ public class DruidMasterRuntimeParams millisToWaitBeforeDeleting, stats, mergeBytesLimit, - mergeSegmentsLimit + mergeSegmentsLimit, + maxSegmentsToMove, + balancerReferenceTimestamp ); } @@ -190,6 +214,8 @@ public class DruidMasterRuntimeParams private MasterStats stats; private long mergeBytesLimit; private int mergeSegmentsLimit; + private int maxSegmentsToMove; + private DateTime balancerReferenceTimestamp; Builder() { @@ -206,6 +232,8 @@ public class DruidMasterRuntimeParams this.stats = new MasterStats(); this.mergeBytesLimit = 0; this.mergeSegmentsLimit = 0; + this.maxSegmentsToMove = 0; + this.balancerReferenceTimestamp = null; } Builder( @@ -221,7 +249,9 @@ public class DruidMasterRuntimeParams long millisToWaitBeforeDeleting, MasterStats stats, long mergeBytesLimit, - int mergeSegmentsLimit + int mergeSegmentsLimit, + int maxSegmentsToMove, + DateTime balancerReferenceTimestamp ) { this.startTime = startTime; @@ -237,6 +267,8 @@ public class DruidMasterRuntimeParams this.stats = stats; this.mergeBytesLimit = mergeBytesLimit; this.mergeSegmentsLimit = mergeSegmentsLimit; + this.maxSegmentsToMove = maxSegmentsToMove; + this.balancerReferenceTimestamp = balancerReferenceTimestamp; } public DruidMasterRuntimeParams build() @@ -254,7 +286,9 @@ public class DruidMasterRuntimeParams millisToWaitBeforeDeleting, stats, mergeBytesLimit, - mergeSegmentsLimit + mergeSegmentsLimit, + maxSegmentsToMove, + balancerReferenceTimestamp ); } @@ -335,5 +369,17 @@ public class DruidMasterRuntimeParams this.mergeSegmentsLimit = mergeSegmentsLimit; return this; } + + public Builder withMaxSegmentsToMove(int maxSegmentsToMove) + { + this.maxSegmentsToMove = maxSegmentsToMove; + return this; + } + + public Builder withBalancerReferenceTimestamp(DateTime balancerReferenceTimestamp) + { + this.balancerReferenceTimestamp = balancerReferenceTimestamp; + return this; + } } } diff --git a/server/src/main/java/com/metamx/druid/master/rules/LoadRule.java b/server/src/main/java/com/metamx/druid/master/rules/LoadRule.java index 2f1819c9274..4cd6acae640 100644 --- a/server/src/main/java/com/metamx/druid/master/rules/LoadRule.java +++ b/server/src/main/java/com/metamx/druid/master/rules/LoadRule.java @@ -19,10 +19,11 @@ package com.metamx.druid.master.rules; -import com.google.common.collect.ImmutableMap; import com.google.common.collect.Lists; import com.google.common.collect.MinMaxPriorityQueue; +import com.metamx.common.Pair; import com.metamx.druid.client.DataSegment; +import com.metamx.druid.master.BalancerCostAnalyzer; import com.metamx.druid.master.DruidMaster; import com.metamx.druid.master.DruidMasterRuntimeParams; import com.metamx.druid.master.LoadPeonCallback; @@ -30,7 +31,9 @@ import com.metamx.druid.master.MasterStats; import com.metamx.druid.master.ReplicationThrottler; import com.metamx.druid.master.ServerHolder; import com.metamx.emitter.EmittingLogger; +import org.joda.time.DateTime; +import java.util.ArrayList; import java.util.List; import java.util.Map; @@ -56,7 +59,21 @@ public abstract class LoadRule implements Rule return stats; } - stats.accumulate(assign(params.getReplicationManager(), expectedReplicants, totalReplicants, serverQueue, segment)); + final List serverHolderList = new ArrayList(serverQueue); + final DateTime referenceTimestamp = params.getBalancerReferenceTimestamp(); + final BalancerCostAnalyzer analyzer = params.getBalancerCostAnalyzer(referenceTimestamp); + + stats.accumulate( + assign( + params.getReplicationManager(), + expectedReplicants, + totalReplicants, + analyzer, + serverHolderList, + segment + ) + ); + stats.accumulate(drop(expectedReplicants, clusterReplicants, segment, params)); return stats; @@ -64,52 +81,27 @@ public abstract class LoadRule implements Rule private MasterStats assign( final ReplicationThrottler replicationManager, - int expectedReplicants, + final int expectedReplicants, int totalReplicants, - MinMaxPriorityQueue serverQueue, + final BalancerCostAnalyzer analyzer, + final List serverHolderList, final DataSegment segment ) { - MasterStats stats = new MasterStats(); + final MasterStats stats = new MasterStats(); - List assignedServers = Lists.newArrayList(); while (totalReplicants < expectedReplicants) { - final ServerHolder holder = serverQueue.pollFirst(); + final ServerHolder holder = analyzer.findNewSegmentHome(segment, serverHolderList); + if (holder == null) { log.warn( - "Not enough %s servers[%d] to assign segment[%s]! Expected Replicants[%d]", + "Not enough %s servers or node capacity to assign segment[%s]! Expected Replicants[%d]", getTier(), - assignedServers.size() + serverQueue.size() + 1, segment.getIdentifier(), expectedReplicants ); break; } - if (holder.isServingSegment(segment) || holder.isLoadingSegment(segment)) { - assignedServers.add(holder); - continue; - } - - if (holder.getAvailableSize() < segment.getSize()) { - log.warn( - "Not enough node capacity, closest is [%s] with %,d available, skipping segment[%s].", - holder.getServer(), - holder.getAvailableSize(), - segment - ); - log.makeAlert( - "Not enough node capacity", - ImmutableMap.builder() - .put("segmentSkipped", segment.toString()) - .put("closestNode", holder.getServer().toString()) - .put("availableSize", holder.getAvailableSize()) - .build() - ).emit(); - serverQueue.add(holder); - stats.addToTieredStat("unassignedCount", getTier(), 1); - stats.addToTieredStat("unassignedSize", getTier(), segment.getSize()); - break; - } if (totalReplicants > 0) { // don't throttle if there's only 1 copy of this segment in the cluster if (!replicationManager.canAddReplicant(getTier()) || @@ -118,7 +110,6 @@ public abstract class LoadRule implements Rule segment.getIdentifier(), holder.getServer().getHost() )) { - serverQueue.add(holder); break; } } @@ -138,12 +129,10 @@ public abstract class LoadRule implements Rule } } ); - assignedServers.add(holder); stats.addToTieredStat("assignedCount", getTier(), 1); ++totalReplicants; } - serverQueue.addAll(assignedServers); return stats; } diff --git a/server/src/test/java/com/metamx/druid/coordination/ServerManagerTest.java b/server/src/test/java/com/metamx/druid/coordination/ServerManagerTest.java index 034bcc8f42e..d10566c5da4 100644 --- a/server/src/test/java/com/metamx/druid/coordination/ServerManagerTest.java +++ b/server/src/test/java/com/metamx/druid/coordination/ServerManagerTest.java @@ -31,7 +31,6 @@ import com.metamx.common.Pair; import com.metamx.common.guava.ConcatSequence; import com.metamx.common.guava.Sequence; import com.metamx.common.guava.Sequences; -import com.metamx.druid.Capabilities; import com.metamx.druid.Druids; import com.metamx.druid.Query; import com.metamx.druid.QueryGranularity; @@ -39,12 +38,9 @@ import com.metamx.druid.StorageAdapter; import com.metamx.druid.client.DataSegment; import com.metamx.druid.index.QueryableIndex; import com.metamx.druid.index.Segment; -import com.metamx.druid.index.brita.Filter; import com.metamx.druid.index.v1.IndexIO; -import com.metamx.druid.index.v1.SegmentIdAttachedStorageAdapter; -import com.metamx.druid.index.v1.processing.Cursor; import com.metamx.druid.loading.SegmentLoader; -import com.metamx.druid.loading.StorageAdapterLoadingException; +import com.metamx.druid.loading.SegmentLoadingException; import com.metamx.druid.metrics.NoopServiceEmitter; import com.metamx.druid.query.CacheStrategy; import com.metamx.druid.query.ConcatQueryRunner; @@ -54,7 +50,6 @@ import com.metamx.druid.query.QueryRunner; import com.metamx.druid.query.QueryRunnerFactory; import com.metamx.druid.query.QueryRunnerFactoryConglomerate; import com.metamx.druid.query.QueryToolChest; -import com.metamx.druid.query.search.SearchHit; import com.metamx.druid.query.search.SearchQuery; import com.metamx.druid.result.Result; import com.metamx.druid.result.SearchResultValue; @@ -62,7 +57,6 @@ import com.metamx.druid.shard.NoneShardSpec; import com.metamx.emitter.EmittingLogger; import com.metamx.emitter.service.ServiceMetricEvent; -import org.joda.time.DateTime; import org.joda.time.Interval; import org.junit.Assert; import org.junit.Before; @@ -72,7 +66,6 @@ import java.io.IOException; import java.util.Arrays; import java.util.Iterator; import java.util.List; -import java.util.Map; import java.util.concurrent.ExecutorService; /** @@ -101,7 +94,7 @@ public class ServerManagerTest } @Override - public void cleanup(DataSegment segment) throws StorageAdapterLoadingException + public void cleanup(DataSegment segment) throws SegmentLoadingException { } @@ -245,7 +238,7 @@ public class ServerManagerTest ) ); } - catch (StorageAdapterLoadingException e) { + catch (SegmentLoadingException e) { throw new RuntimeException(e); } } @@ -267,7 +260,7 @@ public class ServerManagerTest ) ); } - catch (StorageAdapterLoadingException e) { + catch (SegmentLoadingException e) { throw new RuntimeException(e); } } @@ -392,7 +385,7 @@ public class ServerManagerTest } } - public static class NoopQueryToolChest> implements QueryToolChest + public static class NoopQueryToolChest> extends QueryToolChest { @Override public QueryRunner mergeResults(QueryRunner runner) @@ -423,23 +416,5 @@ public class ServerManagerTest { return new TypeReference(){}; } - - @Override - public CacheStrategy getCacheStrategy(QueryType query) - { - return null; - } - - @Override - public QueryRunner preMergeQueryDecoration(QueryRunner runner) - { - return runner; - } - - @Override - public QueryRunner postMergeQueryDecoration(QueryRunner runner) - { - return runner; - } } } diff --git a/server/src/test/java/com/metamx/druid/index/v1/IndexMergerTest.java b/server/src/test/java/com/metamx/druid/index/v1/IndexMergerTest.java index 097762106a2..9cfe9d22379 100644 --- a/server/src/test/java/com/metamx/druid/index/v1/IndexMergerTest.java +++ b/server/src/test/java/com/metamx/druid/index/v1/IndexMergerTest.java @@ -19,6 +19,7 @@ package com.metamx.druid.index.v1; +import com.google.common.collect.ImmutableList; import com.google.common.collect.ImmutableMap; import com.google.common.collect.Lists; import com.google.common.io.Files; @@ -111,4 +112,51 @@ public class IndexMergerTest FileUtils.deleteQuietly(mergedDir); } } + + @Test + public void testPersistEmptyColumn() throws Exception + { + final IncrementalIndex toPersist1 = new IncrementalIndex(0L, QueryGranularity.NONE, new AggregatorFactory[]{}); + final IncrementalIndex toPersist2 = new IncrementalIndex(0L, QueryGranularity.NONE, new AggregatorFactory[]{}); + final File tmpDir1 = Files.createTempDir(); + final File tmpDir2 = Files.createTempDir(); + final File tmpDir3 = Files.createTempDir(); + + try { + toPersist1.add( + new MapBasedInputRow( + 1L, + ImmutableList.of("dim1", "dim2"), + ImmutableMap.of("dim1", ImmutableList.of(), "dim2", "foo") + ) + ); + + toPersist2.add( + new MapBasedInputRow( + 1L, + ImmutableList.of("dim1", "dim2"), + ImmutableMap.of("dim1", ImmutableList.of(), "dim2", "bar") + ) + ); + + final QueryableIndex index1 = IndexIO.loadIndex(IndexMerger.persist(toPersist1, tmpDir1)); + final QueryableIndex index2 = IndexIO.loadIndex(IndexMerger.persist(toPersist1, tmpDir2)); + final QueryableIndex merged = IndexIO.loadIndex( + IndexMerger.mergeQueryableIndex(Arrays.asList(index1, index2), new AggregatorFactory[]{}, tmpDir3) + ); + + Assert.assertEquals(1, index1.getTimeColumn().getLength()); + Assert.assertEquals(ImmutableList.of("dim2"), ImmutableList.copyOf(index1.getAvailableDimensions())); + + Assert.assertEquals(1, index2.getTimeColumn().getLength()); + Assert.assertEquals(ImmutableList.of("dim2"), ImmutableList.copyOf(index2.getAvailableDimensions())); + + Assert.assertEquals(1, merged.getTimeColumn().getLength()); + Assert.assertEquals(ImmutableList.of("dim2"), ImmutableList.copyOf(merged.getAvailableDimensions())); + } finally { + FileUtils.deleteQuietly(tmpDir1); + FileUtils.deleteQuietly(tmpDir2); + FileUtils.deleteQuietly(tmpDir3); + } + } } diff --git a/server/src/test/java/com/metamx/druid/loading/NoopSegmentLoader.java b/server/src/test/java/com/metamx/druid/loading/NoopSegmentLoader.java index 29d784d3631..ca41c4dfec6 100644 --- a/server/src/test/java/com/metamx/druid/loading/NoopSegmentLoader.java +++ b/server/src/test/java/com/metamx/druid/loading/NoopSegmentLoader.java @@ -30,7 +30,7 @@ import org.joda.time.Interval; public class NoopSegmentLoader implements SegmentLoader { @Override - public Segment getSegment(final DataSegment segment) throws StorageAdapterLoadingException + public Segment getSegment(final DataSegment segment) throws SegmentLoadingException { return new Segment() { @@ -61,7 +61,7 @@ public class NoopSegmentLoader implements SegmentLoader } @Override - public void cleanup(DataSegment loadSpec) throws StorageAdapterLoadingException + public void cleanup(DataSegment loadSpec) throws SegmentLoadingException { } } diff --git a/server/src/test/java/com/metamx/druid/master/BalancerAnalyzerTest.java b/server/src/test/java/com/metamx/druid/master/BalancerAnalyzerTest.java deleted file mode 100644 index 5c5c96b520d..00000000000 --- a/server/src/test/java/com/metamx/druid/master/BalancerAnalyzerTest.java +++ /dev/null @@ -1,131 +0,0 @@ -/* - * Druid - a distributed column store. - * Copyright (C) 2012 Metamarkets Group Inc. - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation; either version 2 - * of the License, or (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. - */ - -package com.metamx.druid.master; - -import com.google.common.collect.Lists; -import com.google.common.collect.Sets; -import com.metamx.druid.client.DataSegment; -import com.metamx.druid.client.DruidDataSource; -import com.metamx.druid.client.DruidServer; -import junit.framework.Assert; -import org.easymock.EasyMock; -import org.junit.After; -import org.junit.Before; -import org.junit.Test; - -/** - */ -public class BalancerAnalyzerTest -{ - private ServerHolder high; - private ServerHolder low; - private DruidServer server; - private DruidDataSource dataSource; - private DataSegment segment; - - @Before - public void setUp() throws Exception - { - high = EasyMock.createMock(ServerHolder.class); - low = EasyMock.createMock(ServerHolder.class); - server = EasyMock.createMock(DruidServer.class); - dataSource = EasyMock.createMock(DruidDataSource.class); - segment = EasyMock.createMock(DataSegment.class); - } - - @After - public void tearDown() throws Exception - { - EasyMock.verify(high); - EasyMock.verify(low); - } - - @Test - public void testGetPercentDifference() - { - EasyMock.expect(high.getSizeUsed()).andReturn(6L); - EasyMock.expect(high.getPercentUsed()).andReturn(60.0); - EasyMock.expect(high.getMaxSize()).andReturn(10L); - EasyMock.replay(high); - - EasyMock.expect(low.getSizeUsed()).andReturn(2L); - EasyMock.expect(low.getPercentUsed()).andReturn(20.0); - EasyMock.expect(low.getMaxSize()).andReturn(10L); - EasyMock.replay(low); - - BalancerAnalyzer analyzer = new BalancerAnalyzer(); - - analyzer.init(high, low); - Assert.assertEquals(100.0, analyzer.getPercentDiff()); - } - - @Test - public void testGetLookaheadPercentDifference() - { - EasyMock.expect(high.getSizeUsed()).andReturn(2L); - EasyMock.expect(high.getPercentUsed()).andReturn(20.0); - EasyMock.expect(high.getMaxSize()).andReturn(10L); - EasyMock.replay(high); - - EasyMock.expect(low.getSizeUsed()).andReturn(1L); - EasyMock.expect(low.getPercentUsed()).andReturn(10.0); - EasyMock.expect(low.getMaxSize()).andReturn(10L); - EasyMock.replay(low); - - BalancerAnalyzer analyzer = new BalancerAnalyzer(); - - analyzer.init(high, low); - Assert.assertEquals(100.0, analyzer.getLookaheadPercentDiff(2L, 6L)); - } - - @Test - public void testFindSegmentsToMove() - { - EasyMock.expect(high.getSizeUsed()).andReturn(6L); - EasyMock.expect(high.getPercentUsed()).andReturn(60.0); - EasyMock.expect(high.getMaxSize()).andReturn(10L); - EasyMock.replay(high); - - EasyMock.expect(low.getSizeUsed()).andReturn(2L); - EasyMock.expect(low.getPercentUsed()).andReturn(20.0); - EasyMock.expect(low.getMaxSize()).andReturn(10L); - EasyMock.replay(low); - - EasyMock.expect(segment.getSize()).andReturn(1L).atLeastOnce(); - EasyMock.replay(segment); - - EasyMock.expect(dataSource.getSegments()).andReturn(Sets.newHashSet(segment)); - EasyMock.replay(dataSource); - - EasyMock.expect(server.getDataSources()).andReturn(Lists.newArrayList(dataSource)); - EasyMock.replay(server); - - BalancerAnalyzer analyzer = new BalancerAnalyzer(); - - analyzer.init(high, low); - - Assert.assertEquals(analyzer.findSegmentsToMove(server).iterator().next().getSegment(), segment); - - EasyMock.verify(server); - EasyMock.verify(dataSource); - EasyMock.verify(segment); - } - -} diff --git a/server/src/test/java/com/metamx/druid/master/DruidMasterBalancerTest.java b/server/src/test/java/com/metamx/druid/master/DruidMasterBalancerTest.java index 7e1304aba7a..2ff1dade115 100644 --- a/server/src/test/java/com/metamx/druid/master/DruidMasterBalancerTest.java +++ b/server/src/test/java/com/metamx/druid/master/DruidMasterBalancerTest.java @@ -20,106 +20,142 @@ package com.metamx.druid.master; import com.google.common.collect.ImmutableMap; +import com.google.common.collect.Lists; +import com.google.common.collect.Maps; import com.google.common.collect.MinMaxPriorityQueue; -import com.google.common.collect.Sets; import com.metamx.druid.client.DataSegment; -import com.metamx.druid.client.DruidDataSource; import com.metamx.druid.client.DruidServer; +import com.metamx.druid.shard.NoneShardSpec; +import com.metamx.phonebook.PhoneBook; import junit.framework.Assert; import org.easymock.EasyMock; +import org.joda.time.DateTime; +import org.joda.time.Interval; import org.junit.After; import org.junit.Before; import org.junit.Test; import java.util.Arrays; +import java.util.HashMap; +import java.util.Map; +import java.util.concurrent.ScheduledExecutorService; /** */ public class DruidMasterBalancerTest { + private static final int MAX_SEGMENTS_TO_MOVE = 5; private DruidMaster master; - private DruidServer druidServerHigh; - private DruidServer druidServerLow; + private DruidServer druidServer1; + private DruidServer druidServer2; + private DruidServer druidServer3; + private DruidServer druidServer4; private DataSegment segment1; private DataSegment segment2; private DataSegment segment3; private DataSegment segment4; - private LoadQueuePeon peon; - private DruidDataSource dataSource; + Map segments; @Before public void setUp() throws Exception { master = EasyMock.createMock(DruidMaster.class); - druidServerHigh = EasyMock.createMock(DruidServer.class); - druidServerLow = EasyMock.createMock(DruidServer.class); + druidServer1 = EasyMock.createMock(DruidServer.class); + druidServer2 = EasyMock.createMock(DruidServer.class); + druidServer3 = EasyMock.createMock(DruidServer.class); + druidServer4 = EasyMock.createMock(DruidServer.class); segment1 = EasyMock.createMock(DataSegment.class); segment2 = EasyMock.createMock(DataSegment.class); segment3 = EasyMock.createMock(DataSegment.class); segment4 = EasyMock.createMock(DataSegment.class); - peon = EasyMock.createMock(LoadQueuePeon.class); - dataSource = EasyMock.createMock(DruidDataSource.class); + + DateTime start1 = new DateTime("2012-01-01"); + DateTime start2 = new DateTime("2012-02-01"); + DateTime version = new DateTime("2012-03-01"); + segment1 = new DataSegment( + "datasource1", + new Interval(start1, start1.plusHours(1)), + version.toString(), + Maps.newHashMap(), + Lists.newArrayList(), + Lists.newArrayList(), + new NoneShardSpec(), + 0, + 11L + ); + segment2 = new DataSegment( + "datasource1", + new Interval(start2, start2.plusHours(1)), + version.toString(), + Maps.newHashMap(), + Lists.newArrayList(), + Lists.newArrayList(), + new NoneShardSpec(), + 0, + 7L + ); + segment3 = new DataSegment( + "datasource2", + new Interval(start1, start1.plusHours(1)), + version.toString(), + Maps.newHashMap(), + Lists.newArrayList(), + Lists.newArrayList(), + new NoneShardSpec(), + 0, + 4L + ); + segment4 = new DataSegment( + "datasource2", + new Interval(start2, start2.plusHours(1)), + version.toString(), + Maps.newHashMap(), + Lists.newArrayList(), + Lists.newArrayList(), + new NoneShardSpec(), + 0, + 8L + ); + + segments = new HashMap(); + segments.put("datasource1_2012-01-01T00:00:00.000Z_2012-01-01T01:00:00.000Z_2012-03-01T00:00:00.000Z", segment1); + segments.put("datasource1_2012-02-01T00:00:00.000Z_2012-02-01T01:00:00.000Z_2012-03-01T00:00:00.000Z", segment2); + segments.put("datasource2_2012-01-01T00:00:00.000Z_2012-01-01T01:00:00.000Z_2012-03-01T00:00:00.000Z", segment3); + segments.put("datasource2_2012-02-01T00:00:00.000Z_2012-02-01T01:00:00.000Z_2012-03-01T00:00:00.000Z", segment4); } @After public void tearDown() throws Exception { EasyMock.verify(master); - EasyMock.verify(druidServerHigh); - EasyMock.verify(druidServerLow); - EasyMock.verify(segment1); - EasyMock.verify(segment2); - EasyMock.verify(segment3); - EasyMock.verify(segment4); - EasyMock.verify(peon); - EasyMock.verify(dataSource); + EasyMock.verify(druidServer1); + EasyMock.verify(druidServer2); + EasyMock.verify(druidServer3); + EasyMock.verify(druidServer4); } @Test - public void testRun() + public void testRun1() { // Mock some servers of different usages - EasyMock.expect(druidServerHigh.getName()).andReturn("from").atLeastOnce(); - EasyMock.expect(druidServerHigh.getCurrSize()).andReturn(30L).atLeastOnce(); - EasyMock.expect(druidServerHigh.getMaxSize()).andReturn(100L).atLeastOnce(); - EasyMock.expect(druidServerHigh.getDataSources()).andReturn(Arrays.asList(dataSource)).atLeastOnce(); - EasyMock.replay(druidServerHigh); - EasyMock.expect(druidServerLow.getName()).andReturn("to").atLeastOnce(); - EasyMock.expect(druidServerLow.getTier()).andReturn("normal").atLeastOnce(); - EasyMock.expect(druidServerLow.getCurrSize()).andReturn(0L).atLeastOnce(); - EasyMock.expect(druidServerLow.getMaxSize()).andReturn(100L).atLeastOnce(); - EasyMock.expect(druidServerLow.getDataSources()).andReturn(Arrays.asList(dataSource)).anyTimes(); - EasyMock.expect(druidServerLow.getSegment("segment1")).andReturn(null).anyTimes(); - EasyMock.expect(druidServerLow.getSegment("segment2")).andReturn(null).anyTimes(); - EasyMock.expect(druidServerLow.getSegment("segment3")).andReturn(null).anyTimes(); - EasyMock.expect(druidServerLow.getSegment("segment4")).andReturn(null).anyTimes(); - EasyMock.replay(druidServerLow); + EasyMock.expect(druidServer1.getName()).andReturn("from").atLeastOnce(); + EasyMock.expect(druidServer1.getCurrSize()).andReturn(30L).atLeastOnce(); + EasyMock.expect(druidServer1.getMaxSize()).andReturn(100L).atLeastOnce(); + EasyMock.expect(druidServer1.getSegments()).andReturn(segments).anyTimes(); + EasyMock.expect(druidServer1.getSegment(EasyMock.anyObject())).andReturn(null).anyTimes(); + EasyMock.replay(druidServer1); - // Mock a datasource - EasyMock.expect(dataSource.getSegments()).andReturn( - Sets.newHashSet( - segment1, - segment2, - segment3, - segment4 - ) - ).atLeastOnce(); - EasyMock.replay(dataSource); + EasyMock.expect(druidServer2.getName()).andReturn("to").atLeastOnce(); + EasyMock.expect(druidServer2.getTier()).andReturn("normal").anyTimes(); + EasyMock.expect(druidServer2.getCurrSize()).andReturn(0L).atLeastOnce(); + EasyMock.expect(druidServer2.getMaxSize()).andReturn(100L).atLeastOnce(); + EasyMock.expect(druidServer2.getSegments()).andReturn(new HashMap()).anyTimes(); + EasyMock.expect(druidServer2.getSegment(EasyMock.anyObject())).andReturn(null).anyTimes(); + EasyMock.replay(druidServer2); - // Mock some segments of different sizes - EasyMock.expect(segment1.getSize()).andReturn(11L).anyTimes(); - EasyMock.expect(segment1.getIdentifier()).andReturn("segment1").anyTimes(); - EasyMock.replay(segment1); - EasyMock.expect(segment2.getSize()).andReturn(7L).anyTimes(); - EasyMock.expect(segment2.getIdentifier()).andReturn("segment2").anyTimes(); - EasyMock.replay(segment2); - EasyMock.expect(segment3.getSize()).andReturn(4L).anyTimes(); - EasyMock.expect(segment3.getIdentifier()).andReturn("segment3").anyTimes(); - EasyMock.replay(segment3); - EasyMock.expect(segment4.getSize()).andReturn(8L).anyTimes(); - EasyMock.expect(segment4.getIdentifier()).andReturn("segment4").anyTimes(); - EasyMock.replay(segment4); + EasyMock.replay(druidServer3); + EasyMock.replay(druidServer4); // Mock stuff that the master needs master.moveSegment( @@ -128,12 +164,11 @@ public class DruidMasterBalancerTest EasyMock.anyObject(), EasyMock.anyObject() ); - EasyMock.expectLastCall().atLeastOnce(); + EasyMock.expectLastCall().anyTimes(); EasyMock.replay(master); - EasyMock.expect(peon.getLoadQueueSize()).andReturn(0L).atLeastOnce(); - EasyMock.expect(peon.getSegmentsToLoad()).andReturn(Sets.newHashSet()).atLeastOnce(); - EasyMock.replay(peon); + LoadQueuePeonTester fromPeon = new LoadQueuePeonTester(EasyMock.createMock(PhoneBook.class), "from", EasyMock.createMock(ScheduledExecutorService.class)); + LoadQueuePeonTester toPeon = new LoadQueuePeonTester(EasyMock.createMock(PhoneBook.class), "to", EasyMock.createMock(ScheduledExecutorService.class)); DruidMasterRuntimeParams params = DruidMasterRuntimeParams.newBuilder() @@ -141,20 +176,101 @@ public class DruidMasterBalancerTest new DruidCluster( ImmutableMap.>of( "normal", - MinMaxPriorityQueue.orderedBy(DruidMasterBalancer.percentUsedComparator) + MinMaxPriorityQueue.orderedBy(DruidMasterBalancerTester.percentUsedComparator) .create( Arrays.asList( - new ServerHolder(druidServerHigh, peon), - new ServerHolder(druidServerLow, peon) + new ServerHolder(druidServer1, fromPeon), + new ServerHolder(druidServer2, toPeon) ) ) ) ) ) - .withLoadManagementPeons(ImmutableMap.of("from", peon, "to", peon)) + .withLoadManagementPeons(ImmutableMap.of("from", fromPeon, "to", toPeon)) + .withAvailableSegments(segments.values()) + .withMaxSegmentsToMove(MAX_SEGMENTS_TO_MOVE) + .withBalancerReferenceTimestamp(new DateTime("2013-01-01")) .build(); - params = new DruidMasterBalancer(master, new BalancerAnalyzer()).run(params); - Assert.assertTrue(params.getMasterStats().getPerTierStats().get("movedCount").get("normal").get() > 0); + params = new DruidMasterBalancerTester(master).run(params); + Assert.assertTrue(params.getMasterStats().getPerTierStats().get("movedCount").get("normal").get() == 3); + } + + @Test + public void testRun2() + { + // Mock some servers of different usages + EasyMock.expect(druidServer1.getName()).andReturn("1").atLeastOnce(); + EasyMock.expect(druidServer1.getCurrSize()).andReturn(30L).atLeastOnce(); + EasyMock.expect(druidServer1.getMaxSize()).andReturn(100L).atLeastOnce(); + EasyMock.expect(druidServer1.getSegments()).andReturn(segments).anyTimes(); + EasyMock.expect(druidServer1.getSegment(EasyMock.anyObject())).andReturn(null).anyTimes(); + EasyMock.replay(druidServer1); + + EasyMock.expect(druidServer2.getName()).andReturn("2").atLeastOnce(); + EasyMock.expect(druidServer2.getTier()).andReturn("normal").anyTimes(); + EasyMock.expect(druidServer2.getCurrSize()).andReturn(0L).atLeastOnce(); + EasyMock.expect(druidServer2.getMaxSize()).andReturn(100L).atLeastOnce(); + EasyMock.expect(druidServer2.getSegments()).andReturn(new HashMap()).anyTimes(); + EasyMock.expect(druidServer2.getSegment(EasyMock.anyObject())).andReturn(null).anyTimes(); + EasyMock.replay(druidServer2); + + EasyMock.expect(druidServer3.getName()).andReturn("3").atLeastOnce(); + EasyMock.expect(druidServer3.getTier()).andReturn("normal").anyTimes(); + EasyMock.expect(druidServer3.getCurrSize()).andReturn(0L).atLeastOnce(); + EasyMock.expect(druidServer3.getMaxSize()).andReturn(100L).atLeastOnce(); + EasyMock.expect(druidServer3.getSegments()).andReturn(new HashMap()).anyTimes(); + EasyMock.expect(druidServer3.getSegment(EasyMock.anyObject())).andReturn(null).anyTimes(); + EasyMock.replay(druidServer3); + + EasyMock.expect(druidServer4.getName()).andReturn("4").atLeastOnce(); + EasyMock.expect(druidServer4.getTier()).andReturn("normal").anyTimes(); + EasyMock.expect(druidServer4.getCurrSize()).andReturn(0L).atLeastOnce(); + EasyMock.expect(druidServer4.getMaxSize()).andReturn(100L).atLeastOnce(); + EasyMock.expect(druidServer4.getSegments()).andReturn(new HashMap()).anyTimes(); + EasyMock.expect(druidServer4.getSegment(EasyMock.anyObject())).andReturn(null).anyTimes(); + EasyMock.replay(druidServer4); + + // Mock stuff that the master needs + master.moveSegment( + EasyMock.anyObject(), + EasyMock.anyObject(), + EasyMock.anyObject(), + EasyMock.anyObject() + ); + EasyMock.expectLastCall().anyTimes(); + EasyMock.replay(master); + + LoadQueuePeonTester peon1 = new LoadQueuePeonTester(EasyMock.createMock(PhoneBook.class), "1", EasyMock.createMock(ScheduledExecutorService.class)); + LoadQueuePeonTester peon2 = new LoadQueuePeonTester(EasyMock.createMock(PhoneBook.class), "2", EasyMock.createMock(ScheduledExecutorService.class)); + LoadQueuePeonTester peon3 = new LoadQueuePeonTester(EasyMock.createMock(PhoneBook.class), "3", EasyMock.createMock(ScheduledExecutorService.class)); + LoadQueuePeonTester peon4 = new LoadQueuePeonTester(EasyMock.createMock(PhoneBook.class), "4", EasyMock.createMock(ScheduledExecutorService.class)); + + DruidMasterRuntimeParams params = + DruidMasterRuntimeParams.newBuilder() + .withDruidCluster( + new DruidCluster( + ImmutableMap.>of( + "normal", + MinMaxPriorityQueue.orderedBy(DruidMasterBalancerTester.percentUsedComparator) + .create( + Arrays.asList( + new ServerHolder(druidServer1, peon1), + new ServerHolder(druidServer2, peon2), + new ServerHolder(druidServer3, peon3), + new ServerHolder(druidServer4, peon4) + ) + ) + ) + ) + ) + .withLoadManagementPeons(ImmutableMap.of("1", peon1, "2", peon2, "3", peon3, "4", peon4)) + .withAvailableSegments(segments.values()) + .withMaxSegmentsToMove(MAX_SEGMENTS_TO_MOVE) + .withBalancerReferenceTimestamp(new DateTime("2013-01-01")) + .build(); + + params = new DruidMasterBalancerTester(master).run(params); + Assert.assertTrue(params.getMasterStats().getPerTierStats().get("movedCount").get("normal").get() == 4); } } diff --git a/server/src/test/java/com/metamx/druid/master/DruidMasterBalancerTester.java b/server/src/test/java/com/metamx/druid/master/DruidMasterBalancerTester.java new file mode 100644 index 00000000000..3b5865fb361 --- /dev/null +++ b/server/src/test/java/com/metamx/druid/master/DruidMasterBalancerTester.java @@ -0,0 +1,57 @@ +package com.metamx.druid.master; + +import com.metamx.druid.client.DataSegment; +import com.metamx.druid.client.DruidServer; + +public class DruidMasterBalancerTester extends DruidMasterBalancer +{ + public DruidMasterBalancerTester(DruidMaster master) + { + super(master); + } + + @Override + protected void moveSegment( + final BalancerSegmentHolder segment, + final DruidServer toServer, + final DruidMasterRuntimeParams params + ) + { + final String toServerName = toServer.getName(); + final LoadQueuePeon toPeon = params.getLoadManagementPeons().get(toServerName); + + final String fromServerName = segment.getFromServer().getName(); + final DataSegment segmentToMove = segment.getSegment(); + final String segmentName = segmentToMove.getIdentifier(); + + if (!toPeon.getSegmentsToLoad().contains(segmentToMove) && + !currentlyMovingSegments.get("normal").containsKey(segmentName) && + !toServer.getSegments().containsKey(segmentName) && + new ServerHolder(toServer, toPeon).getAvailableSize() > segmentToMove.getSize()) { + log.info( + "Moving [%s] from [%s] to [%s]", + segmentName, + fromServerName, + toServerName + ); + try { + final LoadQueuePeon loadPeon = params.getLoadManagementPeons().get(toServerName); + + loadPeon.loadSegment(segment.getSegment(), new LoadPeonCallback() + { + @Override + protected void execute() + { + } + }); + + currentlyMovingSegments.get("normal").put(segmentName, segment); + } + catch (Exception e) { + log.info(e, String.format("[%s] : Moving exception", segmentName)); + } + } else { + currentlyMovingSegments.get("normal").remove(segment); + } + } +} diff --git a/server/src/test/java/com/metamx/druid/master/DruidMasterRuleRunnerTest.java b/server/src/test/java/com/metamx/druid/master/DruidMasterRuleRunnerTest.java index 8679daffef4..60429dbcae1 100644 --- a/server/src/test/java/com/metamx/druid/master/DruidMasterRuleRunnerTest.java +++ b/server/src/test/java/com/metamx/druid/master/DruidMasterRuleRunnerTest.java @@ -177,6 +177,8 @@ public class DruidMasterRuleRunnerTest .withAvailableSegments(availableSegments) .withDatabaseRuleManager(databaseRuleManager) .withSegmentReplicantLookup(SegmentReplicantLookup.make(new DruidCluster())) + .withMaxSegmentsToMove(5) + .withBalancerReferenceTimestamp(new DateTime("2013-01-01")) .build(); DruidMasterRuntimeParams afterParams = ruleRunner.run(params); @@ -266,6 +268,7 @@ public class DruidMasterRuleRunnerTest .withAvailableSegments(availableSegments) .withDatabaseRuleManager(databaseRuleManager) .withSegmentReplicantLookup(SegmentReplicantLookup.make(new DruidCluster())) + .withBalancerReferenceTimestamp(new DateTime("2013-01-01")) .build(); DruidMasterRuntimeParams afterParams = ruleRunner.run(params); @@ -351,6 +354,7 @@ public class DruidMasterRuleRunnerTest .withAvailableSegments(availableSegments) .withDatabaseRuleManager(databaseRuleManager) .withSegmentReplicantLookup(segmentReplicantLookup) + .withBalancerReferenceTimestamp(new DateTime("2013-01-01")) .build(); DruidMasterRuntimeParams afterParams = ruleRunner.run(params); @@ -412,6 +416,7 @@ public class DruidMasterRuleRunnerTest .withAvailableSegments(availableSegments) .withDatabaseRuleManager(databaseRuleManager) .withSegmentReplicantLookup(SegmentReplicantLookup.make(new DruidCluster())) + .withBalancerReferenceTimestamp(new DateTime("2013-01-01")) .build(); @@ -523,6 +528,7 @@ public class DruidMasterRuleRunnerTest .withAvailableSegments(availableSegments) .withDatabaseRuleManager(databaseRuleManager) .withSegmentReplicantLookup(segmentReplicantLookup) + .withBalancerReferenceTimestamp(new DateTime("2013-01-01")) .build(); DruidMasterRuntimeParams afterParams = ruleRunner.run(params); @@ -596,6 +602,7 @@ public class DruidMasterRuleRunnerTest .withAvailableSegments(availableSegments) .withDatabaseRuleManager(databaseRuleManager) .withSegmentReplicantLookup(segmentReplicantLookup) + .withBalancerReferenceTimestamp(new DateTime("2013-01-01")) .build(); DruidMasterRuntimeParams afterParams = ruleRunner.run(params); @@ -676,6 +683,7 @@ public class DruidMasterRuleRunnerTest .withAvailableSegments(availableSegments) .withDatabaseRuleManager(databaseRuleManager) .withSegmentReplicantLookup(segmentReplicantLookup) + .withBalancerReferenceTimestamp(new DateTime("2013-01-01")) .build(); DruidMasterRuntimeParams afterParams = ruleRunner.run(params); @@ -752,6 +760,7 @@ public class DruidMasterRuleRunnerTest .withAvailableSegments(availableSegments) .withDatabaseRuleManager(databaseRuleManager) .withSegmentReplicantLookup(segmentReplicantLookup) + .withBalancerReferenceTimestamp(new DateTime("2013-01-01")) .build(); DruidMasterRuntimeParams afterParams = ruleRunner.run(params); @@ -840,6 +849,7 @@ public class DruidMasterRuleRunnerTest .withAvailableSegments(availableSegments) .withDatabaseRuleManager(databaseRuleManager) .withSegmentReplicantLookup(segmentReplicantLookup) + .withBalancerReferenceTimestamp(new DateTime("2013-01-01")) .build(); DruidMasterRuntimeParams afterParams = ruleRunner.run(params); @@ -1025,6 +1035,7 @@ public class DruidMasterRuleRunnerTest .withAvailableSegments(longerAvailableSegments) .withDatabaseRuleManager(databaseRuleManager) .withSegmentReplicantLookup(segmentReplicantLookup) + .withBalancerReferenceTimestamp(new DateTime("2013-01-01")) .build(); DruidMasterRuntimeParams afterParams = ruleRunner.run(params); diff --git a/server/src/test/java/com/metamx/druid/master/DruidMasterTest.java b/server/src/test/java/com/metamx/druid/master/DruidMasterTest.java index 8b41b332e78..895659586ec 100644 --- a/server/src/test/java/com/metamx/druid/master/DruidMasterTest.java +++ b/server/src/test/java/com/metamx/druid/master/DruidMasterTest.java @@ -124,6 +124,13 @@ public class DruidMasterTest return ""; } + @Override + public int getMaxSegmentsToMove() + { + return 0; + } + + @Override public int getReplicantLifetime() { diff --git a/server/src/test/java/com/metamx/druid/master/LoadQueuePeonTester.java b/server/src/test/java/com/metamx/druid/master/LoadQueuePeonTester.java new file mode 100644 index 00000000000..9363bd2e95c --- /dev/null +++ b/server/src/test/java/com/metamx/druid/master/LoadQueuePeonTester.java @@ -0,0 +1,37 @@ +package com.metamx.druid.master; + +import com.metamx.druid.client.DataSegment; +import com.metamx.phonebook.PhoneBook; + +import java.util.concurrent.ConcurrentSkipListSet; +import java.util.concurrent.ScheduledExecutorService; + +public class LoadQueuePeonTester extends LoadQueuePeon +{ + private ConcurrentSkipListSet segmentsToLoad; + + public LoadQueuePeonTester( + PhoneBook yp, + String basePath, + ScheduledExecutorService zkWritingExecutor + ) + { + super(yp, basePath, zkWritingExecutor); + } + + @Override + public void loadSegment( + DataSegment segment, + LoadPeonCallback callback + ) + { + if(segmentsToLoad == null) segmentsToLoad = new ConcurrentSkipListSet(); + segmentsToLoad.add(segment); + } + + public ConcurrentSkipListSet getSegmentsToLoad() + { + if(segmentsToLoad == null) segmentsToLoad = new ConcurrentSkipListSet(); + return segmentsToLoad; + } +}