mirror of https://github.com/apache/druid.git
Add compaction task (#4985)
* Add compaction task * added doc * use combining aggregators * address comments * add support for dimensionsSpec * fix getUniqueDims and getUniqueMetics * find unique dimensionsSpec * fix compilation * add unit test * fix test * fix test * test for different dimension orderings and types, and doc for type and ordering * add control for custom ordering and type * update doc * fix compile * fix compile * add segments param * fix serde error * fix build
This commit is contained in:
parent
5eb08c27cb
commit
5f3c863d5e
|
@ -29,6 +29,8 @@ import com.google.common.base.Preconditions;
|
|||
import io.druid.guice.annotations.PublicApi;
|
||||
import io.druid.java.util.common.StringUtils;
|
||||
|
||||
import java.util.Objects;
|
||||
|
||||
/**
|
||||
*/
|
||||
@PublicApi
|
||||
|
@ -116,7 +118,7 @@ public abstract class DimensionSchema
|
|||
protected DimensionSchema(String name, MultiValueHandling multiValueHandling)
|
||||
{
|
||||
this.name = Preconditions.checkNotNull(name, "Dimension name cannot be null.");
|
||||
this.multiValueHandling = multiValueHandling;
|
||||
this.multiValueHandling = multiValueHandling == null ? MultiValueHandling.ofDefault() : multiValueHandling;
|
||||
}
|
||||
|
||||
@JsonProperty
|
||||
|
@ -149,13 +151,30 @@ public abstract class DimensionSchema
|
|||
|
||||
DimensionSchema that = (DimensionSchema) o;
|
||||
|
||||
return name.equals(that.name);
|
||||
if (!name.equals(that.name)) {
|
||||
return false;
|
||||
}
|
||||
|
||||
if (!getValueType().equals(that.getValueType())) {
|
||||
return false;
|
||||
}
|
||||
|
||||
return Objects.equals(multiValueHandling, that.multiValueHandling);
|
||||
}
|
||||
|
||||
@Override
|
||||
public int hashCode()
|
||||
{
|
||||
return name.hashCode();
|
||||
return Objects.hash(name, getValueType(), multiValueHandling);
|
||||
}
|
||||
|
||||
@Override
|
||||
public String toString()
|
||||
{
|
||||
return "DimensionSchema{" +
|
||||
"name='" + name + "'" +
|
||||
", valueType='" + getValueType() + "'" +
|
||||
", multiValueHandling='" + getMultiValueHandling() + "'" +
|
||||
"}";
|
||||
}
|
||||
}
|
||||
|
|
|
@ -43,6 +43,7 @@ public class NoopInputRowParser implements InputRowParser<InputRow>
|
|||
return input;
|
||||
}
|
||||
|
||||
@JsonProperty
|
||||
@Override
|
||||
public ParseSpec getParseSpec()
|
||||
{
|
||||
|
|
|
@ -21,7 +21,7 @@ package io.druid.data.input.impl;
|
|||
|
||||
import com.fasterxml.jackson.databind.ObjectMapper;
|
||||
import com.google.common.collect.ImmutableList;
|
||||
import junit.framework.Assert;
|
||||
import org.junit.Assert;
|
||||
import org.junit.Test;
|
||||
|
||||
/**
|
||||
|
|
|
@ -79,7 +79,7 @@ A sample ingest firehose spec is shown below -
|
|||
|interval|A String representing ISO-8601 Interval. This defines the time range to fetch the data over.|yes|
|
||||
|dimensions|The list of dimensions to select. If left empty, no dimensions are returned. If left null or not defined, all dimensions are returned. |no|
|
||||
|metrics|The list of metrics to select. If left empty, no metrics are returned. If left null or not defined, all metrics are selected.|no|
|
||||
|filter| See [Filters](../querying/filters.html)|yes|
|
||||
|filter| See [Filters](../querying/filters.html)|no|
|
||||
|
||||
#### CombiningFirehose
|
||||
|
||||
|
|
|
@ -104,7 +104,7 @@ Tasks can have different default priorities depening on their types. Here are a
|
|||
|---------|----------------|
|
||||
|Realtime index task|75|
|
||||
|Batch index task|50|
|
||||
|Merge/Append task|25|
|
||||
|Merge/Append/Compaction task|25|
|
||||
|Other tasks|0|
|
||||
|
||||
You can override the task priority by setting your priority in the task context like below.
|
||||
|
@ -184,19 +184,6 @@ On the contrary, in the incremental publishing mode, segments are incrementally
|
|||
|
||||
To enable bulk publishing mode, `forceGuaranteedRollup` should be set in the TuningConfig. Note that this option cannot be used with either `forceExtendableShardSpecs` of TuningConfig or `appendToExisting` of IOConfig.
|
||||
|
||||
### Task Context
|
||||
|
||||
The task context is used for various task configuration parameters. The following parameters apply to all tasks.
|
||||
|
||||
|property|default|description|
|
||||
|--------|-------|-----------|
|
||||
|taskLockTimeout|300000|task lock timeout in millisecond. For more details, see [the below Locking section](#locking).|
|
||||
|
||||
<div class="note caution">
|
||||
When a task acquires a lock, it sends a request via HTTP and awaits until it receives a response containing the lock acquisition result.
|
||||
As a result, an HTTP timeout error can occur if `taskLockTimeout` is greater than `druid.server.http.maxIdleTime` of overlords.
|
||||
</div>
|
||||
|
||||
Segment Merging Tasks
|
||||
---------------------
|
||||
|
||||
|
@ -210,7 +197,8 @@ Append tasks append a list of segments together into a single segment (one after
|
|||
"id": <task_id>,
|
||||
"dataSource": <task_datasource>,
|
||||
"segments": <JSON list of DataSegment objects to append>,
|
||||
"aggregations": <optional list of aggregators>
|
||||
"aggregations": <optional list of aggregators>,
|
||||
"context": <task context>
|
||||
}
|
||||
```
|
||||
|
||||
|
@ -228,7 +216,8 @@ The grammar is:
|
|||
"dataSource": <task_datasource>,
|
||||
"aggregations": <list of aggregators>,
|
||||
"rollup": <whether or not to rollup data during a merge>,
|
||||
"segments": <JSON list of DataSegment objects to merge>
|
||||
"segments": <JSON list of DataSegment objects to merge>,
|
||||
"context": <task context>
|
||||
}
|
||||
```
|
||||
|
||||
|
@ -245,10 +234,67 @@ The grammar is:
|
|||
"dataSource": <task_datasource>,
|
||||
"aggregations": <list of aggregators>,
|
||||
"rollup": <whether or not to rollup data during a merge>,
|
||||
"interval": <DataSegment objects in this interval are going to be merged>
|
||||
"interval": <DataSegment objects in this interval are going to be merged>,
|
||||
"context": <task context>
|
||||
}
|
||||
```
|
||||
|
||||
### Compaction Task
|
||||
|
||||
Compaction tasks merge all segments of the given interval. The syntax is:
|
||||
|
||||
```json
|
||||
{
|
||||
"type": "compact",
|
||||
"id": <task_id>,
|
||||
"dataSource": <task_datasource>,
|
||||
"interval": <interval to specify segments to be merged>,
|
||||
"dimensions" <custom dimensionsSpec>,
|
||||
"tuningConfig" <index task tuningConfig>,
|
||||
"context": <task context>
|
||||
}
|
||||
```
|
||||
|
||||
|Field|Description|Required|
|
||||
|-----|-----------|--------|
|
||||
|`type`|Task type. Should be `compact`|Yes|
|
||||
|`id`|Task id|No|
|
||||
|`dataSource`|dataSource name to be compacted|Yes|
|
||||
|`interval`|interval of segments to be compacted|Yes|
|
||||
|`dimensions`|custom dimensionsSpec. compaction task will use this dimensionsSpec if exist instead of generating one. See below for more details.|No|
|
||||
|`tuningConfig`|[Index task tuningConfig](#tuningconfig)|No|
|
||||
|`context`|[Task context](#taskcontext)|No|
|
||||
|
||||
An example of compaction task is
|
||||
|
||||
```json
|
||||
{
|
||||
"type" : "compact",
|
||||
"dataSource" : "wikipedia",
|
||||
"interval" : "2017-01-01/2018-01-01"
|
||||
}
|
||||
```
|
||||
|
||||
This compaction task merges _all segments_ of the interval `2017-01-01/2018-01-01` into a _single segment_.
|
||||
To merge each day's worth of data into a separate segment, you can submit multiple `compact` tasks, one for each day. They will run in parallel.
|
||||
|
||||
A compaction task internally generates an `index` task spec for performing compaction work with some fixed parameters.
|
||||
For example, its `firehose` is always the [ingestSegmentSpec](./firehose.html), and `dimensionsSpec` and `metricsSpec`
|
||||
include all dimensions and metrics of the input segments by default.
|
||||
|
||||
The output segment can have different metadata from the input segments unless all input segments have the same metadata.
|
||||
|
||||
- Dimensions: since Druid supports schema change, the dimensions can be different across segments even if they are a part of the same dataSource.
|
||||
If the input segments have different dimensions, the output segment basically includes all dimensions of the input segments.
|
||||
However, even if the input segments have the same set of dimensions, the dimension order or the data type of dimensions can be different. For example, the data type of some dimensions can be
|
||||
changed from `string` to primitive types, or the order of dimensions can be changed for better locality (See [Partitioning](batch-ingestion.html#partitioning-specification)).
|
||||
In this case, the dimensions of recent segments precede that of old segments in terms of data types and the ordering.
|
||||
This is because more recent segments are more likely to have the new desired order and data types. If you want to use
|
||||
your own ordering and types, you can specify a custom `dimensionsSpec` in the compaction task spec.
|
||||
- Roll-up: the output segment is rolled up only when `rollup` is set for all input segments.
|
||||
See [Roll-up](../design/index.html#roll-up) for more details.
|
||||
You can check that your segments are rolled up or not by using [Segment Metadata Queries](../querying/segmentmetadataquery.html#analysistypes).
|
||||
|
||||
Segment Destroying Tasks
|
||||
------------------------
|
||||
|
||||
|
@ -261,7 +307,8 @@ Kill tasks delete all information about a segment and removes it from deep stora
|
|||
"type": "kill",
|
||||
"id": <task_id>,
|
||||
"dataSource": <task_datasource>,
|
||||
"interval" : <all_segments_in_this_interval_will_die!>
|
||||
"interval" : <all_segments_in_this_interval_will_die!>,
|
||||
"context": <task context>
|
||||
}
|
||||
```
|
||||
|
||||
|
@ -342,6 +389,21 @@ These tasks start, sleep for a time and are used only for testing. The available
|
|||
}
|
||||
```
|
||||
|
||||
Task Context
|
||||
------------
|
||||
|
||||
The task context is used for various task configuration parameters. The following parameters apply to all task types.
|
||||
|
||||
|property|default|description|
|
||||
|--------|-------|-----------|
|
||||
|taskLockTimeout|300000|task lock timeout in millisecond. For more details, see [the below Locking section](#locking).|
|
||||
|priority|Different based on task types. See [Task Priority](#task-priority).|Task priority|
|
||||
|
||||
<div class="note caution">
|
||||
When a task acquires a lock, it sends a request via HTTP and awaits until it receives a response containing the lock acquisition result.
|
||||
As a result, an HTTP timeout error can occur if `taskLockTimeout` is greater than `druid.server.http.maxIdleTime` of overlords.
|
||||
</div>
|
||||
|
||||
Locking
|
||||
-------
|
||||
|
||||
|
|
|
@ -190,7 +190,7 @@ public class VarianceAggregatorFactory extends AggregatorFactory
|
|||
@Override
|
||||
public AggregatorFactory getMergingFactory(AggregatorFactory other) throws AggregatorFactoryNotMergeableException
|
||||
{
|
||||
if (Objects.equals(getName(), other.getName()) && this.getClass() == other.getClass()) {
|
||||
if (Objects.equals(getName(), other.getName()) && other instanceof VarianceAggregatorFactory) {
|
||||
return getCombiningFactory();
|
||||
} else {
|
||||
throw new AggregatorFactoryNotMergeableException(this, other);
|
||||
|
|
|
@ -33,7 +33,9 @@ import io.druid.query.Query;
|
|||
import io.druid.query.QueryRunner;
|
||||
import org.joda.time.Interval;
|
||||
|
||||
import javax.annotation.Nullable;
|
||||
import java.io.IOException;
|
||||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
|
||||
|
@ -80,15 +82,27 @@ public abstract class AbstractTask implements Task
|
|||
this.context = context;
|
||||
}
|
||||
|
||||
public static String makeId(String id, final String typeName, String dataSource, Interval interval)
|
||||
static String getOrMakeId(String id, final String typeName, String dataSource)
|
||||
{
|
||||
return id != null ? id : joinId(
|
||||
typeName,
|
||||
dataSource,
|
||||
interval.getStart(),
|
||||
interval.getEnd(),
|
||||
DateTimes.nowUtc().toString()
|
||||
);
|
||||
return getOrMakeId(id, typeName, dataSource, null);
|
||||
}
|
||||
|
||||
static String getOrMakeId(String id, final String typeName, String dataSource, @Nullable Interval interval)
|
||||
{
|
||||
if (id != null) {
|
||||
return id;
|
||||
}
|
||||
|
||||
final List<Object> objects = new ArrayList<>();
|
||||
objects.add(typeName);
|
||||
objects.add(dataSource);
|
||||
if (interval != null) {
|
||||
objects.add(interval.getStart());
|
||||
objects.add(interval.getEnd());
|
||||
}
|
||||
objects.add(DateTimes.nowUtc().toString());
|
||||
|
||||
return joinId(objects);
|
||||
}
|
||||
|
||||
@JsonProperty
|
||||
|
@ -167,7 +181,12 @@ public abstract class AbstractTask implements Task
|
|||
*
|
||||
* @return string of joined objects
|
||||
*/
|
||||
public static String joinId(Object... objects)
|
||||
static String joinId(List<Object> objects)
|
||||
{
|
||||
return ID_JOINER.join(objects);
|
||||
}
|
||||
|
||||
static String joinId(Object...objects)
|
||||
{
|
||||
return ID_JOINER.join(objects);
|
||||
}
|
||||
|
@ -202,7 +221,7 @@ public abstract class AbstractTask implements Task
|
|||
return id.hashCode();
|
||||
}
|
||||
|
||||
protected List<TaskLock> getTaskLocks(TaskActionClient client) throws IOException
|
||||
static List<TaskLock> getTaskLocks(TaskActionClient client) throws IOException
|
||||
{
|
||||
return client.submit(new LockListAction());
|
||||
}
|
||||
|
|
|
@ -47,7 +47,7 @@ public class ArchiveTask extends AbstractFixedIntervalTask
|
|||
)
|
||||
{
|
||||
super(
|
||||
makeId(id, "archive", dataSource, interval),
|
||||
getOrMakeId(id, "archive", dataSource, interval),
|
||||
dataSource,
|
||||
interval,
|
||||
context
|
||||
|
|
|
@ -0,0 +1,495 @@
|
|||
/*
|
||||
* Licensed to Metamarkets Group Inc. (Metamarkets) under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. Metamarkets licenses this file
|
||||
* to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing,
|
||||
* software distributed under the License is distributed on an
|
||||
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
* KIND, either express or implied. See the License for the
|
||||
* specific language governing permissions and limitations
|
||||
* under the License.
|
||||
*/
|
||||
|
||||
package io.druid.indexing.common.task;
|
||||
|
||||
import com.fasterxml.jackson.annotation.JacksonInject;
|
||||
import com.fasterxml.jackson.annotation.JsonCreator;
|
||||
import com.fasterxml.jackson.annotation.JsonIgnore;
|
||||
import com.fasterxml.jackson.annotation.JsonProperty;
|
||||
import com.fasterxml.jackson.databind.ObjectMapper;
|
||||
import com.google.common.annotations.VisibleForTesting;
|
||||
import com.google.common.base.Preconditions;
|
||||
import com.google.common.collect.BiMap;
|
||||
import com.google.common.collect.HashBiMap;
|
||||
import com.google.common.collect.ImmutableList;
|
||||
import com.google.common.collect.Lists;
|
||||
import com.google.inject.Injector;
|
||||
import io.druid.data.input.impl.DimensionSchema;
|
||||
import io.druid.data.input.impl.DimensionSchema.MultiValueHandling;
|
||||
import io.druid.data.input.impl.DimensionsSpec;
|
||||
import io.druid.data.input.impl.DoubleDimensionSchema;
|
||||
import io.druid.data.input.impl.FloatDimensionSchema;
|
||||
import io.druid.data.input.impl.InputRowParser;
|
||||
import io.druid.data.input.impl.LongDimensionSchema;
|
||||
import io.druid.data.input.impl.NoopInputRowParser;
|
||||
import io.druid.data.input.impl.StringDimensionSchema;
|
||||
import io.druid.data.input.impl.TimeAndDimsParseSpec;
|
||||
import io.druid.indexing.common.TaskStatus;
|
||||
import io.druid.indexing.common.TaskToolbox;
|
||||
import io.druid.indexing.common.actions.SegmentListUsedAction;
|
||||
import io.druid.indexing.common.actions.TaskActionClient;
|
||||
import io.druid.indexing.common.task.IndexTask.IndexIOConfig;
|
||||
import io.druid.indexing.common.task.IndexTask.IndexIngestionSpec;
|
||||
import io.druid.indexing.common.task.IndexTask.IndexTuningConfig;
|
||||
import io.druid.indexing.firehose.IngestSegmentFirehoseFactory;
|
||||
import io.druid.java.util.common.ISE;
|
||||
import io.druid.java.util.common.JodaUtils;
|
||||
import io.druid.java.util.common.Pair;
|
||||
import io.druid.java.util.common.granularity.NoneGranularity;
|
||||
import io.druid.java.util.common.guava.Comparators;
|
||||
import io.druid.java.util.common.jackson.JacksonUtils;
|
||||
import io.druid.java.util.common.logger.Logger;
|
||||
import io.druid.query.aggregation.AggregatorFactory;
|
||||
import io.druid.segment.DimensionHandler;
|
||||
import io.druid.segment.IndexIO;
|
||||
import io.druid.segment.QueryableIndex;
|
||||
import io.druid.segment.column.Column;
|
||||
import io.druid.segment.column.ValueType;
|
||||
import io.druid.segment.indexing.DataSchema;
|
||||
import io.druid.segment.indexing.granularity.ArbitraryGranularitySpec;
|
||||
import io.druid.segment.indexing.granularity.GranularitySpec;
|
||||
import io.druid.segment.loading.SegmentLoadingException;
|
||||
import io.druid.timeline.DataSegment;
|
||||
import io.druid.timeline.TimelineObjectHolder;
|
||||
import io.druid.timeline.VersionedIntervalTimeline;
|
||||
import io.druid.timeline.partition.PartitionChunk;
|
||||
import io.druid.timeline.partition.PartitionHolder;
|
||||
import org.joda.time.Interval;
|
||||
|
||||
import javax.annotation.Nullable;
|
||||
import java.io.File;
|
||||
import java.io.IOException;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Arrays;
|
||||
import java.util.Collections;
|
||||
import java.util.HashMap;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.SortedSet;
|
||||
import java.util.TreeSet;
|
||||
import java.util.stream.Collectors;
|
||||
import java.util.stream.IntStream;
|
||||
|
||||
public class CompactionTask extends AbstractTask
|
||||
{
|
||||
private static final Logger log = new Logger(CompactionTask.class);
|
||||
private static final String TYPE = "compact";
|
||||
|
||||
private final Interval interval;
|
||||
private final List<DataSegment> segments;
|
||||
private final DimensionsSpec dimensionsSpec;
|
||||
private final IndexTuningConfig tuningConfig;
|
||||
private final Injector injector;
|
||||
private final ObjectMapper jsonMapper;
|
||||
@JsonIgnore
|
||||
private final SegmentProvider segmentProvider;
|
||||
|
||||
@JsonIgnore
|
||||
private IndexTask indexTaskSpec;
|
||||
|
||||
@JsonCreator
|
||||
public CompactionTask(
|
||||
@JsonProperty("id") final String id,
|
||||
@JsonProperty("resource") final TaskResource taskResource,
|
||||
@JsonProperty("dataSource") final String dataSource,
|
||||
@Nullable @JsonProperty("interval") final Interval interval,
|
||||
@Nullable @JsonProperty("segments") final List<DataSegment> segments,
|
||||
@Nullable @JsonProperty("dimensions") final DimensionsSpec dimensionsSpec,
|
||||
@Nullable @JsonProperty("tuningConfig") final IndexTuningConfig tuningConfig,
|
||||
@Nullable @JsonProperty("context") final Map<String, Object> context,
|
||||
@JacksonInject Injector injector,
|
||||
@JacksonInject ObjectMapper jsonMapper
|
||||
)
|
||||
{
|
||||
super(getOrMakeId(id, TYPE, dataSource), null, taskResource, dataSource, context);
|
||||
Preconditions.checkArgument(interval != null || segments != null, "interval or segments should be specified");
|
||||
Preconditions.checkArgument(interval == null || segments == null, "one of interval and segments should be null");
|
||||
|
||||
this.interval = interval;
|
||||
this.segments = segments;
|
||||
this.dimensionsSpec = dimensionsSpec;
|
||||
this.tuningConfig = tuningConfig;
|
||||
this.injector = injector;
|
||||
this.jsonMapper = jsonMapper;
|
||||
this.segmentProvider = segments == null ? new SegmentProvider(dataSource, interval) : new SegmentProvider(segments);
|
||||
}
|
||||
|
||||
@JsonProperty
|
||||
public Interval getInterval()
|
||||
{
|
||||
return interval;
|
||||
}
|
||||
|
||||
@JsonProperty
|
||||
public List<DataSegment> getSegments()
|
||||
{
|
||||
return segments;
|
||||
}
|
||||
|
||||
@JsonProperty
|
||||
public DimensionsSpec getDimensionsSpec()
|
||||
{
|
||||
return dimensionsSpec;
|
||||
}
|
||||
|
||||
@JsonProperty
|
||||
public IndexTuningConfig getTuningConfig()
|
||||
{
|
||||
return tuningConfig;
|
||||
}
|
||||
|
||||
@Override
|
||||
public String getType()
|
||||
{
|
||||
return TYPE;
|
||||
}
|
||||
|
||||
@Override
|
||||
public int getPriority()
|
||||
{
|
||||
return getContextValue(Tasks.PRIORITY_KEY, Tasks.DEFAULT_MERGE_TASK_PRIORITY);
|
||||
}
|
||||
|
||||
@VisibleForTesting
|
||||
SegmentProvider getSegmentProvider()
|
||||
{
|
||||
return segmentProvider;
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean isReady(TaskActionClient taskActionClient) throws Exception
|
||||
{
|
||||
final SortedSet<Interval> intervals = new TreeSet<>(Comparators.intervalsByStartThenEnd());
|
||||
intervals.add(segmentProvider.interval);
|
||||
return IndexTask.isReady(taskActionClient, intervals);
|
||||
}
|
||||
|
||||
@Override
|
||||
public TaskStatus run(final TaskToolbox toolbox) throws Exception
|
||||
{
|
||||
if (indexTaskSpec == null) {
|
||||
final IndexIngestionSpec ingestionSpec = createIngestionSchema(
|
||||
toolbox,
|
||||
segmentProvider,
|
||||
dimensionsSpec,
|
||||
tuningConfig,
|
||||
injector,
|
||||
jsonMapper
|
||||
);
|
||||
|
||||
indexTaskSpec = new IndexTask(
|
||||
getId(),
|
||||
getGroupId(),
|
||||
getTaskResource(),
|
||||
getDataSource(),
|
||||
ingestionSpec,
|
||||
getContext()
|
||||
);
|
||||
}
|
||||
|
||||
if (indexTaskSpec.getIngestionSchema() == null) {
|
||||
log.info("Cannot find segments for interval");
|
||||
}
|
||||
|
||||
final String json = jsonMapper.writerWithDefaultPrettyPrinter().writeValueAsString(indexTaskSpec);
|
||||
log.info("Generated compaction task details: " + json);
|
||||
|
||||
return indexTaskSpec.run(toolbox);
|
||||
}
|
||||
|
||||
@VisibleForTesting
|
||||
static IndexIngestionSpec createIngestionSchema(
|
||||
TaskToolbox toolbox,
|
||||
SegmentProvider segmentProvider,
|
||||
DimensionsSpec dimensionsSpec,
|
||||
IndexTuningConfig tuningConfig,
|
||||
Injector injector,
|
||||
ObjectMapper jsonMapper
|
||||
) throws IOException, SegmentLoadingException
|
||||
{
|
||||
Pair<Map<DataSegment, File>, List<TimelineObjectHolder<String, DataSegment>>> pair = prepareSegments(
|
||||
toolbox,
|
||||
segmentProvider
|
||||
);
|
||||
final Map<DataSegment, File> segmentFileMap = pair.lhs;
|
||||
final List<TimelineObjectHolder<String, DataSegment>> timelineSegments = pair.rhs;
|
||||
|
||||
if (timelineSegments.size() == 0) {
|
||||
return null;
|
||||
}
|
||||
|
||||
final DataSchema dataSchema = createDataSchema(
|
||||
segmentProvider.dataSource,
|
||||
segmentProvider.interval,
|
||||
dimensionsSpec,
|
||||
toolbox.getIndexIO(),
|
||||
jsonMapper,
|
||||
timelineSegments,
|
||||
segmentFileMap
|
||||
);
|
||||
return new IndexIngestionSpec(
|
||||
dataSchema,
|
||||
new IndexIOConfig(
|
||||
new IngestSegmentFirehoseFactory(
|
||||
segmentProvider.dataSource,
|
||||
segmentProvider.interval,
|
||||
null, // no filter
|
||||
// set dimensions and metrics names to make sure that the generated dataSchema is used for the firehose
|
||||
dataSchema.getParser().getParseSpec().getDimensionsSpec().getDimensionNames(),
|
||||
Arrays.stream(dataSchema.getAggregators()).map(AggregatorFactory::getName).collect(Collectors.toList()),
|
||||
injector,
|
||||
toolbox.getIndexIO()
|
||||
),
|
||||
false
|
||||
),
|
||||
tuningConfig
|
||||
);
|
||||
}
|
||||
|
||||
private static Pair<Map<DataSegment, File>, List<TimelineObjectHolder<String, DataSegment>>> prepareSegments(
|
||||
TaskToolbox toolbox,
|
||||
SegmentProvider segmentProvider
|
||||
) throws IOException, SegmentLoadingException
|
||||
{
|
||||
final List<DataSegment> usedSegments = segmentProvider.checkAndGetSegments(toolbox);
|
||||
final Map<DataSegment, File> segmentFileMap = toolbox.fetchSegments(usedSegments);
|
||||
final List<TimelineObjectHolder<String, DataSegment>> timelineSegments = VersionedIntervalTimeline
|
||||
.forSegments(usedSegments)
|
||||
.lookup(segmentProvider.interval);
|
||||
return Pair.of(segmentFileMap, timelineSegments);
|
||||
}
|
||||
|
||||
private static DataSchema createDataSchema(
|
||||
String dataSource,
|
||||
Interval interval,
|
||||
DimensionsSpec dimensionsSpec,
|
||||
IndexIO indexIO,
|
||||
ObjectMapper jsonMapper,
|
||||
List<TimelineObjectHolder<String, DataSegment>> timelineSegments,
|
||||
Map<DataSegment, File> segmentFileMap
|
||||
)
|
||||
throws IOException, SegmentLoadingException
|
||||
{
|
||||
// find metadata for interval
|
||||
final List<QueryableIndex> queryableIndices = loadSegments(timelineSegments, segmentFileMap, indexIO);
|
||||
|
||||
// find merged aggregators
|
||||
final List<AggregatorFactory[]> aggregatorFactories = queryableIndices
|
||||
.stream()
|
||||
.map(index -> index.getMetadata().getAggregators())
|
||||
.collect(Collectors.toList());
|
||||
final AggregatorFactory[] mergedAggregators = AggregatorFactory.mergeAggregators(aggregatorFactories);
|
||||
|
||||
if (mergedAggregators == null) {
|
||||
throw new ISE("Failed to merge aggregators[%s]", aggregatorFactories);
|
||||
}
|
||||
|
||||
// find granularity spec
|
||||
// set rollup only if rollup is set for all segments
|
||||
final boolean rollup = queryableIndices.stream().allMatch(index -> index.getMetadata().isRollup());
|
||||
final GranularitySpec granularitySpec = new ArbitraryGranularitySpec(
|
||||
new NoneGranularity(),
|
||||
rollup,
|
||||
ImmutableList.of(interval)
|
||||
);
|
||||
|
||||
// find unique dimensions
|
||||
final DimensionsSpec finalDimensionsSpec = dimensionsSpec == null ?
|
||||
createDimensionsSpec(queryableIndices) :
|
||||
dimensionsSpec;
|
||||
final InputRowParser parser = new NoopInputRowParser(new TimeAndDimsParseSpec(null, finalDimensionsSpec));
|
||||
|
||||
return new DataSchema(
|
||||
dataSource,
|
||||
jsonMapper.convertValue(parser, JacksonUtils.TYPE_REFERENCE_MAP_STRING_OBJECT),
|
||||
mergedAggregators,
|
||||
granularitySpec,
|
||||
null,
|
||||
jsonMapper
|
||||
);
|
||||
}
|
||||
|
||||
private static DimensionsSpec createDimensionsSpec(List<QueryableIndex> queryableIndices)
|
||||
{
|
||||
final BiMap<String, Integer> uniqueDims = HashBiMap.create();
|
||||
final Map<String, DimensionSchema> dimensionSchemaMap = new HashMap<>();
|
||||
|
||||
// Here, we try to retain the order of dimensions as they were specified since the order of dimensions may be
|
||||
// optimized for performance.
|
||||
// Dimensions are extracted from the recent segments to olders because recent segments are likely to be queried more
|
||||
// frequently, and thus the performance should be optimized for recent ones rather than old ones.
|
||||
|
||||
// timelineSegments are sorted in order of interval
|
||||
int index = 0;
|
||||
for (QueryableIndex queryableIndex : Lists.reverse(queryableIndices)) {
|
||||
final Map<String, DimensionHandler> dimensionHandlerMap = queryableIndex.getDimensionHandlers();
|
||||
|
||||
for (String dimension : queryableIndex.getAvailableDimensions()) {
|
||||
final Column column = Preconditions.checkNotNull(
|
||||
queryableIndex.getColumn(dimension),
|
||||
"Cannot find column for dimension[%s]",
|
||||
dimension
|
||||
);
|
||||
|
||||
if (!uniqueDims.containsKey(dimension)) {
|
||||
final DimensionHandler dimensionHandler = Preconditions.checkNotNull(
|
||||
dimensionHandlerMap.get(dimension),
|
||||
"Cannot find dimensionHandler for dimension[%s]",
|
||||
dimension
|
||||
);
|
||||
|
||||
uniqueDims.put(dimension, index++);
|
||||
dimensionSchemaMap.put(
|
||||
dimension,
|
||||
createDimensionSchema(
|
||||
column.getCapabilities().getType(),
|
||||
dimension,
|
||||
dimensionHandler.getMultivalueHandling()
|
||||
)
|
||||
);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
final BiMap<Integer, String> orderedDims = uniqueDims.inverse();
|
||||
final List<DimensionSchema> dimensionSchemas = IntStream.range(0, orderedDims.size())
|
||||
.mapToObj(i -> {
|
||||
final String dimName = orderedDims.get(i);
|
||||
return Preconditions.checkNotNull(
|
||||
dimensionSchemaMap.get(dimName),
|
||||
"Cannot find dimension[%s] from dimensionSchemaMap",
|
||||
dimName
|
||||
);
|
||||
})
|
||||
.collect(Collectors.toList());
|
||||
|
||||
return new DimensionsSpec(dimensionSchemas, null, null);
|
||||
}
|
||||
|
||||
private static List<QueryableIndex> loadSegments(
|
||||
List<TimelineObjectHolder<String, DataSegment>> timelineSegments,
|
||||
Map<DataSegment, File> segmentFileMap,
|
||||
IndexIO indexIO
|
||||
) throws IOException
|
||||
{
|
||||
final List<QueryableIndex> segments = new ArrayList<>();
|
||||
|
||||
for (TimelineObjectHolder<String, DataSegment> timelineSegment : timelineSegments) {
|
||||
final PartitionHolder<DataSegment> partitionHolder = timelineSegment.getObject();
|
||||
for (PartitionChunk<DataSegment> chunk : partitionHolder) {
|
||||
final DataSegment segment = chunk.getObject();
|
||||
segments.add(
|
||||
indexIO.loadIndex(
|
||||
Preconditions.checkNotNull(segmentFileMap.get(segment), "File for segment %s", segment.getIdentifier())
|
||||
)
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
return segments;
|
||||
}
|
||||
|
||||
private static DimensionSchema createDimensionSchema(
|
||||
ValueType type,
|
||||
String name,
|
||||
MultiValueHandling multiValueHandling
|
||||
)
|
||||
{
|
||||
switch (type) {
|
||||
case FLOAT:
|
||||
Preconditions.checkArgument(
|
||||
multiValueHandling == null,
|
||||
"multi-value dimension [%s] is not supported for float type yet",
|
||||
name
|
||||
);
|
||||
return new FloatDimensionSchema(name);
|
||||
case LONG:
|
||||
Preconditions.checkArgument(
|
||||
multiValueHandling == null,
|
||||
"multi-value dimension [%s] is not supported for long type yet",
|
||||
name
|
||||
);
|
||||
return new LongDimensionSchema(name);
|
||||
case DOUBLE:
|
||||
Preconditions.checkArgument(
|
||||
multiValueHandling == null,
|
||||
"multi-value dimension [%s] is not supported for double type yet",
|
||||
name
|
||||
);
|
||||
return new DoubleDimensionSchema(name);
|
||||
case STRING:
|
||||
return new StringDimensionSchema(name, multiValueHandling);
|
||||
default:
|
||||
throw new ISE("Unsupported value type[%s] for dimension[%s]", type, name);
|
||||
}
|
||||
}
|
||||
|
||||
@VisibleForTesting
|
||||
static class SegmentProvider
|
||||
{
|
||||
private final String dataSource;
|
||||
private final Interval interval;
|
||||
private final List<DataSegment> segments;
|
||||
|
||||
SegmentProvider(String dataSource, Interval interval)
|
||||
{
|
||||
this.dataSource = Preconditions.checkNotNull(dataSource);
|
||||
this.interval = Preconditions.checkNotNull(interval);
|
||||
this.segments = null;
|
||||
}
|
||||
|
||||
SegmentProvider(List<DataSegment> segments)
|
||||
{
|
||||
Preconditions.checkArgument(segments != null && !segments.isEmpty());
|
||||
final String dataSource = segments.get(0).getDataSource();
|
||||
Preconditions.checkArgument(
|
||||
segments.stream().allMatch(segment -> segment.getDataSource().equals(dataSource)),
|
||||
"segments should have the same dataSource"
|
||||
);
|
||||
this.segments = segments;
|
||||
this.dataSource = dataSource;
|
||||
this.interval = JodaUtils.umbrellaInterval(
|
||||
segments.stream().map(DataSegment::getInterval).collect(Collectors.toList())
|
||||
);
|
||||
}
|
||||
|
||||
List<DataSegment> getSegments()
|
||||
{
|
||||
return segments;
|
||||
}
|
||||
|
||||
List<DataSegment> checkAndGetSegments(TaskToolbox toolbox) throws IOException
|
||||
{
|
||||
final List<DataSegment> usedSegments = toolbox.getTaskActionClient()
|
||||
.submit(new SegmentListUsedAction(dataSource, interval, null));
|
||||
if (segments != null) {
|
||||
Collections.sort(usedSegments);
|
||||
Collections.sort(segments);
|
||||
Preconditions.checkState(
|
||||
usedSegments.equals(segments),
|
||||
"Specified segments[%s] are different from the currently used segments[%s]",
|
||||
segments,
|
||||
usedSegments
|
||||
);
|
||||
}
|
||||
return usedSegments;
|
||||
}
|
||||
}
|
||||
}
|
|
@ -69,7 +69,7 @@ public class HadoopConverterTask extends ConvertSegmentTask
|
|||
)
|
||||
{
|
||||
super(
|
||||
makeId(
|
||||
getOrMakeId(
|
||||
id,
|
||||
TYPE,
|
||||
Preconditions.checkNotNull(dataSource, "dataSource"),
|
||||
|
|
|
@ -36,8 +36,6 @@ import com.google.common.collect.Iterables;
|
|||
import com.google.common.hash.HashFunction;
|
||||
import com.google.common.hash.Hashing;
|
||||
import com.google.common.util.concurrent.ListenableFuture;
|
||||
import io.druid.java.util.common.DateTimes;
|
||||
import io.druid.java.util.common.JodaUtils;
|
||||
import io.druid.data.input.Committer;
|
||||
import io.druid.data.input.Firehose;
|
||||
import io.druid.data.input.FirehoseFactory;
|
||||
|
@ -53,6 +51,7 @@ import io.druid.indexing.common.actions.SegmentTransactionalInsertAction;
|
|||
import io.druid.indexing.common.actions.TaskActionClient;
|
||||
import io.druid.indexing.firehose.IngestSegmentFirehoseFactory;
|
||||
import io.druid.java.util.common.ISE;
|
||||
import io.druid.java.util.common.JodaUtils;
|
||||
import io.druid.java.util.common.StringUtils;
|
||||
import io.druid.java.util.common.granularity.Granularity;
|
||||
import io.druid.java.util.common.guava.Comparators;
|
||||
|
@ -97,6 +96,7 @@ import java.util.HashMap;
|
|||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.Map.Entry;
|
||||
import java.util.Objects;
|
||||
import java.util.Set;
|
||||
import java.util.SortedSet;
|
||||
import java.util.TreeMap;
|
||||
|
@ -115,31 +115,6 @@ public class IndexTask extends AbstractTask
|
|||
private static final HashFunction hashFunction = Hashing.murmur3_128();
|
||||
private static final String TYPE = "index";
|
||||
|
||||
private static String makeId(String id, IndexIngestionSpec ingestionSchema)
|
||||
{
|
||||
if (id != null) {
|
||||
return id;
|
||||
} else {
|
||||
return StringUtils.format("index_%s_%s", makeDataSource(ingestionSchema), DateTimes.nowUtc());
|
||||
}
|
||||
}
|
||||
|
||||
private static String makeGroupId(IndexIngestionSpec ingestionSchema)
|
||||
{
|
||||
if (ingestionSchema.getIOConfig().isAppendToExisting()) {
|
||||
// Shared locking group for all tasks that append, since they are OK to run concurrently.
|
||||
return StringUtils.format("%s_append_%s", TYPE, ingestionSchema.getDataSchema().getDataSource());
|
||||
} else {
|
||||
// Return null, one locking group per task.
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
||||
private static String makeDataSource(IndexIngestionSpec ingestionSchema)
|
||||
{
|
||||
return ingestionSchema.getDataSchema().getDataSource();
|
||||
}
|
||||
|
||||
@JsonIgnore
|
||||
private final IndexIngestionSpec ingestionSchema;
|
||||
|
||||
|
@ -151,17 +126,52 @@ public class IndexTask extends AbstractTask
|
|||
@JsonProperty("context") final Map<String, Object> context
|
||||
)
|
||||
{
|
||||
super(
|
||||
makeId(id, ingestionSchema),
|
||||
this(
|
||||
id,
|
||||
makeGroupId(ingestionSchema),
|
||||
taskResource,
|
||||
makeDataSource(ingestionSchema),
|
||||
ingestionSchema.dataSchema.getDataSource(),
|
||||
ingestionSchema,
|
||||
context
|
||||
);
|
||||
}
|
||||
|
||||
IndexTask(
|
||||
String id,
|
||||
String groupId,
|
||||
TaskResource resource,
|
||||
String dataSource,
|
||||
IndexIngestionSpec ingestionSchema,
|
||||
Map<String, Object> context
|
||||
)
|
||||
{
|
||||
super(
|
||||
getOrMakeId(id, TYPE, dataSource),
|
||||
groupId,
|
||||
resource,
|
||||
dataSource,
|
||||
context
|
||||
);
|
||||
|
||||
this.ingestionSchema = ingestionSchema;
|
||||
}
|
||||
|
||||
private static String makeGroupId(IndexIngestionSpec ingestionSchema)
|
||||
{
|
||||
return makeGroupId(ingestionSchema.ioConfig.appendToExisting, ingestionSchema.dataSchema.getDataSource());
|
||||
}
|
||||
|
||||
private static String makeGroupId(boolean isAppendToExisting, String dataSource)
|
||||
{
|
||||
if (isAppendToExisting) {
|
||||
// Shared locking group for all tasks that append, since they are OK to run concurrently.
|
||||
return StringUtils.format("%s_append_%s", TYPE, dataSource);
|
||||
} else {
|
||||
// Return null, one locking group per task.
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public int getPriority()
|
||||
{
|
||||
|
@ -182,21 +192,26 @@ public class IndexTask extends AbstractTask
|
|||
.bucketIntervals();
|
||||
|
||||
if (intervals.isPresent()) {
|
||||
final List<TaskLock> locks = getTaskLocks(taskActionClient);
|
||||
if (locks.size() == 0) {
|
||||
try {
|
||||
Tasks.tryAcquireExclusiveLocks(taskActionClient, intervals.get());
|
||||
}
|
||||
catch (Exception e) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
return true;
|
||||
return isReady(taskActionClient, intervals.get());
|
||||
} else {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
static boolean isReady(TaskActionClient actionClient, SortedSet<Interval> intervals) throws IOException
|
||||
{
|
||||
final List<TaskLock> locks = getTaskLocks(actionClient);
|
||||
if (locks.size() == 0) {
|
||||
try {
|
||||
Tasks.tryAcquireExclusiveLocks(actionClient, intervals);
|
||||
}
|
||||
catch (Exception e) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
@JsonProperty("spec")
|
||||
public IndexIngestionSpec getIngestionSchema()
|
||||
{
|
||||
|
@ -1136,5 +1151,83 @@ public class IndexTask extends AbstractTask
|
|||
{
|
||||
return new Period(Integer.MAX_VALUE); // intermediate persist doesn't make much sense for batch jobs
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean equals(Object o)
|
||||
{
|
||||
if (this == o) {
|
||||
return true;
|
||||
}
|
||||
|
||||
if (o == null || getClass() != o.getClass()) {
|
||||
return false;
|
||||
}
|
||||
|
||||
final IndexTuningConfig that = (IndexTuningConfig) o;
|
||||
|
||||
if (!Objects.equals(targetPartitionSize, that.targetPartitionSize)) {
|
||||
return false;
|
||||
}
|
||||
|
||||
if (maxRowsInMemory != that.maxRowsInMemory) {
|
||||
return false;
|
||||
}
|
||||
|
||||
if (maxTotalRows != that.maxTotalRows) {
|
||||
return false;
|
||||
}
|
||||
|
||||
if (!Objects.equals(numShards, that.numShards)) {
|
||||
return false;
|
||||
}
|
||||
|
||||
if (!Objects.equals(indexSpec, that.indexSpec)) {
|
||||
return false;
|
||||
}
|
||||
|
||||
if (!Objects.equals(basePersistDirectory, that.basePersistDirectory)) {
|
||||
return false;
|
||||
}
|
||||
|
||||
if (maxPendingPersists != that.maxPendingPersists) {
|
||||
return false;
|
||||
}
|
||||
|
||||
if (forceExtendableShardSpecs != that.forceExtendableShardSpecs) {
|
||||
return false;
|
||||
}
|
||||
|
||||
if (forceGuaranteedRollup != that.forceGuaranteedRollup) {
|
||||
return false;
|
||||
}
|
||||
|
||||
if (reportParseExceptions != that.reportParseExceptions) {
|
||||
return false;
|
||||
}
|
||||
|
||||
if (publishTimeout != that.publishTimeout) {
|
||||
return false;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
@Override
|
||||
public int hashCode()
|
||||
{
|
||||
return Objects.hash(
|
||||
targetPartitionSize,
|
||||
maxRowsInMemory,
|
||||
maxTotalRows,
|
||||
numShards,
|
||||
indexSpec,
|
||||
basePersistDirectory,
|
||||
maxPendingPersists,
|
||||
forceExtendableShardSpecs,
|
||||
forceGuaranteedRollup,
|
||||
reportParseExceptions,
|
||||
publishTimeout
|
||||
);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -52,7 +52,7 @@ public class KillTask extends AbstractFixedIntervalTask
|
|||
)
|
||||
{
|
||||
super(
|
||||
makeId(id, "kill", dataSource, interval),
|
||||
getOrMakeId(id, "kill", dataSource, interval),
|
||||
dataSource,
|
||||
interval,
|
||||
context
|
||||
|
|
|
@ -55,7 +55,7 @@ public class MoveTask extends AbstractFixedIntervalTask
|
|||
)
|
||||
{
|
||||
super(
|
||||
makeId(id, "move", dataSource, interval),
|
||||
getOrMakeId(id, "move", dataSource, interval),
|
||||
dataSource,
|
||||
interval,
|
||||
context
|
||||
|
|
|
@ -48,7 +48,7 @@ public class RestoreTask extends AbstractFixedIntervalTask
|
|||
)
|
||||
{
|
||||
super(
|
||||
makeId(id, "restore", dataSource, interval),
|
||||
getOrMakeId(id, "restore", dataSource, interval),
|
||||
dataSource,
|
||||
interval,
|
||||
context
|
||||
|
|
|
@ -60,7 +60,8 @@ import java.util.Map;
|
|||
@JsonSubTypes.Type(name = "version_converter_sub", value = ConvertSegmentBackwardsCompatibleTask.SubTask.class), // backwards compat - Deprecated
|
||||
@JsonSubTypes.Type(name = "convert_segment", value = ConvertSegmentTask.class),
|
||||
@JsonSubTypes.Type(name = "convert_segment_sub", value = ConvertSegmentTask.SubTask.class),
|
||||
@JsonSubTypes.Type(name = "same_interval_merge", value = SameIntervalMergeTask.class)
|
||||
@JsonSubTypes.Type(name = "same_interval_merge", value = SameIntervalMergeTask.class),
|
||||
@JsonSubTypes.Type(name = "compact", value = CompactionTask.class)
|
||||
})
|
||||
public interface Task
|
||||
{
|
||||
|
|
|
@ -24,8 +24,6 @@ import io.druid.indexing.common.TaskLock;
|
|||
import io.druid.indexing.common.TaskLockType;
|
||||
import io.druid.indexing.common.actions.LockTryAcquireAction;
|
||||
import io.druid.indexing.common.actions.TaskActionClient;
|
||||
import io.druid.indexing.overlord.LockResult;
|
||||
import io.druid.java.util.common.ISE;
|
||||
import io.druid.java.util.common.JodaUtils;
|
||||
import io.druid.java.util.common.guava.Comparators;
|
||||
import org.joda.time.Interval;
|
||||
|
@ -49,13 +47,6 @@ public class Tasks
|
|||
public static final String PRIORITY_KEY = "priority";
|
||||
public static final String LOCK_TIMEOUT_KEY = "taskLockTimeout";
|
||||
|
||||
public static void checkLockResult(LockResult result, Interval interval)
|
||||
{
|
||||
if (!result.isOk()) {
|
||||
throw new ISE("Failed to lock for interval[%s]", interval);
|
||||
}
|
||||
}
|
||||
|
||||
public static Map<Interval, TaskLock> tryAcquireExclusiveLocks(TaskActionClient client, SortedSet<Interval> intervals)
|
||||
throws IOException
|
||||
{
|
||||
|
|
|
@ -22,12 +22,14 @@ package io.druid.indexing.firehose;
|
|||
import com.fasterxml.jackson.annotation.JacksonInject;
|
||||
import com.fasterxml.jackson.annotation.JsonCreator;
|
||||
import com.fasterxml.jackson.annotation.JsonProperty;
|
||||
import com.google.common.annotations.VisibleForTesting;
|
||||
import com.google.common.base.Function;
|
||||
import com.google.common.base.Preconditions;
|
||||
import com.google.common.base.Throwables;
|
||||
import com.google.common.collect.BiMap;
|
||||
import com.google.common.collect.HashBiMap;
|
||||
import com.google.common.collect.Iterables;
|
||||
import com.google.common.collect.Lists;
|
||||
import com.google.common.collect.Sets;
|
||||
import com.google.inject.Injector;
|
||||
import com.metamx.emitter.EmittingLogger;
|
||||
import io.druid.data.input.Firehose;
|
||||
|
@ -37,7 +39,6 @@ import io.druid.indexing.common.TaskToolbox;
|
|||
import io.druid.indexing.common.TaskToolboxFactory;
|
||||
import io.druid.indexing.common.actions.SegmentListUsedAction;
|
||||
import io.druid.indexing.common.task.NoopTask;
|
||||
import io.druid.java.util.common.guava.Comparators;
|
||||
import io.druid.java.util.common.parsers.ParseException;
|
||||
import io.druid.query.filter.DimFilter;
|
||||
import io.druid.segment.IndexIO;
|
||||
|
@ -52,11 +53,14 @@ import io.druid.timeline.VersionedIntervalTimeline;
|
|||
import io.druid.timeline.partition.PartitionChunk;
|
||||
import org.joda.time.Interval;
|
||||
|
||||
import javax.annotation.Nullable;
|
||||
import java.io.File;
|
||||
import java.io.IOException;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.Set;
|
||||
import java.util.stream.Collectors;
|
||||
import java.util.stream.IntStream;
|
||||
|
||||
public class IngestSegmentFirehoseFactory implements FirehoseFactory<InputRowParser>
|
||||
{
|
||||
|
@ -144,16 +148,9 @@ public class IngestSegmentFirehoseFactory implements FirehoseFactory<InputRowPar
|
|||
.getTaskActionClient()
|
||||
.submit(new SegmentListUsedAction(dataSource, interval, null));
|
||||
final Map<DataSegment, File> segmentFileMap = taskToolbox.fetchSegments(usedSegments);
|
||||
VersionedIntervalTimeline<String, DataSegment> timeline = new VersionedIntervalTimeline<>(
|
||||
Comparators.naturalNullsFirst()
|
||||
);
|
||||
|
||||
for (DataSegment segment : usedSegments) {
|
||||
timeline.add(segment.getInterval(), segment.getVersion(), segment.getShardSpec().createChunk(segment));
|
||||
}
|
||||
final List<TimelineObjectHolder<String, DataSegment>> timeLineSegments = timeline.lookup(
|
||||
interval
|
||||
);
|
||||
final List<TimelineObjectHolder<String, DataSegment>> timeLineSegments = VersionedIntervalTimeline
|
||||
.forSegments(usedSegments)
|
||||
.lookup(interval);
|
||||
|
||||
final List<String> dims;
|
||||
if (dimensions != null) {
|
||||
|
@ -161,83 +158,13 @@ public class IngestSegmentFirehoseFactory implements FirehoseFactory<InputRowPar
|
|||
} else if (inputRowParser.getParseSpec().getDimensionsSpec().hasCustomDimensions()) {
|
||||
dims = inputRowParser.getParseSpec().getDimensionsSpec().getDimensionNames();
|
||||
} else {
|
||||
Set<String> dimSet = Sets.newHashSet(
|
||||
Iterables.concat(
|
||||
Iterables.transform(
|
||||
timeLineSegments,
|
||||
new Function<TimelineObjectHolder<String, DataSegment>, Iterable<String>>()
|
||||
{
|
||||
@Override
|
||||
public Iterable<String> apply(
|
||||
TimelineObjectHolder<String, DataSegment> timelineObjectHolder
|
||||
)
|
||||
{
|
||||
return Iterables.concat(
|
||||
Iterables.transform(
|
||||
timelineObjectHolder.getObject(),
|
||||
new Function<PartitionChunk<DataSegment>, Iterable<String>>()
|
||||
{
|
||||
@Override
|
||||
public Iterable<String> apply(PartitionChunk<DataSegment> input)
|
||||
{
|
||||
return input.getObject().getDimensions();
|
||||
}
|
||||
}
|
||||
)
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
)
|
||||
)
|
||||
);
|
||||
dims = Lists.newArrayList(
|
||||
Sets.difference(
|
||||
dimSet,
|
||||
inputRowParser
|
||||
.getParseSpec()
|
||||
.getDimensionsSpec()
|
||||
.getDimensionExclusions()
|
||||
)
|
||||
dims = getUniqueDimensions(
|
||||
timeLineSegments,
|
||||
inputRowParser.getParseSpec().getDimensionsSpec().getDimensionExclusions()
|
||||
);
|
||||
}
|
||||
|
||||
final List<String> metricsList;
|
||||
if (metrics != null) {
|
||||
metricsList = metrics;
|
||||
} else {
|
||||
Set<String> metricsSet = Sets.newHashSet(
|
||||
Iterables.concat(
|
||||
Iterables.transform(
|
||||
timeLineSegments,
|
||||
new Function<TimelineObjectHolder<String, DataSegment>, Iterable<String>>()
|
||||
{
|
||||
@Override
|
||||
public Iterable<String> apply(
|
||||
TimelineObjectHolder<String, DataSegment> input
|
||||
)
|
||||
{
|
||||
return Iterables.concat(
|
||||
Iterables.transform(
|
||||
input.getObject(),
|
||||
new Function<PartitionChunk<DataSegment>, Iterable<String>>()
|
||||
{
|
||||
@Override
|
||||
public Iterable<String> apply(PartitionChunk<DataSegment> input)
|
||||
{
|
||||
return input.getObject().getMetrics();
|
||||
}
|
||||
}
|
||||
)
|
||||
);
|
||||
}
|
||||
}
|
||||
)
|
||||
)
|
||||
);
|
||||
metricsList = Lists.newArrayList(metricsSet);
|
||||
}
|
||||
|
||||
final List<String> metricsList = metrics == null ? getUniqueMetrics(timeLineSegments) : metrics;
|
||||
|
||||
final List<WindowedStorageAdapter> adapters = Lists.newArrayList(
|
||||
Iterables.concat(
|
||||
|
@ -288,6 +215,63 @@ public class IngestSegmentFirehoseFactory implements FirehoseFactory<InputRowPar
|
|||
catch (IOException | SegmentLoadingException e) {
|
||||
throw Throwables.propagate(e);
|
||||
}
|
||||
}
|
||||
|
||||
@VisibleForTesting
|
||||
static List<String> getUniqueDimensions(
|
||||
List<TimelineObjectHolder<String, DataSegment>> timelineSegments,
|
||||
@Nullable Set<String> excludeDimensions
|
||||
)
|
||||
{
|
||||
final BiMap<String, Integer> uniqueDims = HashBiMap.create();
|
||||
|
||||
// Here, we try to retain the order of dimensions as they were specified since the order of dimensions may be
|
||||
// optimized for performance.
|
||||
// Dimensions are extracted from the recent segments to olders because recent segments are likely to be queried more
|
||||
// frequently, and thus the performance should be optimized for recent ones rather than old ones.
|
||||
|
||||
// timelineSegments are sorted in order of interval
|
||||
int index = 0;
|
||||
for (TimelineObjectHolder<String, DataSegment> timelineHolder : Lists.reverse(timelineSegments)) {
|
||||
for (PartitionChunk<DataSegment> chunk : timelineHolder.getObject()) {
|
||||
for (String dimension : chunk.getObject().getDimensions()) {
|
||||
if (!uniqueDims.containsKey(dimension) &&
|
||||
(excludeDimensions == null || !excludeDimensions.contains(dimension))) {
|
||||
uniqueDims.put(dimension, index++);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
final BiMap<Integer, String> orderedDims = uniqueDims.inverse();
|
||||
return IntStream.range(0, orderedDims.size())
|
||||
.mapToObj(orderedDims::get)
|
||||
.collect(Collectors.toList());
|
||||
}
|
||||
|
||||
@VisibleForTesting
|
||||
static List<String> getUniqueMetrics(List<TimelineObjectHolder<String, DataSegment>> timelineSegments)
|
||||
{
|
||||
final BiMap<String, Integer> uniqueMetrics = HashBiMap.create();
|
||||
|
||||
// Here, we try to retain the order of metrics as they were specified. Metrics are extracted from the recent
|
||||
// segments to olders.
|
||||
|
||||
// timelineSegments are sorted in order of interval
|
||||
int index = 0;
|
||||
for (TimelineObjectHolder<String, DataSegment> timelineHolder : Lists.reverse(timelineSegments)) {
|
||||
for (PartitionChunk<DataSegment> chunk : timelineHolder.getObject()) {
|
||||
for (String metric : chunk.getObject().getMetrics()) {
|
||||
if (!uniqueMetrics.containsKey(metric)) {
|
||||
uniqueMetrics.put(metric, index++);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
final BiMap<Integer, String> orderedMetrics = uniqueMetrics.inverse();
|
||||
return IntStream.range(0, orderedMetrics.size())
|
||||
.mapToObj(orderedMetrics::get)
|
||||
.collect(Collectors.toList());
|
||||
}
|
||||
}
|
||||
|
|
|
@ -53,7 +53,7 @@ public class TestUtils
|
|||
|
||||
public TestUtils()
|
||||
{
|
||||
jsonMapper = new DefaultObjectMapper();
|
||||
this.jsonMapper = new DefaultObjectMapper();
|
||||
indexIO = new IndexIO(
|
||||
jsonMapper,
|
||||
new ColumnConfig()
|
||||
|
|
|
@ -0,0 +1,624 @@
|
|||
/*
|
||||
* Licensed to Metamarkets Group Inc. (Metamarkets) under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. Metamarkets licenses this file
|
||||
* to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing,
|
||||
* software distributed under the License is distributed on an
|
||||
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
* KIND, either express or implied. See the License for the
|
||||
* specific language governing permissions and limitations
|
||||
* under the License.
|
||||
*/
|
||||
|
||||
package io.druid.indexing.common.task;
|
||||
|
||||
import com.fasterxml.jackson.databind.ObjectMapper;
|
||||
import com.fasterxml.jackson.databind.introspect.AnnotationIntrospectorPair;
|
||||
import com.fasterxml.jackson.databind.jsontype.NamedType;
|
||||
import com.fasterxml.jackson.databind.module.SimpleModule;
|
||||
import com.google.common.base.Preconditions;
|
||||
import com.google.common.collect.ImmutableList;
|
||||
import com.google.common.collect.ImmutableMap;
|
||||
import com.google.common.collect.Lists;
|
||||
import com.google.inject.Injector;
|
||||
import io.druid.data.input.FirehoseFactory;
|
||||
import io.druid.data.input.impl.DimensionSchema;
|
||||
import io.druid.data.input.impl.DimensionsSpec;
|
||||
import io.druid.data.input.impl.DoubleDimensionSchema;
|
||||
import io.druid.data.input.impl.FloatDimensionSchema;
|
||||
import io.druid.data.input.impl.InputRowParser;
|
||||
import io.druid.data.input.impl.LongDimensionSchema;
|
||||
import io.druid.data.input.impl.NoopInputRowParser;
|
||||
import io.druid.data.input.impl.StringDimensionSchema;
|
||||
import io.druid.data.input.impl.TimeAndDimsParseSpec;
|
||||
import io.druid.guice.GuiceAnnotationIntrospector;
|
||||
import io.druid.guice.GuiceInjectableValues;
|
||||
import io.druid.guice.GuiceInjectors;
|
||||
import io.druid.indexing.common.TaskToolbox;
|
||||
import io.druid.indexing.common.actions.SegmentListUsedAction;
|
||||
import io.druid.indexing.common.actions.TaskAction;
|
||||
import io.druid.indexing.common.actions.TaskActionClient;
|
||||
import io.druid.indexing.common.task.CompactionTask.SegmentProvider;
|
||||
import io.druid.indexing.common.task.IndexTask.IndexIOConfig;
|
||||
import io.druid.indexing.common.task.IndexTask.IndexIngestionSpec;
|
||||
import io.druid.indexing.common.task.IndexTask.IndexTuningConfig;
|
||||
import io.druid.indexing.firehose.IngestSegmentFirehoseFactory;
|
||||
import io.druid.jackson.DefaultObjectMapper;
|
||||
import io.druid.java.util.common.ISE;
|
||||
import io.druid.java.util.common.Intervals;
|
||||
import io.druid.java.util.common.StringUtils;
|
||||
import io.druid.java.util.common.granularity.Granularities;
|
||||
import io.druid.query.aggregation.AggregatorFactory;
|
||||
import io.druid.query.aggregation.CountAggregatorFactory;
|
||||
import io.druid.query.aggregation.LongMaxAggregatorFactory;
|
||||
import io.druid.query.aggregation.LongSumAggregatorFactory;
|
||||
import io.druid.query.aggregation.first.FloatFirstAggregatorFactory;
|
||||
import io.druid.query.aggregation.last.DoubleLastAggregatorFactory;
|
||||
import io.druid.segment.IndexIO;
|
||||
import io.druid.segment.IndexMergerV9;
|
||||
import io.druid.segment.IndexSpec;
|
||||
import io.druid.segment.Metadata;
|
||||
import io.druid.segment.QueryableIndex;
|
||||
import io.druid.segment.SimpleQueryableIndex;
|
||||
import io.druid.segment.column.Column;
|
||||
import io.druid.segment.column.ColumnBuilder;
|
||||
import io.druid.segment.column.ValueType;
|
||||
import io.druid.segment.data.CompressedObjectStrategy.CompressionStrategy;
|
||||
import io.druid.segment.data.CompressionFactory.LongEncodingStrategy;
|
||||
import io.druid.segment.data.ListIndexed;
|
||||
import io.druid.segment.data.RoaringBitmapSerdeFactory;
|
||||
import io.druid.segment.incremental.IncrementalIndex;
|
||||
import io.druid.segment.indexing.DataSchema;
|
||||
import io.druid.segment.indexing.granularity.ArbitraryGranularitySpec;
|
||||
import io.druid.segment.loading.SegmentLoadingException;
|
||||
import io.druid.segment.transform.TransformingInputRowParser;
|
||||
import io.druid.timeline.DataSegment;
|
||||
import io.druid.timeline.partition.NumberedShardSpec;
|
||||
import org.hamcrest.CoreMatchers;
|
||||
import org.joda.time.Interval;
|
||||
import org.junit.Assert;
|
||||
import org.junit.BeforeClass;
|
||||
import org.junit.Rule;
|
||||
import org.junit.Test;
|
||||
import org.junit.rules.ExpectedException;
|
||||
|
||||
import java.io.File;
|
||||
import java.io.IOException;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Arrays;
|
||||
import java.util.HashMap;
|
||||
import java.util.HashSet;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.Map.Entry;
|
||||
import java.util.Set;
|
||||
import java.util.stream.Collectors;
|
||||
|
||||
public class CompactionTaskTest
|
||||
{
|
||||
private static final String DATA_SOURCE = "dataSource";
|
||||
private static final String TIMESTAMP_COLUMN = "timestamp";
|
||||
private static final String MIXED_TYPE_COLUMN = "string_to_double";
|
||||
private static final Interval COMPACTION_INTERVAL = Intervals.of("2017-01-01/2017-06-01");
|
||||
private static final Map<Interval, DimensionSchema> MIXED_TYPE_COLUMN_MAP = ImmutableMap.of(
|
||||
Intervals.of("2017-01-01/2017-02-01"),
|
||||
new StringDimensionSchema(MIXED_TYPE_COLUMN, null),
|
||||
Intervals.of("2017-02-01/2017-03-01"),
|
||||
new StringDimensionSchema(MIXED_TYPE_COLUMN, null),
|
||||
Intervals.of("2017-03-01/2017-04-01"),
|
||||
new StringDimensionSchema(MIXED_TYPE_COLUMN, null),
|
||||
Intervals.of("2017-04-01/2017-05-01"),
|
||||
new StringDimensionSchema(MIXED_TYPE_COLUMN, null),
|
||||
Intervals.of("2017-05-01/2017-06-01"),
|
||||
new DoubleDimensionSchema(MIXED_TYPE_COLUMN)
|
||||
);
|
||||
private static final IndexTuningConfig TUNING_CONFIG = createTuningConfig();
|
||||
private static final Injector INJECTOR = GuiceInjectors.makeStartupInjector();
|
||||
|
||||
private static Map<String, DimensionSchema> DIMENSIONS;
|
||||
private static Map<String, AggregatorFactory> AGGREGATORS;
|
||||
private static List<DataSegment> SEGMENTS;
|
||||
private static ObjectMapper objectMapper = setupInjectablesInObjectMapper(new DefaultObjectMapper());
|
||||
private static TaskToolbox toolbox;
|
||||
|
||||
@BeforeClass
|
||||
public static void setup()
|
||||
{
|
||||
DIMENSIONS = new HashMap<>();
|
||||
AGGREGATORS = new HashMap<>();
|
||||
|
||||
DIMENSIONS.put(Column.TIME_COLUMN_NAME, new LongDimensionSchema(Column.TIME_COLUMN_NAME));
|
||||
DIMENSIONS.put(TIMESTAMP_COLUMN, new LongDimensionSchema(TIMESTAMP_COLUMN));
|
||||
for (int i = 0; i < 5; i++) {
|
||||
final StringDimensionSchema schema = new StringDimensionSchema(
|
||||
"string_dim_" + i,
|
||||
null
|
||||
);
|
||||
DIMENSIONS.put(schema.getName(), schema);
|
||||
}
|
||||
for (int i = 0; i < 5; i++) {
|
||||
final LongDimensionSchema schema = new LongDimensionSchema("long_dim_" + i);
|
||||
DIMENSIONS.put(schema.getName(), schema);
|
||||
}
|
||||
for (int i = 0; i < 5; i++) {
|
||||
final FloatDimensionSchema schema = new FloatDimensionSchema("float_dim_" + i);
|
||||
DIMENSIONS.put(schema.getName(), schema);
|
||||
}
|
||||
for (int i = 0; i < 5; i++) {
|
||||
final DoubleDimensionSchema schema = new DoubleDimensionSchema("double_dim_" + i);
|
||||
DIMENSIONS.put(schema.getName(), schema);
|
||||
}
|
||||
|
||||
AGGREGATORS.put("agg_0", new CountAggregatorFactory("agg_0"));
|
||||
AGGREGATORS.put("agg_1", new LongSumAggregatorFactory("agg_1", "long_dim_1"));
|
||||
AGGREGATORS.put("agg_2", new LongMaxAggregatorFactory("agg_2", "long_dim_2"));
|
||||
AGGREGATORS.put("agg_3", new FloatFirstAggregatorFactory("agg_3", "float_dim_3"));
|
||||
AGGREGATORS.put("agg_4", new DoubleLastAggregatorFactory("agg_4", "double_dim_4"));
|
||||
|
||||
final Map<DataSegment, File> segmentMap = new HashMap<>(5);
|
||||
for (int i = 0; i < 5; i++) {
|
||||
final Interval segmentInterval = Intervals.of(StringUtils.format("2017-0%d-01/2017-0%d-01", (i + 1), (i + 2)));
|
||||
segmentMap.put(
|
||||
new DataSegment(
|
||||
DATA_SOURCE,
|
||||
segmentInterval,
|
||||
"version",
|
||||
ImmutableMap.of(),
|
||||
findDimensions(i, segmentInterval),
|
||||
new ArrayList<>(AGGREGATORS.keySet()),
|
||||
new NumberedShardSpec(0, 1),
|
||||
0,
|
||||
1
|
||||
),
|
||||
new File("file_" + i)
|
||||
);
|
||||
}
|
||||
SEGMENTS = new ArrayList<>(segmentMap.keySet());
|
||||
|
||||
toolbox = new TestTaskToolbox(
|
||||
new TestTaskActionClient(new ArrayList<>(segmentMap.keySet())),
|
||||
new TestIndexIO(objectMapper, segmentMap),
|
||||
segmentMap
|
||||
);
|
||||
}
|
||||
|
||||
private static ObjectMapper setupInjectablesInObjectMapper(ObjectMapper objectMapper)
|
||||
{
|
||||
final GuiceAnnotationIntrospector guiceIntrospector = new GuiceAnnotationIntrospector();
|
||||
objectMapper.setAnnotationIntrospectors(
|
||||
new AnnotationIntrospectorPair(
|
||||
guiceIntrospector, objectMapper.getSerializationConfig().getAnnotationIntrospector()
|
||||
),
|
||||
new AnnotationIntrospectorPair(
|
||||
guiceIntrospector, objectMapper.getDeserializationConfig().getAnnotationIntrospector()
|
||||
)
|
||||
);
|
||||
objectMapper.setInjectableValues(new GuiceInjectableValues(INJECTOR));
|
||||
objectMapper.registerModule(
|
||||
new SimpleModule().registerSubtypes(new NamedType(NumberedShardSpec.class, "NumberedShardSpec"))
|
||||
);
|
||||
return objectMapper;
|
||||
}
|
||||
|
||||
private static List<String> findDimensions(int startIndex, Interval segmentInterval)
|
||||
{
|
||||
final List<String> dimensions = new ArrayList<>();
|
||||
dimensions.add(TIMESTAMP_COLUMN);
|
||||
for (int i = 0; i < 5; i++) {
|
||||
int postfix = i + startIndex;
|
||||
postfix = postfix >= 5 ? postfix - 5 : postfix;
|
||||
dimensions.add("string_dim_" + postfix);
|
||||
dimensions.add("long_dim_" + postfix);
|
||||
dimensions.add("float_dim_" + postfix);
|
||||
dimensions.add("double_dim_" + postfix);
|
||||
}
|
||||
dimensions.add(MIXED_TYPE_COLUMN_MAP.get(segmentInterval).getName());
|
||||
return dimensions;
|
||||
}
|
||||
|
||||
private static IndexTuningConfig createTuningConfig()
|
||||
{
|
||||
return new IndexTuningConfig(
|
||||
5000000,
|
||||
500000,
|
||||
1000000,
|
||||
null,
|
||||
null,
|
||||
new IndexSpec(
|
||||
new RoaringBitmapSerdeFactory(true),
|
||||
CompressionStrategy.LZ4,
|
||||
CompressionStrategy.LZF,
|
||||
LongEncodingStrategy.LONGS
|
||||
),
|
||||
5000,
|
||||
true,
|
||||
false,
|
||||
true,
|
||||
false,
|
||||
100L
|
||||
);
|
||||
}
|
||||
|
||||
@Rule
|
||||
public ExpectedException expectedException = ExpectedException.none();
|
||||
|
||||
@Test
|
||||
public void testSerdeWithInterval() throws IOException
|
||||
{
|
||||
final CompactionTask task = new CompactionTask(
|
||||
null,
|
||||
null,
|
||||
DATA_SOURCE,
|
||||
COMPACTION_INTERVAL,
|
||||
null,
|
||||
null,
|
||||
createTuningConfig(),
|
||||
ImmutableMap.of("testKey", "testContext"),
|
||||
INJECTOR,
|
||||
objectMapper
|
||||
);
|
||||
final byte[] bytes = objectMapper.writeValueAsBytes(task);
|
||||
final CompactionTask fromJson = objectMapper.readValue(bytes, CompactionTask.class);
|
||||
Assert.assertEquals(task.getType(), fromJson.getType());
|
||||
Assert.assertEquals(task.getDataSource(), fromJson.getDataSource());
|
||||
Assert.assertEquals(task.getInterval(), fromJson.getInterval());
|
||||
Assert.assertEquals(task.getSegments(), fromJson.getSegments());
|
||||
Assert.assertEquals(task.getDimensionsSpec(), fromJson.getDimensionsSpec());
|
||||
Assert.assertEquals(task.getTuningConfig(), fromJson.getTuningConfig());
|
||||
Assert.assertEquals(task.getContext(), fromJson.getContext());
|
||||
Assert.assertNull(fromJson.getSegmentProvider().getSegments());
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testSerdeWithSegments() throws IOException
|
||||
{
|
||||
final CompactionTask task = new CompactionTask(
|
||||
null,
|
||||
null,
|
||||
DATA_SOURCE,
|
||||
null,
|
||||
SEGMENTS,
|
||||
null,
|
||||
createTuningConfig(),
|
||||
ImmutableMap.of("testKey", "testContext"),
|
||||
INJECTOR,
|
||||
objectMapper
|
||||
);
|
||||
final byte[] bytes = objectMapper.writeValueAsBytes(task);
|
||||
final CompactionTask fromJson = objectMapper.readValue(bytes, CompactionTask.class);
|
||||
Assert.assertEquals(task.getType(), fromJson.getType());
|
||||
Assert.assertEquals(task.getDataSource(), fromJson.getDataSource());
|
||||
Assert.assertEquals(task.getInterval(), fromJson.getInterval());
|
||||
Assert.assertEquals(task.getSegments(), fromJson.getSegments());
|
||||
Assert.assertEquals(task.getDimensionsSpec(), fromJson.getDimensionsSpec());
|
||||
Assert.assertEquals(task.getTuningConfig(), fromJson.getTuningConfig());
|
||||
Assert.assertEquals(task.getContext(), fromJson.getContext());
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testCreateIngestionSchema() throws IOException, SegmentLoadingException
|
||||
{
|
||||
final IndexIngestionSpec ingestionSchema = CompactionTask.createIngestionSchema(
|
||||
toolbox,
|
||||
new SegmentProvider(DATA_SOURCE, COMPACTION_INTERVAL),
|
||||
null,
|
||||
TUNING_CONFIG,
|
||||
INJECTOR,
|
||||
objectMapper
|
||||
);
|
||||
final DimensionsSpec expectedDimensionsSpec = getExpectedDimensionsSpecForAutoGeneration();
|
||||
|
||||
assertIngestionSchema(ingestionSchema, expectedDimensionsSpec);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testCreateIngestionSchemaWithCustomDimensionsSpec() throws IOException, SegmentLoadingException
|
||||
{
|
||||
final DimensionsSpec customSpec = new DimensionsSpec(
|
||||
Lists.newArrayList(
|
||||
new LongDimensionSchema("timestamp"),
|
||||
new StringDimensionSchema("string_dim_0"),
|
||||
new StringDimensionSchema("string_dim_1"),
|
||||
new StringDimensionSchema("string_dim_2"),
|
||||
new StringDimensionSchema("string_dim_3"),
|
||||
new StringDimensionSchema("string_dim_4"),
|
||||
new LongDimensionSchema("long_dim_0"),
|
||||
new LongDimensionSchema("long_dim_1"),
|
||||
new LongDimensionSchema("long_dim_2"),
|
||||
new LongDimensionSchema("long_dim_3"),
|
||||
new LongDimensionSchema("long_dim_4"),
|
||||
new FloatDimensionSchema("float_dim_0"),
|
||||
new FloatDimensionSchema("float_dim_1"),
|
||||
new FloatDimensionSchema("float_dim_2"),
|
||||
new FloatDimensionSchema("float_dim_3"),
|
||||
new FloatDimensionSchema("float_dim_4"),
|
||||
new DoubleDimensionSchema("double_dim_0"),
|
||||
new DoubleDimensionSchema("double_dim_1"),
|
||||
new DoubleDimensionSchema("double_dim_2"),
|
||||
new DoubleDimensionSchema("double_dim_3"),
|
||||
new DoubleDimensionSchema("double_dim_4"),
|
||||
new StringDimensionSchema(MIXED_TYPE_COLUMN)
|
||||
),
|
||||
null,
|
||||
null
|
||||
);
|
||||
|
||||
final IndexIngestionSpec ingestionSchema = CompactionTask.createIngestionSchema(
|
||||
toolbox,
|
||||
new SegmentProvider(DATA_SOURCE, COMPACTION_INTERVAL),
|
||||
customSpec,
|
||||
TUNING_CONFIG,
|
||||
INJECTOR,
|
||||
objectMapper
|
||||
);
|
||||
|
||||
assertIngestionSchema(ingestionSchema, customSpec);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testCreateIngestionSchemaWithCustomSegments() throws IOException, SegmentLoadingException
|
||||
{
|
||||
final IndexIngestionSpec ingestionSchema = CompactionTask.createIngestionSchema(
|
||||
toolbox,
|
||||
new SegmentProvider(SEGMENTS),
|
||||
null,
|
||||
TUNING_CONFIG,
|
||||
INJECTOR,
|
||||
objectMapper
|
||||
);
|
||||
final DimensionsSpec expectedDimensionsSpec = getExpectedDimensionsSpecForAutoGeneration();
|
||||
|
||||
assertIngestionSchema(ingestionSchema, expectedDimensionsSpec);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testCreateIngestionSchemaWithDifferentSegmentSet() throws IOException, SegmentLoadingException
|
||||
{
|
||||
expectedException.expect(CoreMatchers.instanceOf(IllegalStateException.class));
|
||||
expectedException.expectMessage(CoreMatchers.containsString("are different from the currently used segments"));
|
||||
|
||||
final List<DataSegment> segments = new ArrayList<>(SEGMENTS);
|
||||
segments.remove(0);
|
||||
CompactionTask.createIngestionSchema(
|
||||
toolbox,
|
||||
new SegmentProvider(segments),
|
||||
null,
|
||||
TUNING_CONFIG,
|
||||
INJECTOR,
|
||||
objectMapper
|
||||
);
|
||||
}
|
||||
|
||||
private static DimensionsSpec getExpectedDimensionsSpecForAutoGeneration()
|
||||
{
|
||||
return new DimensionsSpec(
|
||||
Lists.newArrayList(
|
||||
new LongDimensionSchema("timestamp"),
|
||||
new StringDimensionSchema("string_dim_4"),
|
||||
new LongDimensionSchema("long_dim_4"),
|
||||
new FloatDimensionSchema("float_dim_4"),
|
||||
new DoubleDimensionSchema("double_dim_4"),
|
||||
new StringDimensionSchema("string_dim_0"),
|
||||
new LongDimensionSchema("long_dim_0"),
|
||||
new FloatDimensionSchema("float_dim_0"),
|
||||
new DoubleDimensionSchema("double_dim_0"),
|
||||
new StringDimensionSchema("string_dim_1"),
|
||||
new LongDimensionSchema("long_dim_1"),
|
||||
new FloatDimensionSchema("float_dim_1"),
|
||||
new DoubleDimensionSchema("double_dim_1"),
|
||||
new StringDimensionSchema("string_dim_2"),
|
||||
new LongDimensionSchema("long_dim_2"),
|
||||
new FloatDimensionSchema("float_dim_2"),
|
||||
new DoubleDimensionSchema("double_dim_2"),
|
||||
new StringDimensionSchema("string_dim_3"),
|
||||
new LongDimensionSchema("long_dim_3"),
|
||||
new FloatDimensionSchema("float_dim_3"),
|
||||
new DoubleDimensionSchema("double_dim_3"),
|
||||
new DoubleDimensionSchema("string_to_double")
|
||||
),
|
||||
null,
|
||||
null
|
||||
);
|
||||
}
|
||||
|
||||
private static void assertIngestionSchema(
|
||||
IndexIngestionSpec ingestionSchema,
|
||||
DimensionsSpec expectedDimensionsSpec
|
||||
)
|
||||
{
|
||||
// assert dataSchema
|
||||
final DataSchema dataSchema = ingestionSchema.getDataSchema();
|
||||
Assert.assertEquals(DATA_SOURCE, dataSchema.getDataSource());
|
||||
|
||||
final InputRowParser parser = objectMapper.convertValue(dataSchema.getParser(), InputRowParser.class);
|
||||
Assert.assertTrue(parser instanceof TransformingInputRowParser);
|
||||
Assert.assertTrue(((TransformingInputRowParser) parser).getParser() instanceof NoopInputRowParser);
|
||||
Assert.assertTrue(parser.getParseSpec() instanceof TimeAndDimsParseSpec);
|
||||
Assert.assertEquals(
|
||||
new HashSet<>(expectedDimensionsSpec.getDimensions()),
|
||||
new HashSet<>(parser.getParseSpec().getDimensionsSpec().getDimensions())
|
||||
);
|
||||
final Set<AggregatorFactory> expectedAggregators = AGGREGATORS.values()
|
||||
.stream()
|
||||
.map(AggregatorFactory::getCombiningFactory)
|
||||
.collect(Collectors.toSet());
|
||||
Assert.assertEquals(expectedAggregators, new HashSet<>(Arrays.asList(dataSchema.getAggregators())));
|
||||
Assert.assertEquals(
|
||||
new ArbitraryGranularitySpec(Granularities.NONE, false, ImmutableList.of(COMPACTION_INTERVAL)),
|
||||
dataSchema.getGranularitySpec()
|
||||
);
|
||||
|
||||
// assert ioConfig
|
||||
final IndexIOConfig ioConfig = ingestionSchema.getIOConfig();
|
||||
Assert.assertFalse(ioConfig.isAppendToExisting());
|
||||
final FirehoseFactory firehoseFactory = ioConfig.getFirehoseFactory();
|
||||
Assert.assertTrue(firehoseFactory instanceof IngestSegmentFirehoseFactory);
|
||||
final IngestSegmentFirehoseFactory ingestSegmentFirehoseFactory = (IngestSegmentFirehoseFactory) firehoseFactory;
|
||||
Assert.assertEquals(DATA_SOURCE, ingestSegmentFirehoseFactory.getDataSource());
|
||||
Assert.assertEquals(COMPACTION_INTERVAL, ingestSegmentFirehoseFactory.getInterval());
|
||||
Assert.assertNull(ingestSegmentFirehoseFactory.getDimensionsFilter());
|
||||
|
||||
// check the order of dimensions
|
||||
Assert.assertEquals(expectedDimensionsSpec.getDimensionNames(), ingestSegmentFirehoseFactory.getDimensions());
|
||||
// check the order of metrics
|
||||
Assert.assertEquals(
|
||||
Lists.newArrayList("agg_4", "agg_3", "agg_2", "agg_1", "agg_0"),
|
||||
ingestSegmentFirehoseFactory.getMetrics()
|
||||
);
|
||||
|
||||
// assert tuningConfig
|
||||
Assert.assertEquals(createTuningConfig(), ingestionSchema.getTuningConfig());
|
||||
}
|
||||
|
||||
private static class TestTaskToolbox extends TaskToolbox
|
||||
{
|
||||
private final Map<DataSegment, File> segmentFileMap;
|
||||
|
||||
TestTaskToolbox(
|
||||
TaskActionClient taskActionClient,
|
||||
IndexIO indexIO,
|
||||
Map<DataSegment, File> segmentFileMap
|
||||
)
|
||||
{
|
||||
super(
|
||||
null,
|
||||
taskActionClient,
|
||||
null,
|
||||
null,
|
||||
null,
|
||||
null,
|
||||
null,
|
||||
null,
|
||||
null,
|
||||
null,
|
||||
null,
|
||||
null,
|
||||
null,
|
||||
null,
|
||||
null,
|
||||
null,
|
||||
indexIO,
|
||||
null,
|
||||
null,
|
||||
new IndexMergerV9(objectMapper, indexIO),
|
||||
null,
|
||||
null,
|
||||
null,
|
||||
null
|
||||
);
|
||||
this.segmentFileMap = segmentFileMap;
|
||||
}
|
||||
|
||||
@Override
|
||||
public Map<DataSegment, File> fetchSegments(List<DataSegment> segments)
|
||||
throws SegmentLoadingException
|
||||
{
|
||||
final Map<DataSegment, File> submap = new HashMap<>(segments.size());
|
||||
for (DataSegment segment : segments) {
|
||||
final File file = Preconditions.checkNotNull(segmentFileMap.get(segment));
|
||||
submap.put(segment, file);
|
||||
}
|
||||
return submap;
|
||||
}
|
||||
}
|
||||
|
||||
private static class TestTaskActionClient implements TaskActionClient
|
||||
{
|
||||
private final List<DataSegment> segments;
|
||||
|
||||
TestTaskActionClient(List<DataSegment> segments)
|
||||
{
|
||||
this.segments = segments;
|
||||
}
|
||||
|
||||
@Override
|
||||
public <RetType> RetType submit(TaskAction<RetType> taskAction) throws IOException
|
||||
{
|
||||
if (!(taskAction instanceof SegmentListUsedAction)) {
|
||||
throw new ISE("action[%s] is not supported", taskAction);
|
||||
}
|
||||
return (RetType) segments;
|
||||
}
|
||||
}
|
||||
|
||||
private static class TestIndexIO extends IndexIO
|
||||
{
|
||||
private final Map<File, QueryableIndex> queryableIndexMap;
|
||||
|
||||
TestIndexIO(
|
||||
ObjectMapper mapper,
|
||||
Map<DataSegment, File> segmentFileMap
|
||||
)
|
||||
{
|
||||
super(mapper, () -> 0);
|
||||
|
||||
queryableIndexMap = new HashMap<>(segmentFileMap.size());
|
||||
for (Entry<DataSegment, File> entry : segmentFileMap.entrySet()) {
|
||||
final DataSegment segment = entry.getKey();
|
||||
final List<String> columnNames = new ArrayList<>(segment.getDimensions().size() + segment.getMetrics().size());
|
||||
columnNames.add(Column.TIME_COLUMN_NAME);
|
||||
columnNames.addAll(segment.getDimensions());
|
||||
columnNames.addAll(segment.getMetrics());
|
||||
final Map<String, Column> columnMap = new HashMap<>(columnNames.size());
|
||||
final List<AggregatorFactory> aggregatorFactories = new ArrayList<>(segment.getMetrics().size());
|
||||
|
||||
for (String columnName : columnNames) {
|
||||
if (columnName.equals(MIXED_TYPE_COLUMN)) {
|
||||
columnMap.put(columnName, createColumn(MIXED_TYPE_COLUMN_MAP.get(segment.getInterval())));
|
||||
} else if (DIMENSIONS.containsKey(columnName)) {
|
||||
columnMap.put(columnName, createColumn(DIMENSIONS.get(columnName)));
|
||||
} else if (AGGREGATORS.containsKey(columnName)) {
|
||||
columnMap.put(columnName, createColumn(AGGREGATORS.get(columnName)));
|
||||
aggregatorFactories.add(AGGREGATORS.get(columnName));
|
||||
}
|
||||
}
|
||||
|
||||
final Metadata metadata = new Metadata();
|
||||
metadata.setAggregators(aggregatorFactories.toArray(new AggregatorFactory[aggregatorFactories.size()]));
|
||||
metadata.setRollup(false);
|
||||
|
||||
queryableIndexMap.put(
|
||||
entry.getValue(),
|
||||
new SimpleQueryableIndex(
|
||||
segment.getInterval(),
|
||||
new ListIndexed<>(columnNames, String.class),
|
||||
new ListIndexed<>(segment.getDimensions(), String.class),
|
||||
null,
|
||||
columnMap,
|
||||
null,
|
||||
metadata
|
||||
)
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public QueryableIndex loadIndex(File file) throws IOException
|
||||
{
|
||||
return queryableIndexMap.get(file);
|
||||
}
|
||||
}
|
||||
|
||||
private static Column createColumn(DimensionSchema dimensionSchema)
|
||||
{
|
||||
return new ColumnBuilder()
|
||||
.setType(IncrementalIndex.TYPE_MAP.get(dimensionSchema.getValueType()))
|
||||
.setDictionaryEncodedColumn(() -> null)
|
||||
.setBitmapIndex(() -> null)
|
||||
.build();
|
||||
}
|
||||
|
||||
private static Column createColumn(AggregatorFactory aggregatorFactory)
|
||||
{
|
||||
return new ColumnBuilder()
|
||||
.setType(ValueType.fromString(aggregatorFactory.getTypeName()))
|
||||
.build();
|
||||
}
|
||||
}
|
|
@ -87,7 +87,11 @@ import io.druid.segment.realtime.firehose.IngestSegmentFirehose;
|
|||
import io.druid.segment.realtime.plumber.SegmentHandoffNotifierFactory;
|
||||
import io.druid.server.metrics.NoopServiceEmitter;
|
||||
import io.druid.timeline.DataSegment;
|
||||
import io.druid.timeline.TimelineObjectHolder;
|
||||
import io.druid.timeline.partition.NumberedPartitionChunk;
|
||||
import io.druid.timeline.partition.NumberedShardSpec;
|
||||
import io.druid.timeline.partition.PartitionChunk;
|
||||
import io.druid.timeline.partition.PartitionHolder;
|
||||
import org.easymock.EasyMock;
|
||||
import org.joda.time.Interval;
|
||||
import org.junit.AfterClass;
|
||||
|
@ -108,6 +112,8 @@ import java.util.LinkedList;
|
|||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.Set;
|
||||
import java.util.stream.Collectors;
|
||||
import java.util.stream.IntStream;
|
||||
|
||||
/**
|
||||
*
|
||||
|
@ -575,6 +581,96 @@ public class IngestSegmentFirehoseFactoryTest
|
|||
Assert.assertEquals((int) MAX_ROWS, (int) rowcount);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testGetUniqueDimensionsAndMetrics()
|
||||
{
|
||||
final int numSegmentsPerPartitionChunk = 5;
|
||||
final int numPartitionChunksPerTimelineObject = 10;
|
||||
final int numSegments = numSegmentsPerPartitionChunk * numPartitionChunksPerTimelineObject;
|
||||
final List<DataSegment> segments = new ArrayList<>(numSegments);
|
||||
final Interval interval = Intervals.of("2017-01-01/2017-01-02");
|
||||
final String version = "1";
|
||||
|
||||
final List<TimelineObjectHolder<String, DataSegment>> timelineSegments = new ArrayList<>();
|
||||
for (int i = 0; i < numPartitionChunksPerTimelineObject; i++) {
|
||||
final List<PartitionChunk<DataSegment>> chunks = new ArrayList<>();
|
||||
for (int j = 0; j < numSegmentsPerPartitionChunk; j++) {
|
||||
final List<String> dims = IntStream.range(i, i + numSegmentsPerPartitionChunk)
|
||||
.mapToObj(suffix -> "dim" + suffix)
|
||||
.collect(Collectors.toList());
|
||||
final List<String> metrics = IntStream.range(i, i + numSegmentsPerPartitionChunk)
|
||||
.mapToObj(suffix -> "met" + suffix)
|
||||
.collect(Collectors.toList());
|
||||
final DataSegment segment = new DataSegment(
|
||||
"ds",
|
||||
interval,
|
||||
version,
|
||||
ImmutableMap.of(),
|
||||
dims,
|
||||
metrics,
|
||||
new NumberedShardSpec(numPartitionChunksPerTimelineObject, i),
|
||||
1,
|
||||
1
|
||||
);
|
||||
segments.add(segment);
|
||||
|
||||
final PartitionChunk<DataSegment> partitionChunk = new NumberedPartitionChunk<>(
|
||||
i,
|
||||
numPartitionChunksPerTimelineObject,
|
||||
segment
|
||||
);
|
||||
chunks.add(partitionChunk);
|
||||
}
|
||||
final TimelineObjectHolder<String, DataSegment> timelineHolder = new TimelineObjectHolder<>(
|
||||
interval,
|
||||
version,
|
||||
new PartitionHolder<>(chunks)
|
||||
);
|
||||
timelineSegments.add(timelineHolder);
|
||||
}
|
||||
|
||||
final String[] expectedDims = new String[]{
|
||||
"dim9",
|
||||
"dim10",
|
||||
"dim11",
|
||||
"dim12",
|
||||
"dim13",
|
||||
"dim8",
|
||||
"dim7",
|
||||
"dim6",
|
||||
"dim5",
|
||||
"dim4",
|
||||
"dim3",
|
||||
"dim2",
|
||||
"dim1",
|
||||
"dim0"
|
||||
};
|
||||
final String[] expectedMetrics = new String[]{
|
||||
"met9",
|
||||
"met10",
|
||||
"met11",
|
||||
"met12",
|
||||
"met13",
|
||||
"met8",
|
||||
"met7",
|
||||
"met6",
|
||||
"met5",
|
||||
"met4",
|
||||
"met3",
|
||||
"met2",
|
||||
"met1",
|
||||
"met0"
|
||||
};
|
||||
Assert.assertEquals(
|
||||
Arrays.asList(expectedDims),
|
||||
IngestSegmentFirehoseFactory.getUniqueDimensions(timelineSegments, null)
|
||||
);
|
||||
Assert.assertEquals(
|
||||
Arrays.asList(expectedMetrics),
|
||||
IngestSegmentFirehoseFactory.getUniqueMetrics(timelineSegments)
|
||||
);
|
||||
}
|
||||
|
||||
private static ServiceEmitter newMockEmitter()
|
||||
{
|
||||
return new NoopServiceEmitter();
|
||||
|
|
|
@ -39,6 +39,7 @@ import org.joda.time.Interval;
|
|||
|
||||
import java.net.URL;
|
||||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
|
||||
public class CoordinatorResourceTestClient
|
||||
|
@ -80,7 +81,7 @@ public class CoordinatorResourceTestClient
|
|||
}
|
||||
|
||||
// return a list of the segment dates for the specified datasource
|
||||
public ArrayList<String> getSegmentIntervals(final String dataSource) throws Exception
|
||||
public List<String> getSegmentIntervals(final String dataSource) throws Exception
|
||||
{
|
||||
ArrayList<String> segments = null;
|
||||
try {
|
||||
|
|
|
@ -33,8 +33,8 @@ import org.joda.time.Interval;
|
|||
|
||||
import java.io.IOException;
|
||||
import java.io.InputStream;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Collections;
|
||||
import java.util.List;
|
||||
import java.util.concurrent.Callable;
|
||||
|
||||
public abstract class AbstractIndexerTest
|
||||
|
@ -55,7 +55,7 @@ public abstract class AbstractIndexerTest
|
|||
|
||||
protected void unloadAndKillData(final String dataSource) throws Exception
|
||||
{
|
||||
ArrayList<String> intervals = coordinator.getSegmentIntervals(dataSource);
|
||||
List<String> intervals = coordinator.getSegmentIntervals(dataSource);
|
||||
|
||||
// each element in intervals has this form:
|
||||
// 2015-12-01T23:15:00.000Z/2015-12-01T23:16:00.000Z
|
||||
|
|
|
@ -0,0 +1,87 @@
|
|||
/*
|
||||
* Licensed to Metamarkets Group Inc. (Metamarkets) under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. Metamarkets licenses this file
|
||||
* to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing,
|
||||
* software distributed under the License is distributed on an
|
||||
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
* KIND, either express or implied. See the License for the
|
||||
* specific language governing permissions and limitations
|
||||
* under the License.
|
||||
*/
|
||||
|
||||
package io.druid.tests.indexer;
|
||||
|
||||
import io.druid.java.util.common.ISE;
|
||||
import io.druid.java.util.common.logger.Logger;
|
||||
import io.druid.testing.guice.DruidTestModuleFactory;
|
||||
import io.druid.testing.utils.RetryUtil;
|
||||
import org.testng.annotations.Guice;
|
||||
import org.testng.annotations.Test;
|
||||
|
||||
import java.util.List;
|
||||
|
||||
@Guice(moduleFactory = DruidTestModuleFactory.class)
|
||||
public class ITCompactionTaskTest extends AbstractIndexerTest
|
||||
{
|
||||
private static final Logger LOG = new Logger(ITCompactionTaskTest.class);
|
||||
private static String INDEX_TASK = "/indexer/wikipedia_index_task.json";
|
||||
private static String INDEX_QUERIES_RESOURCE = "/indexer/wikipedia_index_queries.json";
|
||||
private static String INDEX_DATASOURCE = "wikipedia_index_test";
|
||||
private static String COMPACTION_TASK = "/indexer/wikipedia_compaction_task.json";
|
||||
private static String COMPACTED_INTERVAL = "2013-08-31T00:00:00.000Z/2013-09-02T00:00:00.000Z";
|
||||
|
||||
@Test
|
||||
public void testCompaction() throws Exception
|
||||
{
|
||||
loadData();
|
||||
final List<String> intervalsBeforeCompaction = coordinator.getSegmentIntervals(INDEX_DATASOURCE);
|
||||
if (intervalsBeforeCompaction.contains(COMPACTED_INTERVAL)) {
|
||||
throw new ISE("Containing a segment for the compacted interval[%s] before compaction", COMPACTED_INTERVAL);
|
||||
}
|
||||
try {
|
||||
queryHelper.testQueriesFromFile(INDEX_QUERIES_RESOURCE, 2);
|
||||
compactData();
|
||||
queryHelper.testQueriesFromFile(INDEX_QUERIES_RESOURCE, 2);
|
||||
|
||||
final List<String> intervalsAfterCompaction = coordinator.getSegmentIntervals(INDEX_DATASOURCE);
|
||||
if (!intervalsAfterCompaction.contains(COMPACTED_INTERVAL)) {
|
||||
throw new ISE("Compacted segment for interval[%s] does not exist", COMPACTED_INTERVAL);
|
||||
}
|
||||
}
|
||||
finally {
|
||||
unloadAndKillData(INDEX_DATASOURCE);
|
||||
}
|
||||
}
|
||||
|
||||
private void loadData() throws Exception
|
||||
{
|
||||
final String taskID = indexer.submitTask(getTaskAsString(INDEX_TASK));
|
||||
LOG.info("TaskID for loading index task %s", taskID);
|
||||
indexer.waitUntilTaskCompletes(taskID);
|
||||
|
||||
RetryUtil.retryUntilTrue(
|
||||
() -> coordinator.areSegmentsLoaded(INDEX_DATASOURCE),
|
||||
"Segment Load"
|
||||
);
|
||||
}
|
||||
|
||||
private void compactData() throws Exception
|
||||
{
|
||||
final String taskID = indexer.submitTask(getTaskAsString(COMPACTION_TASK));
|
||||
LOG.info("TaskID for compaction task %s", taskID);
|
||||
indexer.waitUntilTaskCompletes(taskID);
|
||||
|
||||
RetryUtil.retryUntilTrue(
|
||||
() -> coordinator.areSegmentsLoaded(INDEX_DATASOURCE),
|
||||
"Segment Compaction"
|
||||
);
|
||||
}
|
||||
}
|
|
@ -558,7 +558,7 @@
|
|||
"timestamp": "2013-08-31T01:02:33.000Z",
|
||||
"result": {
|
||||
"minTime": "2013-08-31T01:02:33.000Z",
|
||||
"maxTime": "2013-08-31T12:41:27.000Z"
|
||||
"maxTime": "2013-09-01T12:41:27.000Z"
|
||||
}
|
||||
}
|
||||
]
|
||||
|
|
|
@ -0,0 +1,5 @@
|
|||
{
|
||||
"type" : "compact",
|
||||
"dataSource" : "wikipedia_index_test",
|
||||
"interval" : "2013-08-31/2013-09-02"
|
||||
}
|
|
@ -3,3 +3,8 @@
|
|||
{"timestamp": "2013-08-31T07:11:21Z", "page": "Cherno Alpha", "language" : "ru", "user" : "masterYi", "unpatrolled" : "false", "newPage" : "true", "robot": "true", "anonymous": "false", "namespace":"article", "continent":"Asia", "country":"Russia", "region":"Oblast", "city":"Moscow", "added": 123, "deleted": 12, "delta": 111}
|
||||
{"timestamp": "2013-08-31T11:58:39Z", "page": "Crimson Typhoon", "language" : "zh", "user" : "triplets", "unpatrolled" : "true", "newPage" : "false", "robot": "true", "anonymous": "false", "namespace":"wikipedia", "continent":"Asia", "country":"China", "region":"Shanxi", "city":"Taiyuan", "added": 905, "deleted": 5, "delta": 900}
|
||||
{"timestamp": "2013-08-31T12:41:27Z", "page": "Coyote Tango", "language" : "ja", "user" : "stringer", "unpatrolled" : "true", "newPage" : "false", "robot": "true", "anonymous": "false", "namespace":"wikipedia", "continent":"Asia", "country":"Japan", "region":"Kanto", "city":"Tokyo", "added": 1, "deleted": 10, "delta": -9}
|
||||
{"timestamp": "2013-09-01T01:02:33Z", "page": "Gypsy Danger", "language" : "en", "user" : "nuclear", "unpatrolled" : "true", "newPage" : "true", "robot": "false", "anonymous": "false", "namespace":"article", "continent":"North America", "country":"United States", "region":"Bay Area", "city":"San Francisco", "added": 57, "deleted": 200, "delta": -143}
|
||||
{"timestamp": "2013-09-01T03:32:45Z", "page": "Striker Eureka", "language" : "en", "user" : "speed", "unpatrolled" : "false", "newPage" : "true", "robot": "true", "anonymous": "false", "namespace":"wikipedia", "continent":"Australia", "country":"Australia", "region":"Cantebury", "city":"Syndey", "added": 459, "deleted": 129, "delta": 330}
|
||||
{"timestamp": "2013-09-01T07:11:21Z", "page": "Cherno Alpha", "language" : "ru", "user" : "masterYi", "unpatrolled" : "false", "newPage" : "true", "robot": "true", "anonymous": "false", "namespace":"article", "continent":"Asia", "country":"Russia", "region":"Oblast", "city":"Moscow", "added": 123, "deleted": 12, "delta": 111}
|
||||
{"timestamp": "2013-09-01T11:58:39Z", "page": "Crimson Typhoon", "language" : "zh", "user" : "triplets", "unpatrolled" : "true", "newPage" : "false", "robot": "true", "anonymous": "false", "namespace":"wikipedia", "continent":"Asia", "country":"China", "region":"Shanxi", "city":"Taiyuan", "added": 905, "deleted": 5, "delta": 900}
|
||||
{"timestamp": "2013-09-01T12:41:27Z", "page": "Coyote Tango", "language" : "ja", "user" : "stringer", "unpatrolled" : "true", "newPage" : "false", "robot": "true", "anonymous": "false", "namespace":"wikipedia", "continent":"Asia", "country":"Japan", "region":"Kanto", "city":"Tokyo", "added": 1, "deleted": 10, "delta": -9}
|
|
@ -5,13 +5,15 @@
|
|||
"queryType" : "timeBoundary",
|
||||
"dataSource": "wikipedia_index_test"
|
||||
},
|
||||
"expectedResults":[ {
|
||||
"timestamp" : "2013-08-31T01:02:33.000Z",
|
||||
"result" : {
|
||||
"minTime" : "2013-08-31T01:02:33.000Z",
|
||||
"maxTime" : "2013-08-31T12:41:27.000Z"
|
||||
}
|
||||
} ]
|
||||
"expectedResults":[
|
||||
{
|
||||
"timestamp" : "2013-08-31T01:02:33.000Z",
|
||||
"result" : {
|
||||
"minTime" : "2013-08-31T01:02:33.000Z",
|
||||
"maxTime" : "2013-09-01T12:41:27.000Z"
|
||||
}
|
||||
}
|
||||
]
|
||||
},
|
||||
|
||||
{
|
||||
|
|
|
@ -27,7 +27,7 @@
|
|||
"granularitySpec": {
|
||||
"segmentGranularity": "DAY",
|
||||
"queryGranularity": "second",
|
||||
"intervals" : [ "2013-08-31/2013-09-01" ]
|
||||
"intervals" : [ "2013-08-31/2013-09-02" ]
|
||||
},
|
||||
"parser": {
|
||||
"parseSpec": {
|
||||
|
|
|
@ -28,7 +28,7 @@ import org.joda.time.Interval;
|
|||
|
||||
import java.util.ArrayList;
|
||||
import java.util.Iterator;
|
||||
import java.util.TreeSet;
|
||||
import java.util.SortedSet;
|
||||
|
||||
/**
|
||||
*/
|
||||
|
@ -42,9 +42,15 @@ public class JodaUtils
|
|||
{
|
||||
ArrayList<Interval> retVal = Lists.newArrayList();
|
||||
|
||||
TreeSet<Interval> sortedIntervals = Sets.newTreeSet(Comparators.intervalsByStartThenEnd());
|
||||
for (Interval interval : intervals) {
|
||||
sortedIntervals.add(interval);
|
||||
final SortedSet<Interval> sortedIntervals;
|
||||
|
||||
if (intervals instanceof SortedSet) {
|
||||
sortedIntervals = (SortedSet<Interval>) intervals;
|
||||
} else {
|
||||
sortedIntervals = Sets.newTreeSet(Comparators.intervalsByStartThenEnd());
|
||||
for (Interval interval : intervals) {
|
||||
sortedIntervals.add(interval);
|
||||
}
|
||||
}
|
||||
|
||||
if (sortedIntervals.isEmpty()) {
|
||||
|
|
|
@ -26,6 +26,7 @@ import io.druid.java.util.common.logger.Logger;
|
|||
import io.druid.segment.ColumnSelectorFactory;
|
||||
|
||||
import javax.annotation.Nullable;
|
||||
import java.util.Arrays;
|
||||
import java.util.Comparator;
|
||||
import java.util.LinkedHashMap;
|
||||
import java.util.List;
|
||||
|
@ -92,7 +93,12 @@ public abstract class AggregatorFactory implements Cacheable
|
|||
*/
|
||||
public AggregatorFactory getMergingFactory(AggregatorFactory other) throws AggregatorFactoryNotMergeableException
|
||||
{
|
||||
throw new UOE("[%s] does not implement getMergingFactory(..)", this.getClass().getName());
|
||||
final AggregatorFactory combiningFactory = this.getCombiningFactory();
|
||||
if (other.getName().equals(this.getName()) && combiningFactory.equals(other.getCombiningFactory())) {
|
||||
return combiningFactory;
|
||||
} else {
|
||||
throw new AggregatorFactoryNotMergeableException(this, other);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -153,6 +159,17 @@ public abstract class AggregatorFactory implements Cacheable
|
|||
return null;
|
||||
}
|
||||
|
||||
if (aggregatorsList.size() == 1) {
|
||||
final AggregatorFactory[] aggregatorFactories = aggregatorsList.get(0);
|
||||
if (aggregatorFactories != null) {
|
||||
final AggregatorFactory[] combiningFactories = new AggregatorFactory[aggregatorFactories.length];
|
||||
Arrays.setAll(combiningFactories, i -> aggregatorFactories[i].getCombiningFactory());
|
||||
return combiningFactories;
|
||||
} else {
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
||||
Map<String, AggregatorFactory> mergedAggregators = new LinkedHashMap<>();
|
||||
|
||||
for (AggregatorFactory[] aggregators : aggregatorsList) {
|
||||
|
@ -163,7 +180,9 @@ public abstract class AggregatorFactory implements Cacheable
|
|||
if (mergedAggregators.containsKey(name)) {
|
||||
AggregatorFactory other = mergedAggregators.get(name);
|
||||
try {
|
||||
mergedAggregators.put(name, other.getMergingFactory(aggregator));
|
||||
// the order of aggregator matters when calling getMergingFactory()
|
||||
// because it returns a combiningAggregator which can be different from the original aggregator.
|
||||
mergedAggregators.put(name, aggregator.getMergingFactory(other));
|
||||
}
|
||||
catch (AggregatorFactoryNotMergeableException ex) {
|
||||
log.warn(ex, "failed to merge aggregator factories");
|
||||
|
|
|
@ -29,7 +29,6 @@ import io.druid.segment.ColumnSelectorFactory;
|
|||
import java.nio.ByteBuffer;
|
||||
import java.util.Collections;
|
||||
import java.util.List;
|
||||
import java.util.Objects;
|
||||
|
||||
/**
|
||||
*/
|
||||
|
@ -118,29 +117,4 @@ public class DoubleMaxAggregatorFactory extends SimpleDoubleAggregatorFactory
|
|||
", name='" + name + '\'' +
|
||||
'}';
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean equals(Object o)
|
||||
{
|
||||
if (this == o) {
|
||||
return true;
|
||||
}
|
||||
if (o == null || getClass() != o.getClass()) {
|
||||
return false;
|
||||
}
|
||||
|
||||
DoubleMaxAggregatorFactory that = (DoubleMaxAggregatorFactory) o;
|
||||
|
||||
if (!Objects.equals(fieldName, that.fieldName)) {
|
||||
return false;
|
||||
}
|
||||
if (!Objects.equals(expression, that.expression)) {
|
||||
return false;
|
||||
}
|
||||
if (!Objects.equals(name, that.name)) {
|
||||
return false;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
|
|
@ -29,7 +29,6 @@ import io.druid.segment.ColumnSelectorFactory;
|
|||
import java.nio.ByteBuffer;
|
||||
import java.util.Arrays;
|
||||
import java.util.List;
|
||||
import java.util.Objects;
|
||||
|
||||
/**
|
||||
*/
|
||||
|
@ -116,29 +115,4 @@ public class DoubleMinAggregatorFactory extends SimpleDoubleAggregatorFactory
|
|||
", name='" + name + '\'' +
|
||||
'}';
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean equals(Object o)
|
||||
{
|
||||
if (this == o) {
|
||||
return true;
|
||||
}
|
||||
if (o == null || getClass() != o.getClass()) {
|
||||
return false;
|
||||
}
|
||||
|
||||
DoubleMinAggregatorFactory that = (DoubleMinAggregatorFactory) o;
|
||||
|
||||
if (!Objects.equals(fieldName, that.fieldName)) {
|
||||
return false;
|
||||
}
|
||||
if (!Objects.equals(expression, that.expression)) {
|
||||
return false;
|
||||
}
|
||||
if (!Objects.equals(name, that.name)) {
|
||||
return false;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
|
|
@ -29,7 +29,6 @@ import io.druid.segment.ColumnSelectorFactory;
|
|||
import java.nio.ByteBuffer;
|
||||
import java.util.Arrays;
|
||||
import java.util.List;
|
||||
import java.util.Objects;
|
||||
|
||||
/**
|
||||
*/
|
||||
|
@ -111,30 +110,4 @@ public class DoubleSumAggregatorFactory extends SimpleDoubleAggregatorFactory
|
|||
", name='" + name + '\'' +
|
||||
'}';
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean equals(Object o)
|
||||
{
|
||||
if (this == o) {
|
||||
return true;
|
||||
}
|
||||
if (o == null || getClass() != o.getClass()) {
|
||||
return false;
|
||||
}
|
||||
|
||||
DoubleSumAggregatorFactory that = (DoubleSumAggregatorFactory) o;
|
||||
|
||||
if (!Objects.equals(fieldName, that.fieldName)) {
|
||||
return false;
|
||||
}
|
||||
if (!Objects.equals(expression, that.expression)) {
|
||||
return false;
|
||||
}
|
||||
if (!Objects.equals(name, that.name)) {
|
||||
return false;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
}
|
||||
|
|
|
@ -102,6 +102,31 @@ public abstract class SimpleDoubleAggregatorFactory extends AggregatorFactory
|
|||
return Objects.hash(fieldName, expression, name);
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean equals(Object o)
|
||||
{
|
||||
if (this == o) {
|
||||
return true;
|
||||
}
|
||||
if (o == null || getClass() != o.getClass()) {
|
||||
return false;
|
||||
}
|
||||
|
||||
SimpleDoubleAggregatorFactory that = (SimpleDoubleAggregatorFactory) o;
|
||||
|
||||
if (!Objects.equals(fieldName, that.fieldName)) {
|
||||
return false;
|
||||
}
|
||||
if (!Objects.equals(expression, that.expression)) {
|
||||
return false;
|
||||
}
|
||||
if (!Objects.equals(name, that.name)) {
|
||||
return false;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
@Override
|
||||
public Comparator getComparator()
|
||||
{
|
||||
|
|
|
@ -31,7 +31,6 @@ import io.druid.query.ColumnSelectorPlus;
|
|||
import io.druid.query.aggregation.AggregateCombiner;
|
||||
import io.druid.query.aggregation.Aggregator;
|
||||
import io.druid.query.aggregation.AggregatorFactory;
|
||||
import io.druid.query.aggregation.AggregatorFactoryNotMergeableException;
|
||||
import io.druid.query.aggregation.AggregatorUtil;
|
||||
import io.druid.query.aggregation.BufferAggregator;
|
||||
import io.druid.query.aggregation.NoopAggregator;
|
||||
|
@ -200,12 +199,6 @@ public class CardinalityAggregatorFactory extends AggregatorFactory
|
|||
return new HyperUniquesAggregatorFactory(name, name, false, round);
|
||||
}
|
||||
|
||||
@Override
|
||||
public AggregatorFactory getMergingFactory(AggregatorFactory other) throws AggregatorFactoryNotMergeableException
|
||||
{
|
||||
throw new UnsupportedOperationException("can't merge CardinalityAggregatorFactory");
|
||||
}
|
||||
|
||||
@Override
|
||||
public List<AggregatorFactory> getRequiredColumns()
|
||||
{
|
||||
|
|
|
@ -30,7 +30,6 @@ import io.druid.java.util.common.UOE;
|
|||
import io.druid.query.aggregation.AggregateCombiner;
|
||||
import io.druid.query.aggregation.Aggregator;
|
||||
import io.druid.query.aggregation.AggregatorFactory;
|
||||
import io.druid.query.aggregation.AggregatorFactoryNotMergeableException;
|
||||
import io.druid.query.aggregation.AggregatorUtil;
|
||||
import io.druid.query.aggregation.BufferAggregator;
|
||||
import io.druid.query.monomorphicprocessing.RuntimeShapeInspector;
|
||||
|
@ -162,16 +161,6 @@ public class DoubleFirstAggregatorFactory extends AggregatorFactory
|
|||
};
|
||||
}
|
||||
|
||||
@Override
|
||||
public AggregatorFactory getMergingFactory(AggregatorFactory other) throws AggregatorFactoryNotMergeableException
|
||||
{
|
||||
if (other.getName().equals(this.getName()) && this.getClass() == other.getClass()) {
|
||||
return getCombiningFactory();
|
||||
} else {
|
||||
throw new AggregatorFactoryNotMergeableException(this, other);
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public List<AggregatorFactory> getRequiredColumns()
|
||||
{
|
||||
|
|
|
@ -30,7 +30,6 @@ import io.druid.java.util.common.UOE;
|
|||
import io.druid.query.aggregation.AggregateCombiner;
|
||||
import io.druid.query.aggregation.Aggregator;
|
||||
import io.druid.query.aggregation.AggregatorFactory;
|
||||
import io.druid.query.aggregation.AggregatorFactoryNotMergeableException;
|
||||
import io.druid.query.aggregation.AggregatorUtil;
|
||||
import io.druid.query.aggregation.BufferAggregator;
|
||||
import io.druid.query.monomorphicprocessing.RuntimeShapeInspector;
|
||||
|
@ -160,16 +159,6 @@ public class FloatFirstAggregatorFactory extends AggregatorFactory
|
|||
};
|
||||
}
|
||||
|
||||
@Override
|
||||
public AggregatorFactory getMergingFactory(AggregatorFactory other) throws AggregatorFactoryNotMergeableException
|
||||
{
|
||||
if (other.getName().equals(this.getName()) && this.getClass() == other.getClass()) {
|
||||
return getCombiningFactory();
|
||||
} else {
|
||||
throw new AggregatorFactoryNotMergeableException(this, other);
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public List<AggregatorFactory> getRequiredColumns()
|
||||
{
|
||||
|
|
|
@ -29,7 +29,6 @@ import io.druid.java.util.common.UOE;
|
|||
import io.druid.query.aggregation.AggregateCombiner;
|
||||
import io.druid.query.aggregation.Aggregator;
|
||||
import io.druid.query.aggregation.AggregatorFactory;
|
||||
import io.druid.query.aggregation.AggregatorFactoryNotMergeableException;
|
||||
import io.druid.query.aggregation.AggregatorUtil;
|
||||
import io.druid.query.aggregation.BufferAggregator;
|
||||
import io.druid.query.monomorphicprocessing.RuntimeShapeInspector;
|
||||
|
@ -153,16 +152,6 @@ public class LongFirstAggregatorFactory extends AggregatorFactory
|
|||
};
|
||||
}
|
||||
|
||||
@Override
|
||||
public AggregatorFactory getMergingFactory(AggregatorFactory other) throws AggregatorFactoryNotMergeableException
|
||||
{
|
||||
if (other.getName().equals(this.getName()) && this.getClass() == other.getClass()) {
|
||||
return getCombiningFactory();
|
||||
} else {
|
||||
throw new AggregatorFactoryNotMergeableException(this, other);
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public List<AggregatorFactory> getRequiredColumns()
|
||||
{
|
||||
|
|
|
@ -29,7 +29,6 @@ import io.druid.java.util.common.UOE;
|
|||
import io.druid.query.aggregation.AggregateCombiner;
|
||||
import io.druid.query.aggregation.Aggregator;
|
||||
import io.druid.query.aggregation.AggregatorFactory;
|
||||
import io.druid.query.aggregation.AggregatorFactoryNotMergeableException;
|
||||
import io.druid.query.aggregation.AggregatorUtil;
|
||||
import io.druid.query.aggregation.BufferAggregator;
|
||||
import io.druid.query.aggregation.first.DoubleFirstAggregatorFactory;
|
||||
|
@ -153,16 +152,6 @@ public class DoubleLastAggregatorFactory extends AggregatorFactory
|
|||
};
|
||||
}
|
||||
|
||||
@Override
|
||||
public AggregatorFactory getMergingFactory(AggregatorFactory other) throws AggregatorFactoryNotMergeableException
|
||||
{
|
||||
if (other.getName().equals(this.getName()) && this.getClass() == other.getClass()) {
|
||||
return getCombiningFactory();
|
||||
} else {
|
||||
throw new AggregatorFactoryNotMergeableException(this, other);
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public List<AggregatorFactory> getRequiredColumns()
|
||||
{
|
||||
|
|
|
@ -29,7 +29,6 @@ import io.druid.java.util.common.UOE;
|
|||
import io.druid.query.aggregation.AggregateCombiner;
|
||||
import io.druid.query.aggregation.Aggregator;
|
||||
import io.druid.query.aggregation.AggregatorFactory;
|
||||
import io.druid.query.aggregation.AggregatorFactoryNotMergeableException;
|
||||
import io.druid.query.aggregation.AggregatorUtil;
|
||||
import io.druid.query.aggregation.BufferAggregator;
|
||||
import io.druid.query.aggregation.first.FloatFirstAggregatorFactory;
|
||||
|
@ -151,16 +150,6 @@ public class FloatLastAggregatorFactory extends AggregatorFactory
|
|||
};
|
||||
}
|
||||
|
||||
@Override
|
||||
public AggregatorFactory getMergingFactory(AggregatorFactory other) throws AggregatorFactoryNotMergeableException
|
||||
{
|
||||
if (other.getName().equals(this.getName()) && this.getClass() == other.getClass()) {
|
||||
return getCombiningFactory();
|
||||
} else {
|
||||
throw new AggregatorFactoryNotMergeableException(this, other);
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public List<AggregatorFactory> getRequiredColumns()
|
||||
{
|
||||
|
|
|
@ -28,7 +28,6 @@ import io.druid.java.util.common.UOE;
|
|||
import io.druid.query.aggregation.AggregateCombiner;
|
||||
import io.druid.query.aggregation.Aggregator;
|
||||
import io.druid.query.aggregation.AggregatorFactory;
|
||||
import io.druid.query.aggregation.AggregatorFactoryNotMergeableException;
|
||||
import io.druid.query.aggregation.AggregatorUtil;
|
||||
import io.druid.query.aggregation.BufferAggregator;
|
||||
import io.druid.query.aggregation.first.DoubleFirstAggregatorFactory;
|
||||
|
@ -149,16 +148,6 @@ public class LongLastAggregatorFactory extends AggregatorFactory
|
|||
};
|
||||
}
|
||||
|
||||
@Override
|
||||
public AggregatorFactory getMergingFactory(AggregatorFactory other) throws AggregatorFactoryNotMergeableException
|
||||
{
|
||||
if (other.getName().equals(this.getName()) && this.getClass() == other.getClass()) {
|
||||
return getCombiningFactory();
|
||||
} else {
|
||||
throw new AggregatorFactoryNotMergeableException(this, other);
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public List<AggregatorFactory> getRequiredColumns()
|
||||
{
|
||||
|
|
|
@ -19,6 +19,7 @@
|
|||
|
||||
package io.druid.segment;
|
||||
|
||||
import io.druid.data.input.impl.DimensionSchema.MultiValueHandling;
|
||||
import io.druid.segment.column.Column;
|
||||
import io.druid.segment.column.ColumnCapabilities;
|
||||
import io.druid.segment.data.IOPeon;
|
||||
|
@ -69,6 +70,14 @@ public interface DimensionHandler
|
|||
*/
|
||||
String getDimensionName();
|
||||
|
||||
/**
|
||||
* Get {@link MultiValueHandling} for the column associated with this handler.
|
||||
* Only string columns can have {@link MultiValueHandling} currently.
|
||||
*/
|
||||
default MultiValueHandling getMultivalueHandling()
|
||||
{
|
||||
return null;
|
||||
}
|
||||
|
||||
/**
|
||||
* Creates a new DimensionIndexer, a per-dimension object responsible for processing ingested rows in-memory, used
|
||||
|
|
|
@ -50,6 +50,12 @@ public class StringDimensionHandler implements DimensionHandler<Integer, int[],
|
|||
return dimensionName;
|
||||
}
|
||||
|
||||
@Override
|
||||
public MultiValueHandling getMultivalueHandling()
|
||||
{
|
||||
return multiValueHandling;
|
||||
}
|
||||
|
||||
@Override
|
||||
public int getLengthOfEncodedKeyComponent(int[] dimVals)
|
||||
{
|
||||
|
|
|
@ -100,7 +100,7 @@ public abstract class IncrementalIndex<AggregatorType> implements Iterable<Row>,
|
|||
|
||||
// Used to discover ValueType based on the class of values in a row
|
||||
// Also used to convert between the duplicate ValueType enums in DimensionSchema (druid-api) and main druid.
|
||||
private static final Map<Object, ValueType> TYPE_MAP = ImmutableMap.<Object, ValueType>builder()
|
||||
public static final Map<Object, ValueType> TYPE_MAP = ImmutableMap.<Object, ValueType>builder()
|
||||
.put(Long.class, ValueType.LONG)
|
||||
.put(Double.class, ValueType.DOUBLE)
|
||||
.put(Float.class, ValueType.FLOAT)
|
||||
|
|
|
@ -36,6 +36,11 @@ public class TransformingInputRowParser<T> implements InputRowParser<T>
|
|||
this.transformer = transformSpec.toTransformer();
|
||||
}
|
||||
|
||||
public InputRowParser<T> getParser()
|
||||
{
|
||||
return parser;
|
||||
}
|
||||
|
||||
@Override
|
||||
public InputRow parse(final T row)
|
||||
{
|
||||
|
|
Loading…
Reference in New Issue