mirror of https://github.com/apache/druid.git
give user the option to specify the segments for dataSource inputSpec
This commit is contained in:
parent
8ee81947cd
commit
09ffcae4ae
|
@ -51,6 +51,7 @@ Here is what goes inside `ingestionSpec`:
|
|||
|-----|----|-----------|--------|
|
||||
|dataSource|String|Druid dataSource name from which you are loading the data.|yes|
|
||||
|intervals|List|A list of strings representing ISO-8601 Intervals.|yes|
|
||||
|segments|List|A list of segments from which to read the data. It is determined automatically if not specified and the list of segments might change on multiple invocations. But user might want to provide it manually to make the ingestion task idempotent.|no|
|
||||
|granularity|String|Defines the granularity of the query while loading data. Default value is "none". See [Granularities](../querying/granularities.html).|no|
|
||||
|filter|JSON|See [Filters](../querying/filters.html)|no|
|
||||
|dimensions|Array of String|Name of dimension columns to load. By default, the list will be constructed from parseSpec. If parseSpec does not have an explicit list of dimensions then all the dimension columns present in stored data will be read.|no|
|
||||
|
|
|
@ -37,8 +37,10 @@ import io.druid.timeline.partition.PartitionChunk;
|
|||
import org.joda.time.Interval;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.HashSet;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.Set;
|
||||
|
||||
/**
|
||||
*/
|
||||
|
@ -168,10 +170,32 @@ public class HadoopIngestionSpec extends IngestionSpec<HadoopIOConfig, HadoopTun
|
|||
ingestionSpecMap,
|
||||
DatasourceIngestionSpec.class
|
||||
);
|
||||
|
||||
List<DataSegment> segmentsList = segmentLister.getUsedSegmentsForIntervals(
|
||||
ingestionSpecObj.getDataSource(),
|
||||
ingestionSpecObj.getIntervals()
|
||||
);
|
||||
|
||||
if (ingestionSpecObj.getSegments() != null) {
|
||||
//ensure that user supplied segment list matches with the segmentsList obtained from db
|
||||
//this safety check lets users do test-n-set kind of batch delta ingestion where the delta
|
||||
//ingestion task would only run if current state of the system is same as when they submitted
|
||||
//the task.
|
||||
List<DataSegment> userSuppliedSegmentsList = ingestionSpecObj.getSegments();
|
||||
|
||||
if (segmentsList.size() == userSuppliedSegmentsList.size()) {
|
||||
Set<DataSegment> segmentsSet = new HashSet<>(segmentsList);
|
||||
|
||||
for (DataSegment userSegment : userSuppliedSegmentsList) {
|
||||
if (!segmentsSet.contains(userSegment)) {
|
||||
throw new IOException("user supplied segments list did not match with segments list obtained from db");
|
||||
}
|
||||
}
|
||||
} else {
|
||||
throw new IOException("user supplied segments list did not match with segments list obtained from db");
|
||||
}
|
||||
}
|
||||
|
||||
VersionedIntervalTimeline<String, DataSegment> timeline = new VersionedIntervalTimeline<>(Ordering.natural());
|
||||
for (DataSegment segment : segmentsList) {
|
||||
timeline.add(segment.getInterval(), segment.getVersion(), segment.getShardSpec().createChunk(segment));
|
||||
|
|
|
@ -26,6 +26,7 @@ import com.google.common.collect.ImmutableList;
|
|||
import io.druid.common.utils.JodaUtils;
|
||||
import io.druid.granularity.QueryGranularity;
|
||||
import io.druid.query.filter.DimFilter;
|
||||
import io.druid.timeline.DataSegment;
|
||||
import org.joda.time.Interval;
|
||||
|
||||
import java.util.List;
|
||||
|
@ -34,6 +35,7 @@ public class DatasourceIngestionSpec
|
|||
{
|
||||
private final String dataSource;
|
||||
private final List<Interval> intervals;
|
||||
private final List<DataSegment> segments;
|
||||
private final DimFilter filter;
|
||||
private final QueryGranularity granularity;
|
||||
private final List<String> dimensions;
|
||||
|
@ -45,6 +47,7 @@ public class DatasourceIngestionSpec
|
|||
@JsonProperty("dataSource") String dataSource,
|
||||
@Deprecated @JsonProperty("interval") Interval interval,
|
||||
@JsonProperty("intervals") List<Interval> intervals,
|
||||
@JsonProperty("segments") List<DataSegment> segments,
|
||||
@JsonProperty("filter") DimFilter filter,
|
||||
@JsonProperty("granularity") QueryGranularity granularity,
|
||||
@JsonProperty("dimensions") List<String> dimensions,
|
||||
|
@ -67,6 +70,11 @@ public class DatasourceIngestionSpec
|
|||
}
|
||||
this.intervals = Preconditions.checkNotNull(theIntervals, "no intervals found");
|
||||
|
||||
// note that it is important to have intervals even if user explicitly specifies the list of
|
||||
// segments, because segment list's min/max boundaries might not align the intended interval
|
||||
// to read in all cases.
|
||||
this.segments = segments;
|
||||
|
||||
this.filter = filter;
|
||||
this.granularity = granularity == null ? QueryGranularity.NONE : granularity;
|
||||
|
||||
|
@ -88,6 +96,12 @@ public class DatasourceIngestionSpec
|
|||
return intervals;
|
||||
}
|
||||
|
||||
@JsonProperty
|
||||
public List<DataSegment> getSegments()
|
||||
{
|
||||
return segments;
|
||||
}
|
||||
|
||||
@JsonProperty
|
||||
public DimFilter getFilter()
|
||||
{
|
||||
|
@ -124,6 +138,7 @@ public class DatasourceIngestionSpec
|
|||
dataSource,
|
||||
null,
|
||||
intervals,
|
||||
segments,
|
||||
filter,
|
||||
granularity,
|
||||
dimensions,
|
||||
|
@ -138,6 +153,7 @@ public class DatasourceIngestionSpec
|
|||
dataSource,
|
||||
null,
|
||||
intervals,
|
||||
segments,
|
||||
filter,
|
||||
granularity,
|
||||
dimensions,
|
||||
|
@ -152,6 +168,7 @@ public class DatasourceIngestionSpec
|
|||
dataSource,
|
||||
null,
|
||||
intervals,
|
||||
segments,
|
||||
filter,
|
||||
granularity,
|
||||
dimensions,
|
||||
|
@ -166,6 +183,7 @@ public class DatasourceIngestionSpec
|
|||
dataSource,
|
||||
null,
|
||||
intervals,
|
||||
segments,
|
||||
filter,
|
||||
granularity,
|
||||
dimensions,
|
||||
|
@ -195,6 +213,9 @@ public class DatasourceIngestionSpec
|
|||
if (!intervals.equals(that.intervals)) {
|
||||
return false;
|
||||
}
|
||||
if (segments != null ? !segments.equals(that.segments) : that.segments != null) {
|
||||
return false;
|
||||
}
|
||||
if (filter != null ? !filter.equals(that.filter) : that.filter != null) {
|
||||
return false;
|
||||
}
|
||||
|
@ -213,6 +234,7 @@ public class DatasourceIngestionSpec
|
|||
{
|
||||
int result = dataSource.hashCode();
|
||||
result = 31 * result + intervals.hashCode();
|
||||
result = 31 * result + (segments != null ? segments.hashCode() : 0);
|
||||
result = 31 * result + (filter != null ? filter.hashCode() : 0);
|
||||
result = 31 * result + granularity.hashCode();
|
||||
result = 31 * result + (dimensions != null ? dimensions.hashCode() : 0);
|
||||
|
@ -227,6 +249,7 @@ public class DatasourceIngestionSpec
|
|||
return "DatasourceIngestionSpec{" +
|
||||
"dataSource='" + dataSource + '\'' +
|
||||
", intervals=" + intervals +
|
||||
", segments=" + segments +
|
||||
", filter=" + filter +
|
||||
", granularity=" + granularity +
|
||||
", dimensions=" + dimensions +
|
||||
|
|
|
@ -43,6 +43,7 @@ import org.joda.time.Interval;
|
|||
import org.junit.Assert;
|
||||
import org.junit.Test;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.Map;
|
||||
|
||||
/**
|
||||
|
@ -91,7 +92,7 @@ public class HadoopIngestionSpecUpdateDatasourcePathSpecSegmentsTest
|
|||
PathSpec pathSpec = new DatasourcePathSpec(
|
||||
jsonMapper,
|
||||
null,
|
||||
new DatasourceIngestionSpec(testDatasource, testDatasourceInterval, null, null, null, null, null, false),
|
||||
new DatasourceIngestionSpec(testDatasource, testDatasourceInterval, null, null, null, null, null, null, false),
|
||||
null
|
||||
);
|
||||
HadoopDruidIndexerConfig config = testRunUpdateSegmentListIfDatasourcePathSpecIsUsed(
|
||||
|
@ -104,6 +105,60 @@ public class HadoopIngestionSpecUpdateDatasourcePathSpecSegmentsTest
|
|||
);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testupdateSegmentListIfDatasourcePathSpecWithMatchingUserSegments() throws Exception
|
||||
{
|
||||
PathSpec pathSpec = new DatasourcePathSpec(
|
||||
jsonMapper,
|
||||
null,
|
||||
new DatasourceIngestionSpec(
|
||||
testDatasource,
|
||||
testDatasourceInterval,
|
||||
null,
|
||||
ImmutableList.<DataSegment>of(SEGMENT),
|
||||
null,
|
||||
null,
|
||||
null,
|
||||
null,
|
||||
false
|
||||
),
|
||||
null
|
||||
);
|
||||
HadoopDruidIndexerConfig config = testRunUpdateSegmentListIfDatasourcePathSpecIsUsed(
|
||||
pathSpec,
|
||||
testDatasourceInterval
|
||||
);
|
||||
Assert.assertEquals(
|
||||
ImmutableList.of(WindowedDataSegment.of(SEGMENT)),
|
||||
((DatasourcePathSpec) config.getPathSpec()).getSegments()
|
||||
);
|
||||
}
|
||||
|
||||
@Test(expected = IOException.class)
|
||||
public void testupdateSegmentListThrowsExceptionWithUserSegmentsMismatch() throws Exception
|
||||
{
|
||||
PathSpec pathSpec = new DatasourcePathSpec(
|
||||
jsonMapper,
|
||||
null,
|
||||
new DatasourceIngestionSpec(
|
||||
testDatasource,
|
||||
testDatasourceInterval,
|
||||
null,
|
||||
ImmutableList.<DataSegment>of(SEGMENT.withVersion("v2")),
|
||||
null,
|
||||
null,
|
||||
null,
|
||||
null,
|
||||
false
|
||||
),
|
||||
null
|
||||
);
|
||||
testRunUpdateSegmentListIfDatasourcePathSpecIsUsed(
|
||||
pathSpec,
|
||||
testDatasourceInterval
|
||||
);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testupdateSegmentListIfDatasourcePathSpecIsUsedWithJustDatasourcePathSpecAndPartialInterval()
|
||||
throws Exception
|
||||
|
@ -111,7 +166,17 @@ public class HadoopIngestionSpecUpdateDatasourcePathSpecSegmentsTest
|
|||
PathSpec pathSpec = new DatasourcePathSpec(
|
||||
jsonMapper,
|
||||
null,
|
||||
new DatasourceIngestionSpec(testDatasource, testDatasourceIntervalPartial, null, null, null, null, null, false),
|
||||
new DatasourceIngestionSpec(
|
||||
testDatasource,
|
||||
testDatasourceIntervalPartial,
|
||||
null,
|
||||
null,
|
||||
null,
|
||||
null,
|
||||
null,
|
||||
null,
|
||||
false
|
||||
),
|
||||
null
|
||||
);
|
||||
HadoopDruidIndexerConfig config = testRunUpdateSegmentListIfDatasourcePathSpecIsUsed(
|
||||
|
@ -133,7 +198,17 @@ public class HadoopIngestionSpecUpdateDatasourcePathSpecSegmentsTest
|
|||
new DatasourcePathSpec(
|
||||
jsonMapper,
|
||||
null,
|
||||
new DatasourceIngestionSpec(testDatasource, testDatasourceInterval, null, null, null, null, null, false),
|
||||
new DatasourceIngestionSpec(
|
||||
testDatasource,
|
||||
testDatasourceInterval,
|
||||
null,
|
||||
null,
|
||||
null,
|
||||
null,
|
||||
null,
|
||||
null,
|
||||
false
|
||||
),
|
||||
null
|
||||
)
|
||||
)
|
||||
|
|
|
@ -25,6 +25,7 @@ import com.google.common.collect.Lists;
|
|||
import io.druid.granularity.QueryGranularity;
|
||||
import io.druid.query.filter.SelectorDimFilter;
|
||||
import io.druid.segment.TestHelper;
|
||||
import io.druid.timeline.DataSegment;
|
||||
import org.joda.time.Interval;
|
||||
import org.junit.Assert;
|
||||
import org.junit.Test;
|
||||
|
@ -46,6 +47,7 @@ public class DatasourceIngestionSpecTest
|
|||
"test",
|
||||
interval,
|
||||
null,
|
||||
null,
|
||||
new SelectorDimFilter("dim", "value"),
|
||||
QueryGranularity.DAY,
|
||||
Lists.newArrayList("d1", "d2"),
|
||||
|
@ -84,6 +86,7 @@ public class DatasourceIngestionSpecTest
|
|||
null,
|
||||
null,
|
||||
null,
|
||||
null,
|
||||
false
|
||||
);
|
||||
|
||||
|
@ -93,6 +96,18 @@ public class DatasourceIngestionSpecTest
|
|||
jsonStr = "{\n"
|
||||
+ " \"dataSource\": \"test\",\n"
|
||||
+ " \"intervals\": [\"2014/2015\", \"2016/2017\"],\n"
|
||||
+ " \"segments\": [{\n"
|
||||
+ " \"dataSource\":\"test\",\n"
|
||||
+ " \"interval\":\"2014-01-01T00:00:00.000Z/2017-01-01T00:00:00.000Z\",\n"
|
||||
+ " \"version\":\"v0\",\n"
|
||||
+ " \"loadSpec\":null,\n"
|
||||
+ " \"dimensions\":\"\",\n"
|
||||
+ " \"metrics\":\"\",\n"
|
||||
+ " \"shardSpec\":{\"type\":\"none\"},\n"
|
||||
+ " \"binaryVersion\":9,\n"
|
||||
+ " \"size\":128,\n"
|
||||
+ " \"identifier\":\"test_2014-01-01T00:00:00.000Z_2017-01-01T00:00:00.000Z_v0\"\n"
|
||||
+ " }],\n"
|
||||
+ " \"filter\": { \"type\": \"selector\", \"dimension\": \"dim\", \"value\": \"value\"},\n"
|
||||
+ " \"granularity\": \"day\",\n"
|
||||
+ " \"dimensions\": [\"d1\", \"d2\"],\n"
|
||||
|
@ -104,6 +119,19 @@ public class DatasourceIngestionSpecTest
|
|||
"test",
|
||||
null,
|
||||
intervals,
|
||||
ImmutableList.of(
|
||||
new DataSegment(
|
||||
"test",
|
||||
Interval.parse("2014/2017"),
|
||||
"v0",
|
||||
null,
|
||||
null,
|
||||
null,
|
||||
null,
|
||||
9,
|
||||
128
|
||||
)
|
||||
),
|
||||
new SelectorDimFilter("dim", "value"),
|
||||
QueryGranularity.DAY,
|
||||
Lists.newArrayList("d1", "d2"),
|
||||
|
@ -128,7 +156,7 @@ public class DatasourceIngestionSpecTest
|
|||
DatasourceIngestionSpec actual = MAPPER.readValue(jsonStr, DatasourceIngestionSpec.class);
|
||||
|
||||
Assert.assertEquals(
|
||||
new DatasourceIngestionSpec("test", Interval.parse("2014/2015"), null, null, null, null, null, false),
|
||||
new DatasourceIngestionSpec("test", Interval.parse("2014/2015"), null, null, null, null, null, null, false),
|
||||
actual
|
||||
);
|
||||
}
|
||||
|
|
|
@ -67,6 +67,7 @@ public class DatasourceRecordReaderTest
|
|||
null,
|
||||
null,
|
||||
null,
|
||||
null,
|
||||
segment.getDimensions(),
|
||||
segment.getMetrics(),
|
||||
false
|
||||
|
|
|
@ -80,6 +80,7 @@ public class DatasourcePathSpecTest
|
|||
null,
|
||||
null,
|
||||
null,
|
||||
null,
|
||||
false
|
||||
);
|
||||
|
||||
|
|
Loading…
Reference in New Issue