mirror of https://github.com/apache/druid.git
add configurable spatial dimensions to hadoop indexer
This commit is contained in:
parent
5af188f18d
commit
803e8ff69e
|
@ -22,8 +22,10 @@ package com.metamx.druid.indexer.data;
|
|||
import com.fasterxml.jackson.annotation.JsonCreator;
|
||||
import com.fasterxml.jackson.annotation.JsonProperty;
|
||||
import com.google.common.base.Preconditions;
|
||||
import com.google.common.collect.Lists;
|
||||
import com.metamx.common.parsers.CSVParser;
|
||||
import com.metamx.common.parsers.Parser;
|
||||
import com.metamx.druid.index.v1.SpatialDimensionSchema;
|
||||
|
||||
import java.util.List;
|
||||
|
||||
|
@ -33,11 +35,13 @@ public class CSVDataSpec implements DataSpec
|
|||
{
|
||||
private final List<String> columns;
|
||||
private final List<String> dimensions;
|
||||
private final List<SpatialDimensionSchema> spatialDimensions;
|
||||
|
||||
@JsonCreator
|
||||
public CSVDataSpec(
|
||||
@JsonProperty("columns") List<String> columns,
|
||||
@JsonProperty("dimensions") List<String> dimensions
|
||||
@JsonProperty("dimensions") List<String> dimensions,
|
||||
@JsonProperty("spatialDimensions") List<SpatialDimensionSchema> spatialDimensions
|
||||
)
|
||||
{
|
||||
Preconditions.checkNotNull(columns, "columns");
|
||||
|
@ -47,6 +51,9 @@ public class CSVDataSpec implements DataSpec
|
|||
|
||||
this.columns = columns;
|
||||
this.dimensions = dimensions;
|
||||
this.spatialDimensions = (spatialDimensions == null)
|
||||
? Lists.<SpatialDimensionSchema>newArrayList()
|
||||
: spatialDimensions;
|
||||
}
|
||||
|
||||
@JsonProperty("columns")
|
||||
|
@ -62,6 +69,13 @@ public class CSVDataSpec implements DataSpec
|
|||
return dimensions;
|
||||
}
|
||||
|
||||
@JsonProperty("spatialDimensions")
|
||||
@Override
|
||||
public List<SpatialDimensionSchema> getSpatialDimensions()
|
||||
{
|
||||
return spatialDimensions;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void verify(List<String> usedCols)
|
||||
{
|
||||
|
|
|
@ -22,6 +22,7 @@ package com.metamx.druid.indexer.data;
|
|||
import com.fasterxml.jackson.annotation.JsonSubTypes;
|
||||
import com.fasterxml.jackson.annotation.JsonTypeInfo;
|
||||
import com.metamx.common.parsers.Parser;
|
||||
import com.metamx.druid.index.v1.SpatialDimensionSchema;
|
||||
|
||||
import java.util.List;
|
||||
|
||||
|
@ -41,5 +42,7 @@ public interface DataSpec
|
|||
|
||||
public List<String> getDimensions();
|
||||
|
||||
public List<SpatialDimensionSchema> getSpatialDimensions();
|
||||
|
||||
public Parser<String, Object> getParser();
|
||||
}
|
||||
|
|
|
@ -22,8 +22,10 @@ package com.metamx.druid.indexer.data;
|
|||
import com.fasterxml.jackson.annotation.JsonCreator;
|
||||
import com.fasterxml.jackson.annotation.JsonProperty;
|
||||
import com.google.common.base.Preconditions;
|
||||
import com.google.common.collect.Lists;
|
||||
import com.metamx.common.parsers.DelimitedParser;
|
||||
import com.metamx.common.parsers.Parser;
|
||||
import com.metamx.druid.index.v1.SpatialDimensionSchema;
|
||||
|
||||
import java.util.List;
|
||||
|
||||
|
@ -34,12 +36,14 @@ public class DelimitedDataSpec implements DataSpec
|
|||
private final String delimiter;
|
||||
private final List<String> columns;
|
||||
private final List<String> dimensions;
|
||||
private final List<SpatialDimensionSchema> spatialDimensions;
|
||||
|
||||
@JsonCreator
|
||||
public DelimitedDataSpec(
|
||||
@JsonProperty("delimiter") String delimiter,
|
||||
@JsonProperty("columns") List<String> columns,
|
||||
@JsonProperty("dimensions") List<String> dimensions
|
||||
@JsonProperty("dimensions") List<String> dimensions,
|
||||
@JsonProperty("spatialDimensions") List<SpatialDimensionSchema> spatialDimensions
|
||||
)
|
||||
{
|
||||
Preconditions.checkNotNull(columns);
|
||||
|
@ -50,6 +54,9 @@ public class DelimitedDataSpec implements DataSpec
|
|||
this.delimiter = (delimiter == null) ? DelimitedParser.DEFAULT_DELIMITER : delimiter;
|
||||
this.columns = columns;
|
||||
this.dimensions = dimensions;
|
||||
this.spatialDimensions = (spatialDimensions == null)
|
||||
? Lists.<SpatialDimensionSchema>newArrayList()
|
||||
: spatialDimensions;
|
||||
}
|
||||
|
||||
@JsonProperty("delimiter")
|
||||
|
@ -71,6 +78,13 @@ public class DelimitedDataSpec implements DataSpec
|
|||
return dimensions;
|
||||
}
|
||||
|
||||
@JsonProperty("spatialDimensions")
|
||||
@Override
|
||||
public List<SpatialDimensionSchema> getSpatialDimensions()
|
||||
{
|
||||
return spatialDimensions;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void verify(List<String> usedCols)
|
||||
{
|
||||
|
|
|
@ -20,8 +20,10 @@
|
|||
package com.metamx.druid.indexer.data;
|
||||
|
||||
import com.fasterxml.jackson.annotation.JsonProperty;
|
||||
import com.google.common.collect.Lists;
|
||||
import com.metamx.common.parsers.JSONParser;
|
||||
import com.metamx.common.parsers.Parser;
|
||||
import com.metamx.druid.index.v1.SpatialDimensionSchema;
|
||||
|
||||
import java.util.List;
|
||||
|
||||
|
@ -30,12 +32,17 @@ import java.util.List;
|
|||
public class JSONDataSpec implements DataSpec
|
||||
{
|
||||
private final List<String> dimensions;
|
||||
private final List<SpatialDimensionSchema> spatialDimensions;
|
||||
|
||||
public JSONDataSpec(
|
||||
@JsonProperty("dimensions") List<String> dimensions
|
||||
@JsonProperty("dimensions") List<String> dimensions,
|
||||
@JsonProperty("spatialDimensions") List<SpatialDimensionSchema> spatialDimensions
|
||||
)
|
||||
{
|
||||
this.dimensions = dimensions;
|
||||
this.spatialDimensions = (spatialDimensions == null)
|
||||
? Lists.<SpatialDimensionSchema>newArrayList()
|
||||
: spatialDimensions;
|
||||
}
|
||||
|
||||
@JsonProperty("dimensions")
|
||||
|
@ -45,6 +52,13 @@ public class JSONDataSpec implements DataSpec
|
|||
return dimensions;
|
||||
}
|
||||
|
||||
@JsonProperty("spatialDimensions")
|
||||
@Override
|
||||
public List<SpatialDimensionSchema> getSpatialDimensions()
|
||||
{
|
||||
return spatialDimensions;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void verify(List<String> usedCols)
|
||||
{
|
||||
|
|
|
@ -22,6 +22,7 @@ package com.metamx.druid.indexer.data;
|
|||
import com.fasterxml.jackson.annotation.JsonValue;
|
||||
import com.metamx.common.parsers.Parser;
|
||||
import com.metamx.common.parsers.ToLowerCaseParser;
|
||||
import com.metamx.druid.index.v1.SpatialDimensionSchema;
|
||||
|
||||
import java.util.List;
|
||||
|
||||
|
@ -56,6 +57,12 @@ public class ToLowercaseDataSpec implements DataSpec
|
|||
return delegate.getDimensions();
|
||||
}
|
||||
|
||||
@Override
|
||||
public List<SpatialDimensionSchema> getSpatialDimensions()
|
||||
{
|
||||
return delegate.getSpatialDimensions();
|
||||
}
|
||||
|
||||
@Override
|
||||
public Parser<String, Object> getParser()
|
||||
{
|
||||
|
|
|
@ -35,6 +35,7 @@ import com.metamx.druid.aggregation.AggregatorFactory;
|
|||
import com.metamx.druid.client.DataSegment;
|
||||
import com.metamx.druid.index.QueryableIndex;
|
||||
import com.metamx.druid.index.v1.IncrementalIndex;
|
||||
import com.metamx.druid.index.v1.IncrementalIndexSchema;
|
||||
import com.metamx.druid.index.v1.IndexIO;
|
||||
import com.metamx.druid.index.v1.IndexMerger;
|
||||
import com.metamx.druid.indexer.data.StringInputRowParser;
|
||||
|
@ -152,7 +153,8 @@ public class IndexGeneratorJob implements Jobby
|
|||
}
|
||||
}
|
||||
|
||||
public static List<DataSegment> getPublishedSegments(HadoopDruidIndexerConfig config) {
|
||||
public static List<DataSegment> getPublishedSegments(HadoopDruidIndexerConfig config)
|
||||
{
|
||||
|
||||
final Configuration conf = new Configuration();
|
||||
final ObjectMapper jsonMapper = HadoopDruidIndexerConfig.jsonMapper;
|
||||
|
@ -182,7 +184,7 @@ public class IndexGeneratorJob implements Jobby
|
|||
List<DataSegment> publishedSegments = publishedSegmentsBuilder.build();
|
||||
|
||||
return publishedSegments;
|
||||
}
|
||||
}
|
||||
|
||||
public static class IndexGeneratorMapper extends HadoopDruidIndexerMapper<BytesWritable, Text>
|
||||
|
||||
|
@ -197,7 +199,7 @@ public class IndexGeneratorJob implements Jobby
|
|||
// Group by bucket, sort by timestamp
|
||||
final Optional<Bucket> bucket = getConfig().getBucket(inputRow);
|
||||
|
||||
if(!bucket.isPresent()) {
|
||||
if (!bucket.isPresent()) {
|
||||
throw new ISE("WTF?! No bucket found for row: %s", inputRow);
|
||||
}
|
||||
|
||||
|
@ -590,9 +592,12 @@ public class IndexGeneratorJob implements Jobby
|
|||
private IncrementalIndex makeIncrementalIndex(Bucket theBucket, AggregatorFactory[] aggs)
|
||||
{
|
||||
return new IncrementalIndex(
|
||||
theBucket.time.getMillis(),
|
||||
config.getRollupSpec().getRollupGranularity(),
|
||||
aggs
|
||||
new IncrementalIndexSchema.Builder()
|
||||
.withMinTimestamp(theBucket.time.getMillis())
|
||||
.withSpatialDimensions(config.getDataSpec().getSpatialDimensions())
|
||||
.withQueryGranularity(config.getRollupSpec().getRollupGranularity())
|
||||
.withMetrics(aggs)
|
||||
.build()
|
||||
);
|
||||
}
|
||||
|
||||
|
|
|
@ -317,7 +317,7 @@ public class TaskSerdeTest
|
|||
"foo",
|
||||
"timestamp",
|
||||
"auto",
|
||||
new JSONDataSpec(ImmutableList.of("foo")),
|
||||
new JSONDataSpec(ImmutableList.of("foo"), null),
|
||||
null,
|
||||
new UniformGranularitySpec(Granularity.DAY, ImmutableList.of(new Interval("2010-01-01/P1D"))),
|
||||
new StaticPathSpec("bar"),
|
||||
|
|
2
pom.xml
2
pom.xml
|
@ -75,7 +75,7 @@
|
|||
<dependency>
|
||||
<groupId>com.metamx</groupId>
|
||||
<artifactId>bytebuffer-collections</artifactId>
|
||||
<version>0.0.1-SNAPSHOT</version>
|
||||
<version>0.0.1</version>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>com.metamx</groupId>
|
||||
|
|
|
@ -162,7 +162,7 @@ public class TestIndex
|
|||
{
|
||||
StringInputRowParser parser = new StringInputRowParser(
|
||||
new TimestampSpec("ts", "iso"),
|
||||
new DelimitedDataSpec("\t", Arrays.asList(COLUMNS), Arrays.asList(DIMENSIONS)),
|
||||
new DelimitedDataSpec("\t", Arrays.asList(COLUMNS), Arrays.asList(DIMENSIONS), null),
|
||||
Arrays.<String>asList()
|
||||
);
|
||||
boolean runOnce = false;
|
||||
|
|
Loading…
Reference in New Issue