mirror of
https://github.com/apache/druid.git
synced 2025-02-06 10:08:26 +00:00
ff501e8f13
* Add Date support to the parquet reader Add support for the Date logical type. Currently this is not supported. Since the parquet date is number of days since epoch gets interpreted as seconds since epoch, it will fails on indexing the data because it will not map to the appriopriate bucket. * Cleaned up code and tests Got rid of unused json files in the examples, cleaned up the tests by using try-with-resources. Now get the filenames from the json file instead of hard coding them and integrated general improvements from the feedback provided by leventov. * Got rid of the caching Remove the caching of the logical type of the time dimension column and cleaned up the code a bit.
62 lines
1.7 KiB
JSON
Executable File
62 lines
1.7 KiB
JSON
Executable File
{
|
|
"type": "index_hadoop",
|
|
"spec": {
|
|
"ioConfig": {
|
|
"type": "hadoop",
|
|
"inputSpec": {
|
|
"type": "static",
|
|
"inputFormat": "io.druid.data.input.parquet.DruidParquetInputFormat",
|
|
"paths": "example/test_date_data.snappy.parquet"
|
|
},
|
|
"metadataUpdateSpec": {
|
|
"type": "postgresql",
|
|
"connectURI": "jdbc:postgresql://localhost/druid",
|
|
"user" : "druid",
|
|
"password" : "asdf",
|
|
"segmentTable": "druid_segments"
|
|
},
|
|
"segmentOutputPath": "/tmp/segments"
|
|
},
|
|
"dataSchema": {
|
|
"dataSource": "date_dataset_date",
|
|
"parser": {
|
|
"type": "parquet",
|
|
"parseSpec": {
|
|
"format": "timeAndDims",
|
|
"timestampSpec": {
|
|
"column": "date_as_date"
|
|
},
|
|
"dimensionsSpec": {
|
|
"dimensions": [
|
|
"idx"
|
|
]
|
|
}
|
|
}
|
|
},
|
|
"metricsSpec": [{
|
|
"type": "count",
|
|
"name": "count"
|
|
}],
|
|
"granularitySpec": {
|
|
"type": "uniform",
|
|
"segmentGranularity": "DAY",
|
|
"queryGranularity": "NONE",
|
|
"intervals": ["2017-06-17/2017-09-24"]
|
|
}
|
|
},
|
|
"tuningConfig": {
|
|
"type": "hadoop",
|
|
"workingPath": "tmp/working_path",
|
|
"partitionsSpec": {
|
|
"targetPartitionSize": 5000000
|
|
},
|
|
"jobProperties" : {
|
|
"mapreduce.map.java.opts": "-server -Duser.timezone=UTC -Dfile.encoding=UTF-8 -XX:+PrintGCDetails -XX:+PrintGCTimeStamps",
|
|
"mapreduce.reduce.java.opts": "-server -Duser.timezone=UTC -Dfile.encoding=UTF-8 -XX:+PrintGCDetails -XX:+PrintGCTimeStamps",
|
|
"mapred.child.java.opts": "-server -XX:+PrintGCDetails -XX:+PrintGCTimeStamps"
|
|
},
|
|
"leaveIntermediate": true
|
|
}
|
|
}
|
|
}
|