druid/examples/quickstart/tutorial/transform-index.json
Suneet Saldanha 85a3d416b0 Tutorials use new ingestion spec where possible (#9155)
* Tutorials use new ingestion spec where possible

There are 2 main changes
  * Use task type index_parallel instead of index
  * Remove the use of parser + firehose in favor of inputFormat + inputSource

index_parallel is the preferred method starting in 0.17. Setting the job to
index_parallel with the default maxNumConcurrentSubTasks(1) is the equivalent
of an index task

Instead of using a parserSpec, dimensionSpec and timestampSpec have been
promoted to the dataSchema. The format is described in the ioConfig as the
inputFormat.

There are a few cases where the new format is not supported
 * Hadoop must use firehoses instead of the inputSource and inputFormat
 * There is no equivalent of a combining firehose as an inputSource
 * A Combining firehose does not support index_parallel

* fix typo
2020-01-15 14:08:29 -08:00

70 lines
1.9 KiB
JSON

{
"type" : "index_parallel",
"spec" : {
"dataSchema" : {
"dataSource" : "transform-tutorial",
"timestampSpec": {
"column": "timestamp",
"format": "iso"
},
"dimensionsSpec" : {
"dimensions" : [
"animal",
{ "name": "location", "type": "long" }
]
},
"metricsSpec" : [
{ "type" : "count", "name" : "count" },
{ "type" : "longSum", "name" : "number", "fieldName" : "number" },
{ "type" : "longSum", "name" : "triple-number", "fieldName" : "triple-number" }
],
"granularitySpec" : {
"type" : "uniform",
"segmentGranularity" : "week",
"queryGranularity" : "minute",
"intervals" : ["2018-01-01/2018-01-03"],
"rollup" : true
},
"transformSpec": {
"transforms": [
{
"type": "expression",
"name": "animal",
"expression": "concat('super-', animal)"
},
{
"type": "expression",
"name": "triple-number",
"expression": "number * 3"
}
],
"filter": {
"type":"or",
"fields": [
{ "type": "selector", "dimension": "animal", "value": "super-mongoose" },
{ "type": "selector", "dimension": "triple-number", "value": "300" },
{ "type": "selector", "dimension": "location", "value": "3" }
]
}
}
},
"ioConfig" : {
"type" : "index_parallel",
"inputSource" : {
"type" : "local",
"baseDir" : "quickstart/tutorial",
"filter" : "transform-data.json"
},
"inpuFormat" : {
"type" : "json"
},
"appendToExisting" : false
},
"tuningConfig" : {
"type" : "index_parallel",
"maxRowsPerSegment" : 5000000,
"maxRowsInMemory" : 25000
}
}
}