2018-08-09 16:37:52 -04:00
|
|
|
{
|
|
|
|
"type" : "index_hadoop",
|
|
|
|
"spec" : {
|
|
|
|
"dataSchema" : {
|
|
|
|
"dataSource" : "wikipedia",
|
|
|
|
"parser" : {
|
|
|
|
"type" : "hadoopyString",
|
|
|
|
"parseSpec" : {
|
|
|
|
"format" : "json",
|
|
|
|
"dimensionsSpec" : {
|
|
|
|
"dimensions" : [
|
|
|
|
"channel",
|
|
|
|
"cityName",
|
|
|
|
"comment",
|
|
|
|
"countryIsoCode",
|
|
|
|
"countryName",
|
|
|
|
"isAnonymous",
|
|
|
|
"isMinor",
|
|
|
|
"isNew",
|
|
|
|
"isRobot",
|
|
|
|
"isUnpatrolled",
|
|
|
|
"metroCode",
|
|
|
|
"namespace",
|
|
|
|
"page",
|
|
|
|
"regionIsoCode",
|
|
|
|
"regionName",
|
|
|
|
"user",
|
|
|
|
{ "name": "added", "type": "long" },
|
|
|
|
{ "name": "deleted", "type": "long" },
|
|
|
|
{ "name": "delta", "type": "long" }
|
|
|
|
]
|
|
|
|
},
|
|
|
|
"timestampSpec" : {
|
|
|
|
"format" : "auto",
|
|
|
|
"column" : "time"
|
|
|
|
}
|
|
|
|
}
|
|
|
|
},
|
|
|
|
"metricsSpec" : [],
|
|
|
|
"granularitySpec" : {
|
|
|
|
"type" : "uniform",
|
|
|
|
"segmentGranularity" : "day",
|
|
|
|
"queryGranularity" : "none",
|
|
|
|
"intervals" : ["2015-09-12/2015-09-13"],
|
|
|
|
"rollup" : false
|
|
|
|
}
|
|
|
|
},
|
|
|
|
"ioConfig" : {
|
|
|
|
"type" : "hadoop",
|
|
|
|
"inputSpec" : {
|
|
|
|
"type" : "static",
|
|
|
|
"paths" : "/quickstart/wikiticker-2015-09-12-sampled.json.gz"
|
|
|
|
}
|
|
|
|
},
|
|
|
|
"tuningConfig" : {
|
|
|
|
"type" : "hadoop",
|
|
|
|
"partitionsSpec" : {
|
|
|
|
"type" : "hashed",
|
|
|
|
"targetPartitionSize" : 5000000
|
|
|
|
},
|
|
|
|
"forceExtendableShardSpecs" : true,
|
|
|
|
"jobProperties" : {
|
|
|
|
"fs.default.name" : "hdfs://druid-hadoop-demo:9000",
|
|
|
|
"fs.defaultFS" : "hdfs://druid-hadoop-demo:9000",
|
|
|
|
"dfs.datanode.address" : "druid-hadoop-demo",
|
|
|
|
"dfs.client.use.datanode.hostname" : "true",
|
|
|
|
"dfs.datanode.use.datanode.hostname" : "true",
|
|
|
|
"yarn.resourcemanager.hostname" : "druid-hadoop-demo",
|
|
|
|
"yarn.nodemanager.vmem-check-enabled" : "false",
|
|
|
|
"mapreduce.map.java.opts" : "-Duser.timezone=UTC -Dfile.encoding=UTF-8",
|
|
|
|
"mapreduce.job.user.classpath.first" : "true",
|
|
|
|
"mapreduce.reduce.java.opts" : "-Duser.timezone=UTC -Dfile.encoding=UTF-8",
|
|
|
|
"mapreduce.map.memory.mb" : 1024,
|
|
|
|
"mapreduce.reduce.memory.mb" : 1024
|
|
|
|
}
|
|
|
|
}
|
|
|
|
},
|
2019-11-19 12:14:33 -05:00
|
|
|
"hadoopDependencyCoordinates": ["org.apache.hadoop:hadoop-client:2.8.5"]
|
2018-08-09 16:37:52 -04:00
|
|
|
}
|