mirror of
https://github.com/apache/druid.git
synced 2025-02-11 20:45:01 +00:00
2) Fix bug with IndexMerger and null columns 3) Add QueryableIndexIndexableAdapter so that QueryableIndexes can be merged 4) Adjust twitter example to have multiple values for each hash tag 5) Adjusted GroupByQueryEngine to just drop dimensions that don't exist instead of throwing an NPE
45 lines
1.7 KiB
Python
45 lines
1.7 KiB
Python
[{
|
|
"schema": {
|
|
"dataSource": "twitterstream",
|
|
"aggregators": [
|
|
{"type": "count", "name": "tweets"},
|
|
{"type": "doubleSum", "fieldName": "follower_count", "name": "total_follower_count"},
|
|
{"type": "doubleSum", "fieldName": "retweet_count", "name": "total_retweet_count" },
|
|
{"type": "doubleSum", "fieldName": "friends_count", "name": "total_friends_count" },
|
|
{"type": "doubleSum", "fieldName": "statuses_count", "name": "total_statuses_count"},
|
|
|
|
{"type": "min", "fieldName": "follower_count", "name": "min_follower_count"},
|
|
{"type": "max", "fieldName": "follower_count", "name": "max_follower_count"},
|
|
|
|
{"type": "min", "fieldName": "friends_count", "name": "min_friends_count"},
|
|
{"type": "max", "fieldName": "friends_count", "name": "max_friends_count"},
|
|
|
|
{"type": "min", "fieldName": "statuses_count", "name": "min_statuses_count"},
|
|
{"type": "max", "fieldName": "statuses_count", "name": "max_statuses_count"},
|
|
|
|
{"type": "min", "fieldName": "retweet_count", "name": "min_retweet_count"},
|
|
{"type": "max", "fieldName": "retweet_count", "name": "max_retweet_count"}
|
|
],
|
|
"indexGranularity": "minute",
|
|
"shardSpec": {"type": "none"}
|
|
},
|
|
|
|
"config": {
|
|
"maxRowsInMemory": 50000,
|
|
"intermediatePersistPeriod": "PT2m"
|
|
},
|
|
|
|
"firehose": {
|
|
"type": "twitzer",
|
|
"maxEventCount": 500000,
|
|
"maxRunMinutes": 120
|
|
},
|
|
|
|
"plumber": {
|
|
"type": "realtime",
|
|
"windowPeriod": "PT3m",
|
|
"segmentGranularity": "hour",
|
|
"basePersistDirectory": "/tmp/twitter_realtime/basePersist"
|
|
}
|
|
}]
|