mirror of https://github.com/apache/druid.git
Tutorials use new ingestion spec where possible (#9155)
* Tutorials use new ingestion spec where possible There are 2 main changes * Use task type index_parallel instead of index * Remove the use of parser + firehose in favor of inputFormat + inputSource index_parallel is the preferred method starting in 0.17. Setting the job to index_parallel with the default maxNumConcurrentSubTasks(1) is the equivalent of an index task Instead of using a parserSpec, dimensionSpec and timestampSpec have been promoted to the dataSchema. The format is described in the ioConfig as the inputFormat. There are a few cases where the new format is not supported * Hadoop must use firehoses instead of the inputSource and inputFormat * There is no equivalent of a combining firehose as an inputSource * A Combining firehose does not support index_parallel * fix typo
This commit is contained in:
parent
4716e0b585
commit
85a3d416b0
|
@ -112,7 +112,7 @@ The MySQL extension provides an implementation of an [SqlFirehose](../../ingesti
|
||||||
|
|
||||||
```json
|
```json
|
||||||
{
|
{
|
||||||
"type": "index",
|
"type": "index_parallel",
|
||||||
"spec": {
|
"spec": {
|
||||||
"dataSchema": {
|
"dataSchema": {
|
||||||
"dataSource": "some_datasource",
|
"dataSource": "some_datasource",
|
||||||
|
@ -149,7 +149,7 @@ The MySQL extension provides an implementation of an [SqlFirehose](../../ingesti
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
"ioConfig": {
|
"ioConfig": {
|
||||||
"type": "index",
|
"type": "index_parallel",
|
||||||
"firehose": {
|
"firehose": {
|
||||||
"type": "sql",
|
"type": "sql",
|
||||||
"database": {
|
"database": {
|
||||||
|
@ -166,7 +166,7 @@ The MySQL extension provides an implementation of an [SqlFirehose](../../ingesti
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
"tuningconfig": {
|
"tuningconfig": {
|
||||||
"type": "index"
|
"type": "index_parallel"
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -91,7 +91,7 @@ The PostgreSQL extension provides an implementation of an [SqlFirehose](../../in
|
||||||
|
|
||||||
```json
|
```json
|
||||||
{
|
{
|
||||||
"type": "index",
|
"type": "index_parallel",
|
||||||
"spec": {
|
"spec": {
|
||||||
"dataSchema": {
|
"dataSchema": {
|
||||||
"dataSource": "some_datasource",
|
"dataSource": "some_datasource",
|
||||||
|
@ -128,7 +128,7 @@ The PostgreSQL extension provides an implementation of an [SqlFirehose](../../in
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
"ioConfig": {
|
"ioConfig": {
|
||||||
"type": "index",
|
"type": "index_parallel",
|
||||||
"firehose": {
|
"firehose": {
|
||||||
"type": "sql",
|
"type": "sql",
|
||||||
"database": {
|
"database": {
|
||||||
|
@ -145,7 +145,7 @@ The PostgreSQL extension provides an implementation of an [SqlFirehose](../../in
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
"tuningconfig": {
|
"tuningconfig": {
|
||||||
"type": "index"
|
"type": "index_parallel"
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -134,42 +134,36 @@ which has been configured to read the `quickstart/tutorial/wikiticker-2015-09-12
|
||||||
|
|
||||||
```json
|
```json
|
||||||
{
|
{
|
||||||
"type" : "index",
|
"type" : "index_parallel",
|
||||||
"spec" : {
|
"spec" : {
|
||||||
"dataSchema" : {
|
"dataSchema" : {
|
||||||
"dataSource" : "wikipedia",
|
"dataSource" : "wikipedia",
|
||||||
"parser" : {
|
"dimensionsSpec" : {
|
||||||
"type" : "string",
|
"dimensions" : [
|
||||||
"parseSpec" : {
|
"channel",
|
||||||
"format" : "json",
|
"cityName",
|
||||||
"dimensionsSpec" : {
|
"comment",
|
||||||
"dimensions" : [
|
"countryIsoCode",
|
||||||
"channel",
|
"countryName",
|
||||||
"cityName",
|
"isAnonymous",
|
||||||
"comment",
|
"isMinor",
|
||||||
"countryIsoCode",
|
"isNew",
|
||||||
"countryName",
|
"isRobot",
|
||||||
"isAnonymous",
|
"isUnpatrolled",
|
||||||
"isMinor",
|
"metroCode",
|
||||||
"isNew",
|
"namespace",
|
||||||
"isRobot",
|
"page",
|
||||||
"isUnpatrolled",
|
"regionIsoCode",
|
||||||
"metroCode",
|
"regionName",
|
||||||
"namespace",
|
"user",
|
||||||
"page",
|
{ "name": "added", "type": "long" },
|
||||||
"regionIsoCode",
|
{ "name": "deleted", "type": "long" },
|
||||||
"regionName",
|
{ "name": "delta", "type": "long" }
|
||||||
"user",
|
]
|
||||||
{ "name": "added", "type": "long" },
|
},
|
||||||
{ "name": "deleted", "type": "long" },
|
"timestampSpec": {
|
||||||
{ "name": "delta", "type": "long" }
|
"column": "time",
|
||||||
]
|
"format": "iso"
|
||||||
},
|
|
||||||
"timestampSpec": {
|
|
||||||
"column": "time",
|
|
||||||
"format": "iso"
|
|
||||||
}
|
|
||||||
}
|
|
||||||
},
|
},
|
||||||
"metricsSpec" : [],
|
"metricsSpec" : [],
|
||||||
"granularitySpec" : {
|
"granularitySpec" : {
|
||||||
|
@ -181,16 +175,19 @@ which has been configured to read the `quickstart/tutorial/wikiticker-2015-09-12
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
"ioConfig" : {
|
"ioConfig" : {
|
||||||
"type" : "index",
|
"type" : "index_parallel",
|
||||||
"firehose" : {
|
"inputSource" : {
|
||||||
"type" : "local",
|
"type" : "local",
|
||||||
"baseDir" : "quickstart/tutorial/",
|
"baseDir" : "quickstart/tutorial/",
|
||||||
"filter" : "wikiticker-2015-09-12-sampled.json.gz"
|
"filter" : "wikiticker-2015-09-12-sampled.json.gz"
|
||||||
},
|
},
|
||||||
|
"inputFormat" : {
|
||||||
|
"type": "json"
|
||||||
|
},
|
||||||
"appendToExisting" : false
|
"appendToExisting" : false
|
||||||
},
|
},
|
||||||
"tuningConfig" : {
|
"tuningConfig" : {
|
||||||
"type" : "index",
|
"type" : "index_parallel",
|
||||||
"maxRowsPerSegment" : 5000000,
|
"maxRowsPerSegment" : 5000000,
|
||||||
"maxRowsInMemory" : 25000
|
"maxRowsInMemory" : 25000
|
||||||
}
|
}
|
||||||
|
|
|
@ -81,7 +81,7 @@ We have included a compaction task spec for this tutorial datasource at `quickst
|
||||||
"dataSource": "compaction-tutorial",
|
"dataSource": "compaction-tutorial",
|
||||||
"interval": "2015-09-12/2015-09-13",
|
"interval": "2015-09-12/2015-09-13",
|
||||||
"tuningConfig" : {
|
"tuningConfig" : {
|
||||||
"type" : "index",
|
"type" : "index_parallel",
|
||||||
"maxRowsPerSegment" : 5000000,
|
"maxRowsPerSegment" : 5000000,
|
||||||
"maxRowsInMemory" : 25000
|
"maxRowsInMemory" : 25000
|
||||||
}
|
}
|
||||||
|
@ -143,7 +143,7 @@ We have included a compaction task spec that will create DAY granularity segment
|
||||||
"interval": "2015-09-12/2015-09-13",
|
"interval": "2015-09-12/2015-09-13",
|
||||||
"segmentGranularity": "DAY",
|
"segmentGranularity": "DAY",
|
||||||
"tuningConfig" : {
|
"tuningConfig" : {
|
||||||
"type" : "index",
|
"type" : "index_parallel",
|
||||||
"maxRowsPerSegment" : 5000000,
|
"maxRowsPerSegment" : 5000000,
|
||||||
"maxRowsInMemory" : 25000,
|
"maxRowsInMemory" : 25000,
|
||||||
"forceExtendableShardSpecs" : true
|
"forceExtendableShardSpecs" : true
|
||||||
|
|
|
@ -88,42 +88,18 @@ The datasource name is specified by the `dataSource` parameter in the `dataSchem
|
||||||
|
|
||||||
Let's call the tutorial datasource `ingestion-tutorial`.
|
Let's call the tutorial datasource `ingestion-tutorial`.
|
||||||
|
|
||||||
### Choose a parser
|
|
||||||
|
|
||||||
A `dataSchema` has a `parser` field, which defines the parser that Druid will use to interpret the input data.
|
|
||||||
|
|
||||||
Since our input data is represented as JSON strings, we'll use a `string` parser with `json` format:
|
|
||||||
|
|
||||||
```json
|
|
||||||
"dataSchema" : {
|
|
||||||
"dataSource" : "ingestion-tutorial",
|
|
||||||
"parser" : {
|
|
||||||
"type" : "string",
|
|
||||||
"parseSpec" : {
|
|
||||||
"format" : "json"
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
```
|
|
||||||
|
|
||||||
### Time column
|
### Time column
|
||||||
|
|
||||||
The `parser` needs to know how to extract the main timestamp field from the input data. When using a `json` type `parseSpec`, the timestamp is defined in a `timestampSpec`.
|
The `dataSchema` needs to know how to extract the main timestamp field from the input data.
|
||||||
|
|
||||||
The timestamp column in our input data is named "ts", containing ISO 8601 timestamps, so let's add a `timestampSpec` with that information to the `parseSpec`:
|
The timestamp column in our input data is named "ts", containing ISO 8601 timestamps, so let's add a `timestampSpec` with that information to the `dataSchema`:
|
||||||
|
|
||||||
```json
|
```json
|
||||||
"dataSchema" : {
|
"dataSchema" : {
|
||||||
"dataSource" : "ingestion-tutorial",
|
"dataSource" : "ingestion-tutorial",
|
||||||
"parser" : {
|
"timestampSpec" : {
|
||||||
"type" : "string",
|
"format" : "iso",
|
||||||
"parseSpec" : {
|
"column" : "ts"
|
||||||
"format" : "json",
|
|
||||||
"timestampSpec" : {
|
|
||||||
"format" : "iso",
|
|
||||||
"column" : "ts"
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
```
|
```
|
||||||
|
@ -146,26 +122,17 @@ When ingesting data, we must consider whether we wish to use rollup or not.
|
||||||
|
|
||||||
For this tutorial, let's enable rollup. This is specified with a `granularitySpec` on the `dataSchema`.
|
For this tutorial, let's enable rollup. This is specified with a `granularitySpec` on the `dataSchema`.
|
||||||
|
|
||||||
Note that the `granularitySpec` lies outside of the `parser`. We will revisit the `parser` soon when we define our dimensions and metrics.
|
|
||||||
|
|
||||||
```json
|
```json
|
||||||
"dataSchema" : {
|
"dataSchema" : {
|
||||||
"dataSource" : "ingestion-tutorial",
|
"dataSource" : "ingestion-tutorial",
|
||||||
"parser" : {
|
"timestampSpec" : {
|
||||||
"type" : "string",
|
"format" : "iso",
|
||||||
"parseSpec" : {
|
"column" : "ts"
|
||||||
"format" : "json",
|
|
||||||
"timestampSpec" : {
|
|
||||||
"format" : "iso",
|
|
||||||
"column" : "ts"
|
|
||||||
}
|
|
||||||
}
|
|
||||||
},
|
},
|
||||||
"granularitySpec" : {
|
"granularitySpec" : {
|
||||||
"rollup" : true
|
"rollup" : true
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
```
|
```
|
||||||
|
|
||||||
#### Choosing dimensions and metrics
|
#### Choosing dimensions and metrics
|
||||||
|
@ -181,29 +148,23 @@ Let's look at how to define these dimensions and metrics within the ingestion sp
|
||||||
|
|
||||||
#### Dimensions
|
#### Dimensions
|
||||||
|
|
||||||
Dimensions are specified with a `dimensionsSpec` inside the `parseSpec`.
|
Dimensions are specified with a `dimensionsSpec` inside the `dataSchema`.
|
||||||
|
|
||||||
```json
|
```json
|
||||||
"dataSchema" : {
|
"dataSchema" : {
|
||||||
"dataSource" : "ingestion-tutorial",
|
"dataSource" : "ingestion-tutorial",
|
||||||
"parser" : {
|
"timestampSpec" : {
|
||||||
"type" : "string",
|
"format" : "iso",
|
||||||
"parseSpec" : {
|
"column" : "ts"
|
||||||
"format" : "json",
|
},
|
||||||
"timestampSpec" : {
|
"dimensionsSpec" : {
|
||||||
"format" : "iso",
|
"dimensions": [
|
||||||
"column" : "ts"
|
"srcIP",
|
||||||
},
|
{ "name" : "srcPort", "type" : "long" },
|
||||||
"dimensionsSpec" : {
|
{ "name" : "dstIP", "type" : "string" },
|
||||||
"dimensions": [
|
{ "name" : "dstPort", "type" : "long" },
|
||||||
"srcIP",
|
{ "name" : "protocol", "type" : "string" }
|
||||||
{ "name" : "srcPort", "type" : "long" },
|
]
|
||||||
{ "name" : "dstIP", "type" : "string" },
|
|
||||||
{ "name" : "dstPort", "type" : "long" },
|
|
||||||
{ "name" : "protocol", "type" : "string" }
|
|
||||||
]
|
|
||||||
}
|
|
||||||
}
|
|
||||||
},
|
},
|
||||||
"granularitySpec" : {
|
"granularitySpec" : {
|
||||||
"rollup" : true
|
"rollup" : true
|
||||||
|
@ -232,24 +193,18 @@ Metrics are specified with a `metricsSpec` inside the `dataSchema`:
|
||||||
```json
|
```json
|
||||||
"dataSchema" : {
|
"dataSchema" : {
|
||||||
"dataSource" : "ingestion-tutorial",
|
"dataSource" : "ingestion-tutorial",
|
||||||
"parser" : {
|
"timestampSpec" : {
|
||||||
"type" : "string",
|
"format" : "iso",
|
||||||
"parseSpec" : {
|
"column" : "ts"
|
||||||
"format" : "json",
|
},
|
||||||
"timestampSpec" : {
|
"dimensionsSpec" : {
|
||||||
"format" : "iso",
|
"dimensions": [
|
||||||
"column" : "ts"
|
"srcIP",
|
||||||
},
|
{ "name" : "srcPort", "type" : "long" },
|
||||||
"dimensionsSpec" : {
|
{ "name" : "dstIP", "type" : "string" },
|
||||||
"dimensions": [
|
{ "name" : "dstPort", "type" : "long" },
|
||||||
"srcIP",
|
{ "name" : "protocol", "type" : "string" }
|
||||||
{ "name" : "srcPort", "type" : "long" },
|
]
|
||||||
{ "name" : "dstIP", "type" : "string" },
|
|
||||||
{ "name" : "dstPort", "type" : "long" },
|
|
||||||
{ "name" : "protocol", "type" : "string" }
|
|
||||||
]
|
|
||||||
}
|
|
||||||
}
|
|
||||||
},
|
},
|
||||||
"metricsSpec" : [
|
"metricsSpec" : [
|
||||||
{ "type" : "count", "name" : "count" },
|
{ "type" : "count", "name" : "count" },
|
||||||
|
@ -307,24 +262,18 @@ Segment granularity is configured by the `segmentGranularity` property in the `g
|
||||||
```json
|
```json
|
||||||
"dataSchema" : {
|
"dataSchema" : {
|
||||||
"dataSource" : "ingestion-tutorial",
|
"dataSource" : "ingestion-tutorial",
|
||||||
"parser" : {
|
"timestampSpec" : {
|
||||||
"type" : "string",
|
"format" : "iso",
|
||||||
"parseSpec" : {
|
"column" : "ts"
|
||||||
"format" : "json",
|
},
|
||||||
"timestampSpec" : {
|
"dimensionsSpec" : {
|
||||||
"format" : "iso",
|
"dimensions": [
|
||||||
"column" : "ts"
|
"srcIP",
|
||||||
},
|
{ "name" : "srcPort", "type" : "long" },
|
||||||
"dimensionsSpec" : {
|
{ "name" : "dstIP", "type" : "string" },
|
||||||
"dimensions": [
|
{ "name" : "dstPort", "type" : "long" },
|
||||||
"srcIP",
|
{ "name" : "protocol", "type" : "string" }
|
||||||
{ "name" : "srcPort", "type" : "long" },
|
]
|
||||||
{ "name" : "dstIP", "type" : "string" },
|
|
||||||
{ "name" : "dstPort", "type" : "long" },
|
|
||||||
{ "name" : "protocol", "type" : "string" }
|
|
||||||
]
|
|
||||||
}
|
|
||||||
}
|
|
||||||
},
|
},
|
||||||
"metricsSpec" : [
|
"metricsSpec" : [
|
||||||
{ "type" : "count", "name" : "count" },
|
{ "type" : "count", "name" : "count" },
|
||||||
|
@ -349,24 +298,18 @@ The query granularity is configured by the `queryGranularity` property in the `g
|
||||||
```json
|
```json
|
||||||
"dataSchema" : {
|
"dataSchema" : {
|
||||||
"dataSource" : "ingestion-tutorial",
|
"dataSource" : "ingestion-tutorial",
|
||||||
"parser" : {
|
"timestampSpec" : {
|
||||||
"type" : "string",
|
"format" : "iso",
|
||||||
"parseSpec" : {
|
"column" : "ts"
|
||||||
"format" : "json",
|
},
|
||||||
"timestampSpec" : {
|
"dimensionsSpec" : {
|
||||||
"format" : "iso",
|
"dimensions": [
|
||||||
"column" : "ts"
|
"srcIP",
|
||||||
},
|
{ "name" : "srcPort", "type" : "long" },
|
||||||
"dimensionsSpec" : {
|
{ "name" : "dstIP", "type" : "string" },
|
||||||
"dimensions": [
|
{ "name" : "dstPort", "type" : "long" },
|
||||||
"srcIP",
|
{ "name" : "protocol", "type" : "string" }
|
||||||
{ "name" : "srcPort", "type" : "long" },
|
]
|
||||||
{ "name" : "dstIP", "type" : "string" },
|
|
||||||
{ "name" : "dstPort", "type" : "long" },
|
|
||||||
{ "name" : "protocol", "type" : "string" }
|
|
||||||
]
|
|
||||||
}
|
|
||||||
}
|
|
||||||
},
|
},
|
||||||
"metricsSpec" : [
|
"metricsSpec" : [
|
||||||
{ "type" : "count", "name" : "count" },
|
{ "type" : "count", "name" : "count" },
|
||||||
|
@ -377,7 +320,7 @@ The query granularity is configured by the `queryGranularity` property in the `g
|
||||||
"granularitySpec" : {
|
"granularitySpec" : {
|
||||||
"type" : "uniform",
|
"type" : "uniform",
|
||||||
"segmentGranularity" : "HOUR",
|
"segmentGranularity" : "HOUR",
|
||||||
"queryGranularity" : "MINUTE"
|
"queryGranularity" : "MINUTE",
|
||||||
"rollup" : true
|
"rollup" : true
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -404,24 +347,18 @@ The interval is also specified in the `granularitySpec`:
|
||||||
```json
|
```json
|
||||||
"dataSchema" : {
|
"dataSchema" : {
|
||||||
"dataSource" : "ingestion-tutorial",
|
"dataSource" : "ingestion-tutorial",
|
||||||
"parser" : {
|
"timestampSpec" : {
|
||||||
"type" : "string",
|
"format" : "iso",
|
||||||
"parseSpec" : {
|
"column" : "ts"
|
||||||
"format" : "json",
|
},
|
||||||
"timestampSpec" : {
|
"dimensionsSpec" : {
|
||||||
"format" : "iso",
|
"dimensions": [
|
||||||
"column" : "ts"
|
"srcIP",
|
||||||
},
|
{ "name" : "srcPort", "type" : "long" },
|
||||||
"dimensionsSpec" : {
|
{ "name" : "dstIP", "type" : "string" },
|
||||||
"dimensions": [
|
{ "name" : "dstPort", "type" : "long" },
|
||||||
"srcIP",
|
{ "name" : "protocol", "type" : "string" }
|
||||||
{ "name" : "srcPort", "type" : "long" },
|
]
|
||||||
{ "name" : "dstIP", "type" : "string" },
|
|
||||||
{ "name" : "dstPort", "type" : "long" },
|
|
||||||
{ "name" : "protocol", "type" : "string" }
|
|
||||||
]
|
|
||||||
}
|
|
||||||
}
|
|
||||||
},
|
},
|
||||||
"metricsSpec" : [
|
"metricsSpec" : [
|
||||||
{ "type" : "count", "name" : "count" },
|
{ "type" : "count", "name" : "count" },
|
||||||
|
@ -447,28 +384,22 @@ The `dataSchema` is shared across all task types, but each task type has its own
|
||||||
|
|
||||||
```json
|
```json
|
||||||
{
|
{
|
||||||
"type" : "index",
|
"type" : "index_parallel",
|
||||||
"spec" : {
|
"spec" : {
|
||||||
"dataSchema" : {
|
"dataSchema" : {
|
||||||
"dataSource" : "ingestion-tutorial",
|
"dataSource" : "ingestion-tutorial",
|
||||||
"parser" : {
|
"timestampSpec" : {
|
||||||
"type" : "string",
|
"format" : "iso",
|
||||||
"parseSpec" : {
|
"column" : "ts"
|
||||||
"format" : "json",
|
},
|
||||||
"timestampSpec" : {
|
"dimensionsSpec" : {
|
||||||
"format" : "iso",
|
"dimensions": [
|
||||||
"column" : "ts"
|
"srcIP",
|
||||||
},
|
{ "name" : "srcPort", "type" : "long" },
|
||||||
"dimensionsSpec" : {
|
{ "name" : "dstIP", "type" : "string" },
|
||||||
"dimensions": [
|
{ "name" : "dstPort", "type" : "long" },
|
||||||
"srcIP",
|
{ "name" : "protocol", "type" : "string" }
|
||||||
{ "name" : "srcPort", "type" : "long" },
|
]
|
||||||
{ "name" : "dstIP", "type" : "string" },
|
|
||||||
{ "name" : "dstPort", "type" : "long" },
|
|
||||||
{ "name" : "protocol", "type" : "string" }
|
|
||||||
]
|
|
||||||
}
|
|
||||||
}
|
|
||||||
},
|
},
|
||||||
"metricsSpec" : [
|
"metricsSpec" : [
|
||||||
{ "type" : "count", "name" : "count" },
|
{ "type" : "count", "name" : "count" },
|
||||||
|
@ -490,13 +421,13 @@ The `dataSchema` is shared across all task types, but each task type has its own
|
||||||
|
|
||||||
## Define the input source
|
## Define the input source
|
||||||
|
|
||||||
Now let's define our input source, which is specified in an `ioConfig` object. Each task type has its own type of `ioConfig`. The native batch task uses "firehoses" to read input data, so let's configure a "local" firehose to read the example netflow data we saved earlier:
|
Now let's define our input source, which is specified in an `ioConfig` object. Each task type has its own type of `ioConfig`. To read input data, we need to specify an `inputSource`. The example netflow data we saved earlier needs to be read from a local file, which is configured below:
|
||||||
|
|
||||||
|
|
||||||
```json
|
```json
|
||||||
"ioConfig" : {
|
"ioConfig" : {
|
||||||
"type" : "index",
|
"type" : "index_parallel",
|
||||||
"firehose" : {
|
"inputSource" : {
|
||||||
"type" : "local",
|
"type" : "local",
|
||||||
"baseDir" : "quickstart/",
|
"baseDir" : "quickstart/",
|
||||||
"filter" : "ingestion-tutorial-data.json"
|
"filter" : "ingestion-tutorial-data.json"
|
||||||
|
@ -504,30 +435,43 @@ Now let's define our input source, which is specified in an `ioConfig` object. E
|
||||||
}
|
}
|
||||||
```
|
```
|
||||||
|
|
||||||
|
|
||||||
|
### Define the format of the data
|
||||||
|
|
||||||
|
Since our input data is represented as JSON strings, we'll use a `inputFormat` to `json` format:
|
||||||
|
|
||||||
|
```json
|
||||||
|
"ioConfig" : {
|
||||||
|
"type" : "index_parallel",
|
||||||
|
"inputSource" : {
|
||||||
|
"type" : "local",
|
||||||
|
"baseDir" : "quickstart/",
|
||||||
|
"filter" : "ingestion-tutorial-data.json"
|
||||||
|
},
|
||||||
|
"inputFormat" : {
|
||||||
|
"type" : "json"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
```json
|
```json
|
||||||
{
|
{
|
||||||
"type" : "index",
|
"type" : "index_parallel",
|
||||||
"spec" : {
|
"spec" : {
|
||||||
"dataSchema" : {
|
"dataSchema" : {
|
||||||
"dataSource" : "ingestion-tutorial",
|
"dataSource" : "ingestion-tutorial",
|
||||||
"parser" : {
|
"timestampSpec" : {
|
||||||
"type" : "string",
|
"format" : "iso",
|
||||||
"parseSpec" : {
|
"column" : "ts"
|
||||||
"format" : "json",
|
},
|
||||||
"timestampSpec" : {
|
"dimensionsSpec" : {
|
||||||
"format" : "iso",
|
"dimensions": [
|
||||||
"column" : "ts"
|
"srcIP",
|
||||||
},
|
{ "name" : "srcPort", "type" : "long" },
|
||||||
"dimensionsSpec" : {
|
{ "name" : "dstIP", "type" : "string" },
|
||||||
"dimensions": [
|
{ "name" : "dstPort", "type" : "long" },
|
||||||
"srcIP",
|
{ "name" : "protocol", "type" : "string" }
|
||||||
{ "name" : "srcPort", "type" : "long" },
|
]
|
||||||
{ "name" : "dstIP", "type" : "string" },
|
|
||||||
{ "name" : "dstPort", "type" : "long" },
|
|
||||||
{ "name" : "protocol", "type" : "string" }
|
|
||||||
]
|
|
||||||
}
|
|
||||||
}
|
|
||||||
},
|
},
|
||||||
"metricsSpec" : [
|
"metricsSpec" : [
|
||||||
{ "type" : "count", "name" : "count" },
|
{ "type" : "count", "name" : "count" },
|
||||||
|
@ -544,11 +488,14 @@ Now let's define our input source, which is specified in an `ioConfig` object. E
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
"ioConfig" : {
|
"ioConfig" : {
|
||||||
"type" : "index",
|
"type" : "index_parallel",
|
||||||
"firehose" : {
|
"inputSource" : {
|
||||||
"type" : "local",
|
"type" : "local",
|
||||||
"baseDir" : "quickstart/",
|
"baseDir" : "quickstart/",
|
||||||
"filter" : "ingestion-tutorial-data.json"
|
"filter" : "ingestion-tutorial-data.json"
|
||||||
|
},
|
||||||
|
"inputFormat" : {
|
||||||
|
"type" : "json"
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -563,7 +510,7 @@ As an example, let's add a `tuningConfig` that sets a target segment size for th
|
||||||
|
|
||||||
```json
|
```json
|
||||||
"tuningConfig" : {
|
"tuningConfig" : {
|
||||||
"type" : "index",
|
"type" : "index_parallel",
|
||||||
"maxRowsPerSegment" : 5000000
|
"maxRowsPerSegment" : 5000000
|
||||||
}
|
}
|
||||||
```
|
```
|
||||||
|
@ -576,28 +523,22 @@ We've finished defining the ingestion spec, it should now look like the followin
|
||||||
|
|
||||||
```json
|
```json
|
||||||
{
|
{
|
||||||
"type" : "index",
|
"type" : "index_parallel",
|
||||||
"spec" : {
|
"spec" : {
|
||||||
"dataSchema" : {
|
"dataSchema" : {
|
||||||
"dataSource" : "ingestion-tutorial",
|
"dataSource" : "ingestion-tutorial",
|
||||||
"parser" : {
|
"timestampSpec" : {
|
||||||
"type" : "string",
|
"format" : "iso",
|
||||||
"parseSpec" : {
|
"column" : "ts"
|
||||||
"format" : "json",
|
},
|
||||||
"timestampSpec" : {
|
"dimensionsSpec" : {
|
||||||
"format" : "iso",
|
"dimensions": [
|
||||||
"column" : "ts"
|
"srcIP",
|
||||||
},
|
{ "name" : "srcPort", "type" : "long" },
|
||||||
"dimensionsSpec" : {
|
{ "name" : "dstIP", "type" : "string" },
|
||||||
"dimensions": [
|
{ "name" : "dstPort", "type" : "long" },
|
||||||
"srcIP",
|
{ "name" : "protocol", "type" : "string" }
|
||||||
{ "name" : "srcPort", "type" : "long" },
|
]
|
||||||
{ "name" : "dstIP", "type" : "string" },
|
|
||||||
{ "name" : "dstPort", "type" : "long" },
|
|
||||||
{ "name" : "protocol", "type" : "string" }
|
|
||||||
]
|
|
||||||
}
|
|
||||||
}
|
|
||||||
},
|
},
|
||||||
"metricsSpec" : [
|
"metricsSpec" : [
|
||||||
{ "type" : "count", "name" : "count" },
|
{ "type" : "count", "name" : "count" },
|
||||||
|
@ -614,15 +555,18 @@ We've finished defining the ingestion spec, it should now look like the followin
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
"ioConfig" : {
|
"ioConfig" : {
|
||||||
"type" : "index",
|
"type" : "index_parallel",
|
||||||
"firehose" : {
|
"inputSource" : {
|
||||||
"type" : "local",
|
"type" : "local",
|
||||||
"baseDir" : "quickstart/",
|
"baseDir" : "quickstart/",
|
||||||
"filter" : "ingestion-tutorial-data.json"
|
"filter" : "ingestion-tutorial-data.json"
|
||||||
|
},
|
||||||
|
"inputFormat" : {
|
||||||
|
"type" : "json"
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
"tuningConfig" : {
|
"tuningConfig" : {
|
||||||
"type" : "index",
|
"type" : "index_parallel",
|
||||||
"maxRowsPerSegment" : 5000000
|
"maxRowsPerSegment" : 5000000
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -55,25 +55,19 @@ We'll ingest this data using the following ingestion task spec, located at `quic
|
||||||
|
|
||||||
```json
|
```json
|
||||||
{
|
{
|
||||||
"type" : "index",
|
"type" : "index_parallel",
|
||||||
"spec" : {
|
"spec" : {
|
||||||
"dataSchema" : {
|
"dataSchema" : {
|
||||||
"dataSource" : "rollup-tutorial",
|
"dataSource" : "rollup-tutorial",
|
||||||
"parser" : {
|
"dimensionsSpec" : {
|
||||||
"type" : "string",
|
"dimensions" : [
|
||||||
"parseSpec" : {
|
"srcIP",
|
||||||
"format" : "json",
|
"dstIP"
|
||||||
"dimensionsSpec" : {
|
]
|
||||||
"dimensions" : [
|
},
|
||||||
"srcIP",
|
"timestampSpec": {
|
||||||
"dstIP"
|
"column": "timestamp",
|
||||||
]
|
"format": "iso"
|
||||||
},
|
|
||||||
"timestampSpec": {
|
|
||||||
"column": "timestamp",
|
|
||||||
"format": "iso"
|
|
||||||
}
|
|
||||||
}
|
|
||||||
},
|
},
|
||||||
"metricsSpec" : [
|
"metricsSpec" : [
|
||||||
{ "type" : "count", "name" : "count" },
|
{ "type" : "count", "name" : "count" },
|
||||||
|
@ -89,16 +83,19 @@ We'll ingest this data using the following ingestion task spec, located at `quic
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
"ioConfig" : {
|
"ioConfig" : {
|
||||||
"type" : "index",
|
"type" : "index_parallel",
|
||||||
"firehose" : {
|
"inputSource" : {
|
||||||
"type" : "local",
|
"type" : "local",
|
||||||
"baseDir" : "quickstart/tutorial",
|
"baseDir" : "quickstart/tutorial",
|
||||||
"filter" : "rollup-data.json"
|
"filter" : "rollup-data.json"
|
||||||
},
|
},
|
||||||
|
"inputFormat" : {
|
||||||
|
"type" : "json"
|
||||||
|
},
|
||||||
"appendToExisting" : false
|
"appendToExisting" : false
|
||||||
},
|
},
|
||||||
"tuningConfig" : {
|
"tuningConfig" : {
|
||||||
"type" : "index",
|
"type" : "index_parallel",
|
||||||
"maxRowsPerSegment" : 5000000,
|
"maxRowsPerSegment" : 5000000,
|
||||||
"maxRowsInMemory" : 25000
|
"maxRowsInMemory" : 25000
|
||||||
}
|
}
|
||||||
|
|
|
@ -48,25 +48,19 @@ We will ingest the sample data using the following spec, which demonstrates the
|
||||||
|
|
||||||
```json
|
```json
|
||||||
{
|
{
|
||||||
"type" : "index",
|
"type" : "index_parallel",
|
||||||
"spec" : {
|
"spec" : {
|
||||||
"dataSchema" : {
|
"dataSchema" : {
|
||||||
"dataSource" : "transform-tutorial",
|
"dataSource" : "transform-tutorial",
|
||||||
"parser" : {
|
"timestampSpec": {
|
||||||
"type" : "string",
|
"column": "timestamp",
|
||||||
"parseSpec" : {
|
"format": "iso"
|
||||||
"format" : "json",
|
},
|
||||||
"dimensionsSpec" : {
|
"dimensionsSpec" : {
|
||||||
"dimensions" : [
|
"dimensions" : [
|
||||||
"animal",
|
"animal",
|
||||||
{ "name": "location", "type": "long" }
|
{ "name": "location", "type": "long" }
|
||||||
]
|
]
|
||||||
},
|
|
||||||
"timestampSpec": {
|
|
||||||
"column": "timestamp",
|
|
||||||
"format": "iso"
|
|
||||||
}
|
|
||||||
}
|
|
||||||
},
|
},
|
||||||
"metricsSpec" : [
|
"metricsSpec" : [
|
||||||
{ "type" : "count", "name" : "count" },
|
{ "type" : "count", "name" : "count" },
|
||||||
|
@ -104,16 +98,19 @@ We will ingest the sample data using the following spec, which demonstrates the
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
"ioConfig" : {
|
"ioConfig" : {
|
||||||
"type" : "index",
|
"type" : "index_parallel",
|
||||||
"firehose" : {
|
"inputSource" : {
|
||||||
"type" : "local",
|
"type" : "local",
|
||||||
"baseDir" : "quickstart/tutorial",
|
"baseDir" : "quickstart/tutorial",
|
||||||
"filter" : "transform-data.json"
|
"filter" : "transform-data.json"
|
||||||
},
|
},
|
||||||
|
"inputFormat" : {
|
||||||
|
"type" :"json"
|
||||||
|
},
|
||||||
"appendToExisting" : false
|
"appendToExisting" : false
|
||||||
},
|
},
|
||||||
"tuningConfig" : {
|
"tuningConfig" : {
|
||||||
"type" : "index",
|
"type" : "index_parallel",
|
||||||
"maxRowsPerSegment" : 5000000,
|
"maxRowsPerSegment" : 5000000,
|
||||||
"maxRowsInMemory" : 25000
|
"maxRowsInMemory" : 25000
|
||||||
}
|
}
|
||||||
|
|
|
@ -4,7 +4,7 @@
|
||||||
"interval": "2015-09-12/2015-09-13",
|
"interval": "2015-09-12/2015-09-13",
|
||||||
"segmentGranularity": "DAY",
|
"segmentGranularity": "DAY",
|
||||||
"tuningConfig" : {
|
"tuningConfig" : {
|
||||||
"type" : "index",
|
"type" : "index_parallel",
|
||||||
"maxRowsPerSegment" : 5000000,
|
"maxRowsPerSegment" : 5000000,
|
||||||
"maxRowsInMemory" : 25000,
|
"maxRowsInMemory" : 25000,
|
||||||
"forceExtendableShardSpecs" : true
|
"forceExtendableShardSpecs" : true
|
||||||
|
|
|
@ -1,40 +1,34 @@
|
||||||
{
|
{
|
||||||
"type" : "index",
|
"type" : "index_parallel",
|
||||||
"spec" : {
|
"spec" : {
|
||||||
"dataSchema" : {
|
"dataSchema" : {
|
||||||
"dataSource" : "compaction-tutorial",
|
"dataSource" : "compaction-tutorial",
|
||||||
"parser" : {
|
"timestampSpec": {
|
||||||
"type" : "string",
|
"column": "time",
|
||||||
"parseSpec" : {
|
"format": "iso"
|
||||||
"format" : "json",
|
},
|
||||||
"dimensionsSpec" : {
|
"dimensionsSpec" : {
|
||||||
"dimensions" : [
|
"dimensions" : [
|
||||||
"channel",
|
"channel",
|
||||||
"cityName",
|
"cityName",
|
||||||
"comment",
|
"comment",
|
||||||
"countryIsoCode",
|
"countryIsoCode",
|
||||||
"countryName",
|
"countryName",
|
||||||
"isAnonymous",
|
"isAnonymous",
|
||||||
"isMinor",
|
"isMinor",
|
||||||
"isNew",
|
"isNew",
|
||||||
"isRobot",
|
"isRobot",
|
||||||
"isUnpatrolled",
|
"isUnpatrolled",
|
||||||
"metroCode",
|
"metroCode",
|
||||||
"namespace",
|
"namespace",
|
||||||
"page",
|
"page",
|
||||||
"regionIsoCode",
|
"regionIsoCode",
|
||||||
"regionName",
|
"regionName",
|
||||||
"user",
|
"user",
|
||||||
{ "name": "added", "type": "long" },
|
{ "name": "added", "type": "long" },
|
||||||
{ "name": "deleted", "type": "long" },
|
{ "name": "deleted", "type": "long" },
|
||||||
{ "name": "delta", "type": "long" }
|
{ "name": "delta", "type": "long" }
|
||||||
]
|
]
|
||||||
},
|
|
||||||
"timestampSpec": {
|
|
||||||
"column": "time",
|
|
||||||
"format": "iso"
|
|
||||||
}
|
|
||||||
}
|
|
||||||
},
|
},
|
||||||
"metricsSpec" : [],
|
"metricsSpec" : [],
|
||||||
"granularitySpec" : {
|
"granularitySpec" : {
|
||||||
|
@ -46,16 +40,19 @@
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
"ioConfig" : {
|
"ioConfig" : {
|
||||||
"type" : "index",
|
"type" : "index_parallel",
|
||||||
"firehose" : {
|
"inputSource" : {
|
||||||
"type" : "local",
|
"type" : "local",
|
||||||
"baseDir" : "quickstart/tutorial/",
|
"baseDir" : "quickstart/tutorial/",
|
||||||
"filter" : "wikiticker-2015-09-12-sampled.json.gz"
|
"filter" : "wikiticker-2015-09-12-sampled.json.gz"
|
||||||
},
|
},
|
||||||
|
"inputFormat" : {
|
||||||
|
"type" : "json"
|
||||||
|
},
|
||||||
"appendToExisting" : false
|
"appendToExisting" : false
|
||||||
},
|
},
|
||||||
"tuningConfig" : {
|
"tuningConfig" : {
|
||||||
"type" : "index",
|
"type" : "index_parallel",
|
||||||
"maxRowsPerSegment" : 1000
|
"maxRowsPerSegment" : 1000
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -3,7 +3,7 @@
|
||||||
"dataSource": "compaction-tutorial",
|
"dataSource": "compaction-tutorial",
|
||||||
"interval": "2015-09-12/2015-09-13",
|
"interval": "2015-09-12/2015-09-13",
|
||||||
"tuningConfig" : {
|
"tuningConfig" : {
|
||||||
"type" : "index",
|
"type" : "index_parallel",
|
||||||
"maxRowsPerSegment" : 5000000,
|
"maxRowsPerSegment" : 5000000,
|
||||||
"maxRowsInMemory" : 25000
|
"maxRowsInMemory" : 25000
|
||||||
}
|
}
|
||||||
|
|
|
@ -1,40 +1,34 @@
|
||||||
{
|
{
|
||||||
"type" : "index",
|
"type" : "index_parallel",
|
||||||
"spec" : {
|
"spec" : {
|
||||||
"dataSchema" : {
|
"dataSchema" : {
|
||||||
"dataSource" : "deletion-tutorial",
|
"dataSource" : "deletion-tutorial",
|
||||||
"parser" : {
|
"timestampSpec": {
|
||||||
"type" : "string",
|
"column": "time",
|
||||||
"parseSpec" : {
|
"format": "iso"
|
||||||
"format" : "json",
|
},
|
||||||
"dimensionsSpec" : {
|
"dimensionsSpec" : {
|
||||||
"dimensions" : [
|
"dimensions" : [
|
||||||
"channel",
|
"channel",
|
||||||
"cityName",
|
"cityName",
|
||||||
"comment",
|
"comment",
|
||||||
"countryIsoCode",
|
"countryIsoCode",
|
||||||
"countryName",
|
"countryName",
|
||||||
"isAnonymous",
|
"isAnonymous",
|
||||||
"isMinor",
|
"isMinor",
|
||||||
"isNew",
|
"isNew",
|
||||||
"isRobot",
|
"isRobot",
|
||||||
"isUnpatrolled",
|
"isUnpatrolled",
|
||||||
"metroCode",
|
"metroCode",
|
||||||
"namespace",
|
"namespace",
|
||||||
"page",
|
"page",
|
||||||
"regionIsoCode",
|
"regionIsoCode",
|
||||||
"regionName",
|
"regionName",
|
||||||
"user",
|
"user",
|
||||||
{ "name": "added", "type": "long" },
|
{ "name": "added", "type": "long" },
|
||||||
{ "name": "deleted", "type": "long" },
|
{ "name": "deleted", "type": "long" },
|
||||||
{ "name": "delta", "type": "long" }
|
{ "name": "delta", "type": "long" }
|
||||||
]
|
]
|
||||||
},
|
|
||||||
"timestampSpec": {
|
|
||||||
"column": "time",
|
|
||||||
"format": "iso"
|
|
||||||
}
|
|
||||||
}
|
|
||||||
},
|
},
|
||||||
"metricsSpec" : [],
|
"metricsSpec" : [],
|
||||||
"granularitySpec" : {
|
"granularitySpec" : {
|
||||||
|
@ -46,16 +40,19 @@
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
"ioConfig" : {
|
"ioConfig" : {
|
||||||
"type" : "index",
|
"type" : "index_parallel",
|
||||||
"firehose" : {
|
"inputSource" : {
|
||||||
"type" : "local",
|
"type" : "local",
|
||||||
"baseDir" : "quickstart/tutorial/",
|
"baseDir" : "quickstart/tutorial/",
|
||||||
"filter" : "wikiticker-2015-09-12-sampled.json.gz"
|
"filter" : "wikiticker-2015-09-12-sampled.json.gz"
|
||||||
},
|
},
|
||||||
|
"inputFormat" : {
|
||||||
|
"type" : "json"
|
||||||
|
},
|
||||||
"appendToExisting" : false
|
"appendToExisting" : false
|
||||||
},
|
},
|
||||||
"tuningConfig" : {
|
"tuningConfig" : {
|
||||||
"type" : "index",
|
"type" : "index_parallel",
|
||||||
"maxRowsPerSegment" : 5000000,
|
"maxRowsPerSegment" : 5000000,
|
||||||
"maxRowsInMemory" : 25000
|
"maxRowsInMemory" : 25000
|
||||||
}
|
}
|
||||||
|
|
|
@ -1,40 +1,34 @@
|
||||||
{
|
{
|
||||||
"type" : "index",
|
"type" : "index_parallel",
|
||||||
"spec" : {
|
"spec" : {
|
||||||
"dataSchema" : {
|
"dataSchema" : {
|
||||||
"dataSource" : "retention-tutorial",
|
"dataSource" : "retention-tutorial",
|
||||||
"parser" : {
|
"timestampSpec": {
|
||||||
"type" : "string",
|
"column": "time",
|
||||||
"parseSpec" : {
|
"format": "iso"
|
||||||
"format" : "json",
|
},
|
||||||
"dimensionsSpec" : {
|
"dimensionsSpec" : {
|
||||||
"dimensions" : [
|
"dimensions" : [
|
||||||
"channel",
|
"channel",
|
||||||
"cityName",
|
"cityName",
|
||||||
"comment",
|
"comment",
|
||||||
"countryIsoCode",
|
"countryIsoCode",
|
||||||
"countryName",
|
"countryName",
|
||||||
"isAnonymous",
|
"isAnonymous",
|
||||||
"isMinor",
|
"isMinor",
|
||||||
"isNew",
|
"isNew",
|
||||||
"isRobot",
|
"isRobot",
|
||||||
"isUnpatrolled",
|
"isUnpatrolled",
|
||||||
"metroCode",
|
"metroCode",
|
||||||
"namespace",
|
"namespace",
|
||||||
"page",
|
"page",
|
||||||
"regionIsoCode",
|
"regionIsoCode",
|
||||||
"regionName",
|
"regionName",
|
||||||
"user",
|
"user",
|
||||||
{ "name": "added", "type": "long" },
|
{ "name": "added", "type": "long" },
|
||||||
{ "name": "deleted", "type": "long" },
|
{ "name": "deleted", "type": "long" },
|
||||||
{ "name": "delta", "type": "long" }
|
{ "name": "delta", "type": "long" }
|
||||||
]
|
]
|
||||||
},
|
|
||||||
"timestampSpec": {
|
|
||||||
"column": "time",
|
|
||||||
"format": "iso"
|
|
||||||
}
|
|
||||||
}
|
|
||||||
},
|
},
|
||||||
"metricsSpec" : [],
|
"metricsSpec" : [],
|
||||||
"granularitySpec" : {
|
"granularitySpec" : {
|
||||||
|
@ -46,16 +40,19 @@
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
"ioConfig" : {
|
"ioConfig" : {
|
||||||
"type" : "index",
|
"type" : "index_parallel",
|
||||||
"firehose" : {
|
"inputSource" : {
|
||||||
"type" : "local",
|
"type" : "local",
|
||||||
"baseDir" : "quickstart/tutorial/",
|
"baseDir" : "quickstart/tutorial/",
|
||||||
"filter" : "wikiticker-2015-09-12-sampled.json.gz"
|
"filter" : "wikiticker-2015-09-12-sampled.json.gz"
|
||||||
},
|
},
|
||||||
|
"inputFormat" : {
|
||||||
|
"type" : "json"
|
||||||
|
},
|
||||||
"appendToExisting" : false
|
"appendToExisting" : false
|
||||||
},
|
},
|
||||||
"tuningConfig" : {
|
"tuningConfig" : {
|
||||||
"type" : "index",
|
"type" : "index_parallel",
|
||||||
"maxRowsPerSegment" : 5000000,
|
"maxRowsPerSegment" : 5000000,
|
||||||
"maxRowsInMemory" : 25000
|
"maxRowsInMemory" : 25000
|
||||||
}
|
}
|
||||||
|
|
|
@ -1,23 +1,17 @@
|
||||||
{
|
{
|
||||||
"type" : "index",
|
"type" : "index_parallel",
|
||||||
"spec" : {
|
"spec" : {
|
||||||
"dataSchema" : {
|
"dataSchema" : {
|
||||||
"dataSource" : "rollup-tutorial",
|
"dataSource" : "rollup-tutorial",
|
||||||
"parser" : {
|
"timestampSpec": {
|
||||||
"type" : "string",
|
"column": "timestamp",
|
||||||
"parseSpec" : {
|
"format": "iso"
|
||||||
"format" : "json",
|
},
|
||||||
"dimensionsSpec" : {
|
"dimensionsSpec" : {
|
||||||
"dimensions" : [
|
"dimensions" : [
|
||||||
"srcIP",
|
"srcIP",
|
||||||
"dstIP"
|
"dstIP"
|
||||||
]
|
]
|
||||||
},
|
|
||||||
"timestampSpec": {
|
|
||||||
"column": "timestamp",
|
|
||||||
"format": "iso"
|
|
||||||
}
|
|
||||||
}
|
|
||||||
},
|
},
|
||||||
"metricsSpec" : [
|
"metricsSpec" : [
|
||||||
{ "type" : "count", "name" : "count" },
|
{ "type" : "count", "name" : "count" },
|
||||||
|
@ -33,16 +27,19 @@
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
"ioConfig" : {
|
"ioConfig" : {
|
||||||
"type" : "index",
|
"type" : "index_parallel",
|
||||||
"firehose" : {
|
"inputSource" : {
|
||||||
"type" : "local",
|
"type" : "local",
|
||||||
"baseDir" : "quickstart/tutorial",
|
"baseDir" : "quickstart/tutorial",
|
||||||
"filter" : "rollup-data.json"
|
"filter" : "rollup-data.json"
|
||||||
},
|
},
|
||||||
|
"inputFormat" : {
|
||||||
|
"type" : "json"
|
||||||
|
},
|
||||||
"appendToExisting" : false
|
"appendToExisting" : false
|
||||||
},
|
},
|
||||||
"tuningConfig" : {
|
"tuningConfig" : {
|
||||||
"type" : "index",
|
"type" : "index_parallel",
|
||||||
"maxRowsPerSegment" : 5000000,
|
"maxRowsPerSegment" : 5000000,
|
||||||
"maxRowsInMemory" : 25000
|
"maxRowsInMemory" : 25000
|
||||||
}
|
}
|
||||||
|
|
|
@ -1,23 +1,17 @@
|
||||||
{
|
{
|
||||||
"type" : "index",
|
"type" : "index_parallel",
|
||||||
"spec" : {
|
"spec" : {
|
||||||
"dataSchema" : {
|
"dataSchema" : {
|
||||||
"dataSource" : "transform-tutorial",
|
"dataSource" : "transform-tutorial",
|
||||||
"parser" : {
|
"timestampSpec": {
|
||||||
"type" : "string",
|
"column": "timestamp",
|
||||||
"parseSpec" : {
|
"format": "iso"
|
||||||
"format" : "json",
|
},
|
||||||
"dimensionsSpec" : {
|
"dimensionsSpec" : {
|
||||||
"dimensions" : [
|
"dimensions" : [
|
||||||
"animal",
|
"animal",
|
||||||
{ "name": "location", "type": "long" }
|
{ "name": "location", "type": "long" }
|
||||||
]
|
]
|
||||||
},
|
|
||||||
"timestampSpec": {
|
|
||||||
"column": "timestamp",
|
|
||||||
"format": "iso"
|
|
||||||
}
|
|
||||||
}
|
|
||||||
},
|
},
|
||||||
"metricsSpec" : [
|
"metricsSpec" : [
|
||||||
{ "type" : "count", "name" : "count" },
|
{ "type" : "count", "name" : "count" },
|
||||||
|
@ -55,16 +49,19 @@
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
"ioConfig" : {
|
"ioConfig" : {
|
||||||
"type" : "index",
|
"type" : "index_parallel",
|
||||||
"firehose" : {
|
"inputSource" : {
|
||||||
"type" : "local",
|
"type" : "local",
|
||||||
"baseDir" : "quickstart/tutorial",
|
"baseDir" : "quickstart/tutorial",
|
||||||
"filter" : "transform-data.json"
|
"filter" : "transform-data.json"
|
||||||
},
|
},
|
||||||
|
"inpuFormat" : {
|
||||||
|
"type" : "json"
|
||||||
|
},
|
||||||
"appendToExisting" : false
|
"appendToExisting" : false
|
||||||
},
|
},
|
||||||
"tuningConfig" : {
|
"tuningConfig" : {
|
||||||
"type" : "index",
|
"type" : "index_parallel",
|
||||||
"maxRowsPerSegment" : 5000000,
|
"maxRowsPerSegment" : 5000000,
|
||||||
"maxRowsInMemory" : 25000
|
"maxRowsInMemory" : 25000
|
||||||
}
|
}
|
||||||
|
|
|
@ -1,22 +1,16 @@
|
||||||
{
|
{
|
||||||
"type" : "index",
|
"type" : "index_parallel",
|
||||||
"spec" : {
|
"spec" : {
|
||||||
"dataSchema" : {
|
"dataSchema" : {
|
||||||
"dataSource" : "updates-tutorial",
|
"dataSource" : "updates-tutorial",
|
||||||
"parser" : {
|
"timestampSpec": {
|
||||||
"type" : "string",
|
"column": "timestamp",
|
||||||
"parseSpec" : {
|
"format": "iso"
|
||||||
"format" : "json",
|
},
|
||||||
"dimensionsSpec" : {
|
"dimensionsSpec" : {
|
||||||
"dimensions" : [
|
"dimensions" : [
|
||||||
"animal"
|
"animal"
|
||||||
]
|
]
|
||||||
},
|
|
||||||
"timestampSpec": {
|
|
||||||
"column": "timestamp",
|
|
||||||
"format": "iso"
|
|
||||||
}
|
|
||||||
}
|
|
||||||
},
|
},
|
||||||
"metricsSpec" : [
|
"metricsSpec" : [
|
||||||
{ "type" : "count", "name" : "count" },
|
{ "type" : "count", "name" : "count" },
|
||||||
|
@ -31,16 +25,19 @@
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
"ioConfig" : {
|
"ioConfig" : {
|
||||||
"type" : "index",
|
"type" : "index_parallel",
|
||||||
"firehose" : {
|
"inputSource" : {
|
||||||
"type" : "local",
|
"type" : "local",
|
||||||
"baseDir" : "quickstart/tutorial",
|
"baseDir" : "quickstart/tutorial",
|
||||||
"filter" : "updates-data4.json"
|
"filter" : "updates-data4.json"
|
||||||
},
|
},
|
||||||
|
"inputFormat" : {
|
||||||
|
"type" : "json"
|
||||||
|
},
|
||||||
"appendToExisting" : true
|
"appendToExisting" : true
|
||||||
},
|
},
|
||||||
"tuningConfig" : {
|
"tuningConfig" : {
|
||||||
"type" : "index",
|
"type" : "index_parallel",
|
||||||
"maxRowsPerSegment" : 5000000,
|
"maxRowsPerSegment" : 5000000,
|
||||||
"maxRowsInMemory" : 25000
|
"maxRowsInMemory" : 25000
|
||||||
}
|
}
|
||||||
|
|
|
@ -1,22 +1,16 @@
|
||||||
{
|
{
|
||||||
"type" : "index",
|
"type" : "index_parallel",
|
||||||
"spec" : {
|
"spec" : {
|
||||||
"dataSchema" : {
|
"dataSchema" : {
|
||||||
"dataSource" : "updates-tutorial",
|
"dataSource" : "updates-tutorial",
|
||||||
"parser" : {
|
"timestampSpec": {
|
||||||
"type" : "string",
|
"column": "timestamp",
|
||||||
"parseSpec" : {
|
"format": "iso"
|
||||||
"format" : "json",
|
},
|
||||||
"dimensionsSpec" : {
|
"dimensionsSpec" : {
|
||||||
"dimensions" : [
|
"dimensions" : [
|
||||||
"animal"
|
"animal"
|
||||||
]
|
]
|
||||||
},
|
|
||||||
"timestampSpec": {
|
|
||||||
"column": "timestamp",
|
|
||||||
"format": "iso"
|
|
||||||
}
|
|
||||||
}
|
|
||||||
},
|
},
|
||||||
"metricsSpec" : [
|
"metricsSpec" : [
|
||||||
{ "type" : "count", "name" : "count" },
|
{ "type" : "count", "name" : "count" },
|
||||||
|
@ -31,16 +25,19 @@
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
"ioConfig" : {
|
"ioConfig" : {
|
||||||
"type" : "index",
|
"type" : "index_parallel",
|
||||||
"firehose" : {
|
"inputSource" : {
|
||||||
"type" : "local",
|
"type" : "local",
|
||||||
"baseDir" : "quickstart/tutorial",
|
"baseDir" : "quickstart/tutorial",
|
||||||
"filter" : "updates-data.json"
|
"filter" : "updates-data.json"
|
||||||
},
|
},
|
||||||
|
"inputFormat" : {
|
||||||
|
"type" : "json"
|
||||||
|
},
|
||||||
"appendToExisting" : false
|
"appendToExisting" : false
|
||||||
},
|
},
|
||||||
"tuningConfig" : {
|
"tuningConfig" : {
|
||||||
"type" : "index",
|
"type" : "index_parallel",
|
||||||
"maxRowsPerSegment" : 5000000,
|
"maxRowsPerSegment" : 5000000,
|
||||||
"maxRowsInMemory" : 25000
|
"maxRowsInMemory" : 25000
|
||||||
}
|
}
|
||||||
|
|
|
@ -1,22 +1,16 @@
|
||||||
{
|
{
|
||||||
"type" : "index",
|
"type" : "index_parallel",
|
||||||
"spec" : {
|
"spec" : {
|
||||||
"dataSchema" : {
|
"dataSchema" : {
|
||||||
"dataSource" : "updates-tutorial",
|
"dataSource" : "updates-tutorial",
|
||||||
"parser" : {
|
"timestampSpec": {
|
||||||
"type" : "string",
|
"column": "timestamp",
|
||||||
"parseSpec" : {
|
"format": "iso"
|
||||||
"format" : "json",
|
},
|
||||||
"dimensionsSpec" : {
|
"dimensionsSpec" : {
|
||||||
"dimensions" : [
|
"dimensions" : [
|
||||||
"animal"
|
"animal"
|
||||||
]
|
]
|
||||||
},
|
|
||||||
"timestampSpec": {
|
|
||||||
"column": "timestamp",
|
|
||||||
"format": "iso"
|
|
||||||
}
|
|
||||||
}
|
|
||||||
},
|
},
|
||||||
"metricsSpec" : [
|
"metricsSpec" : [
|
||||||
{ "type" : "count", "name" : "count" },
|
{ "type" : "count", "name" : "count" },
|
||||||
|
@ -31,16 +25,19 @@
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
"ioConfig" : {
|
"ioConfig" : {
|
||||||
"type" : "index",
|
"type" : "index_parallel",
|
||||||
"firehose" : {
|
"inputSource" : {
|
||||||
"type" : "local",
|
"type" : "local",
|
||||||
"baseDir" : "quickstart/tutorial",
|
"baseDir" : "quickstart/tutorial",
|
||||||
"filter" : "updates-data2.json"
|
"filter" : "updates-data2.json"
|
||||||
},
|
},
|
||||||
|
"inputFormat" : {
|
||||||
|
"type" : "json"
|
||||||
|
},
|
||||||
"appendToExisting" : false
|
"appendToExisting" : false
|
||||||
},
|
},
|
||||||
"tuningConfig" : {
|
"tuningConfig" : {
|
||||||
"type" : "index",
|
"type" : "index_parallel",
|
||||||
"maxRowsPerSegment" : 5000000,
|
"maxRowsPerSegment" : 5000000,
|
||||||
"maxRowsInMemory" : 25000
|
"maxRowsInMemory" : 25000
|
||||||
}
|
}
|
||||||
|
|
|
@ -1,40 +1,34 @@
|
||||||
{
|
{
|
||||||
"type" : "index",
|
"type" : "index_parallel",
|
||||||
"spec" : {
|
"spec" : {
|
||||||
"dataSchema" : {
|
"dataSchema" : {
|
||||||
"dataSource" : "wikipedia",
|
"dataSource" : "wikipedia",
|
||||||
"parser" : {
|
"timestampSpec": {
|
||||||
"type" : "string",
|
"column": "time",
|
||||||
"parseSpec" : {
|
"format": "iso"
|
||||||
"format" : "json",
|
},
|
||||||
"dimensionsSpec" : {
|
"dimensionsSpec" : {
|
||||||
"dimensions" : [
|
"dimensions" : [
|
||||||
"channel",
|
"channel",
|
||||||
"cityName",
|
"cityName",
|
||||||
"comment",
|
"comment",
|
||||||
"countryIsoCode",
|
"countryIsoCode",
|
||||||
"countryName",
|
"countryName",
|
||||||
"isAnonymous",
|
"isAnonymous",
|
||||||
"isMinor",
|
"isMinor",
|
||||||
"isNew",
|
"isNew",
|
||||||
"isRobot",
|
"isRobot",
|
||||||
"isUnpatrolled",
|
"isUnpatrolled",
|
||||||
"metroCode",
|
"metroCode",
|
||||||
"namespace",
|
"namespace",
|
||||||
"page",
|
"page",
|
||||||
"regionIsoCode",
|
"regionIsoCode",
|
||||||
"regionName",
|
"regionName",
|
||||||
"user",
|
"user",
|
||||||
{ "name": "added", "type": "long" },
|
{ "name": "added", "type": "long" },
|
||||||
{ "name": "deleted", "type": "long" },
|
{ "name": "deleted", "type": "long" },
|
||||||
{ "name": "delta", "type": "long" }
|
{ "name": "delta", "type": "long" }
|
||||||
]
|
]
|
||||||
},
|
|
||||||
"timestampSpec": {
|
|
||||||
"column": "time",
|
|
||||||
"format": "iso"
|
|
||||||
}
|
|
||||||
}
|
|
||||||
},
|
},
|
||||||
"metricsSpec" : [],
|
"metricsSpec" : [],
|
||||||
"granularitySpec" : {
|
"granularitySpec" : {
|
||||||
|
@ -46,16 +40,19 @@
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
"ioConfig" : {
|
"ioConfig" : {
|
||||||
"type" : "index",
|
"type" : "index_parallel",
|
||||||
"firehose" : {
|
"inputSource" : {
|
||||||
"type" : "local",
|
"type" : "local",
|
||||||
"baseDir" : "quickstart/tutorial/",
|
"baseDir" : "quickstart/tutorial/",
|
||||||
"filter" : "wikiticker-2015-09-12-sampled.json.gz"
|
"filter" : "wikiticker-2015-09-12-sampled.json.gz"
|
||||||
},
|
},
|
||||||
|
"inputFormat" : {
|
||||||
|
"type" : "json"
|
||||||
|
},
|
||||||
"appendToExisting" : false
|
"appendToExisting" : false
|
||||||
},
|
},
|
||||||
"tuningConfig" : {
|
"tuningConfig" : {
|
||||||
"type" : "index",
|
"type" : "index_parallel",
|
||||||
"maxRowsPerSegment" : 5000000,
|
"maxRowsPerSegment" : 5000000,
|
||||||
"maxRowsInMemory" : 25000
|
"maxRowsInMemory" : 25000
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in New Issue