diff --git a/docs/development/extensions-core/mysql.md b/docs/development/extensions-core/mysql.md index 5445e1b4758..2ce85ddc5fc 100644 --- a/docs/development/extensions-core/mysql.md +++ b/docs/development/extensions-core/mysql.md @@ -112,7 +112,7 @@ The MySQL extension provides an implementation of an [SqlFirehose](../../ingesti ```json { - "type": "index", + "type": "index_parallel", "spec": { "dataSchema": { "dataSource": "some_datasource", @@ -149,7 +149,7 @@ The MySQL extension provides an implementation of an [SqlFirehose](../../ingesti } }, "ioConfig": { - "type": "index", + "type": "index_parallel", "firehose": { "type": "sql", "database": { @@ -166,7 +166,7 @@ The MySQL extension provides an implementation of an [SqlFirehose](../../ingesti } }, "tuningconfig": { - "type": "index" + "type": "index_parallel" } } } diff --git a/docs/development/extensions-core/postgresql.md b/docs/development/extensions-core/postgresql.md index a51112b0cbc..4be3a7678df 100644 --- a/docs/development/extensions-core/postgresql.md +++ b/docs/development/extensions-core/postgresql.md @@ -91,7 +91,7 @@ The PostgreSQL extension provides an implementation of an [SqlFirehose](../../in ```json { - "type": "index", + "type": "index_parallel", "spec": { "dataSchema": { "dataSource": "some_datasource", @@ -128,7 +128,7 @@ The PostgreSQL extension provides an implementation of an [SqlFirehose](../../in } }, "ioConfig": { - "type": "index", + "type": "index_parallel", "firehose": { "type": "sql", "database": { @@ -145,7 +145,7 @@ The PostgreSQL extension provides an implementation of an [SqlFirehose](../../in } }, "tuningconfig": { - "type": "index" + "type": "index_parallel" } } } diff --git a/docs/tutorials/tutorial-batch.md b/docs/tutorials/tutorial-batch.md index e175d4a272c..75fab9f7c2e 100644 --- a/docs/tutorials/tutorial-batch.md +++ b/docs/tutorials/tutorial-batch.md @@ -134,42 +134,36 @@ which has been configured to read the `quickstart/tutorial/wikiticker-2015-09-12 ```json { - "type" : "index", + "type" : "index_parallel", "spec" : { "dataSchema" : { "dataSource" : "wikipedia", - "parser" : { - "type" : "string", - "parseSpec" : { - "format" : "json", - "dimensionsSpec" : { - "dimensions" : [ - "channel", - "cityName", - "comment", - "countryIsoCode", - "countryName", - "isAnonymous", - "isMinor", - "isNew", - "isRobot", - "isUnpatrolled", - "metroCode", - "namespace", - "page", - "regionIsoCode", - "regionName", - "user", - { "name": "added", "type": "long" }, - { "name": "deleted", "type": "long" }, - { "name": "delta", "type": "long" } - ] - }, - "timestampSpec": { - "column": "time", - "format": "iso" - } - } + "dimensionsSpec" : { + "dimensions" : [ + "channel", + "cityName", + "comment", + "countryIsoCode", + "countryName", + "isAnonymous", + "isMinor", + "isNew", + "isRobot", + "isUnpatrolled", + "metroCode", + "namespace", + "page", + "regionIsoCode", + "regionName", + "user", + { "name": "added", "type": "long" }, + { "name": "deleted", "type": "long" }, + { "name": "delta", "type": "long" } + ] + }, + "timestampSpec": { + "column": "time", + "format": "iso" }, "metricsSpec" : [], "granularitySpec" : { @@ -181,16 +175,19 @@ which has been configured to read the `quickstart/tutorial/wikiticker-2015-09-12 } }, "ioConfig" : { - "type" : "index", - "firehose" : { + "type" : "index_parallel", + "inputSource" : { "type" : "local", "baseDir" : "quickstart/tutorial/", "filter" : "wikiticker-2015-09-12-sampled.json.gz" }, + "inputFormat" : { + "type": "json" + }, "appendToExisting" : false }, "tuningConfig" : { - "type" : "index", + "type" : "index_parallel", "maxRowsPerSegment" : 5000000, "maxRowsInMemory" : 25000 } diff --git a/docs/tutorials/tutorial-compaction.md b/docs/tutorials/tutorial-compaction.md index 8da333b0d73..98052170cf7 100644 --- a/docs/tutorials/tutorial-compaction.md +++ b/docs/tutorials/tutorial-compaction.md @@ -81,7 +81,7 @@ We have included a compaction task spec for this tutorial datasource at `quickst "dataSource": "compaction-tutorial", "interval": "2015-09-12/2015-09-13", "tuningConfig" : { - "type" : "index", + "type" : "index_parallel", "maxRowsPerSegment" : 5000000, "maxRowsInMemory" : 25000 } @@ -143,7 +143,7 @@ We have included a compaction task spec that will create DAY granularity segment "interval": "2015-09-12/2015-09-13", "segmentGranularity": "DAY", "tuningConfig" : { - "type" : "index", + "type" : "index_parallel", "maxRowsPerSegment" : 5000000, "maxRowsInMemory" : 25000, "forceExtendableShardSpecs" : true diff --git a/docs/tutorials/tutorial-ingestion-spec.md b/docs/tutorials/tutorial-ingestion-spec.md index b722ed73675..773b920b34b 100644 --- a/docs/tutorials/tutorial-ingestion-spec.md +++ b/docs/tutorials/tutorial-ingestion-spec.md @@ -88,42 +88,18 @@ The datasource name is specified by the `dataSource` parameter in the `dataSchem Let's call the tutorial datasource `ingestion-tutorial`. -### Choose a parser - -A `dataSchema` has a `parser` field, which defines the parser that Druid will use to interpret the input data. - -Since our input data is represented as JSON strings, we'll use a `string` parser with `json` format: - -```json -"dataSchema" : { - "dataSource" : "ingestion-tutorial", - "parser" : { - "type" : "string", - "parseSpec" : { - "format" : "json" - } - } -} -``` - ### Time column -The `parser` needs to know how to extract the main timestamp field from the input data. When using a `json` type `parseSpec`, the timestamp is defined in a `timestampSpec`. +The `dataSchema` needs to know how to extract the main timestamp field from the input data. -The timestamp column in our input data is named "ts", containing ISO 8601 timestamps, so let's add a `timestampSpec` with that information to the `parseSpec`: +The timestamp column in our input data is named "ts", containing ISO 8601 timestamps, so let's add a `timestampSpec` with that information to the `dataSchema`: ```json "dataSchema" : { "dataSource" : "ingestion-tutorial", - "parser" : { - "type" : "string", - "parseSpec" : { - "format" : "json", - "timestampSpec" : { - "format" : "iso", - "column" : "ts" - } - } + "timestampSpec" : { + "format" : "iso", + "column" : "ts" } } ``` @@ -146,26 +122,17 @@ When ingesting data, we must consider whether we wish to use rollup or not. For this tutorial, let's enable rollup. This is specified with a `granularitySpec` on the `dataSchema`. -Note that the `granularitySpec` lies outside of the `parser`. We will revisit the `parser` soon when we define our dimensions and metrics. - ```json "dataSchema" : { "dataSource" : "ingestion-tutorial", - "parser" : { - "type" : "string", - "parseSpec" : { - "format" : "json", - "timestampSpec" : { - "format" : "iso", - "column" : "ts" - } - } + "timestampSpec" : { + "format" : "iso", + "column" : "ts" }, "granularitySpec" : { "rollup" : true } } - ``` #### Choosing dimensions and metrics @@ -181,29 +148,23 @@ Let's look at how to define these dimensions and metrics within the ingestion sp #### Dimensions -Dimensions are specified with a `dimensionsSpec` inside the `parseSpec`. +Dimensions are specified with a `dimensionsSpec` inside the `dataSchema`. ```json "dataSchema" : { "dataSource" : "ingestion-tutorial", - "parser" : { - "type" : "string", - "parseSpec" : { - "format" : "json", - "timestampSpec" : { - "format" : "iso", - "column" : "ts" - }, - "dimensionsSpec" : { - "dimensions": [ - "srcIP", - { "name" : "srcPort", "type" : "long" }, - { "name" : "dstIP", "type" : "string" }, - { "name" : "dstPort", "type" : "long" }, - { "name" : "protocol", "type" : "string" } - ] - } - } + "timestampSpec" : { + "format" : "iso", + "column" : "ts" + }, + "dimensionsSpec" : { + "dimensions": [ + "srcIP", + { "name" : "srcPort", "type" : "long" }, + { "name" : "dstIP", "type" : "string" }, + { "name" : "dstPort", "type" : "long" }, + { "name" : "protocol", "type" : "string" } + ] }, "granularitySpec" : { "rollup" : true @@ -232,24 +193,18 @@ Metrics are specified with a `metricsSpec` inside the `dataSchema`: ```json "dataSchema" : { "dataSource" : "ingestion-tutorial", - "parser" : { - "type" : "string", - "parseSpec" : { - "format" : "json", - "timestampSpec" : { - "format" : "iso", - "column" : "ts" - }, - "dimensionsSpec" : { - "dimensions": [ - "srcIP", - { "name" : "srcPort", "type" : "long" }, - { "name" : "dstIP", "type" : "string" }, - { "name" : "dstPort", "type" : "long" }, - { "name" : "protocol", "type" : "string" } - ] - } - } + "timestampSpec" : { + "format" : "iso", + "column" : "ts" + }, + "dimensionsSpec" : { + "dimensions": [ + "srcIP", + { "name" : "srcPort", "type" : "long" }, + { "name" : "dstIP", "type" : "string" }, + { "name" : "dstPort", "type" : "long" }, + { "name" : "protocol", "type" : "string" } + ] }, "metricsSpec" : [ { "type" : "count", "name" : "count" }, @@ -307,24 +262,18 @@ Segment granularity is configured by the `segmentGranularity` property in the `g ```json "dataSchema" : { "dataSource" : "ingestion-tutorial", - "parser" : { - "type" : "string", - "parseSpec" : { - "format" : "json", - "timestampSpec" : { - "format" : "iso", - "column" : "ts" - }, - "dimensionsSpec" : { - "dimensions": [ - "srcIP", - { "name" : "srcPort", "type" : "long" }, - { "name" : "dstIP", "type" : "string" }, - { "name" : "dstPort", "type" : "long" }, - { "name" : "protocol", "type" : "string" } - ] - } - } + "timestampSpec" : { + "format" : "iso", + "column" : "ts" + }, + "dimensionsSpec" : { + "dimensions": [ + "srcIP", + { "name" : "srcPort", "type" : "long" }, + { "name" : "dstIP", "type" : "string" }, + { "name" : "dstPort", "type" : "long" }, + { "name" : "protocol", "type" : "string" } + ] }, "metricsSpec" : [ { "type" : "count", "name" : "count" }, @@ -349,24 +298,18 @@ The query granularity is configured by the `queryGranularity` property in the `g ```json "dataSchema" : { "dataSource" : "ingestion-tutorial", - "parser" : { - "type" : "string", - "parseSpec" : { - "format" : "json", - "timestampSpec" : { - "format" : "iso", - "column" : "ts" - }, - "dimensionsSpec" : { - "dimensions": [ - "srcIP", - { "name" : "srcPort", "type" : "long" }, - { "name" : "dstIP", "type" : "string" }, - { "name" : "dstPort", "type" : "long" }, - { "name" : "protocol", "type" : "string" } - ] - } - } + "timestampSpec" : { + "format" : "iso", + "column" : "ts" + }, + "dimensionsSpec" : { + "dimensions": [ + "srcIP", + { "name" : "srcPort", "type" : "long" }, + { "name" : "dstIP", "type" : "string" }, + { "name" : "dstPort", "type" : "long" }, + { "name" : "protocol", "type" : "string" } + ] }, "metricsSpec" : [ { "type" : "count", "name" : "count" }, @@ -377,7 +320,7 @@ The query granularity is configured by the `queryGranularity` property in the `g "granularitySpec" : { "type" : "uniform", "segmentGranularity" : "HOUR", - "queryGranularity" : "MINUTE" + "queryGranularity" : "MINUTE", "rollup" : true } } @@ -404,24 +347,18 @@ The interval is also specified in the `granularitySpec`: ```json "dataSchema" : { "dataSource" : "ingestion-tutorial", - "parser" : { - "type" : "string", - "parseSpec" : { - "format" : "json", - "timestampSpec" : { - "format" : "iso", - "column" : "ts" - }, - "dimensionsSpec" : { - "dimensions": [ - "srcIP", - { "name" : "srcPort", "type" : "long" }, - { "name" : "dstIP", "type" : "string" }, - { "name" : "dstPort", "type" : "long" }, - { "name" : "protocol", "type" : "string" } - ] - } - } + "timestampSpec" : { + "format" : "iso", + "column" : "ts" + }, + "dimensionsSpec" : { + "dimensions": [ + "srcIP", + { "name" : "srcPort", "type" : "long" }, + { "name" : "dstIP", "type" : "string" }, + { "name" : "dstPort", "type" : "long" }, + { "name" : "protocol", "type" : "string" } + ] }, "metricsSpec" : [ { "type" : "count", "name" : "count" }, @@ -447,28 +384,22 @@ The `dataSchema` is shared across all task types, but each task type has its own ```json { - "type" : "index", + "type" : "index_parallel", "spec" : { "dataSchema" : { "dataSource" : "ingestion-tutorial", - "parser" : { - "type" : "string", - "parseSpec" : { - "format" : "json", - "timestampSpec" : { - "format" : "iso", - "column" : "ts" - }, - "dimensionsSpec" : { - "dimensions": [ - "srcIP", - { "name" : "srcPort", "type" : "long" }, - { "name" : "dstIP", "type" : "string" }, - { "name" : "dstPort", "type" : "long" }, - { "name" : "protocol", "type" : "string" } - ] - } - } + "timestampSpec" : { + "format" : "iso", + "column" : "ts" + }, + "dimensionsSpec" : { + "dimensions": [ + "srcIP", + { "name" : "srcPort", "type" : "long" }, + { "name" : "dstIP", "type" : "string" }, + { "name" : "dstPort", "type" : "long" }, + { "name" : "protocol", "type" : "string" } + ] }, "metricsSpec" : [ { "type" : "count", "name" : "count" }, @@ -490,13 +421,13 @@ The `dataSchema` is shared across all task types, but each task type has its own ## Define the input source -Now let's define our input source, which is specified in an `ioConfig` object. Each task type has its own type of `ioConfig`. The native batch task uses "firehoses" to read input data, so let's configure a "local" firehose to read the example netflow data we saved earlier: +Now let's define our input source, which is specified in an `ioConfig` object. Each task type has its own type of `ioConfig`. To read input data, we need to specify an `inputSource`. The example netflow data we saved earlier needs to be read from a local file, which is configured below: ```json "ioConfig" : { - "type" : "index", - "firehose" : { + "type" : "index_parallel", + "inputSource" : { "type" : "local", "baseDir" : "quickstart/", "filter" : "ingestion-tutorial-data.json" @@ -504,30 +435,43 @@ Now let's define our input source, which is specified in an `ioConfig` object. E } ``` + +### Define the format of the data + +Since our input data is represented as JSON strings, we'll use a `inputFormat` to `json` format: + +```json + "ioConfig" : { + "type" : "index_parallel", + "inputSource" : { + "type" : "local", + "baseDir" : "quickstart/", + "filter" : "ingestion-tutorial-data.json" + }, + "inputFormat" : { + "type" : "json" + } + } +``` + ```json { - "type" : "index", + "type" : "index_parallel", "spec" : { "dataSchema" : { "dataSource" : "ingestion-tutorial", - "parser" : { - "type" : "string", - "parseSpec" : { - "format" : "json", - "timestampSpec" : { - "format" : "iso", - "column" : "ts" - }, - "dimensionsSpec" : { - "dimensions": [ - "srcIP", - { "name" : "srcPort", "type" : "long" }, - { "name" : "dstIP", "type" : "string" }, - { "name" : "dstPort", "type" : "long" }, - { "name" : "protocol", "type" : "string" } - ] - } - } + "timestampSpec" : { + "format" : "iso", + "column" : "ts" + }, + "dimensionsSpec" : { + "dimensions": [ + "srcIP", + { "name" : "srcPort", "type" : "long" }, + { "name" : "dstIP", "type" : "string" }, + { "name" : "dstPort", "type" : "long" }, + { "name" : "protocol", "type" : "string" } + ] }, "metricsSpec" : [ { "type" : "count", "name" : "count" }, @@ -544,11 +488,14 @@ Now let's define our input source, which is specified in an `ioConfig` object. E } }, "ioConfig" : { - "type" : "index", - "firehose" : { + "type" : "index_parallel", + "inputSource" : { "type" : "local", "baseDir" : "quickstart/", "filter" : "ingestion-tutorial-data.json" + }, + "inputFormat" : { + "type" : "json" } } } @@ -563,7 +510,7 @@ As an example, let's add a `tuningConfig` that sets a target segment size for th ```json "tuningConfig" : { - "type" : "index", + "type" : "index_parallel", "maxRowsPerSegment" : 5000000 } ``` @@ -576,28 +523,22 @@ We've finished defining the ingestion spec, it should now look like the followin ```json { - "type" : "index", + "type" : "index_parallel", "spec" : { "dataSchema" : { "dataSource" : "ingestion-tutorial", - "parser" : { - "type" : "string", - "parseSpec" : { - "format" : "json", - "timestampSpec" : { - "format" : "iso", - "column" : "ts" - }, - "dimensionsSpec" : { - "dimensions": [ - "srcIP", - { "name" : "srcPort", "type" : "long" }, - { "name" : "dstIP", "type" : "string" }, - { "name" : "dstPort", "type" : "long" }, - { "name" : "protocol", "type" : "string" } - ] - } - } + "timestampSpec" : { + "format" : "iso", + "column" : "ts" + }, + "dimensionsSpec" : { + "dimensions": [ + "srcIP", + { "name" : "srcPort", "type" : "long" }, + { "name" : "dstIP", "type" : "string" }, + { "name" : "dstPort", "type" : "long" }, + { "name" : "protocol", "type" : "string" } + ] }, "metricsSpec" : [ { "type" : "count", "name" : "count" }, @@ -614,15 +555,18 @@ We've finished defining the ingestion spec, it should now look like the followin } }, "ioConfig" : { - "type" : "index", - "firehose" : { + "type" : "index_parallel", + "inputSource" : { "type" : "local", "baseDir" : "quickstart/", "filter" : "ingestion-tutorial-data.json" + }, + "inputFormat" : { + "type" : "json" } }, "tuningConfig" : { - "type" : "index", + "type" : "index_parallel", "maxRowsPerSegment" : 5000000 } } diff --git a/docs/tutorials/tutorial-rollup.md b/docs/tutorials/tutorial-rollup.md index 8b4f1ad1d6f..79276176925 100644 --- a/docs/tutorials/tutorial-rollup.md +++ b/docs/tutorials/tutorial-rollup.md @@ -55,25 +55,19 @@ We'll ingest this data using the following ingestion task spec, located at `quic ```json { - "type" : "index", + "type" : "index_parallel", "spec" : { "dataSchema" : { "dataSource" : "rollup-tutorial", - "parser" : { - "type" : "string", - "parseSpec" : { - "format" : "json", - "dimensionsSpec" : { - "dimensions" : [ - "srcIP", - "dstIP" - ] - }, - "timestampSpec": { - "column": "timestamp", - "format": "iso" - } - } + "dimensionsSpec" : { + "dimensions" : [ + "srcIP", + "dstIP" + ] + }, + "timestampSpec": { + "column": "timestamp", + "format": "iso" }, "metricsSpec" : [ { "type" : "count", "name" : "count" }, @@ -89,16 +83,19 @@ We'll ingest this data using the following ingestion task spec, located at `quic } }, "ioConfig" : { - "type" : "index", - "firehose" : { + "type" : "index_parallel", + "inputSource" : { "type" : "local", "baseDir" : "quickstart/tutorial", "filter" : "rollup-data.json" }, + "inputFormat" : { + "type" : "json" + }, "appendToExisting" : false }, "tuningConfig" : { - "type" : "index", + "type" : "index_parallel", "maxRowsPerSegment" : 5000000, "maxRowsInMemory" : 25000 } diff --git a/docs/tutorials/tutorial-transform-spec.md b/docs/tutorials/tutorial-transform-spec.md index c90ca6077e3..35695de2497 100644 --- a/docs/tutorials/tutorial-transform-spec.md +++ b/docs/tutorials/tutorial-transform-spec.md @@ -48,25 +48,19 @@ We will ingest the sample data using the following spec, which demonstrates the ```json { - "type" : "index", + "type" : "index_parallel", "spec" : { "dataSchema" : { "dataSource" : "transform-tutorial", - "parser" : { - "type" : "string", - "parseSpec" : { - "format" : "json", - "dimensionsSpec" : { - "dimensions" : [ - "animal", - { "name": "location", "type": "long" } - ] - }, - "timestampSpec": { - "column": "timestamp", - "format": "iso" - } - } + "timestampSpec": { + "column": "timestamp", + "format": "iso" + }, + "dimensionsSpec" : { + "dimensions" : [ + "animal", + { "name": "location", "type": "long" } + ] }, "metricsSpec" : [ { "type" : "count", "name" : "count" }, @@ -104,16 +98,19 @@ We will ingest the sample data using the following spec, which demonstrates the } }, "ioConfig" : { - "type" : "index", - "firehose" : { + "type" : "index_parallel", + "inputSource" : { "type" : "local", "baseDir" : "quickstart/tutorial", "filter" : "transform-data.json" }, + "inputFormat" : { + "type" :"json" + }, "appendToExisting" : false }, "tuningConfig" : { - "type" : "index", + "type" : "index_parallel", "maxRowsPerSegment" : 5000000, "maxRowsInMemory" : 25000 } diff --git a/examples/quickstart/tutorial/compaction-day-granularity.json b/examples/quickstart/tutorial/compaction-day-granularity.json index 4855821c064..eb39276d254 100644 --- a/examples/quickstart/tutorial/compaction-day-granularity.json +++ b/examples/quickstart/tutorial/compaction-day-granularity.json @@ -4,7 +4,7 @@ "interval": "2015-09-12/2015-09-13", "segmentGranularity": "DAY", "tuningConfig" : { - "type" : "index", + "type" : "index_parallel", "maxRowsPerSegment" : 5000000, "maxRowsInMemory" : 25000, "forceExtendableShardSpecs" : true diff --git a/examples/quickstart/tutorial/compaction-init-index.json b/examples/quickstart/tutorial/compaction-init-index.json index b6b59b60550..f2c00481c36 100644 --- a/examples/quickstart/tutorial/compaction-init-index.json +++ b/examples/quickstart/tutorial/compaction-init-index.json @@ -1,40 +1,34 @@ { - "type" : "index", + "type" : "index_parallel", "spec" : { "dataSchema" : { "dataSource" : "compaction-tutorial", - "parser" : { - "type" : "string", - "parseSpec" : { - "format" : "json", - "dimensionsSpec" : { - "dimensions" : [ - "channel", - "cityName", - "comment", - "countryIsoCode", - "countryName", - "isAnonymous", - "isMinor", - "isNew", - "isRobot", - "isUnpatrolled", - "metroCode", - "namespace", - "page", - "regionIsoCode", - "regionName", - "user", - { "name": "added", "type": "long" }, - { "name": "deleted", "type": "long" }, - { "name": "delta", "type": "long" } - ] - }, - "timestampSpec": { - "column": "time", - "format": "iso" - } - } + "timestampSpec": { + "column": "time", + "format": "iso" + }, + "dimensionsSpec" : { + "dimensions" : [ + "channel", + "cityName", + "comment", + "countryIsoCode", + "countryName", + "isAnonymous", + "isMinor", + "isNew", + "isRobot", + "isUnpatrolled", + "metroCode", + "namespace", + "page", + "regionIsoCode", + "regionName", + "user", + { "name": "added", "type": "long" }, + { "name": "deleted", "type": "long" }, + { "name": "delta", "type": "long" } + ] }, "metricsSpec" : [], "granularitySpec" : { @@ -46,16 +40,19 @@ } }, "ioConfig" : { - "type" : "index", - "firehose" : { + "type" : "index_parallel", + "inputSource" : { "type" : "local", "baseDir" : "quickstart/tutorial/", "filter" : "wikiticker-2015-09-12-sampled.json.gz" }, + "inputFormat" : { + "type" : "json" + }, "appendToExisting" : false }, "tuningConfig" : { - "type" : "index", + "type" : "index_parallel", "maxRowsPerSegment" : 1000 } } diff --git a/examples/quickstart/tutorial/compaction-keep-granularity.json b/examples/quickstart/tutorial/compaction-keep-granularity.json index 6721e7bfcc6..ba76d612bdd 100644 --- a/examples/quickstart/tutorial/compaction-keep-granularity.json +++ b/examples/quickstart/tutorial/compaction-keep-granularity.json @@ -3,7 +3,7 @@ "dataSource": "compaction-tutorial", "interval": "2015-09-12/2015-09-13", "tuningConfig" : { - "type" : "index", + "type" : "index_parallel", "maxRowsPerSegment" : 5000000, "maxRowsInMemory" : 25000 } diff --git a/examples/quickstart/tutorial/deletion-index.json b/examples/quickstart/tutorial/deletion-index.json index 0faf4689803..d32ddd9ee28 100644 --- a/examples/quickstart/tutorial/deletion-index.json +++ b/examples/quickstart/tutorial/deletion-index.json @@ -1,40 +1,34 @@ { - "type" : "index", + "type" : "index_parallel", "spec" : { "dataSchema" : { "dataSource" : "deletion-tutorial", - "parser" : { - "type" : "string", - "parseSpec" : { - "format" : "json", - "dimensionsSpec" : { - "dimensions" : [ - "channel", - "cityName", - "comment", - "countryIsoCode", - "countryName", - "isAnonymous", - "isMinor", - "isNew", - "isRobot", - "isUnpatrolled", - "metroCode", - "namespace", - "page", - "regionIsoCode", - "regionName", - "user", - { "name": "added", "type": "long" }, - { "name": "deleted", "type": "long" }, - { "name": "delta", "type": "long" } - ] - }, - "timestampSpec": { - "column": "time", - "format": "iso" - } - } + "timestampSpec": { + "column": "time", + "format": "iso" + }, + "dimensionsSpec" : { + "dimensions" : [ + "channel", + "cityName", + "comment", + "countryIsoCode", + "countryName", + "isAnonymous", + "isMinor", + "isNew", + "isRobot", + "isUnpatrolled", + "metroCode", + "namespace", + "page", + "regionIsoCode", + "regionName", + "user", + { "name": "added", "type": "long" }, + { "name": "deleted", "type": "long" }, + { "name": "delta", "type": "long" } + ] }, "metricsSpec" : [], "granularitySpec" : { @@ -46,16 +40,19 @@ } }, "ioConfig" : { - "type" : "index", - "firehose" : { + "type" : "index_parallel", + "inputSource" : { "type" : "local", "baseDir" : "quickstart/tutorial/", "filter" : "wikiticker-2015-09-12-sampled.json.gz" }, + "inputFormat" : { + "type" : "json" + }, "appendToExisting" : false }, "tuningConfig" : { - "type" : "index", + "type" : "index_parallel", "maxRowsPerSegment" : 5000000, "maxRowsInMemory" : 25000 } diff --git a/examples/quickstart/tutorial/retention-index.json b/examples/quickstart/tutorial/retention-index.json index 95416e20079..4c6b33f05c6 100644 --- a/examples/quickstart/tutorial/retention-index.json +++ b/examples/quickstart/tutorial/retention-index.json @@ -1,40 +1,34 @@ { - "type" : "index", + "type" : "index_parallel", "spec" : { "dataSchema" : { "dataSource" : "retention-tutorial", - "parser" : { - "type" : "string", - "parseSpec" : { - "format" : "json", - "dimensionsSpec" : { - "dimensions" : [ - "channel", - "cityName", - "comment", - "countryIsoCode", - "countryName", - "isAnonymous", - "isMinor", - "isNew", - "isRobot", - "isUnpatrolled", - "metroCode", - "namespace", - "page", - "regionIsoCode", - "regionName", - "user", - { "name": "added", "type": "long" }, - { "name": "deleted", "type": "long" }, - { "name": "delta", "type": "long" } - ] - }, - "timestampSpec": { - "column": "time", - "format": "iso" - } - } + "timestampSpec": { + "column": "time", + "format": "iso" + }, + "dimensionsSpec" : { + "dimensions" : [ + "channel", + "cityName", + "comment", + "countryIsoCode", + "countryName", + "isAnonymous", + "isMinor", + "isNew", + "isRobot", + "isUnpatrolled", + "metroCode", + "namespace", + "page", + "regionIsoCode", + "regionName", + "user", + { "name": "added", "type": "long" }, + { "name": "deleted", "type": "long" }, + { "name": "delta", "type": "long" } + ] }, "metricsSpec" : [], "granularitySpec" : { @@ -46,16 +40,19 @@ } }, "ioConfig" : { - "type" : "index", - "firehose" : { + "type" : "index_parallel", + "inputSource" : { "type" : "local", "baseDir" : "quickstart/tutorial/", "filter" : "wikiticker-2015-09-12-sampled.json.gz" }, + "inputFormat" : { + "type" : "json" + }, "appendToExisting" : false }, "tuningConfig" : { - "type" : "index", + "type" : "index_parallel", "maxRowsPerSegment" : 5000000, "maxRowsInMemory" : 25000 } diff --git a/examples/quickstart/tutorial/rollup-index.json b/examples/quickstart/tutorial/rollup-index.json index 2c1426e58fd..7c0b5815d2c 100644 --- a/examples/quickstart/tutorial/rollup-index.json +++ b/examples/quickstart/tutorial/rollup-index.json @@ -1,23 +1,17 @@ { - "type" : "index", + "type" : "index_parallel", "spec" : { "dataSchema" : { "dataSource" : "rollup-tutorial", - "parser" : { - "type" : "string", - "parseSpec" : { - "format" : "json", - "dimensionsSpec" : { - "dimensions" : [ - "srcIP", - "dstIP" - ] - }, - "timestampSpec": { - "column": "timestamp", - "format": "iso" - } - } + "timestampSpec": { + "column": "timestamp", + "format": "iso" + }, + "dimensionsSpec" : { + "dimensions" : [ + "srcIP", + "dstIP" + ] }, "metricsSpec" : [ { "type" : "count", "name" : "count" }, @@ -33,16 +27,19 @@ } }, "ioConfig" : { - "type" : "index", - "firehose" : { + "type" : "index_parallel", + "inputSource" : { "type" : "local", "baseDir" : "quickstart/tutorial", "filter" : "rollup-data.json" }, + "inputFormat" : { + "type" : "json" + }, "appendToExisting" : false }, "tuningConfig" : { - "type" : "index", + "type" : "index_parallel", "maxRowsPerSegment" : 5000000, "maxRowsInMemory" : 25000 } diff --git a/examples/quickstart/tutorial/transform-index.json b/examples/quickstart/tutorial/transform-index.json index 8d40b191777..bf605fcfdbf 100644 --- a/examples/quickstart/tutorial/transform-index.json +++ b/examples/quickstart/tutorial/transform-index.json @@ -1,23 +1,17 @@ { - "type" : "index", + "type" : "index_parallel", "spec" : { "dataSchema" : { "dataSource" : "transform-tutorial", - "parser" : { - "type" : "string", - "parseSpec" : { - "format" : "json", - "dimensionsSpec" : { - "dimensions" : [ - "animal", - { "name": "location", "type": "long" } - ] - }, - "timestampSpec": { - "column": "timestamp", - "format": "iso" - } - } + "timestampSpec": { + "column": "timestamp", + "format": "iso" + }, + "dimensionsSpec" : { + "dimensions" : [ + "animal", + { "name": "location", "type": "long" } + ] }, "metricsSpec" : [ { "type" : "count", "name" : "count" }, @@ -55,16 +49,19 @@ } }, "ioConfig" : { - "type" : "index", - "firehose" : { + "type" : "index_parallel", + "inputSource" : { "type" : "local", "baseDir" : "quickstart/tutorial", "filter" : "transform-data.json" }, + "inpuFormat" : { + "type" : "json" + }, "appendToExisting" : false }, "tuningConfig" : { - "type" : "index", + "type" : "index_parallel", "maxRowsPerSegment" : 5000000, "maxRowsInMemory" : 25000 } diff --git a/examples/quickstart/tutorial/updates-append-index2.json b/examples/quickstart/tutorial/updates-append-index2.json index 247192a3f5d..921b8cf0e2d 100644 --- a/examples/quickstart/tutorial/updates-append-index2.json +++ b/examples/quickstart/tutorial/updates-append-index2.json @@ -1,22 +1,16 @@ { - "type" : "index", + "type" : "index_parallel", "spec" : { "dataSchema" : { "dataSource" : "updates-tutorial", - "parser" : { - "type" : "string", - "parseSpec" : { - "format" : "json", - "dimensionsSpec" : { - "dimensions" : [ - "animal" - ] - }, - "timestampSpec": { - "column": "timestamp", - "format": "iso" - } - } + "timestampSpec": { + "column": "timestamp", + "format": "iso" + }, + "dimensionsSpec" : { + "dimensions" : [ + "animal" + ] }, "metricsSpec" : [ { "type" : "count", "name" : "count" }, @@ -31,16 +25,19 @@ } }, "ioConfig" : { - "type" : "index", - "firehose" : { + "type" : "index_parallel", + "inputSource" : { "type" : "local", "baseDir" : "quickstart/tutorial", "filter" : "updates-data4.json" }, + "inputFormat" : { + "type" : "json" + }, "appendToExisting" : true }, "tuningConfig" : { - "type" : "index", + "type" : "index_parallel", "maxRowsPerSegment" : 5000000, "maxRowsInMemory" : 25000 } diff --git a/examples/quickstart/tutorial/updates-init-index.json b/examples/quickstart/tutorial/updates-init-index.json index 71c449bf59a..ed4b349c6e0 100644 --- a/examples/quickstart/tutorial/updates-init-index.json +++ b/examples/quickstart/tutorial/updates-init-index.json @@ -1,22 +1,16 @@ { - "type" : "index", + "type" : "index_parallel", "spec" : { "dataSchema" : { "dataSource" : "updates-tutorial", - "parser" : { - "type" : "string", - "parseSpec" : { - "format" : "json", - "dimensionsSpec" : { - "dimensions" : [ - "animal" - ] - }, - "timestampSpec": { - "column": "timestamp", - "format": "iso" - } - } + "timestampSpec": { + "column": "timestamp", + "format": "iso" + }, + "dimensionsSpec" : { + "dimensions" : [ + "animal" + ] }, "metricsSpec" : [ { "type" : "count", "name" : "count" }, @@ -31,16 +25,19 @@ } }, "ioConfig" : { - "type" : "index", - "firehose" : { + "type" : "index_parallel", + "inputSource" : { "type" : "local", "baseDir" : "quickstart/tutorial", "filter" : "updates-data.json" }, + "inputFormat" : { + "type" : "json" + }, "appendToExisting" : false }, "tuningConfig" : { - "type" : "index", + "type" : "index_parallel", "maxRowsPerSegment" : 5000000, "maxRowsInMemory" : 25000 } diff --git a/examples/quickstart/tutorial/updates-overwrite-index.json b/examples/quickstart/tutorial/updates-overwrite-index.json index 451750e726c..b2545f04dd1 100644 --- a/examples/quickstart/tutorial/updates-overwrite-index.json +++ b/examples/quickstart/tutorial/updates-overwrite-index.json @@ -1,22 +1,16 @@ { - "type" : "index", + "type" : "index_parallel", "spec" : { "dataSchema" : { "dataSource" : "updates-tutorial", - "parser" : { - "type" : "string", - "parseSpec" : { - "format" : "json", - "dimensionsSpec" : { - "dimensions" : [ - "animal" - ] - }, - "timestampSpec": { - "column": "timestamp", - "format": "iso" - } - } + "timestampSpec": { + "column": "timestamp", + "format": "iso" + }, + "dimensionsSpec" : { + "dimensions" : [ + "animal" + ] }, "metricsSpec" : [ { "type" : "count", "name" : "count" }, @@ -31,16 +25,19 @@ } }, "ioConfig" : { - "type" : "index", - "firehose" : { + "type" : "index_parallel", + "inputSource" : { "type" : "local", "baseDir" : "quickstart/tutorial", "filter" : "updates-data2.json" }, + "inputFormat" : { + "type" : "json" + }, "appendToExisting" : false }, "tuningConfig" : { - "type" : "index", + "type" : "index_parallel", "maxRowsPerSegment" : 5000000, "maxRowsInMemory" : 25000 } diff --git a/examples/quickstart/tutorial/wikipedia-index.json b/examples/quickstart/tutorial/wikipedia-index.json index 785fbda9167..60d7670a577 100644 --- a/examples/quickstart/tutorial/wikipedia-index.json +++ b/examples/quickstart/tutorial/wikipedia-index.json @@ -1,40 +1,34 @@ { - "type" : "index", + "type" : "index_parallel", "spec" : { "dataSchema" : { "dataSource" : "wikipedia", - "parser" : { - "type" : "string", - "parseSpec" : { - "format" : "json", - "dimensionsSpec" : { - "dimensions" : [ - "channel", - "cityName", - "comment", - "countryIsoCode", - "countryName", - "isAnonymous", - "isMinor", - "isNew", - "isRobot", - "isUnpatrolled", - "metroCode", - "namespace", - "page", - "regionIsoCode", - "regionName", - "user", - { "name": "added", "type": "long" }, - { "name": "deleted", "type": "long" }, - { "name": "delta", "type": "long" } - ] - }, - "timestampSpec": { - "column": "time", - "format": "iso" - } - } + "timestampSpec": { + "column": "time", + "format": "iso" + }, + "dimensionsSpec" : { + "dimensions" : [ + "channel", + "cityName", + "comment", + "countryIsoCode", + "countryName", + "isAnonymous", + "isMinor", + "isNew", + "isRobot", + "isUnpatrolled", + "metroCode", + "namespace", + "page", + "regionIsoCode", + "regionName", + "user", + { "name": "added", "type": "long" }, + { "name": "deleted", "type": "long" }, + { "name": "delta", "type": "long" } + ] }, "metricsSpec" : [], "granularitySpec" : { @@ -46,16 +40,19 @@ } }, "ioConfig" : { - "type" : "index", - "firehose" : { + "type" : "index_parallel", + "inputSource" : { "type" : "local", "baseDir" : "quickstart/tutorial/", "filter" : "wikiticker-2015-09-12-sampled.json.gz" }, + "inputFormat" : { + "type" : "json" + }, "appendToExisting" : false }, "tuningConfig" : { - "type" : "index", + "type" : "index_parallel", "maxRowsPerSegment" : 5000000, "maxRowsInMemory" : 25000 }