From 180e3ccfadc298d3b3ddd16576a1e80b8446a2dd Mon Sep 17 00:00:00 2001 From: Jonathan Wei Date: Tue, 4 Sep 2018 12:54:41 -0700 Subject: [PATCH] Docs consistency cleanup (#6259) --- docs/_redirects.json | 12 +- docs/content/configuration/index.md | 8 +- docs/content/configuration/logging.md | 2 +- docs/content/design/broker.md | 25 +- docs/content/design/coordinator.md | 46 +- docs/content/design/design.md | 100 ---- docs/content/design/historical.md | 27 +- docs/content/design/index.md | 2 +- docs/content/design/indexing-service.md | 50 +- docs/content/design/middlemanager.md | 16 +- docs/content/design/overlord.md | 48 ++ docs/content/design/peons.md | 15 +- docs/content/design/realtime.md | 2 +- docs/content/design/segments.md | 4 +- .../development/extensions-contrib/parquet.md | 2 +- .../extensions-contrib/redis-cache.md | 2 +- .../development/extensions-core/avro.md | 2 +- .../extensions-core/druid-basic-security.md | 2 +- .../extensions-core/kafka-ingestion.md | 4 +- .../development/extensions-core/stats.md | 2 +- docs/content/development/javascript.md | 2 +- .../ingestion/command-line-hadoop-indexer.md | 7 +- docs/content/ingestion/compaction.md | 65 +++ docs/content/ingestion/data-formats.md | 2 +- docs/content/ingestion/delete-data.md | 30 ++ docs/content/ingestion/faq.md | 12 +- docs/content/ingestion/firehose.md | 82 ++-- docs/content/ingestion/hadoop.md | 12 +- docs/content/ingestion/index.md | 462 +++++++++--------- docs/content/ingestion/ingestion-spec.md | 309 ++++++++++++ .../content/ingestion/locking-and-priority.md | 59 +++ docs/content/ingestion/misc-tasks.md | 163 ++++++ docs/content/ingestion/native_tasks.md | 14 +- docs/content/ingestion/overview.md | 279 ----------- docs/content/ingestion/schema-design.md | 4 +- docs/content/ingestion/stream-ingestion.md | 27 +- docs/content/ingestion/stream-pull.md | 6 +- docs/content/ingestion/stream-push.md | 48 +- docs/content/ingestion/tasks.md | 306 +----------- .../content/ingestion/update-existing-data.md | 6 +- docs/content/misc/math-expr.md | 2 +- docs/content/operations/api-reference.md | 7 +- .../operations/including-extensions.md | 2 +- docs/content/operations/metrics.md | 2 +- docs/content/operations/other-hadoop.md | 10 +- docs/content/operations/password-provider.md | 6 +- .../operations/segment-optimization.md | 1 + docs/content/querying/granularities.md | 2 +- docs/content/querying/lookups.md | 2 +- docs/content/querying/multitenancy.md | 2 +- docs/content/querying/post-aggregations.md | 2 +- docs/content/querying/query-context.md | 4 +- docs/content/querying/querying.md | 17 +- docs/content/querying/segmentmetadataquery.md | 2 +- docs/content/querying/sql.md | 2 +- docs/content/toc.md | 25 +- docs/content/tutorials/cluster.md | 2 +- .../tutorials/tutorial-batch-hadoop.md | 10 +- .../tutorials/tutorial-ingestion-spec.md | 2 +- 59 files changed, 1196 insertions(+), 1172 deletions(-) delete mode 100644 docs/content/design/design.md create mode 100644 docs/content/design/overlord.md create mode 100644 docs/content/ingestion/compaction.md create mode 100644 docs/content/ingestion/delete-data.md create mode 100644 docs/content/ingestion/ingestion-spec.md create mode 100644 docs/content/ingestion/locking-and-priority.md create mode 100644 docs/content/ingestion/misc-tasks.md delete mode 100644 docs/content/ingestion/overview.md diff --git a/docs/_redirects.json b/docs/_redirects.json index 390f6ba45ed..9e53a746875 100644 --- a/docs/_redirects.json +++ b/docs/_redirects.json @@ -98,6 +98,7 @@ {"source": "configuration/indexing-service.html", "target": "../configuration/index.html#overlord"}, {"source": "configuration/simple-cluster.html", "target": "../tutorials/cluster.html"}, {"source": "design/concepts-and-terminology.html", "target": "index.html"}, + {"source": "design/design.html", "target": "index.html"}, {"source": "development/approximate-histograms.html", "target": "extensions-core/approximate-histograms.html"}, {"source": "development/datasketches-aggregators.html", "target": "extensions-core/datasketches-extension.html"}, {"source": "development/extensions-core/datasketches-aggregators.html", "target": "datasketches-extension.html"}, @@ -105,18 +106,19 @@ {"source": "development/kafka-simple-consumer-firehose.html", "target": "extensions-contrib/kafka-simple.html"}, {"source": "development/select-query.html", "target": "../querying/select-query.html"}, {"source": "index.html", "target": "design/index.html"}, - {"source": "ingestion/overview.html", "target": "ingestion.html"}, + {"source": "ingestion/overview.html", "target": "index.html"}, + {"source": "ingestion/ingestion.html", "target": "index.html"}, {"source": "ingestion/realtime-ingestion.html", "target": "stream-ingestion.html"}, {"source": "misc/cluster-setup.html", "target": "../tutorials/cluster.html"}, {"source": "misc/evaluate.html", "target": "../tutorials/cluster.html"}, {"source": "misc/tasks.html", "target": "../ingestion/tasks.html"}, {"source": "operations/multitenancy.html", "target": "../querying/multitenancy.html"}, {"source": "tutorials/booting-a-production-cluster.html", "target": "cluster.html"}, - {"source": "tutorials/examples.html", "target": "quickstart.html"}, + {"source": "tutorials/examples.html", "target": "index.html"}, {"source": "tutorials/firewall.html", "target": "cluster.html"}, - {"source": "tutorials/index.html", "target": "quickstart.html"}, - {"source": "tutorials/tutorial-a-first-look-at-druid.html", "target": "quickstart.html"}, - {"source": "tutorials/tutorial-all-about-queries.html", "target": "quickstart.html"}, + {"source": "tutorials/quickstart.html", "target": "index.html"}, + {"source": "tutorials/tutorial-a-first-look-at-druid.html", "target": "index.html"}, + {"source": "tutorials/tutorial-all-about-queries.html", "target": "index.html"}, {"source": "tutorials/tutorial-loading-batch-data.html", "target": "tutorial-batch.html"}, {"source": "tutorials/tutorial-loading-streaming-data.html", "target": "tutorial-streams.html"}, {"source": "tutorials/tutorial-the-druid-cluster.html", "target": "cluster.html"}, diff --git a/docs/content/configuration/index.md b/docs/content/configuration/index.md index c87da2a06e9..f60e60b28f3 100644 --- a/docs/content/configuration/index.md +++ b/docs/content/configuration/index.md @@ -254,7 +254,7 @@ These properties apply to the SSLContext that will be provided to the internal H |`druid.client.https.trustStoreType`|The type of the key store where trusted root certificates are stored.|`java.security.KeyStore.getDefaultType()`|no| |`druid.client.https.trustStorePath`|The file path or URL of the TLS/SSL Key store where trusted root certificates are stored.|none|yes| |`druid.client.https.trustStoreAlgorithm`|Algorithm to be used by TrustManager to validate certificate chains|`javax.net.ssl.TrustManagerFactory.getDefaultAlgorithm()`|no| -|`druid.client.https.trustStorePassword`|The [Password Provider](../../operations/password-provider.html) or String password for the Trust Store.|none|yes| +|`druid.client.https.trustStorePassword`|The [Password Provider](../operations/password-provider.html) or String password for the Trust Store.|none|yes| This [document](http://docs.oracle.com/javase/8/docs/technotes/guides/security/StandardNames.html) lists all the possible values for the above mentioned configs among others provided by Java implementation. @@ -416,7 +416,7 @@ The following properties allow the Http Emitter to use its own truststore config |`druid.emitter.http.ssl.trustStorePath`|The file path or URL of the TLS/SSL Key store where trusted root certificates are stored. If this is unspecified, the Http Emitter will use the same SSLContext as Druid's internal HTTP client, as described in the beginning of this section, and all other properties below are ignored.|null| |`druid.emitter.http.ssl.trustStoreType`|The type of the key store where trusted root certificates are stored.|`java.security.KeyStore.getDefaultType()`| |`druid.emitter.http.ssl.trustStoreAlgorithm`|Algorithm to be used by TrustManager to validate certificate chains|`javax.net.ssl.TrustManagerFactory.getDefaultAlgorithm()`| -|`druid.emitter.http.ssl.trustStorePassword`|The [Password Provider](../../operations/password-provider.html) or String password for the Trust Store.|none| +|`druid.emitter.http.ssl.trustStorePassword`|The [Password Provider](../operations/password-provider.html) or String password for the Trust Store.|none| |`druid.emitter.http.ssl.protocol`|TLS protocol to use.|"TLSv1.2"| #### Parametrized Http Emitter Module @@ -1119,7 +1119,7 @@ If the peon is running in remote mode, there must be an overlord up and running. |`druid.peon.taskActionClient.retry.maxWait`|The maximum retry time to communicate with overlord.|PT1M| |`druid.peon.taskActionClient.retry.maxRetryCount`|The maximum number of retries to communicate with overlord.|60| -##### SegmentWriteOutMediumFactory +#### SegmentWriteOutMediumFactory When new segments are created, Druid temporarily stores some pre-processed data in some buffers. Currently two types of *medium* exist for those buffers: *temporary files* and *off-heap memory*. @@ -1319,7 +1319,7 @@ Druid uses Jetty to serve HTTP requests. |`druid.server.http.defaultQueryTimeout`|Query timeout in millis, beyond which unfinished queries will be cancelled|300000| |`druid.server.http.gracefulShutdownTimeout`|The maximum amount of time Jetty waits after receiving shutdown signal. After this timeout the threads will be forcefully shutdown. This allows any queries that are executing to complete.|`PT0S` (do not wait)| |`druid.server.http.unannouncePropagationDelay`|How long to wait for zookeeper unannouncements to propagate before shutting down Jetty. This is a minimum and `druid.server.http.gracefulShutdownTimeout` does not start counting down until after this period elapses.|`PT0S` (do not wait)| -|`druid.server.http.maxQueryTimeout`|Maximum allowed value (in milliseconds) for `timeout` parameter. See [query-context](query-context.html) to know more about `timeout`. Query is rejected if the query context `timeout` is greater than this value. |Long.MAX_VALUE| +|`druid.server.http.maxQueryTimeout`|Maximum allowed value (in milliseconds) for `timeout` parameter. See [query-context](../querying/query-context.html) to know more about `timeout`. Query is rejected if the query context `timeout` is greater than this value. |Long.MAX_VALUE| |`druid.server.http.maxRequestHeaderSize`|Maximum size of a request header in bytes. Larger headers consume more memory and can make a server more vulnerable to denial of service attacks.|8 * 1024| #### Processing diff --git a/docs/content/configuration/logging.md b/docs/content/configuration/logging.md index 72adb3009d4..55e733b9e43 100644 --- a/docs/content/configuration/logging.md +++ b/docs/content/configuration/logging.md @@ -4,7 +4,7 @@ layout: doc_page Logging ========================== -Druid nodes will emit logs that are useful for debugging to the console. Druid nodes also emit periodic metrics about their state. For more about metrics, see [Configuration](../configuration/index.html). Metric logs are printed to the console by default, and can be disabled with `-Ddruid.emitter.logging.logLevel=debug`. +Druid nodes will emit logs that are useful for debugging to the console. Druid nodes also emit periodic metrics about their state. For more about metrics, see [Configuration](../configuration/index.html#enabling-metrics). Metric logs are printed to the console by default, and can be disabled with `-Ddruid.emitter.logging.logLevel=debug`. Druid uses [log4j2](http://logging.apache.org/log4j/2.x/) for logging. Logging can be configured with a log4j2.xml file. Add the path to the directory containing the log4j2.xml file (e.g. the _common/ dir) to your classpath if you want to override default Druid log configuration. Note that this directory should be earlier in the classpath than the druid jars. The easiest way to do this is to prefix the classpath with the config dir. diff --git a/docs/content/design/broker.md b/docs/content/design/broker.md index bc674c5475d..592b40319ac 100644 --- a/docs/content/design/broker.md +++ b/docs/content/design/broker.md @@ -3,32 +3,33 @@ layout: doc_page --- Broker ====== -For Broker Node Configuration, see [Broker Configuration](../configuration/broker.html). + +### Configuration + +For Broker Node Configuration, see [Broker Configuration](../configuration/index.html#broker). + +### HTTP endpoints + +For a list of API endpoints supported by the Broker, see [Broker API](../operations/api-reference.html#broker). + +### Overview The Broker is the node to route queries to if you want to run a distributed cluster. It understands the metadata published to ZooKeeper about what segments exist on what nodes and routes queries such that they hit the right nodes. This node also merges the result sets from all of the individual nodes together. On start up, Realtime nodes announce themselves and the segments they are serving in Zookeeper. -Running -------- +### Running ``` org.apache.druid.cli.Main server broker ``` -Forwarding Queries ------------------- +### Forwarding Queries Most druid queries contain an interval object that indicates a span of time for which data is requested. Likewise, Druid [Segments](../design/segments.html) are partitioned to contain data for some interval of time and segments are distributed across a cluster. Consider a simple datasource with 7 segments where each segment contains data for a given day of the week. Any query issued to the datasource for more than one day of data will hit more than one segment. These segments will likely be distributed across multiple nodes, and hence, the query will likely hit multiple nodes. To determine which nodes to forward queries to, the Broker node first builds a view of the world from information in Zookeeper. Zookeeper maintains information about [Historical](../design/historical.html) and [Realtime](../design/realtime.html) nodes and the segments they are serving. For every datasource in Zookeeper, the Broker node builds a timeline of segments and the nodes that serve them. When queries are received for a specific datasource and interval, the Broker node performs a lookup into the timeline associated with the query datasource for the query interval and retrieves the nodes that contain data for the query. The Broker node then forwards down the query to the selected nodes. -Caching -------- +### Caching Broker nodes employ a cache with a LRU cache invalidation strategy. The broker cache stores per-segment results. The cache can be local to each broker node or shared across multiple nodes using an external distributed cache such as [memcached](http://memcached.org/). Each time a broker node receives a query, it first maps the query to a set of segments. A subset of these segment results may already exist in the cache and the results can be directly pulled from the cache. For any segment results that do not exist in the cache, the broker node will forward the query to the historical nodes. Once the historical nodes return their results, the broker will store those results in the cache. Real-time segments are never cached and hence requests for real-time data will always be forwarded to real-time nodes. Real-time data is perpetually changing and caching the results would be unreliable. - -HTTP Endpoints --------------- - -For a list of API endpoints supported by the Broker, please see the [API reference](../operations/api-reference.html#broker). diff --git a/docs/content/design/coordinator.md b/docs/content/design/coordinator.md index 74bcfb3114a..8160660fe54 100644 --- a/docs/content/design/coordinator.md +++ b/docs/content/design/coordinator.md @@ -3,7 +3,16 @@ layout: doc_page --- Coordinator Node ================ -For Coordinator Node Configuration, see [Coordinator Configuration](../configuration/coordinator.html). + +### Configuration + +For Coordinator Node Configuration, see [Coordinator Configuration](../configuration/index.html#coordinator). + +### HTTP endpoints + +For a list of API endpoints supported by the Coordinator, see [Coordinator API](../operations/api-reference.html#coordinator). + +### Overview The Druid coordinator node is primarily responsible for segment management and distribution. More specifically, the Druid coordinator node communicates to historical nodes to load or drop segments based on configurations. The Druid coordinator is responsible for loading new segments, dropping outdated segments, managing segment replication, and balancing segment load. @@ -17,29 +26,24 @@ Before any unassigned segments are serviced by historical nodes, the available h org.apache.druid.cli.Main server coordinator ``` -Rules ------ +### Rules Segments can be automatically loaded and dropped from the cluster based on a set of rules. For more information on rules, see [Rule Configuration](../operations/rule-configuration.html). -Cleaning Up Segments --------------------- +### Cleaning Up Segments Each run, the Druid coordinator compares the list of available database segments in the database with the current segments in the cluster. Segments that are not in the database but are still being served in the cluster are flagged and appended to a removal list. Segments that are overshadowed (their versions are too old and their data has been replaced by newer segments) are also dropped. Note that if all segments in database are deleted(or marked unused), then coordinator will not drop anything from the historicals. This is done to prevent a race condition in which the coordinator would drop all segments if it started running cleanup before it finished polling the database for available segments for the first time and believed that there were no segments. -Segment Availability --------------------- +### Segment Availability If a historical node restarts or becomes unavailable for any reason, the Druid coordinator will notice a node has gone missing and treat all segments served by that node as being dropped. Given a sufficient period of time, the segments may be reassigned to other historical nodes in the cluster. However, each segment that is dropped is not immediately forgotten. Instead, there is a transitional data structure that stores all dropped segments with an associated lifetime. The lifetime represents a period of time in which the coordinator will not reassign a dropped segment. Hence, if a historical node becomes unavailable and available again within a short period of time, the historical node will start up and serve segments from its cache without any those segments being reassigned across the cluster. -Balancing Segment Load ----------------------- +### Balancing Segment Load To ensure an even distribution of segments across historical nodes in the cluster, the coordinator node will find the total size of all segments being served by every historical node each time the coordinator runs. For every historical node tier in the cluster, the coordinator node will determine the historical node with the highest utilization and the historical node with the lowest utilization. The percent difference in utilization between the two nodes is computed, and if the result exceeds a certain threshold, a number of segments will be moved from the highest utilized node to the lowest utilized node. There is a configurable limit on the number of segments that can be moved from one node to another each time the coordinator runs. Segments to be moved are selected at random and only moved if the resulting utilization calculation indicates the percentage difference between the highest and lowest servers has decreased. -Compacting Segments -------------------- +### Compacting Segments Each run, the Druid coordinator compacts small segments abutting each other. This is useful when you have a lot of small segments which may degrade the query performance as well as increasing the disk usage. Note that the data for an interval @@ -49,7 +53,7 @@ The coordinator first finds the segments to compact together based on the [segme Once it finds some segments, it launches a [compact task](../ingestion/tasks.html#compaction-task) to compact those segments. The maximum number of running compact tasks is `max(sum of worker capacity * slotRatio, maxSlots)`. Note that even though `max(sum of worker capacity * slotRatio, maxSlots)` = 1, at least one compact task is always submitted -once a compaction is configured for a dataSource. See [HTTP Endpoints](#http-endpoints) to set those values. +once a compaction is configured for a dataSource. See [Compaction Configuration API](../operations/api-reference.html#compaction-configuration) to set those values. Compact tasks might fail due to some reasons. @@ -59,7 +63,7 @@ Compact tasks might fail due to some reasons. Once a compact task fails, the coordinator simply finds the segments for the interval of the failed task again, and launches a new compact task in the next run. To use this feature, you need to set some configurations for dataSources you want to compact. -Please see [Compaction Configuration](../configuration/coordinator.html#compaction-configuration) for more details. +Please see [Compaction Configuration](../configuration/index.html#compaction-dynamic-configuration) for more details. ### Segment Search Policy @@ -69,23 +73,16 @@ This policy searches the segments of _all dataSources_ in inverse order of their For example, let me assume there are 3 dataSources (`ds1`, `ds2`, `ds3`) and 5 segments (`seg_ds1_2017-10-01_2017-10-02`, `seg_ds1_2017-11-01_2017-11-02`, `seg_ds2_2017-08-01_2017-08-02`, `seg_ds3_2017-07-01_2017-07-02`, `seg_ds3_2017-12-01_2017-12-02`) for those dataSources. The segment name indicates its dataSource and interval. The search result of newestSegmentFirstPolicy is [`seg_ds3_2017-12-01_2017-12-02`, `seg_ds1_2017-11-01_2017-11-02`, `seg_ds1_2017-10-01_2017-10-02`, `seg_ds2_2017-08-01_2017-08-02`, `seg_ds3_2017-07-01_2017-07-02`]. -Every run, this policy starts searching from the (very latest interval - [skipOffsetFromLatest](../configuration/coordinator.html#compaction-configuration)). +Every run, this policy starts searching from the (very latest interval - [skipOffsetFromLatest](../configuration/index.html#compaction-dynamic-configuration)). This is to handle the late segments ingested to realtime dataSources.
This policy currently cannot handle the situation when there are a lot of small segments which have the same interval, -and their total size exceeds targetCompactionSizebytes. +and their total size exceeds targetCompactionSizebytes. If it finds such segments, it simply skips compacting them.
- -HTTP Endpoints --------------- - -For a list of API endpoints supported by the Coordinator, please see the [API reference](../operations/api-reference.html#coordinator). - -The Coordinator Console ------------------- +### The Coordinator Console The Druid coordinator exposes a web GUI for displaying cluster information and rule configuration. After the coordinator starts, the console can be accessed at: @@ -97,8 +94,7 @@ http://: The coordinator console also exposes an interface to creating and editing rules. All valid datasources configured in the segment database, along with a default datasource, are available for configuration. Rules of different types can be added, deleted or edited. -FAQ ---- +### FAQ 1. **Do clients ever contact the coordinator node?** diff --git a/docs/content/design/design.md b/docs/content/design/design.md deleted file mode 100644 index c2a8beaf38d..00000000000 --- a/docs/content/design/design.md +++ /dev/null @@ -1,100 +0,0 @@ ---- -layout: doc_page ---- - -For a comprehensive look at the architecture of Druid, read the [White Paper](http://static.druid.io/docs/druid.pdf). Please note -that Druid is undergoing rapid development and the white paper may be out date. - -What is Druid? -============== - -Druid is a system built to allow fast ("real-time") access to large sets of seldom-changing data. It was designed with the intent of being a service and maintaining 100% uptime in the face of code deployments, machine failures and other eventualities of a production system. It can be useful for back-office use cases as well, but design decisions were made explicitly targeting an always-up service. - -Druid currently allows for single-table queries in a similar manner to [Dremel](http://research.google.com/pubs/pub36632.html) and [PowerDrill](http://www.vldb.org/pvldb/vol5/p1436_alexanderhall_vldb2012.pdf). It adds to the mix - -1. columnar storage format for partially nested data structures -2. hierarchical query distribution with intermediate pruning -3. indexing for quick filtering -4. realtime ingestion (ingested data is immediately available for querying) -5. fault-tolerant distributed architecture that doesn’t lose data - -As far as a comparison of systems is concerned, Druid sits in between PowerDrill and Dremel on the spectrum of functionality. It implements almost everything Dremel offers (Dremel handles arbitrary nested data structures while Druid only allows for a single level of array-based nesting) and gets into some of the interesting data layout and compression methods from PowerDrill. - -Druid is a good fit for products that require real-time data ingestion of a single, large data stream. Especially if you are targeting no-downtime operation and are building your product on top of a time-oriented summarization of the incoming data stream. When talking about query speed it is important to clarify what "fast" means: with Druid it is entirely within the realm of possibility (we have done it) to achieve queries that run in less than a second across trillions of rows of data. - -### Architecture - -Druid is architected as a grouping of systems each with a distinct role and together they form a working system. The name comes from the Druid class in many role-playing games: it is a shape-shifter, capable of taking many different forms to fulfill various different roles in a group. - -Each of the systems, or components, described below also has a dedicated page with more details. You can find the page in the menu on the right, or click the link in the component's description. - -The node types that currently exist are: - -* [**Historical**](../design/historical.html) nodes are the workhorses that handle storage and querying on "historical" data (non-realtime). Historical nodes download segments from deep storage, respond to the queries from broker nodes about these segments, and return results to the broker nodes. They announce themselves and the segments they are serving in Zookeeper, and also use Zookeeper to monitor for signals to load or drop new segments. -* [**Coordinator**](../design/coordinator.html) nodes monitor the grouping of historical nodes to ensure that data is available, replicated and in a generally "optimal" configuration. They do this by reading segment metadata information from metadata storage to determine what segments should be loaded in the cluster, using Zookeeper to determine what Historical nodes exist, and creating Zookeeper entries to tell Historical nodes to load and drop new segments. -* [**Broker**](../design/broker.html) nodes receive queries from external clients and forward those queries to Realtime and Historical nodes. When Broker nodes receive results, they merge these results and return them to the caller. For knowing topology, Broker nodes use Zookeeper to determine what Realtime and Historical nodes exist. -* [**Indexing Service**](../design/indexing-service.html) nodes form a cluster of workers to load batch and real-time data into the system as well as allow for alterations to the data stored in the system. -* [**Realtime**](../design/realtime.html) nodes also load real-time data into the system. They are simpler to set up than the indexing service, at the cost of several [limitations](../ingestion/stream-pull.html#limitations) for production use. - -This separation allows each node to only care about what it is best at. By separating Historical and Realtime processing, we separate the memory concerns of listening on a real-time stream of data and processing it for entry into the system. By separating the Coordinator and Broker, we separate the needs for querying from the needs for maintaining "good" data distribution across the cluster. - -The following diagram shows how queries and data flow through this architecture, and which nodes (and external dependencies, discussed below) are involved: - - - -All nodes can be run in some highly available fashion, either as symmetric peers in a share-nothing cluster or as hot-swap failover nodes. - -Aside from these nodes, there are 3 external dependencies to the system: - -1. A running [ZooKeeper](../dependencies/zookeeper.html) cluster for cluster service discovery and maintenance of current data topology -2. A [metadata storage instance](../dependencies/metadata-storage.html) for maintenance of metadata about the data segments that should be served by the system -3. A ["deep storage" LOB store/file system](../dependencies/deep-storage.html) to hold the stored segments - -The following diagram illustrates the cluster's management layer, showing how certain nodes and dependencies help manage the cluster by tracking and exchanging metadata: - - - - -### Segments and Data Storage - -Getting data into the Druid system requires an indexing process, as shown in the diagrams above. This gives the system a chance to analyze the data, add indexing structures, compress and adjust the layout in an attempt to optimize query speed. A quick list of what happens to the data follows. - -- Converted to columnar format -- Indexed with bitmap indexes -- Compressed using various algorithms - - LZ4 for all columns - - Dictionary encoding w/ id storage minimization for String columns - - Bitmap compression for bitmap indexes - -The output of the indexing process is called a "segment". Segments are the fundamental structure to store data in Druid. Segments contain the various dimensions and metrics in a data set, stored in a column orientation, as well as the indexes for those columns. - -Segments are stored in a "deep storage" LOB store/file system (see [Deep Storage](../dependencies/deep-storage.html) for information about potential options). Data is then loaded by Historical nodes by first downloading the data to their local disk and then memory-mapping it before serving queries. - -If a Historical node dies, it will no longer serve its segments, but given that the segments are still available on the "deep storage", any other node can simply download the segment and start serving it. This means that it is possible to actually remove all historical nodes from the cluster and then re-provision them without any data loss. It also means that if the "deep storage" is not available, the nodes can continue to serve the segments they have already pulled down (i.e. the cluster goes stale, not down). - -In order for a segment to exist inside of the cluster, an entry has to be added to a table in a metadata storage instance. This entry is a self-describing bit of metadata about the segment, including things like the schema of the segment, its size, and its location on deep storage. These entries are what the Coordinator uses to know what data **should** be available on the cluster. - -### Fault Tolerance - -- **Historical** As discussed above, if a historical node dies, another historical node can take its place and there is no fear of data loss. -- **Coordinator** Can be run in a hot fail-over configuration. If no coordinators are running, then changes to the data topology will stop happening (no new data and no data balancing decisions), but the system will continue to run. -- **Broker** Can be run in parallel or in hot fail-over. -- **Indexing Service** Workers run with replicated ingestion tasks, coordination piece has hot fail-over. -- **Realtime** Depending on the semantics of the delivery stream, multiple of these can be run in parallel processing the exact same stream. They periodically checkpoint to disk and eventually push out to deep storage. Steps are taken to be able to recover from process death, but loss of access to the local disk can result in data loss if this is the only method of adding data to the system. -- **"deep storage" file system** If this is not available, new data will not be able to enter the cluster, but the cluster will continue operating as is. -- **metadata storage** If this is not available, the Coordinator will be unable to find out about new segments in the system, but it will continue with its current view of the segments that should exist in the cluster. -- **ZooKeeper** If this is not available, data topology changes cannot be made, but the Brokers will maintain their most recent view of the data topology and continue serving requests accordingly. - -### Query processing - -A query first enters the Broker, where the Broker will match the query with the data segments that are known to exist. It will then pick a set of machines that are serving those segments and rewrite the query for each server to specify the segment(s) targetted. The Historical/Realtime processes will take in the query, process them and return results. The Broker then takes the results and merges them together to get the final answer, which it returns. In this way, the broker can prune all of the data that doesn’t match a query before ever even looking at a single row of data. - -For filters at a more granular level than what the Broker can prune based on, the indexing structures inside each segment allows the historical nodes to figure out which (if any) rows match the filter set before looking at any row of data. It can do all of the boolean algebra of the filter on the bitmap indices and never actually look directly at a row of data. - -Once it knows the rows that match the current query, it can access the columns it cares about for those rows directly without having to load data that it is just going to throw away. - -### In-memory? - -Druid is not always and only in-memory. When we first built it, it is true that it was all in-memory all the time, but as time went on the price-performance tradeoff ended up swinging towards keeping all of our customers data in memory all the time a non-starter. We then added the ability to memory-map data and allow the OS to handle paging data in and out of memory on demand. Our production cluster is primarily configured to operate with this memory mapping behavior and we are definitely over-subscribed in terms of memory available vs. data a node is serving. - -As you read some of the old blog posts or other literature about the project, you will see "in-memory" touted often, as that is the history of where Druid came from, but the technical reality is that there is a spectrum of price vs. performance. Being able to slide along that spectrum from all in-memory (high cost, great performance) to mostly on disk (low cost, low performance) is the important knob to be able to adjust. diff --git a/docs/content/design/historical.md b/docs/content/design/historical.md index 5d09f7a839d..12e898d5de7 100644 --- a/docs/content/design/historical.md +++ b/docs/content/design/historical.md @@ -3,19 +3,22 @@ layout: doc_page --- Historical Node =============== -For Historical Node Configuration, see [Historical Configuration](../configuration/historical.html). -Historical nodes load up historical segments and expose them for querying. +### Configuration -Running -------- +For Historical Node Configuration, see [Historical Configuration](../configuration/index.html#historical). + +### HTTP Endpoints + +For a list of API endpoints supported by the Historical, please see the [API reference](../operations/api-reference.html#historical). + +### Running ``` org.apache.druid.cli.Main server historical ``` -Loading and Serving Segments ----------------------------- +### Loading and Serving Segments Each historical node maintains a constant connection to Zookeeper and watches a configurable set of Zookeeper paths for new segment information. Historical nodes do not communicate directly with each other or with the coordinator nodes but instead rely on Zookeeper for coordination. @@ -23,22 +26,14 @@ The [Coordinator](../design/coordinator.html) node is responsible for assigning When a historical node notices a new load queue entry in its load queue path, it will first check a local disk directory (cache) for the information about segment. If no information about the segment exists in the cache, the historical node will download metadata about the new segment to serve from Zookeeper. This metadata includes specifications about where the segment is located in deep storage and about how to decompress and process the segment. For more information about segment metadata and Druid segments in general, please see [Segments](../design/segments.html). Once a historical node completes processing a segment, the segment is announced in Zookeeper under a served segments path associated with the node. At this point, the segment is available for querying. -Loading and Serving Segments From Cache ---------------------------------------- +### Loading and Serving Segments From Cache Recall that when a historical node notices a new segment entry in its load queue path, the historical node first checks a configurable cache directory on its local disk to see if the segment had been previously downloaded. If a local cache entry already exists, the historical node will directly read the segment binary files from disk and load the segment. The segment cache is also leveraged when a historical node is first started. On startup, a historical node will search through its cache directory and immediately load and serve all segments that are found. This feature allows historical nodes to be queried as soon they come online. -Querying Segments ------------------ +### Querying Segments Please see [Querying](../querying/querying.html) for more information on querying historical nodes. A historical can be configured to log and report metrics for every query it services. - -HTTP Endpoints --------------- - -For a list of API endpoints supported by the Historical, please see the [API reference](../operations/api-reference.html#historical). - diff --git a/docs/content/design/index.md b/docs/content/design/index.md index a0bed6748dc..4cf4109ef3f 100644 --- a/docs/content/design/index.md +++ b/docs/content/design/index.md @@ -185,7 +185,7 @@ For more details, please see [Deep storage dependency](../dependencies/deep-stor The metadata storage holds various system metadata such as segment availability information and task information. -For more details, please see [Metadata storage dependency](..dependencies/metadata-storage.html) +For more details, please see [Metadata storage dependency](../dependencies/metadata-storage.html) ## Zookeeper diff --git a/docs/content/design/indexing-service.md b/docs/content/design/indexing-service.md index 4435e69208f..11aa55101ba 100644 --- a/docs/content/design/indexing-service.md +++ b/docs/content/design/indexing-service.md @@ -4,15 +4,14 @@ layout: doc_page Indexing Service ================ -For Indexing Service Configuration, see [Indexing Service Configuration](../configuration/indexing-service.html). +The indexing service is a highly-available, distributed service that runs indexing related tasks. -The indexing service is a highly-available, distributed service that runs indexing related tasks. Indexing service [tasks](../ingestion/tasks.html) create (and sometimes destroy) Druid [segments](../design/segments.html). The indexing service has a master/slave like architecture. +Indexing tasks [tasks](../ingestion/tasks.html) create (and sometimes destroy) Druid [segments](../design/segments.html). The indexing service has a master/slave like architecture. -The indexing service is composed of three main components: a peon component that can run a single task, a [Middle Manager](../design/middlemanager.html) component that manages peons, and an overlord component that manages task distribution to middle managers. -Overlords and middle managers may run on the same node or across multiple nodes while middle managers and [Peons](../design/peons.html) always run on the same node. +The indexing service is composed of three main components: a [Peon](../design/peons.html) component that can run a single task, a [Middle Manager](../design/middlemanager.html) component that manages peons, and an [Overlord](../design/overlord.html) component that manages task distribution to middle managers. +Overlords and middle managers may run on the same node or across multiple nodes while middle managers and peons always run on the same node. -Indexing Service Overview -------------------------- +Tasks are managed using API endpoints on the Overlord service. Please see [Overlord Task API](../operations/api-reference.html#overlord-tasks) for more information. ![Indexing Service](../../img/indexing_service.png "Indexing Service") @@ -25,43 +24,10 @@ The truth is, the indexing service is an experience that is difficult to charact The indexing service is philosophical transcendence, an infallible truth that will shape your soul, mold your character, and define your reality. The indexing service is creating world peace, playing with puppies, unwrapping presents on Christmas morning, cradling a loved one, and beating Goro in Mortal Kombat for the first time. The indexing service is sustainable economic growth, global propensity, and a world of transparent financial transactions. The indexing service is a true belieber. The indexing service is panicking because you forgot you signed up for a course and the big exam is in a few minutes, only to wake up and realize it was all a dream. What is the indexing service? More like what isn’t the indexing service. The indexing service is here and it is ready, but are you? --> -Overlord Node -------------- +Overlord +-------------- -The overlord node is responsible for accepting tasks, coordinating task distribution, creating locks around tasks, and returning statuses to callers. Overlord can be configured to run in one of two modes - local or remote (local being default). -In local mode overlord is also responsible for creating peons for executing tasks. When running the overlord in local mode, all middle manager and peon configurations must be provided as well. -Local mode is typically used for simple workflows. In remote mode, the overlord and middle manager are run in separate processes and you can run each on a different server. -This mode is recommended if you intend to use the indexing service as the single endpoint for all Druid indexing. - -#### HTTP Endpoints - -For a list of API endpoints supported by the Overlord, please see the [API reference](../operations/api-reference.html#overlord). - -#### Overlord Console - -The overlord console can be used to view pending tasks, running tasks, available workers, and recent worker creation and termination. The console can be accessed at: - -``` -http://:/console.html -``` - -#### Blacklisted Workers -If the workers fail tasks above a threshold, the overlord will blacklist these workers. No more than 20% of the nodes can be blacklisted. Blacklisted nodes will be periodically whitelisted. - -The following vairables can be used to set the threshold and blacklist timeouts. - -``` -druid.indexer.runner.maxRetriesBeforeBlacklist -druid.indexer.runner.workerBlackListBackoffTime -druid.indexer.runner.workerBlackListCleanupPeriod -druid.indexer.runner.maxPercentageBlacklistWorkers -``` - -#### Autoscaling - -The Autoscaling mechanisms currently in place are tightly coupled with our deployment infrastructure but the framework should be in place for other implementations. We are highly open to new implementations or extensions of the existing mechanisms. In our own deployments, middle manager nodes are Amazon AWS EC2 nodes and they are provisioned to register themselves in a [galaxy](https://github.com/ning/galaxy) environment. - -If autoscaling is enabled, new middle managers may be added when a task has been in pending state for too long. Middle managers may be terminated if they have not run any tasks for a period of time. +See [Overlord](../design/overlord.html). Middle Managers --------------- diff --git a/docs/content/design/middlemanager.md b/docs/content/design/middlemanager.md index 288741400ab..8f2c10ec0ac 100644 --- a/docs/content/design/middlemanager.md +++ b/docs/content/design/middlemanager.md @@ -5,19 +5,23 @@ layout: doc_page Middle Manager Node ------------------ -For Middlemanager Node Configuration, see [Indexing Service Configuration](../configuration/indexing-service.html). +### Configuration + +For Middlemanager Node Configuration, see [Indexing Service Configuration](../configuration/index.html#middlemanager-and-peons). + +### HTTP Endpoints + +For a list of API endpoints supported by the MiddleManager, please see the [API reference](../operations/api-reference.html#middlemanager). + +### Overview The middle manager node is a worker node that executes submitted tasks. Middle Managers forward tasks to peons that run in separate JVMs. The reason we have separate JVMs for tasks is for resource and log isolation. Each [Peon](../design/peons.html) is capable of running only one task at a time, however, a middle manager may have multiple peons. -Running -------- +### Running ``` org.apache.druid.cli.Main server middleManager ``` -HTTP Endpoints --------------- -For a list of API endpoints supported by the MiddleManager, please see the [API reference](../operations/api-reference.html#middlemanager). diff --git a/docs/content/design/overlord.md b/docs/content/design/overlord.md new file mode 100644 index 00000000000..688981f133b --- /dev/null +++ b/docs/content/design/overlord.md @@ -0,0 +1,48 @@ +--- +layout: doc_page +--- + +Overlord Node +------------- + +### Configuration + +For Overlord Node Configuration, see [Overlord Configuration](../configuration/index.html#overlord). + +### HTTP Endpoints + +For a list of API endpoints supported by the Overlord, please see the [API reference](../operations/api-reference.html#overlord). + +### Overview + +The overlord node is responsible for accepting tasks, coordinating task distribution, creating locks around tasks, and returning statuses to callers. Overlord can be configured to run in one of two modes - local or remote (local being default). +In local mode overlord is also responsible for creating peons for executing tasks. When running the overlord in local mode, all middle manager and peon configurations must be provided as well. +Local mode is typically used for simple workflows. In remote mode, the overlord and middle manager are run in separate processes and you can run each on a different server. +This mode is recommended if you intend to use the indexing service as the single endpoint for all Druid indexing. + +### Overlord Console + +The overlord console can be used to view pending tasks, running tasks, available workers, and recent worker creation and termination. The console can be accessed at: + +``` +http://:/console.html +``` + +### Blacklisted Workers + +If the workers fail tasks above a threshold, the overlord will blacklist these workers. No more than 20% of the nodes can be blacklisted. Blacklisted nodes will be periodically whitelisted. + +The following vairables can be used to set the threshold and blacklist timeouts. + +``` +druid.indexer.runner.maxRetriesBeforeBlacklist +druid.indexer.runner.workerBlackListBackoffTime +druid.indexer.runner.workerBlackListCleanupPeriod +druid.indexer.runner.maxPercentageBlacklistWorkers +``` + +### Autoscaling + +The Autoscaling mechanisms currently in place are tightly coupled with our deployment infrastructure but the framework should be in place for other implementations. We are highly open to new implementations or extensions of the existing mechanisms. In our own deployments, middle manager nodes are Amazon AWS EC2 nodes and they are provisioned to register themselves in a [galaxy](https://github.com/ning/galaxy) environment. + +If autoscaling is enabled, new middle managers may be added when a task has been in pending state for too long. Middle managers may be terminated if they have not run any tasks for a period of time. diff --git a/docs/content/design/peons.md b/docs/content/design/peons.md index 7aeaa071203..579cd8267b6 100644 --- a/docs/content/design/peons.md +++ b/docs/content/design/peons.md @@ -5,13 +5,18 @@ layout: doc_page Peons ----- -For Peon Configuration, see [Peon Configuration](../configuration/indexing-service.html). +### Configuration + +For Peon Configuration, see [Peon Query Configuration](../configuration/index.html#peon-query-configuration) and [Additional Peon Configuration](../configuration/index.html#additional-peon-configuration). + +### HTTP Endpoints + +For a list of API endpoints supported by the Peon, please see the [Peon API reference](../operations/api-reference.html#peon). Peons run a single task in a single JVM. MiddleManager is responsible for creating Peons for running tasks. Peons should rarely (if ever for testing purposes) be run on their own. -Running -------- +### Running The peon should very rarely ever be run independent of the middle manager unless for development purposes. @@ -22,7 +27,3 @@ org.apache.druid.cli.Main internal peon The task file contains the task JSON object. The status file indicates where the task status will be output. -HTTP Endpoints --------------- - -For a list of API endpoints supported by the Peon, please see the [API reference](../operations/api-reference.html#peon). diff --git a/docs/content/design/realtime.md b/docs/content/design/realtime.md index 8b7561e3a0c..4088c6120f2 100644 --- a/docs/content/design/realtime.md +++ b/docs/content/design/realtime.md @@ -6,7 +6,7 @@ Real-time Node ==============
-NOTE: Realtime nodes are deprecated. +NOTE: Realtime nodes are deprecated. Please use the Kafka Indexing Service for stream pull use cases instead.
For Real-time Node Configuration, see [Realtime Configuration](../configuration/realtime.html). diff --git a/docs/content/design/segments.md b/docs/content/design/segments.md index 78bbe677d22..a64a25f4be3 100644 --- a/docs/content/design/segments.md +++ b/docs/content/design/segments.md @@ -8,7 +8,7 @@ Druid stores its index in *segment files*, which are partitioned by time. In a basic setup, one segment file is created for each time interval, where the time interval is configurable in the `segmentGranularity` parameter of the `granularitySpec`, which is -documented [here](../ingestion/batch-ingestion.html). For druid to +documented [here](../ingestion/ingestion-spec.html#granularityspec). For druid to operate well under heavy query load, it is important for the segment file size to be within the recommended range of 300mb-700mb. If your segment files are larger than this range, then consider either @@ -16,7 +16,7 @@ changing the granularity of the time interval or partitioning your data and tweaking the `targetPartitionSize` in your `partitionsSpec` (a good starting point for this parameter is 5 million rows). See the sharding section below and the 'Partitioning specification' section of -the [Batch ingestion](../ingestion/batch-ingestion.html) documentation +the [Batch ingestion](../ingestion/hadoop.html#partitioning-specification) documentation for more information. ### A segment file's core data structures diff --git a/docs/content/development/extensions-contrib/parquet.md b/docs/content/development/extensions-contrib/parquet.md index 45e3262c448..428ce526d08 100644 --- a/docs/content/development/extensions-contrib/parquet.md +++ b/docs/content/development/extensions-contrib/parquet.md @@ -18,7 +18,7 @@ This is for batch ingestion using the HadoopDruidIndexer. The inputFormat of `in | parseSpec | JSON Object | Specifies the timestamp and dimensions of the data. Should be a timeAndDims parseSpec. | yes | | binaryAsString | Boolean | Specifies if the bytes parquet column should be converted to strings. | no(default == false) | -When the time dimension is a [DateType column](https://github.com/Parquet/parquet-format/blob/master/LogicalTypes.md), a format should not be supplied. When the format is UTF8 (String), either `auto` or a explicitly defined [format](http://www.joda.org/joda-time/apidocs/org/joda/time/format/DateTimeFormat.html) is required. +When the time dimension is a [DateType column](https://github.com/apache/parquet-format/blob/master/LogicalTypes.md), a format should not be supplied. When the format is UTF8 (String), either `auto` or a explicitly defined [format](http://www.joda.org/joda-time/apidocs/org/joda/time/format/DateTimeFormat.html) is required. ### Example json for overlord diff --git a/docs/content/development/extensions-contrib/redis-cache.md b/docs/content/development/extensions-contrib/redis-cache.md index b38d7cb7cb8..2ab6689f0f9 100644 --- a/docs/content/development/extensions-contrib/redis-cache.md +++ b/docs/content/development/extensions-contrib/redis-cache.md @@ -10,7 +10,7 @@ A cache implementation for Druid based on [Redis](https://github.com/antirez/red # Configuration Below are the configuration options known to this module. -Note that just adding these properties does not enable the cache. You still need to add the `druid..cache.useCache` and `druid..cache.populateCache` properties for the nodes you want to enable the cache on as described in the [cache configuration docs](../../configuration/caching.html). +Note that just adding these properties does not enable the cache. You still need to add the `druid..cache.useCache` and `druid..cache.populateCache` properties for the nodes you want to enable the cache on as described in the [cache configuration docs](../../configuration/index.html#cache-configuration). A possible configuration would be to keep the properties below in your `common.runtime.properties` file (present on all nodes) and then add `druid..cache.useCache` and `druid..cache.populateCache` in the `runtime.properties` file of the node types you want to enable caching on. diff --git a/docs/content/development/extensions-core/avro.md b/docs/content/development/extensions-core/avro.md index 129c46e12bd..979fc540e29 100644 --- a/docs/content/development/extensions-core/avro.md +++ b/docs/content/development/extensions-core/avro.md @@ -163,7 +163,7 @@ This is for batch ingestion using the HadoopDruidIndexer. The `inputFormat` of ` | parseSpec | JSON Object | Specifies the timestamp and dimensions of the data. Should be an "avro" parseSpec. | yes | | fromPigAvroStorage | Boolean | Specifies whether the data file is stored using AvroStorage. | no(default == false) | -An Avro parseSpec can contain a [flattenSpec](../../ingestion/flatten-spec.html) using either the "root" or "path" +An Avro parseSpec can contain a [flattenSpec](../../ingestion/flatten-json.html) using either the "root" or "path" field types, which can be used to read nested Avro records. The "jq" field type is not currently supported for Avro. For example, using Avro Hadoop parser with custom reader's schema file: diff --git a/docs/content/development/extensions-core/druid-basic-security.md b/docs/content/development/extensions-core/druid-basic-security.md index 81b3fdfde99..c0f9d054bc1 100644 --- a/docs/content/development/extensions-core/druid-basic-security.md +++ b/docs/content/development/extensions-core/druid-basic-security.md @@ -10,7 +10,7 @@ This extension adds: Make sure to [include](../../operations/including-extensions.html) `druid-basic-security` as an extension. -Please see [Authentication and Authorization](../../configuration/auth.html) for more information on the extension interfaces being implemented. +Please see [Authentication and Authorization](../../design/auth.html) for more information on the extension interfaces being implemented. ## Configuration diff --git a/docs/content/development/extensions-core/kafka-ingestion.md b/docs/content/development/extensions-core/kafka-ingestion.md index a7db8d87107..4c51b0562a4 100644 --- a/docs/content/development/extensions-core/kafka-ingestion.md +++ b/docs/content/development/extensions-core/kafka-ingestion.md @@ -104,7 +104,7 @@ A sample supervisor spec is shown below: |Field|Description|Required| |--------|-----------|---------| |`type`|The supervisor type, this should always be `kafka`.|yes| -|`dataSchema`|The schema that will be used by the Kafka indexing task during ingestion, see [Ingestion Spec](../../ingestion/index.html).|yes| +|`dataSchema`|The schema that will be used by the Kafka indexing task during ingestion, see [Ingestion Spec DataSchema](../../ingestion/ingestion-spec.html#dataschema).|yes| |`tuningConfig`|A KafkaSupervisorTuningConfig to configure the supervisor and indexing tasks, see below.|no| |`ioConfig`|A KafkaSupervisorIOConfig to configure the supervisor and indexing tasks, see below.|yes| @@ -130,7 +130,7 @@ The tuningConfig is optional and default parameters will be used if no tuningCon |`httpTimeout`|ISO8601 Period|How long to wait for a HTTP response from an indexing task.|no (default == PT10S)| |`shutdownTimeout`|ISO8601 Period|How long to wait for the supervisor to attempt a graceful shutdown of tasks before exiting.|no (default == PT80S)| |`offsetFetchPeriod`|ISO8601 Period|How often the supervisor queries Kafka and the indexing tasks to fetch current offsets and calculate lag.|no (default == PT30S, min == PT5S)| -|`segmentWriteOutMediumFactory`|String|Segment write-out medium to use when creating segments. See [Indexing Service Configuration](../configuration/indexing-service.html) page, "SegmentWriteOutMediumFactory" section for explanation and available options.|no (not specified by default, the value from `druid.peon.defaultSegmentWriteOutMediumFactory` is used)| +|`segmentWriteOutMediumFactory`|String|Segment write-out medium to use when creating segments. See [Additional Peon Configuration: SegmentWriteOutMediumFactory](../../configuration/index.html#segmentwriteoutmediumfactory) for explanation and available options.|no (not specified by default, the value from `druid.peon.defaultSegmentWriteOutMediumFactory` is used)| |`intermediateHandoffPeriod`|ISO8601 Period|How often the tasks should hand off segments. Handoff will happen either if `maxRowsPerSegment` is hit or every `intermediateHandoffPeriod`, whichever happens earlier.|no (default == P2147483647D)| #### IndexSpec diff --git a/docs/content/development/extensions-core/stats.md b/docs/content/development/extensions-core/stats.md index 747a75686dc..09874799b4f 100644 --- a/docs/content/development/extensions-core/stats.md +++ b/docs/content/development/extensions-core/stats.md @@ -16,7 +16,7 @@ The American Statistician, 37 (1983) pp. 242--247. variance = variance1 + variance2 + n/(m*(m+n)) * pow(((m/n)*t1 - t2),2) -where: - variance is sum[x-avg^2] (this is actually n times the variance) +where: - variance is sum(x-avg^2) (this is actually n times the variance) and is updated at every step. - n is the count of elements in chunk1 - m is the count of elements in chunk2 - t1 = sum of elements in chunk1, t2 = sum of elements in chunk2. diff --git a/docs/content/development/javascript.md b/docs/content/development/javascript.md index 14393dfac63..e3fc1d0d54e 100644 --- a/docs/content/development/javascript.md +++ b/docs/content/development/javascript.md @@ -27,7 +27,7 @@ Druid uses the Mozilla Rhino engine at optimization level 9 to compile and execu Druid does not execute JavaScript functions in a sandbox, so they have full access to the machine. So Javascript functions allow users to execute arbitrary code inside druid process. So, by default, Javascript is disabled. However, on dev/staging environments or secured production environments you can enable those by setting -the [configuration property](../configuration/index.html) +the [configuration property](../configuration/index.html#javascript) `druid.javascript.enabled = true`. ## Global variables diff --git a/docs/content/ingestion/command-line-hadoop-indexer.md b/docs/content/ingestion/command-line-hadoop-indexer.md index 7f83ec22174..542b3c151a4 100644 --- a/docs/content/ingestion/command-line-hadoop-indexer.md +++ b/docs/content/ingestion/command-line-hadoop-indexer.md @@ -17,8 +17,9 @@ java -Xmx256m -Duser.timezone=UTC -Dfile.encoding=UTF-8 -classpath lib/*:, + "dataSource": , + "interval": , + "dimensions" , + "tuningConfig" , + "context": +} +``` + +|Field|Description|Required| +|-----|-----------|--------| +|`type`|Task type. Should be `compact`|Yes| +|`id`|Task id|No| +|`dataSource`|dataSource name to be compacted|Yes| +|`interval`|interval of segments to be compacted|Yes| +|`dimensions`|custom dimensionsSpec. compaction task will use this dimensionsSpec if exist instead of generating one. See below for more details.|No| +|`tuningConfig`|[Index task tuningConfig](../ingestion/native_tasks.html#tuningconfig)|No| +|`context`|[Task context](../ingestion/locking-and-priority.html#task-context)|No| + +An example of compaction task is + +```json +{ + "type" : "compact", + "dataSource" : "wikipedia", + "interval" : "2017-01-01/2018-01-01" +} +``` + +This compaction task reads _all segments_ of the interval `2017-01-01/2018-01-01` and results in new segments. +Note that intervals of the input segments are merged into a single interval of `2017-01-01/2018-01-01` no matter what the segmentGranularity was. +To control the number of result segments, you can set `targetPartitionSize` or `numShards`. See [indexTuningConfig](../ingestion/native_tasks.html#tuningconfig) for more details. +To merge each day's worth of data into separate segments, you can submit multiple `compact` tasks, one for each day. They will run in parallel. + +A compaction task internally generates an `index` task spec for performing compaction work with some fixed parameters. +For example, its `firehose` is always the [ingestSegmentSpec](./firehose.html#ingestsegmentfirehose), and `dimensionsSpec` and `metricsSpec` +include all dimensions and metrics of the input segments by default. + +Compaction tasks will exit with a failure status code, without doing anything, if the interval you specify has no +data segments loaded in it (or if the interval you specify is empty). + +The output segment can have different metadata from the input segments unless all input segments have the same metadata. + +- Dimensions: since Druid supports schema change, the dimensions can be different across segments even if they are a part of the same dataSource. +If the input segments have different dimensions, the output segment basically includes all dimensions of the input segments. +However, even if the input segments have the same set of dimensions, the dimension order or the data type of dimensions can be different. For example, the data type of some dimensions can be +changed from `string` to primitive types, or the order of dimensions can be changed for better locality. +In this case, the dimensions of recent segments precede that of old segments in terms of data types and the ordering. +This is because more recent segments are more likely to have the new desired order and data types. If you want to use +your own ordering and types, you can specify a custom `dimensionsSpec` in the compaction task spec. +- Roll-up: the output segment is rolled up only when `rollup` is set for all input segments. +See [Roll-up](../ingestion/index.html#rollup) for more details. +You can check that your segments are rolled up or not by using [Segment Metadata Queries](../querying/segmentmetadataquery.html#analysistypes). +- Partitioning: The compaction task is a special form of native batch indexing task, so it always uses hash-based partitioning on the full set of dimensions. \ No newline at end of file diff --git a/docs/content/ingestion/data-formats.md b/docs/content/ingestion/data-formats.md index 859f75e12be..ffe20bea3c4 100644 --- a/docs/content/ingestion/data-formats.md +++ b/docs/content/ingestion/data-formats.md @@ -11,7 +11,7 @@ For additional data formats, please see our [extensions list](../development/ext ## Formatting the Data -The following are some samples of the data used in the [Wikipedia example](../tutorials/quickstart.html). +The following samples show data formats that are natively supported in Druid: _JSON_ diff --git a/docs/content/ingestion/delete-data.md b/docs/content/ingestion/delete-data.md new file mode 100644 index 00000000000..62c02e581ce --- /dev/null +++ b/docs/content/ingestion/delete-data.md @@ -0,0 +1,30 @@ +--- +layout: doc_page +--- + +# Deleting Data + +Permanent deletion of a Druid segment has two steps: + +1. The segment must first be marked as "unused". This occurs when a segment is dropped by retention rules, and when a user manually disables a segment through the Coordinator API. +2. After segments have been marked as "unused", a Kill Task will delete any "unused" segments from Druid's metadata store as well as deep storage. + +For documentation on retention rules, please see [Data Retention](../operations/rule-configuration.html). + +For documentation on disabling segments using the Coordinator API, please see [Coordinator Delete API](../operations/api-reference.html#coordinator-delete) + +A data deletion tutorial is available at [Tutorial: Deleting data](../tutorials/tutorial-delete-data.html) + +## Kill Task + +Kill tasks delete all information about a segment and removes it from deep storage. Killable segments must be disabled (used==0) in the Druid segment table. The available grammar is: + +```json +{ + "type": "kill", + "id": , + "dataSource": , + "interval" : , + "context": +} +``` diff --git a/docs/content/ingestion/faq.md b/docs/content/ingestion/faq.md index 2289f308be5..91ab0dfb1f1 100644 --- a/docs/content/ingestion/faq.md +++ b/docs/content/ingestion/faq.md @@ -16,7 +16,7 @@ If you are trying to batch load historical data but no events are being loaded, ## What types of data does Druid support? -Druid can ingest JSON, CSV, TSV and other delimited data out of the box. Druid supports single dimension values, or multiple dimension values (an array of strings). Druid supports long and float numeric columns. +Druid can ingest JSON, CSV, TSV and other delimited data out of the box. Druid supports single dimension values, or multiple dimension values (an array of strings). Druid supports long, float, and double numeric columns. ## Not all of my events were ingested @@ -63,15 +63,19 @@ You can use a [segment metadata query](../querying/segmentmetadataquery.html) fo You can use IngestSegmentFirehose with index task to ingest existing druid segments using a new schema and change the name, dimensions, metrics, rollup, etc. of the segment. See [Firehose](../ingestion/firehose.html) for more details on IngestSegmentFirehose. -Or, if you use hadoop based ingestion, then you can use "dataSource" input spec to do reindexing. See [batch-ingestion](../ingestion/batch-ingestion.html) for more details. +Or, if you use hadoop based ingestion, then you can use "dataSource" input spec to do reindexing. + +See [Update Existing Data](../ingestion/update-existing-data.html) for more details. ## How can I change the granularity of existing data in Druid? In a lot of situations you may want to lower the granularity of older data. Example, any data older than 1 month has only hour level granularity but newer data has minute level granularity. This use case is same as re-indexing. -To do this use the IngestSegmentFirehose and run an indexer task. The IngestSegment firehose will allow you to take in existing segments from Druid and aggregate them and feed them back into druid. It will also allow you to filter the data in those segments while feeding it back in. This means if there are rows you want to delete, you can just filter them away during re-ingestion. +To do this use the IngestSegmentFirehose and run an indexer task. The IngestSegment firehose will allow you to take in existing segments from Druid and aggregate them and feed them back into Druid. It will also allow you to filter the data in those segments while feeding it back in. This means if there are rows you want to delete, you can just filter them away during re-ingestion. Typically the above will be run as a batch job to say everyday feed in a chunk of data and aggregate it. -Or, if you use hadoop based ingestion, then you can use "dataSource" input spec to do reindexing. See [batch-ingestion](../ingestion/batch-ingestion.html) for more details. +Or, if you use hadoop based ingestion, then you can use "dataSource" input spec to do reindexing. + +See [Update Existing Data](../ingestion/update-existing-data.html) for more details. ## Real-time ingestion seems to be stuck diff --git a/docs/content/ingestion/firehose.md b/docs/content/ingestion/firehose.md index b127631c12b..41a1014ab38 100644 --- a/docs/content/ingestion/firehose.md +++ b/docs/content/ingestion/firehose.md @@ -4,7 +4,9 @@ layout: doc_page # Druid Firehoses -Firehoses are used in the [stream-pull](../ingestion/stream-pull.html) ingestion model. They are pluggable and thus the configuration schema can and will vary based on the `type` of the firehose. +Firehoses are used in [native batch ingestion tasks](../ingestion/native_tasks.html), stream push tasks automatically created by [Tranquility](../ingestion/stream-push.html), and the [stream-pull (deprecated)](../ingestion/stream-pull.html) ingestion model. + +They are pluggable and thus the configuration schema can and will vary based on the `type` of the firehose. | Field | Type | Description | Required | |-------|------|-------------|----------| @@ -16,7 +18,7 @@ There are several firehoses readily available in Druid, some are meant for examp For additional firehoses, please see our [extensions list](../development/extensions.html). -#### LocalFirehose +### LocalFirehose This Firehose can be used to read the data from files on local disk. It can be used for POCs to ingest data on disk. @@ -38,7 +40,7 @@ A sample local firehose spec is shown below: |filter|A wildcard filter for files. See [here](http://commons.apache.org/proper/commons-io/apidocs/org/apache/commons/io/filefilter/WildcardFileFilter.html) for more information.|yes| |baseDir|directory to search recursively for files to be ingested. |yes| -#### HttpFirehose +### HttpFirehose This Firehose can be used to read the data from remote sites via HTTP. This firehose is _splittable_ and can be used by [native parallel index tasks](./native_tasks.html#parallel-index-task). @@ -62,7 +64,7 @@ The below configurations can be optionally used for tuning the firehose performa |fetchTimeout|Timeout for fetching a http object.|60000| |maxFetchRetry|Maximum retry for fetching a http object.|3| -#### IngestSegmentFirehose +### IngestSegmentFirehose This Firehose can be used to read the data from existing druid segments. It can be used ingest existing druid segments using a new schema and change the name, dimensions, metrics, rollup, etc. of the segment. @@ -85,7 +87,39 @@ A sample ingest firehose spec is shown below - |metrics|The list of metrics to select. If left empty, no metrics are returned. If left null or not defined, all metrics are selected.|no| |filter| See [Filters](../querying/filters.html)|no| -#### CombiningFirehose +#### SqlFirehose + +SqlFirehoseFactory can be used to ingest events residing in RDBMS. The database connection information is provided as part of the ingestion spec. For each query, the results are fetched locally and indexed. If there are multiple queries from which data needs to be indexed, queries are prefetched in the background upto `maxFetchCapacityBytes` bytes. +An example is shown below: + +```json +{ + "type" : "sql", + "database": { + "type": "mysql", + "connectorConfig" : { + "connectURI" : "jdbc:mysql://host:port/schema", + "user" : "user", + "password" : "password" + } + }, + "sqls" : ["SELECT * FROM table1", "SELECT * FROM table2"] +} +``` + + +|property|description|default|required?| +|--------|-----------|-------|---------| +|type|This should be "sql".||Yes| +|database|Specifies the database connection details.`type` should specify the database type and `connectorConfig` should specify the database connection properties via `connectURI`, `user` and `password`||Yes| +|maxCacheCapacityBytes|Maximum size of the cache space in bytes. 0 means disabling cache. Cached files are not removed until the ingestion task completes.|1073741824|No| +|maxFetchCapacityBytes|Maximum size of the fetch space in bytes. 0 means disabling prefetch. Prefetched files are removed immediately once they are read.|1073741824|No| +|prefetchTriggerBytes|Threshold to trigger prefetching SQL result objects.|maxFetchCapacityBytes / 2|No| +|fetchTimeout|Timeout for fetching the result set.|60000|No| +|foldCase|Toggle case folding of database column names. This may be enabled in cases where the database returns case insensitive column names in query results.|false|No| +|sqls|List of SQL queries where each SQL query would retrieve the data to be indexed.||Yes| + +### CombiningFirehose This firehose can be used to combine and merge data from a list of different firehoses. This can be used to merge data from more than one firehose. @@ -102,6 +136,13 @@ This can be used to merge data from more than one firehose. |type|This should be "combining"|yes| |delegates|list of firehoses to combine data from|yes| + +### Streaming Firehoses + +The firehoses shown below should only be used with the [stream-pull (deprecated)](../ingestion/stream-pull.html) ingestion model, as they are not suitable for batch ingestion. + +The EventReceiverFirehose is also used in tasks automatically generated by [Tranquility stream push](../ingestion/stream-push.html). + #### EventReceiverFirehose EventReceiverFirehoseFactory can be used to ingest events using an http endpoint. @@ -152,34 +193,3 @@ An example is shown below: |shutoffTime|time at which the firehose should shut down, in ISO8601 format|yes| |delegate|firehose to use|yes| -#### SqlFirehose - -SqlFirehoseFactory can be used to ingest events residing in RDBMS. The database connection information is provided as part of the ingestion spec. For each query, the results are fetched locally and indexed. If there are multiple queries from which data needs to be indexed, queries are prefetched in the background upto `maxFetchCapacityBytes` bytes. -An example is shown below: - -```json -{ - "type" : "sql", - "database": { - "type": "mysql", - "connectorConfig" : { - "connectURI" : "jdbc:mysql://host:port/schema", - "user" : "user", - "password" : "password" - } - }, - "sqls" : ["SELECT * FROM table1", "SELECT * FROM table2"] -} -``` - - -|property|description|default|required?| -|--------|-----------|-------|---------| -|type|This should be "sql".||Yes| -|database|Specifies the database connection details.`type` should specify the database type and `connectorConfig` should specify the database connection properties via `connectURI`, `user` and `password`||Yes| -|maxCacheCapacityBytes|Maximum size of the cache space in bytes. 0 means disabling cache. Cached files are not removed until the ingestion task completes.|1073741824|No| -|maxFetchCapacityBytes|Maximum size of the fetch space in bytes. 0 means disabling prefetch. Prefetched files are removed immediately once they are read.|1073741824|No| -|prefetchTriggerBytes|Threshold to trigger prefetching SQL result objects.|maxFetchCapacityBytes / 2|No| -|fetchTimeout|Timeout for fetching the result set.|60000|No| -|foldCase|Toggle case folding of database column names. This may be enabled in cases where the database returns case insensitive column names in query results.|false|No| -|sqls|List of SQL queries where each SQL query would retrieve the data to be indexed.||Yes| diff --git a/docs/content/ingestion/hadoop.md b/docs/content/ingestion/hadoop.md index 1115804636b..507cfdb3113 100644 --- a/docs/content/ingestion/hadoop.md +++ b/docs/content/ingestion/hadoop.md @@ -5,7 +5,7 @@ layout: doc_page # Hadoop-based Batch Ingestion Hadoop-based batch ingestion in Druid is supported via a Hadoop-ingestion task. These tasks can be posted to a running -instance of a Druid [overlord](../design/indexing-service.html). +instance of a Druid [overlord](../design/overlord.html). ## Command Line Hadoop Indexer @@ -83,15 +83,15 @@ A sample task is shown below: |property|description|required?| |--------|-----------|---------| |type|The task type, this should always be "index_hadoop".|yes| -|spec|A Hadoop Index Spec. See [Batch Ingestion](../ingestion/batch-ingestion.html)|yes| +|spec|A Hadoop Index Spec. See [Ingestion](../ingestion/ingestion-spec.html)|yes| |hadoopDependencyCoordinates|A JSON array of Hadoop dependency coordinates that Druid will use, this property will override the default Hadoop coordinates. Once specified, Druid will look for those Hadoop dependencies from the location specified by `druid.extensions.hadoopDependenciesDir`|no| |classpathPrefix|Classpath that will be pre-appended for the peon process.|no| -also note that, druid automatically computes the classpath for hadoop job containers that run in hadoop cluster. But, in case of conflicts between hadoop and druid's dependencies, you can manually specify the classpath by setting `druid.extensions.hadoopContainerDruidClasspath` property. See the extensions config in [base druid configuration](../configuration/index.html). +also note that, druid automatically computes the classpath for hadoop job containers that run in hadoop cluster. But, in case of conflicts between hadoop and druid's dependencies, you can manually specify the classpath by setting `druid.extensions.hadoopContainerDruidClasspath` property. See the extensions config in [base druid configuration](../configuration/index.html#extensions). ## DataSchema -This field is required. See [Ingestion](../ingestion/index.html). +This field is required. See [Ingestion Spec DataSchema](../ingestion/ingestion-spec.html#dataschema). ## IOConfig @@ -102,7 +102,7 @@ This field is required. |type|String|This should always be 'hadoop'.|yes| |inputSpec|Object|A specification of where to pull the data in from. See below.|yes| |segmentOutputPath|String|The path to dump segments into.|yes| -|metadataUpdateSpec|Object|A specification of how to update the metadata for the druid cluster these segments belong to.|yes| +|metadataUpdateSpec|Object|A specification of how to update the metadata for the druid cluster these segments belong to.|Only used by the [CLI Hadoop Indexer](../ingestion/command-line-hadoop-indexer.html). This field must be null otherwise.| ### InputSpec specification @@ -158,7 +158,7 @@ The tuningConfig is optional and default parameters will be used if no tuningCon |Field|Type|Description|Required| |-----|----|-----------|--------| -|workingPath|String|The working path to use for intermediate results (results between Hadoop jobs).|no (default == '/tmp/druid-indexing')| +|workingPath|String|The working path to use for intermediate results (results between Hadoop jobs).|Only used by the [CLI Hadoop Indexer](../ingestion/command-line-hadoop-indexer.html). The default is '/tmp/druid-indexing'. This field must be null otherwise.| |version|String|The version of created segments. Ignored for HadoopIndexTask unless useExplicitVersion is set to true|no (default == datetime that indexing starts at)| |partitionsSpec|Object|A specification of how to partition each time bucket into segments. Absence of this property means no partitioning will occur. See 'Partitioning specification' below.|no (default == 'hashed')| |maxRowsInMemory|Integer|The number of rows to aggregate before persisting. Note that this is the number of post-aggregation rows which may not be equal to the number of input events due to roll-up. This is used to manage the required JVM heap size. Normally user does not need to set this, but depending on the nature of data, if rows are short in terms of bytes, user may not want to store a million rows in memory and this value should be set.|no (default == 1000000)| diff --git a/docs/content/ingestion/index.md b/docs/content/ingestion/index.md index 378b308d9e9..c7f0d67b8b2 100644 --- a/docs/content/ingestion/index.md +++ b/docs/content/ingestion/index.md @@ -2,300 +2,278 @@ layout: doc_page --- -# Ingestion Spec +# Ingestion -A Druid ingestion spec consists of 3 components: +## Overview -```json -{ - "dataSchema" : {...}, - "ioConfig" : {...}, - "tuningConfig" : {...} -} +### Datasources and segments + +Druid data is stored in "datasources", which are similar to tables in a traditional RDBMS. Each datasource is +partitioned by time and, optionally, further partitioned by other attributes. Each time range is called a "chunk" (for +example, a single day, if your datasource is partitioned by day). Within a chunk, data is partitioned into one or more +"segments". Each segment is a single file, typically comprising up to a few million rows of data. Since segments are +organized into time chunks, it's sometimes helpful to think of segments as living on a timeline like the following: + + + +A datasource may have anywhere from just a few segments, up to hundreds of thousands and even millions of segments. Each +segment starts life off being created on a MiddleManager, and at that point, is mutable and uncommitted. The segment +building process includes the following steps, designed to produce a data file that is compact and supports fast +queries: + +- Conversion to columnar format +- Indexing with bitmap indexes +- Compression using various algorithms + - Dictionary encoding with id storage minimization for String columns + - Bitmap compression for bitmap indexes + - Type-aware compression for all columns + +Periodically, segments are published (committed). At this point, they are written to deep storage, become immutable, and +move from MiddleManagers to the Historical processes. An entry about the segment is also written to the metadata store. +This entry is a self-describing bit of metadata about the segment, including things like the schema of the segment, its +size, and its location on deep storage. These entries are what the Coordinator uses to know what data *should* be +available on the cluster. + +For details on the segment file format, please see [segment files](../design/segments.html). + +#### Segment identifiers + +Segments all have a four-part identifier with the following components: + +- Datasource name. +- Time interval (for the time chunk containing the segment; this corresponds to the `segmentGranularity` specified +at ingestion time). +- Version number (generally an ISO8601 timestamp corresponding to when the segment set was first started). +- Partition number (an integer, unique within a datasource+interval+version; may not necessarily be contiguous). + +For example, this is the identifier for a segment in datasource `clarity-cloud0`, time chunk +`2018-05-21T16:00:00.000Z/2018-05-21T17:00:00.000Z`, version `2018-05-21T15:56:09.909Z`, and partition number 1: + +``` +clarity-cloud0_2018-05-21T16:00:00.000Z_2018-05-21T17:00:00.000Z_2018-05-21T15:56:09.909Z_1 ``` -| Field | Type | Description | Required | -|-------|------|-------------|----------| -| dataSchema | JSON Object | Specifies the the schema of the incoming data. All ingestion specs can share the same dataSchema. | yes | -| ioConfig | JSON Object | Specifies where the data is coming from and where the data is going. This object will vary with the ingestion method. | yes | -| tuningConfig | JSON Object | Specifies how to tune various ingestion parameters. This object will vary with the ingestion method. | no | +Segments with partition number 0 (the first partition in a chunk) omit the partition number, like the following +example, which is a segment in the same time chunk as the previous one, but with partition number 0 instead of 1: -# DataSchema - -An example dataSchema is shown below: - -```json -"dataSchema" : { - "dataSource" : "wikipedia", - "parser" : { - "type" : "string", - "parseSpec" : { - "format" : "json", - "timestampSpec" : { - "column" : "timestamp", - "format" : "auto" - }, - "dimensionsSpec" : { - "dimensions": [ - "page", - "language", - "user", - "unpatrolled", - "newPage", - "robot", - "anonymous", - "namespace", - "continent", - "country", - "region", - "city", - { - "type": "long", - "name": "countryNum" - }, - { - "type": "float", - "name": "userLatitude" - }, - { - "type": "float", - "name": "userLongitude" - } - ], - "dimensionExclusions" : [], - "spatialDimensions" : [] - } - } - }, - "metricsSpec" : [{ - "type" : "count", - "name" : "count" - }, { - "type" : "doubleSum", - "name" : "added", - "fieldName" : "added" - }, { - "type" : "doubleSum", - "name" : "deleted", - "fieldName" : "deleted" - }, { - "type" : "doubleSum", - "name" : "delta", - "fieldName" : "delta" - }], - "granularitySpec" : { - "segmentGranularity" : "DAY", - "queryGranularity" : "NONE", - "intervals" : [ "2013-08-31/2013-09-01" ] - }, - "transformSpec" : null -} +``` +clarity-cloud0_2018-05-21T16:00:00.000Z_2018-05-21T17:00:00.000Z_2018-05-21T15:56:09.909Z ``` -| Field | Type | Description | Required | -|-------|------|-------------|----------| -| dataSource | String | The name of the ingested datasource. Datasources can be thought of as tables. | yes | -| parser | JSON Object | Specifies how ingested data can be parsed. | yes | -| metricsSpec | JSON Object array | A list of [aggregators](../querying/aggregations.html). | yes | -| granularitySpec | JSON Object | Specifies how to create segments and roll up data. | yes | -| transformSpec | JSON Object | Specifes how to filter and transform input data. See [transform specs](../ingestion/transform-spec.html).| no | +#### Segment versioning -## Parser +You may be wondering what the "version number" described in the previous section is for. Or, you might not be, in which +case good for you and you can skip this section! -If `type` is not included, the parser defaults to `string`. For additional data formats, please see our [extensions list](../development/extensions.html). +It's there to support batch-mode overwriting. In Druid, if all you ever do is append data, then there will be just a +single version for each time chunk. But when you overwrite data, what happens behind the scenes is that a new set of +segments is created with the same datasource, same time interval, but a higher version number. This is a signal to the +rest of the Druid system that the older version should be removed from the cluster, and the new version should replace +it. -### String Parser +The switch appears to happen instantaneously to a user, because Druid handles this by first loading the new data (but +not allowing it to be queried), and then, as soon as the new data is all loaded, switching all new queries to use those +new segments. Then it drops the old segments a few minutes later. -| Field | Type | Description | Required | -|-------|------|-------------|----------| -| type | String | This should say `string` in general, or `hadoopyString` when used in a Hadoop indexing job. | no | -| parseSpec | JSON Object | Specifies the format, timestamp, and dimensions of the data. | yes | -### ParseSpec +#### Segment states -ParseSpecs serve two purposes: +Segments can be either _available_ or _unavailable_, which refers to whether or not they are currently served by some +Druid server process. They can also be _published_ or _unpublished_, which refers to whether or not they have been +written to deep storage and the metadata store. And published segments can be either _used_ or _unused_, which refers to +whether or not Druid considers them active segments that should be served. -- The String Parser use them to determine the format (i.e. JSON, CSV, TSV) of incoming rows. -- All Parsers use them to determine the timestamp and dimensions of incoming rows. +Putting these together, there are five basic states that a segment can be in: -If `format` is not included, the parseSpec defaults to `tsv`. +- **Published, available, and used:** These segments are published in deep storage and the metadata store, and they are +served by Historical processes. They are the majority of active data in a Druid cluster (they include everything except +in-flight realtime data). +- **Published, available, and unused:** These segments are being served by Historicals, but won't be for very long. They +may be segments that have recently been overwritten (see [Segment versioning](#segment-versioning)) or dropped for +other reasons (like drop rules, or being dropped manually). +- **Published, unavailable, and used:** These segments are published in deep storage and the metadata store, and +_should_ be served, but are not actually being served. If segments stay in this state for more than a few minutes, it's +usually because something is wrong. Some of the more common causes include: failure of a large number of Historicals, +Historicals being out of capacity to download more segments, and some issue with coordination that prevents the +Coordinator from telling Historicals to load new segments. +- **Published, unavailable, and unused:** These segments are published in deep storage and the metadata store, but +are inactive (because they have been overwritten or dropped). They lie dormant, and can potentially be resurrected +by manual action if needed (in particular: setting the "used" flag to true). +- **Unpublished and available:** This is the state that segments are in while they are being built by Druid ingestion +tasks. This includes all "realtime" data that has not been handed off to Historicals yet. Segments in this state may or +may not be replicated. If all replicas are lost, then the segment must be rebuilt from scratch. This may or may not be +possible. (It is possible with Kafka, and happens automatically; it is possible with S3/HDFS by restarting the job; and +it is _not_ possible with Tranquility, so in that case, data will be lost.) -#### JSON ParseSpec +The sixth state in this matrix, "unpublished and unavailable," isn't possible. If a segment isn't published and isn't +being served then does it really exist? -Use this with the String Parser to load JSON. -| Field | Type | Description | Required | -|-------|------|-------------|----------| -| format | String | This should say `json`. | no | -| timestampSpec | JSON Object | Specifies the column and format of the timestamp. | yes | -| dimensionsSpec | JSON Object | Specifies the dimensions of the data. | yes | -| flattenSpec | JSON Object | Specifies flattening configuration for nested JSON data. See [Flattening JSON](./flatten-json.html) for more info. | no | +#### Indexing and handoff -#### JSON Lowercase ParseSpec +_Indexing_ is the mechanism by which new segments are created, and _handoff_ is the mechanism by which they are published +and begin being served by Historical processes. The mechanism works like this on the indexing side: -
-The _jsonLowercase_ parser is deprecated and may be removed in a future version of Druid. -
+1. An _indexing task_ starts running and building a new segment. It must determine the identifier of the segment before +it starts building it. For a task that is appending (like a Kafka task, or an index task in append mode) this will be +done by calling an "allocate" API on the Overlord to potentially add a new partition to an existing set of segments. For +a task that is overwriting (like a Hadoop task, or an index task _not_ in append mode) this is done by locking an +interval and creating a new version number and new set of segments. +2. If the indexing task is a realtime task (like a Kafka task) then the segment is immediately queryable at this point. +It's available, but unpublished. +3. When the indexing task has finished reading data for the segment, it pushes it to deep storage and then publishes it +by writing a record into the metadata store. +4. If the indexing task is a realtime task, at this point it waits for a Historical process to load the segment. If the +indexing task is not a realtime task, it exits immediately. -This is a special variation of the JSON ParseSpec that lower cases all the column names in the incoming JSON data. This parseSpec is required if you are updating to Druid 0.7.x from Druid 0.6.x, are directly ingesting JSON with mixed case column names, do not have any ETL in place to lower case those column names, and would like to make queries that include the data you created using 0.6.x and 0.7.x. +And like this on the Coordinator / Historical side: -| Field | Type | Description | Required | -|-------|------|-------------|----------| -| format | String | This should say `jsonLowercase`. | yes | -| timestampSpec | JSON Object | Specifies the column and format of the timestamp. | yes | -| dimensionsSpec | JSON Object | Specifies the dimensions of the data. | yes | +1. The Coordinator polls the metadata store periodically (by default, every 1 minute) for newly published segments. +2. When the Coordinator finds a segment that is published and used, but unavailable, it chooses a Historical process +to load that segment and instructs that Historical to do so. +3. The Historical loads the segment and begins serving it. +4. At this point, if the indexing task was waiting for handoff, it will exit. -#### CSV ParseSpec -Use this with the String Parser to load CSV. Strings are parsed using the com.opencsv library. +## Ingestion methods -| Field | Type | Description | Required | -|-------|------|-------------|----------| -| format | String | This should say `csv`. | yes | -| timestampSpec | JSON Object | Specifies the column and format of the timestamp. | yes | -| dimensionsSpec | JSON Object | Specifies the dimensions of the data. | yes | -| listDelimiter | String | A custom delimiter for multi-value dimensions. | no (default == ctrl+A) | -| columns | JSON array | Specifies the columns of the data. | yes | +In most ingestion methods, this work is done by Druid +MiddleManager nodes. One exception is Hadoop-based ingestion, where this work is instead done using a Hadoop MapReduce +job on YARN (although MiddleManager nodes are still involved in starting and monitoring the Hadoop jobs). -#### TSV / Delimited ParseSpec +Once segments have been generated and stored in [deep storage](../dependencies/deep-storage.html), they will be loaded by Druid Historical nodes. Some Druid +ingestion methods additionally support _real-time queries_, meaning you can query in-flight data on MiddleManager nodes +before it is finished being converted and written to deep storage. In general, a small amount of data will be in-flight +on MiddleManager nodes relative to the larger amount of historical data being served from Historical nodes. -Use this with the String Parser to load any delimited text that does not require special escaping. By default, -the delimiter is a tab, so this will load TSV. +See the [Design](../design/index.html) page for more details on how Druid stores and manages your data. -| Field | Type | Description | Required | -|-------|------|-------------|----------| -| format | String | This should say `tsv`. | yes | -| timestampSpec | JSON Object | Specifies the column and format of the timestamp. | yes | -| dimensionsSpec | JSON Object | Specifies the dimensions of the data. | yes | -| delimiter | String | A custom delimiter for data values. | no (default == \t) | -| listDelimiter | String | A custom delimiter for multi-value dimensions. | no (default == ctrl+A) | -| columns | JSON String array | Specifies the columns of the data. | yes | +The table below lists Druid's most common data ingestion methods, along with comparisons to help you choose +the best one for your situation. -#### TimeAndDims ParseSpec +|Method|How it works|Can append and overwrite?|Can handle late data?|Exactly-once ingestion?|Real-time queries?| +|------|------------|-------------------------|---------------------|-----------------------|------------------| +|[Native batch](native_tasks.html)|Druid loads data directly from S3, HTTP, NFS, or other networked storage.|Append or overwrite|Yes|Yes|No| +|[Hadoop](hadoop.html)|Druid launches Hadoop Map/Reduce jobs to load data files.|Append or overwrite|Yes|Yes|No| +|[Kafka indexing service](../development/extensions-core/kafka-ingestion.html)|Druid reads directly from Kafka.|Append only|Yes|Yes|Yes| +|[Tranquility](stream-push.html)|You use Tranquility, a client side library, to push individual records into Druid.|Append only|No - late data is dropped|No - may drop or duplicate data|Yes| -Use this with non-String Parsers to provide them with timestamp and dimensions information. Non-String Parsers -handle all formatting decisions on their own, without using the ParseSpec. +## Partitioning -| Field | Type | Description | Required | -|-------|------|-------------|----------| -| format | String | This should say `timeAndDims`. | yes | -| timestampSpec | JSON Object | Specifies the column and format of the timestamp. | yes | -| dimensionsSpec | JSON Object | Specifies the dimensions of the data. | yes | +Druid is a distributed data store, and it partitions your data in order to process it in parallel. Druid +[datasources](../design/index.html) are always partitioned first by time based on the +[segmentGranularity](../ingestion/index.html#granularityspec) parameter of your ingestion spec. Each of these time partitions is called +a _time chunk_, and each time chunk contains one or more [segments](../design/segments.html). The segments within a +particular time chunk may be partitioned further using options that vary based on the ingestion method you have chosen. -### TimestampSpec + * With [Hadoop](hadoop.html) you can do hash- or range-based partitioning on one or more columns. + * With [Native batch](native_tasks.html) you can partition on a hash of all dimension columns. This is useful when + rollup is enabled, since it maximizes your space savings. + * With [Kafka indexing](../development/extensions-core/kafka-ingestion.html), partitioning is based on Kafka + partitions, and is not configurable through Druid. You can configure it on the Kafka side by using the partitioning + functionality of the Kafka producer. + * With [Tranquility](stream-push.html), partitioning is done by default on a hash of all dimension columns in order + to maximize rollup. You can also provide a custom Partitioner class; see the + [Tranquility documentation](https://github.com/druid-io/tranquility/blob/master/docs/overview.md#partitioning-and-replication) + for details. -| Field | Type | Description | Required | -|-------|------|-------------|----------| -| column | String | The column of the timestamp. | yes | -| format | String | iso, millis, posix, auto or any [Joda time](http://joda-time.sourceforge.net/apidocs/org/joda/time/format/DateTimeFormat.html) format. | no (default == 'auto' | +All Druid datasources are partitioned by time. Each data ingestion method must acquire a write lock on a particular +time range when loading data, so no two methods can operate on the same time range of the same datasource at the same +time. However, two data ingestion methods _can_ operate on different time ranges of the same datasource at the same +time. For example, you can do a batch backfill from Hadoop while also doing a real-time load from Kafka, so long as +the backfill data and the real-time data do not need to be written to the same time partitions. (If they do, the +real-time load will take priority.) -### DimensionsSpec +## Rollup -| Field | Type | Description | Required | -|-------|------|-------------|----------| -| dimensions | JSON array | A list of [dimension schema](#dimension-schema) objects or dimension names. Providing a name is equivalent to providing a String-typed dimension schema with the given name. If this is an empty array, Druid will treat all columns that are not timestamp or metric columns as String-typed dimension columns. | yes | -| dimensionExclusions | JSON String array | The names of dimensions to exclude from ingestion. | no (default == [] | -| spatialDimensions | JSON Object array | An array of [spatial dimensions](../development/geo.html) | no (default == [] | +Druid is able to summarize raw data at ingestion time using a process we refer to as "roll-up". +Roll-up is a first-level aggregation operation over a selected set of "dimensions", where a set of "metrics" are aggregated. -#### Dimension Schema -A dimension schema specifies the type and name of a dimension to be ingested. +Suppose we have the following raw data, representing total packet/byte counts in particular seconds for traffic between a source and destination. The `srcIP` and `dstIP` fields are dimensions, while `packets` and `bytes` are metrics. -For string columns, the dimension schema can also be used to enable or disable bitmap indexing by setting the -`createBitmapIndex` boolean. By default, bitmap indexes are enabled for all string columns. Only string columns can have -bitmap indexes; they are not supported for numeric columns. - -For example, the following `dimensionsSpec` section from a `dataSchema` ingests one column as Long (`countryNum`), two -columns as Float (`userLatitude`, `userLongitude`), and the other columns as Strings, with bitmap indexes disabled -for the `comment` column. - -```json -"dimensionsSpec" : { - "dimensions": [ - "page", - "language", - "user", - "unpatrolled", - "newPage", - "robot", - "anonymous", - "namespace", - "continent", - "country", - "region", - "city", - { - "type": "string", - "name": "comment", - "createBitmapIndex": false - }, - { - "type": "long", - "name": "countryNum" - }, - { - "type": "float", - "name": "userLatitude" - }, - { - "type": "float", - "name": "userLongitude" - } - ], - "dimensionExclusions" : [], - "spatialDimensions" : [] -} +``` +timestamp srcIP dstIP packets bytes +2018-01-01T01:01:35Z 1.1.1.1 2.2.2.2 100 1000 +2018-01-01T01:01:51Z 1.1.1.1 2.2.2.2 200 2000 +2018-01-01T01:01:59Z 1.1.1.1 2.2.2.2 300 3000 +2018-01-01T01:02:14Z 1.1.1.1 2.2.2.2 400 4000 +2018-01-01T01:02:29Z 1.1.1.1 2.2.2.2 500 5000 +2018-01-01T01:03:29Z 1.1.1.1 2.2.2.2 600 6000 +2018-01-02T21:33:14Z 7.7.7.7 8.8.8.8 100 1000 +2018-01-02T21:33:45Z 7.7.7.7 8.8.8.8 200 2000 +2018-01-02T21:35:45Z 7.7.7.7 8.8.8.8 300 3000 ``` -## metricsSpec - The `metricsSpec` is a list of [aggregators](../querying/aggregations.html). If `rollup` is false in the granularity spec, the metrics spec should be an empty list and all columns should be defined in the `dimensionsSpec` instead (without rollup, there isn't a real distinction between dimensions and metrics at ingestion time). This is optional, however. - -## GranularitySpec +If we ingest this data into Druid with a `queryGranularity` of `minute` (which will floor timestamps to minutes), the roll-up operation is equivalent to the following pseudocode: -The default granularity spec is `uniform`, and can be changed by setting the `type` field. -Currently, `uniform` and `arbitrary` types are supported. +``` +GROUP BY TRUNCATE(timestamp, MINUTE), srcIP, dstIP :: SUM(packets), SUM(bytes) +``` -### Uniform Granularity Spec +After the data above is aggregated during roll-up, the following rows will be ingested: -This spec is used to generated segments with uniform intervals. +``` +timestamp srcIP dstIP packets bytes +2018-01-01T01:01:00Z 1.1.1.1 2.2.2.2 600 6000 +2018-01-01T01:02:00Z 1.1.1.1 2.2.2.2 900 9000 +2018-01-01T01:03:00Z 1.1.1.1 2.2.2.2 600 6000 +2018-01-02T21:33:00Z 7.7.7.7 8.8.8.8 300 3000 +2018-01-02T21:35:00Z 7.7.7.7 8.8.8.8 300 3000 +``` -| Field | Type | Description | Required | -|-------|------|-------------|----------| -| segmentGranularity | string | The granularity to create segments at. | no (default == 'DAY') | -| queryGranularity | string | The minimum granularity to be able to query results at and the granularity of the data inside the segment. E.g. a value of "minute" will mean that data is aggregated at minutely granularity. That is, if there are collisions in the tuple (minute(timestamp), dimensions), then it will aggregate values together using the aggregators instead of storing individual rows. A granularity of 'NONE' means millisecond granularity.| no (default == 'NONE') | -| rollup | boolean | rollup or not | no (default == true) | -| intervals | string | A list of intervals for the raw data being ingested. Ignored for real-time ingestion. | no. If specified, batch ingestion tasks may skip determining partitions phase which results in faster ingestion. | +Druid can roll up data as it is ingested to minimize the amount of raw data that needs to be stored. +In practice, we see that rolling up data can dramatically reduce the size of data that needs to be stored (up to a factor of 100). +This storage reduction does come at a cost: as we roll up data, we lose the ability to query individual events. -### Arbitrary Granularity Spec +The rollup granularity is the minimum granularity you will be able to explore data at and events are floored to this granularity. +Hence, Druid ingestion specs define this granularity as the `queryGranularity` of the data. The lowest supported `queryGranularity` is millisecond. -This spec is used to generate segments with arbitrary intervals (it tries to create evenly sized segments). This spec is not supported for real-time processing. +The following links may be helpful in further understanding dimensions and metrics: +* https://en.wikipedia.org/wiki/Dimension_(data_warehouse) +* https://en.wikipedia.org/wiki/Measure_(data_warehouse)) -| Field | Type | Description | Required | -|-------|------|-------------|----------| -| queryGranularity | string | The minimum granularity to be able to query results at and the granularity of the data inside the segment. E.g. a value of "minute" will mean that data is aggregated at minutely granularity. That is, if there are collisions in the tuple (minute(timestamp), dimensions), then it will aggregate values together using the aggregators instead of storing individual rows. A granularity of 'NONE' means millisecond granularity.| no (default == 'NONE') | -| rollup | boolean | rollup or not | no (default == true) | -| intervals | string | A list of intervals for the raw data being ingested. Ignored for real-time ingestion. | no. If specified, batch ingestion tasks may skip determining partitions phase which results in faster ingestion. | +### Roll-up modes -# Transform Spec +Druid supports two roll-up modes, i.e., _perfect roll-up_ and _best-effort roll-up_. In the perfect roll-up mode, Druid guarantees that input data are perfectly aggregated at ingestion time. Meanwhile, in the best-effort roll-up, input data might not be perfectly aggregated and thus there can be multiple segments holding the rows which should belong to the same segment with the perfect roll-up since they have the same dimension value and their timestamps fall into the same interval. -Transform specs allow Druid to transform and filter input data during ingestion. See [Transform specs](../ingestion/transform-spec.html) +The perfect roll-up mode encompasses an additional preprocessing step to determine intervals and shardSpecs before actual data ingestion if they are not specified in the ingestionSpec. This preprocessing step usually scans the entire input data which might increase the ingestion time. The [Hadoop indexing task](../ingestion/hadoop.html) always runs with this perfect roll-up mode. -# IO Config +On the contrary, the best-effort roll-up mode doesn't require any preprocessing step, but the size of ingested data might be larger than that of the perfect roll-up. All types of [streaming indexing (e.g., kafka indexing service)](../ingestion/stream-ingestion.html) run with this mode. -Stream Push Ingestion: Stream push ingestion with Tranquility does not require an IO Config. -Stream Pull Ingestion: See [Stream pull ingestion](../ingestion/stream-pull.html). -Batch Ingestion: See [Batch ingestion](../ingestion/batch-ingestion.html) +Finally, the [native index task](../ingestion/native_tasks.html) supports both modes and you can choose either one which fits to your application. -# Tuning Config +## Data maintenance -Stream Push Ingestion: See [Stream push ingestion](../ingestion/stream-push.html). -Stream Pull Ingestion: See [Stream pull ingestion](../ingestion/stream-pull.html). -Batch Ingestion: See [Batch ingestion](../ingestion/batch-ingestion.html) +### Inserts and overwrites -# Evaluating Timestamp, Dimensions and Metrics +Druid can insert new data to an existing datasource by appending new segments to existing segment sets. It can also add new data by merging an existing set of segments with new data and overwriting the original set. -Druid will interpret dimensions, dimension exclusions, and metrics in the following order: +Druid does not support single-record updates by primary key. -* Any column listed in the list of dimensions is treated as a dimension. -* Any column listed in the list of dimension exclusions is excluded as a dimension. -* The timestamp column and columns/fieldNames required by metrics are excluded by default. -* If a metric is also listed as a dimension, the metric must have a different name than the dimension name. +Updates are described further at [update existing data](../ingestion/update-existing-data.html). + +### Compaction + +Compaction is a type of overwrite operation, which reads an existing set of segments, combines them into a new set with larger but fewer segments, and overwrites the original set with the new compacted set, without changing the data that is stored. + +For performance reasons, it is sometimes beneficial to compact a set of segments into a set of larger but fewer segments, as there is some per-segment processing and memory overhead in both the ingestion and querying paths. + +For compaction documentation, please see [tasks](../ingestion/tasks.html). + +### Retention and Tiering + +Druid supports retention rules, which are used to define intervals of time where data should be preserved, and intervals where data should be discarded. + +Druid also supports separating historical nodes into tiers, and the retention rules can be configured to assign data for specific intervals to specific tiers. + +These features are useful for performance/cost management; a common use case is separating historical nodes into a "hot" tier and a "cold" tier. + +For more information, please see [Load rules](../operations/rule-configuration.html). + +### Deletes + +Druid supports permanent deletion of segments that are in an "unused" state (see the [Segment states](#segment-states) section above). + +The Kill Task deletes unused segments within a specified interval from metadata storage and deep storage. + +For more information, please see [Kill Task](../ingestion/tasks.html#kill-task). \ No newline at end of file diff --git a/docs/content/ingestion/ingestion-spec.md b/docs/content/ingestion/ingestion-spec.md new file mode 100644 index 00000000000..7978f6ff561 --- /dev/null +++ b/docs/content/ingestion/ingestion-spec.md @@ -0,0 +1,309 @@ +--- +layout: doc_page +--- + +# Ingestion Spec + +A Druid ingestion spec consists of 3 components: + +```json +{ + "dataSchema" : {...}, + "ioConfig" : {...}, + "tuningConfig" : {...} +} +``` + +| Field | Type | Description | Required | +|-------|------|-------------|----------| +| dataSchema | JSON Object | Specifies the the schema of the incoming data. All ingestion specs can share the same dataSchema. | yes | +| ioConfig | JSON Object | Specifies where the data is coming from and where the data is going. This object will vary with the ingestion method. | yes | +| tuningConfig | JSON Object | Specifies how to tune various ingestion parameters. This object will vary with the ingestion method. | no | + +# DataSchema + +An example dataSchema is shown below: + +```json +"dataSchema" : { + "dataSource" : "wikipedia", + "parser" : { + "type" : "string", + "parseSpec" : { + "format" : "json", + "timestampSpec" : { + "column" : "timestamp", + "format" : "auto" + }, + "dimensionsSpec" : { + "dimensions": [ + "page", + "language", + "user", + "unpatrolled", + "newPage", + "robot", + "anonymous", + "namespace", + "continent", + "country", + "region", + "city", + { + "type": "long", + "name": "countryNum" + }, + { + "type": "float", + "name": "userLatitude" + }, + { + "type": "float", + "name": "userLongitude" + } + ], + "dimensionExclusions" : [], + "spatialDimensions" : [] + } + } + }, + "metricsSpec" : [{ + "type" : "count", + "name" : "count" + }, { + "type" : "doubleSum", + "name" : "added", + "fieldName" : "added" + }, { + "type" : "doubleSum", + "name" : "deleted", + "fieldName" : "deleted" + }, { + "type" : "doubleSum", + "name" : "delta", + "fieldName" : "delta" + }], + "granularitySpec" : { + "segmentGranularity" : "DAY", + "queryGranularity" : "NONE", + "intervals" : [ "2013-08-31/2013-09-01" ] + }, + "transformSpec" : null +} +``` + +| Field | Type | Description | Required | +|-------|------|-------------|----------| +| dataSource | String | The name of the ingested datasource. Datasources can be thought of as tables. | yes | +| parser | JSON Object | Specifies how ingested data can be parsed. | yes | +| metricsSpec | JSON Object array | A list of [aggregators](../querying/aggregations.html). | yes | +| granularitySpec | JSON Object | Specifies how to create segments and roll up data. | yes | +| transformSpec | JSON Object | Specifes how to filter and transform input data. See [transform specs](../ingestion/transform-spec.html).| no | + +## Parser + +If `type` is not included, the parser defaults to `string`. For additional data formats, please see our [extensions list](../development/extensions.html). + +### String Parser + +| Field | Type | Description | Required | +|-------|------|-------------|----------| +| type | String | This should say `string` in general, or `hadoopyString` when used in a Hadoop indexing job. | no | +| parseSpec | JSON Object | Specifies the format, timestamp, and dimensions of the data. | yes | + +### ParseSpec + +ParseSpecs serve two purposes: + +- The String Parser use them to determine the format (i.e. JSON, CSV, TSV) of incoming rows. +- All Parsers use them to determine the timestamp and dimensions of incoming rows. + +If `format` is not included, the parseSpec defaults to `tsv`. + +#### JSON ParseSpec + +Use this with the String Parser to load JSON. + +| Field | Type | Description | Required | +|-------|------|-------------|----------| +| format | String | This should say `json`. | no | +| timestampSpec | JSON Object | Specifies the column and format of the timestamp. | yes | +| dimensionsSpec | JSON Object | Specifies the dimensions of the data. | yes | +| flattenSpec | JSON Object | Specifies flattening configuration for nested JSON data. See [Flattening JSON](./flatten-json.html) for more info. | no | + +#### JSON Lowercase ParseSpec + +
+The _jsonLowercase_ parser is deprecated and may be removed in a future version of Druid. +
+ +This is a special variation of the JSON ParseSpec that lower cases all the column names in the incoming JSON data. This parseSpec is required if you are updating to Druid 0.7.x from Druid 0.6.x, are directly ingesting JSON with mixed case column names, do not have any ETL in place to lower case those column names, and would like to make queries that include the data you created using 0.6.x and 0.7.x. + +| Field | Type | Description | Required | +|-------|------|-------------|----------| +| format | String | This should say `jsonLowercase`. | yes | +| timestampSpec | JSON Object | Specifies the column and format of the timestamp. | yes | +| dimensionsSpec | JSON Object | Specifies the dimensions of the data. | yes | + +#### CSV ParseSpec + +Use this with the String Parser to load CSV. Strings are parsed using the com.opencsv library. + +| Field | Type | Description | Required | +|-------|------|-------------|----------| +| format | String | This should say `csv`. | yes | +| timestampSpec | JSON Object | Specifies the column and format of the timestamp. | yes | +| dimensionsSpec | JSON Object | Specifies the dimensions of the data. | yes | +| listDelimiter | String | A custom delimiter for multi-value dimensions. | no (default == ctrl+A) | +| columns | JSON array | Specifies the columns of the data. | yes | + +#### TSV / Delimited ParseSpec + +Use this with the String Parser to load any delimited text that does not require special escaping. By default, +the delimiter is a tab, so this will load TSV. + +| Field | Type | Description | Required | +|-------|------|-------------|----------| +| format | String | This should say `tsv`. | yes | +| timestampSpec | JSON Object | Specifies the column and format of the timestamp. | yes | +| dimensionsSpec | JSON Object | Specifies the dimensions of the data. | yes | +| delimiter | String | A custom delimiter for data values. | no (default == \t) | +| listDelimiter | String | A custom delimiter for multi-value dimensions. | no (default == ctrl+A) | +| columns | JSON String array | Specifies the columns of the data. | yes | + +#### TimeAndDims ParseSpec + +Use this with non-String Parsers to provide them with timestamp and dimensions information. Non-String Parsers +handle all formatting decisions on their own, without using the ParseSpec. + +| Field | Type | Description | Required | +|-------|------|-------------|----------| +| format | String | This should say `timeAndDims`. | yes | +| timestampSpec | JSON Object | Specifies the column and format of the timestamp. | yes | +| dimensionsSpec | JSON Object | Specifies the dimensions of the data. | yes | + +### TimestampSpec + +| Field | Type | Description | Required | +|-------|------|-------------|----------| +| column | String | The column of the timestamp. | yes | +| format | String | iso, millis, posix, auto or any [Joda time](http://joda-time.sourceforge.net/apidocs/org/joda/time/format/DateTimeFormat.html) format. | no (default == 'auto' | + +### DimensionsSpec + +| Field | Type | Description | Required | +|-------|------|-------------|----------| +| dimensions | JSON array | A list of [dimension schema](#dimension-schema) objects or dimension names. Providing a name is equivalent to providing a String-typed dimension schema with the given name. If this is an empty array, Druid will treat all columns that are not timestamp or metric columns as String-typed dimension columns. | yes | +| dimensionExclusions | JSON String array | The names of dimensions to exclude from ingestion. | no (default == [] | +| spatialDimensions | JSON Object array | An array of [spatial dimensions](../development/geo.html) | no (default == [] | + +#### Dimension Schema +A dimension schema specifies the type and name of a dimension to be ingested. + +For string columns, the dimension schema can also be used to enable or disable bitmap indexing by setting the +`createBitmapIndex` boolean. By default, bitmap indexes are enabled for all string columns. Only string columns can have +bitmap indexes; they are not supported for numeric columns. + +For example, the following `dimensionsSpec` section from a `dataSchema` ingests one column as Long (`countryNum`), two +columns as Float (`userLatitude`, `userLongitude`), and the other columns as Strings, with bitmap indexes disabled +for the `comment` column. + +```json +"dimensionsSpec" : { + "dimensions": [ + "page", + "language", + "user", + "unpatrolled", + "newPage", + "robot", + "anonymous", + "namespace", + "continent", + "country", + "region", + "city", + { + "type": "string", + "name": "comment", + "createBitmapIndex": false + }, + { + "type": "long", + "name": "countryNum" + }, + { + "type": "float", + "name": "userLatitude" + }, + { + "type": "float", + "name": "userLongitude" + } + ], + "dimensionExclusions" : [], + "spatialDimensions" : [] +} +``` + +## metricsSpec + The `metricsSpec` is a list of [aggregators](../querying/aggregations.html). If `rollup` is false in the granularity spec, the metrics spec should be an empty list and all columns should be defined in the `dimensionsSpec` instead (without rollup, there isn't a real distinction between dimensions and metrics at ingestion time). This is optional, however. + +## GranularitySpec + +The default granularity spec is `uniform`, and can be changed by setting the `type` field. +Currently, `uniform` and `arbitrary` types are supported. + +### Uniform Granularity Spec + +This spec is used to generated segments with uniform intervals. + +| Field | Type | Description | Required | +|-------|------|-------------|----------| +| segmentGranularity | string | The granularity to create segments at. | no (default == 'DAY') | +| queryGranularity | string | The minimum granularity to be able to query results at and the granularity of the data inside the segment. E.g. a value of "minute" will mean that data is aggregated at minutely granularity. That is, if there are collisions in the tuple (minute(timestamp), dimensions), then it will aggregate values together using the aggregators instead of storing individual rows. A granularity of 'NONE' means millisecond granularity.| no (default == 'NONE') | +| rollup | boolean | rollup or not | no (default == true) | +| intervals | string | A list of intervals for the raw data being ingested. Ignored for real-time ingestion. | no. If specified, batch ingestion tasks may skip determining partitions phase which results in faster ingestion. | + +### Arbitrary Granularity Spec + +This spec is used to generate segments with arbitrary intervals (it tries to create evenly sized segments). This spec is not supported for real-time processing. + +| Field | Type | Description | Required | +|-------|------|-------------|----------| +| queryGranularity | string | The minimum granularity to be able to query results at and the granularity of the data inside the segment. E.g. a value of "minute" will mean that data is aggregated at minutely granularity. That is, if there are collisions in the tuple (minute(timestamp), dimensions), then it will aggregate values together using the aggregators instead of storing individual rows. A granularity of 'NONE' means millisecond granularity.| no (default == 'NONE') | +| rollup | boolean | rollup or not | no (default == true) | +| intervals | string | A list of intervals for the raw data being ingested. Ignored for real-time ingestion. | no. If specified, batch ingestion tasks may skip determining partitions phase which results in faster ingestion. | + +# Transform Spec + +Transform specs allow Druid to transform and filter input data during ingestion. See [Transform specs](../ingestion/transform-spec.html) + +# IO Config + +The IOConfig spec differs based on the ingestion task type. + +* Native Batch ingestion: See [Native Batch IOConfig](../ingestion/native_tasks.html#ioconfig) +* Hadoop Batch ingestion: See [Hadoop Batch IOConfig](../ingestion/hadoop.html#ioconfig) +* Kafka Indexing Service: See [Kafka Supervisor IOConfig](../development/extensions-core/kafka-ingestion.html#KafkaSupervisorIOConfig) +* Stream Push Ingestion: Stream push ingestion with Tranquility does not require an IO Config. +* Stream Pull Ingestion (Deprecated): See [Stream pull ingestion](../ingestion/stream-pull.html#ioconfig). + +# Tuning Config + +The TuningConfig spec differs based on the ingestion task type. + +* Native Batch ingestion: See [Native Batch TuningConfig](../ingestion/native_tasks.html#tuningconfig) +* Hadoop Batch ingestion: See [Hadoop Batch TuningConfig](../ingestion/hadoop.html#tuningconfig) +* Kafka Indexing Service: See [Kafka Supervisor TuningConfig](../development/extensions-core/kafka-ingestion.html#KafkaSupervisorTuningConfig) +* Stream Push Ingestion (Tranquility): See [Tranquility TuningConfig](http://static.druid.io/tranquility/api/latest/#com.metamx.tranquility.druid.DruidTuning). +* Stream Pull Ingestion (Deprecated): See [Stream pull ingestion](../ingestion/stream-pull.html#tuningconfig). + +# Evaluating Timestamp, Dimensions and Metrics + +Druid will interpret dimensions, dimension exclusions, and metrics in the following order: + +* Any column listed in the list of dimensions is treated as a dimension. +* Any column listed in the list of dimension exclusions is excluded as a dimension. +* The timestamp column and columns/fieldNames required by metrics are excluded by default. +* If a metric is also listed as a dimension, the metric must have a different name than the dimension name. diff --git a/docs/content/ingestion/locking-and-priority.md b/docs/content/ingestion/locking-and-priority.md new file mode 100644 index 00000000000..87e66640e76 --- /dev/null +++ b/docs/content/ingestion/locking-and-priority.md @@ -0,0 +1,59 @@ +--- +layout: doc_page +--- + +# Task Locking & Priority + +## Locking + +Once an overlord node accepts a task, the task acquires locks for the data source and intervals specified in the task. + +There are two lock types, i.e., _shared lock_ and _exclusive lock_. + +- A task needs to acquire a shared lock before it reads segments of an interval. Multiple shared locks can be acquired for the same dataSource and interval. Shared locks are always preemptable, but they don't preempt each other. +- A task needs to acquire an exclusive lock before it writes segments for an interval. An exclusive lock is also preemptable except while the task is publishing segments. + +Each task can have different lock priorities. The locks of higher-priority tasks can preempt the locks of lower-priority tasks. The lock preemption works based on _optimistic locking_. When a lock is preempted, it is not notified to the owner task immediately. Instead, it's notified when the owner task tries to acquire the same lock again. (Note that lock acquisition is idempotent unless the lock is preempted.) In general, tasks don't compete for acquiring locks because they usually targets different dataSources or intervals. + +A task writing data into a dataSource must acquire exclusive locks for target intervals. Note that exclusive locks are still preemptable. That is, they also be able to be preempted by higher priority locks unless they are _publishing segments_ in a critical section. Once publishing segments is finished, those locks become preemptable again. + +Tasks do not need to explicitly release locks, they are released upon task completion. Tasks may potentially release +locks early if they desire. Task ids are unique by naming them using UUIDs or the timestamp in which the task was created. +Tasks are also part of a "task group", which is a set of tasks that can share interval locks. + +## Priority + +Druid's indexing tasks use locks for atomic data ingestion. Each lock is acquired for the combination of a dataSource and an interval. Once a task acquires a lock, it can write data for the dataSource and the interval of the acquired lock unless the lock is released or preempted. Please see [the below Locking section](#locking) + +Each task has a priority which is used for lock acquisition. The locks of higher-priority tasks can preempt the locks of lower-priority tasks if they try to acquire for the same dataSource and interval. If some locks of a task are preempted, the behavior of the preempted task depends on the task implementation. Usually, most tasks finish as failed if they are preempted. + +Tasks can have different default priorities depening on their types. Here are a list of default priorities. Higher the number, higher the priority. + +|task type|default priority| +|---------|----------------| +|Realtime index task|75| +|Batch index task|50| +|Merge/Append/Compaction task|25| +|Other tasks|0| + +You can override the task priority by setting your priority in the task context like below. + +```json +"context" : { + "priority" : 100 +} +``` + +## Task Context + +The task context is used for various task configuration parameters. The following parameters apply to all task types. + +|property|default|description| +|--------|-------|-----------| +|taskLockTimeout|300000|task lock timeout in millisecond. For more details, see [Locking](#locking).| +|priority|Different based on task types. See [Priority](#priority).|Task priority| + +
+When a task acquires a lock, it sends a request via HTTP and awaits until it receives a response containing the lock acquisition result. +As a result, an HTTP timeout error can occur if `taskLockTimeout` is greater than `druid.server.http.maxIdleTime` of overlords. +
diff --git a/docs/content/ingestion/misc-tasks.md b/docs/content/ingestion/misc-tasks.md new file mode 100644 index 00000000000..481e90f8752 --- /dev/null +++ b/docs/content/ingestion/misc-tasks.md @@ -0,0 +1,163 @@ +--- +layout: doc_page +--- + +# Miscellaneous Tasks + +## Version Converter Task + +The convert task suite takes active segments and will recompress them using a new IndexSpec. This is handy when doing activities like migrating from Concise to Roaring, or adding dimension compression to old segments. + +Upon success the new segments will have the same version as the old segment with `_converted` appended. A convert task may be run against the same interval for the same datasource multiple times. Each execution will append another `_converted` to the version for the segments + +There are two types of conversion tasks. One is the Hadoop convert task, and the other is the indexing service convert task. The Hadoop convert task runs on a hadoop cluster, and simply leaves a task monitor on the indexing service (similar to the hadoop batch task). The indexing service convert task runs the actual conversion on the indexing service. + +### Hadoop Convert Segment Task + +```json +{ + "type": "hadoop_convert_segment", + "dataSource":"some_datasource", + "interval":"2013/2015", + "indexSpec":{"bitmap":{"type":"concise"},"dimensionCompression":"lz4","metricCompression":"lz4"}, + "force": true, + "validate": false, + "distributedSuccessCache":"hdfs://some-hdfs-nn:9000/user/jobrunner/cache", + "jobPriority":"VERY_LOW", + "segmentOutputPath":"s3n://somebucket/somekeyprefix" +} +``` + +The values are described below. + +|Field|Type|Description|Required| +|-----|----|-----------|--------| +|`type`|String|Convert task identifier|Yes: `hadoop_convert_segment`| +|`dataSource`|String|The datasource to search for segments|Yes| +|`interval`|Interval string|The interval in the datasource to look for segments|Yes| +|`indexSpec`|json|The compression specification for the index|Yes| +|`force`|boolean|Forces the convert task to continue even if binary versions indicate it has been updated recently (you probably want to do this)|No| +|`validate`|boolean|Runs validation between the old and new segment before reporting task success|No| +|`distributedSuccessCache`|URI|A location where hadoop should put intermediary files.|Yes| +|`jobPriority`|`org.apache.hadoop.mapred.JobPriority` as String|The priority to set for the hadoop job|No| +|`segmentOutputPath`|URI|A base uri for the segment to be placed. Same format as other places a segment output path is needed|Yes| + +### Indexing Service Convert Segment Task + +```json +{ + "type": "convert_segment", + "dataSource":"some_datasource", + "interval":"2013/2015", + "indexSpec":{"bitmap":{"type":"concise"},"dimensionCompression":"lz4","metricCompression":"lz4"}, + "force": true, + "validate": false +} +``` + +|Field|Type|Description|Required (default)| +|-----|----|-----------|--------| +|`type`|String|Convert task identifier|Yes: `convert_segment`| +|`dataSource`|String|The datasource to search for segments|Yes| +|`interval`|Interval string|The interval in the datasource to look for segments|Yes| +|`indexSpec`|json|The compression specification for the index|Yes| +|`force`|boolean|Forces the convert task to continue even if binary versions indicate it has been updated recently (you probably want to do this)|No (false)| +|`validate`|boolean|Runs validation between the old and new segment before reporting task success|No (true)| + +Unlike the hadoop convert task, the indexing service task draws its output path from the indexing service's configuration. + +#### IndexSpec + +The indexSpec defines segment storage format options to be used at indexing time, such as bitmap type and column +compression formats. The indexSpec is optional and default parameters will be used if not specified. + +|Field|Type|Description|Required| +|-----|----|-----------|--------| +|bitmap|Object|Compression format for bitmap indexes. Should be a JSON object; see below for options.|no (defaults to Concise)| +|dimensionCompression|String|Compression format for dimension columns. Choose from `LZ4`, `LZF`, or `uncompressed`.|no (default == `LZ4`)| +|metricCompression|String|Compression format for metric columns. Choose from `LZ4`, `LZF`, `uncompressed`, or `none`.|no (default == `LZ4`)| +|longEncoding|String|Encoding format for metric and dimension columns with type long. Choose from `auto` or `longs`. `auto` encodes the values using offset or lookup table depending on column cardinality, and store them with variable size. `longs` stores the value as is with 8 bytes each.|no (default == `longs`)| + +##### Bitmap types + +For Concise bitmaps: + +|Field|Type|Description|Required| +|-----|----|-----------|--------| +|type|String|Must be `concise`.|yes| + +For Roaring bitmaps: + +|Field|Type|Description|Required| +|-----|----|-----------|--------| +|type|String|Must be `roaring`.|yes| +|compressRunOnSerialization|Boolean|Use a run-length encoding where it is estimated as more space efficient.|no (default == `true`)| + +## Noop Task + +These tasks start, sleep for a time and are used only for testing. The available grammar is: + +```json +{ + "type": "noop", + "id": , + "interval" : , + "runTime" : , + "firehose": +} +``` + + +## Segment Merging Tasks (Deprecated) + +### Append Task + +Append tasks append a list of segments together into a single segment (one after the other). The grammar is: + +```json +{ + "type": "append", + "id": , + "dataSource": , + "segments": , + "aggregations": , + "context": +} +``` + +### Merge Task + +Merge tasks merge a list of segments together. Any common timestamps are merged. +If rollup is disabled as part of ingestion, common timestamps are not merged and rows are reordered by their timestamp. + +The grammar is: + +```json +{ + "type": "merge", + "id": , + "dataSource": , + "aggregations": , + "rollup": , + "segments": , + "context": +} +``` + +### Same Interval Merge Task + +Same Interval Merge task is a shortcut of merge task, all segments in the interval are going to be merged. + +The grammar is: + +```json +{ + "type": "same_interval_merge", + "id": , + "dataSource": , + "aggregations": , + "rollup": , + "interval": , + "context": +} +``` diff --git a/docs/content/ingestion/native_tasks.md b/docs/content/ingestion/native_tasks.md index 8ef79624bff..a39c2cddc0c 100644 --- a/docs/content/ingestion/native_tasks.md +++ b/docs/content/ingestion/native_tasks.md @@ -28,7 +28,7 @@ which specifies a split and submits worker tasks using those specs. As a result, the implementation of splittable firehoses. Please note that multiple tasks can be created for the same worker task spec if one of them fails. -Since this task doesn't shuffle intermediate data, it isn't available for [perfect rollup](../design/index.html). +Since this task doesn't shuffle intermediate data, it isn't available for [perfect rollup](../ingestion/index.html#roll-up-modes). An example ingestion spec is: @@ -114,7 +114,7 @@ An example ingestion spec is: This field is required. -See [Ingestion](../ingestion/index.html) +See [Ingestion Spec DataSchema](../ingestion/ingestion-spec.html#dataschema) #### IOConfig @@ -141,7 +141,7 @@ The tuningConfig is optional and default parameters will be used if no tuningCon |forceExtendableShardSpecs|Forces use of extendable shardSpecs. Experimental feature intended for use with the [Kafka indexing service extension](../development/extensions-core/kafka-ingestion.html).|false|no| |reportParseExceptions|If true, exceptions encountered during parsing will be thrown and will halt ingestion; if false, unparseable rows and fields will be skipped.|false|no| |pushTimeout|Milliseconds to wait for pushing segments. It must be >= 0, where 0 means to wait forever.|0|no| -|segmentWriteOutMediumFactory|Segment write-out medium to use when creating segments. See [Indexing Service Configuration](../configuration/indexing-service.html) page, "SegmentWriteOutMediumFactory" section for explanation and available options.|Not specified, the value from `druid.peon.defaultSegmentWriteOutMediumFactory` is used|no| +|segmentWriteOutMediumFactory|Segment write-out medium to use when creating segments. See [Additional Peon Configuration: SegmentWriteOutMediumFactory](../configuration/index.html#segmentwriteoutmediumfactory) for explanation and available options.|Not specified, the value from `druid.peon.defaultSegmentWriteOutMediumFactory` is used|no| |maxNumSubTasks|Maximum number of tasks which can be run at the same time.|Integer.MAX_VALUE|no| |maxRetry|Maximum number of retries on task failures.|3|no| |taskStatusCheckPeriodMs|Polling period in milleseconds to check running task statuses.|1000|no| @@ -453,7 +453,7 @@ The Local Index Task is designed to be used for smaller data sets. The task exec This field is required. -See [Ingestion](../ingestion/index.html) +See [Ingestion Spec DataSchema](../ingestion/ingestion-spec.html#dataschema) #### IOConfig @@ -478,10 +478,10 @@ The tuningConfig is optional and default parameters will be used if no tuningCon |indexSpec|defines segment storage format options to be used at indexing time, see [IndexSpec](#indexspec)|null|no| |maxPendingPersists|Maximum number of persists that can be pending but not started. If this limit would be exceeded by a new intermediate persist, ingestion will block until the currently-running persist finishes. Maximum heap memory usage for indexing scales with maxRowsInMemory * (2 + maxPendingPersists).|0 (meaning one persist can be running concurrently with ingestion, and none can be queued up)|no| |forceExtendableShardSpecs|Forces use of extendable shardSpecs. Experimental feature intended for use with the [Kafka indexing service extension](../development/extensions-core/kafka-ingestion.html).|false|no| -|forceGuaranteedRollup|Forces guaranteeing the [perfect rollup](../design/index.html). The perfect rollup optimizes the total size of generated segments and querying time while indexing time will be increased. This flag cannot be used with either `appendToExisting` of IOConfig or `forceExtendableShardSpecs`. For more details, see the below __Segment pushing modes__ section.|false|no| +|forceGuaranteedRollup|Forces guaranteeing the [perfect rollup](../ingestion/index.html#roll-up-modes). The perfect rollup optimizes the total size of generated segments and querying time while indexing time will be increased. This flag cannot be used with either `appendToExisting` of IOConfig or `forceExtendableShardSpecs`. For more details, see the below __Segment pushing modes__ section.|false|no| |reportParseExceptions|If true, exceptions encountered during parsing will be thrown and will halt ingestion; if false, unparseable rows and fields will be skipped.|false|no| |pushTimeout|Milliseconds to wait for pushing segments. It must be >= 0, where 0 means to wait forever.|0|no| -|segmentWriteOutMediumFactory|Segment write-out medium to use when creating segments. See [Indexing Service Configuration](../configuration/indexing-service.html) page, "SegmentWriteOutMediumFactory" section for explanation and available options.|Not specified, the value from `druid.peon.defaultSegmentWriteOutMediumFactory` is used|no| +|segmentWriteOutMediumFactory|Segment write-out medium to use when creating segments. See [Additional Peon Configuration: SegmentWriteOutMediumFactory](../configuration/index.html#segmentwriteoutmediumfactory) for explanation and available options.|Not specified, the value from `druid.peon.defaultSegmentWriteOutMediumFactory` is used|no| #### IndexSpec @@ -514,7 +514,7 @@ For Roaring bitmaps: While ingesting data using the Index task, it creates segments from the input data and pushes them. For segment pushing, the Index task supports two segment pushing modes, i.e., _bulk pushing mode_ and _incremental pushing mode_ for -[perfect rollup and best-effort rollup](../design/index.html), respectively. +[perfect rollup and best-effort rollup](../ingestion/index.html#roll-up-modes), respectively. In the bulk pushing mode, every segment is pushed at the very end of the index task. Until then, created segments are stored in the memory and local storage of the node running the index task. As a result, this mode might cause a diff --git a/docs/content/ingestion/overview.md b/docs/content/ingestion/overview.md deleted file mode 100644 index c7f0d67b8b2..00000000000 --- a/docs/content/ingestion/overview.md +++ /dev/null @@ -1,279 +0,0 @@ ---- -layout: doc_page ---- - -# Ingestion - -## Overview - -### Datasources and segments - -Druid data is stored in "datasources", which are similar to tables in a traditional RDBMS. Each datasource is -partitioned by time and, optionally, further partitioned by other attributes. Each time range is called a "chunk" (for -example, a single day, if your datasource is partitioned by day). Within a chunk, data is partitioned into one or more -"segments". Each segment is a single file, typically comprising up to a few million rows of data. Since segments are -organized into time chunks, it's sometimes helpful to think of segments as living on a timeline like the following: - - - -A datasource may have anywhere from just a few segments, up to hundreds of thousands and even millions of segments. Each -segment starts life off being created on a MiddleManager, and at that point, is mutable and uncommitted. The segment -building process includes the following steps, designed to produce a data file that is compact and supports fast -queries: - -- Conversion to columnar format -- Indexing with bitmap indexes -- Compression using various algorithms - - Dictionary encoding with id storage minimization for String columns - - Bitmap compression for bitmap indexes - - Type-aware compression for all columns - -Periodically, segments are published (committed). At this point, they are written to deep storage, become immutable, and -move from MiddleManagers to the Historical processes. An entry about the segment is also written to the metadata store. -This entry is a self-describing bit of metadata about the segment, including things like the schema of the segment, its -size, and its location on deep storage. These entries are what the Coordinator uses to know what data *should* be -available on the cluster. - -For details on the segment file format, please see [segment files](../design/segments.html). - -#### Segment identifiers - -Segments all have a four-part identifier with the following components: - -- Datasource name. -- Time interval (for the time chunk containing the segment; this corresponds to the `segmentGranularity` specified -at ingestion time). -- Version number (generally an ISO8601 timestamp corresponding to when the segment set was first started). -- Partition number (an integer, unique within a datasource+interval+version; may not necessarily be contiguous). - -For example, this is the identifier for a segment in datasource `clarity-cloud0`, time chunk -`2018-05-21T16:00:00.000Z/2018-05-21T17:00:00.000Z`, version `2018-05-21T15:56:09.909Z`, and partition number 1: - -``` -clarity-cloud0_2018-05-21T16:00:00.000Z_2018-05-21T17:00:00.000Z_2018-05-21T15:56:09.909Z_1 -``` - -Segments with partition number 0 (the first partition in a chunk) omit the partition number, like the following -example, which is a segment in the same time chunk as the previous one, but with partition number 0 instead of 1: - -``` -clarity-cloud0_2018-05-21T16:00:00.000Z_2018-05-21T17:00:00.000Z_2018-05-21T15:56:09.909Z -``` - -#### Segment versioning - -You may be wondering what the "version number" described in the previous section is for. Or, you might not be, in which -case good for you and you can skip this section! - -It's there to support batch-mode overwriting. In Druid, if all you ever do is append data, then there will be just a -single version for each time chunk. But when you overwrite data, what happens behind the scenes is that a new set of -segments is created with the same datasource, same time interval, but a higher version number. This is a signal to the -rest of the Druid system that the older version should be removed from the cluster, and the new version should replace -it. - -The switch appears to happen instantaneously to a user, because Druid handles this by first loading the new data (but -not allowing it to be queried), and then, as soon as the new data is all loaded, switching all new queries to use those -new segments. Then it drops the old segments a few minutes later. - - -#### Segment states - -Segments can be either _available_ or _unavailable_, which refers to whether or not they are currently served by some -Druid server process. They can also be _published_ or _unpublished_, which refers to whether or not they have been -written to deep storage and the metadata store. And published segments can be either _used_ or _unused_, which refers to -whether or not Druid considers them active segments that should be served. - -Putting these together, there are five basic states that a segment can be in: - -- **Published, available, and used:** These segments are published in deep storage and the metadata store, and they are -served by Historical processes. They are the majority of active data in a Druid cluster (they include everything except -in-flight realtime data). -- **Published, available, and unused:** These segments are being served by Historicals, but won't be for very long. They -may be segments that have recently been overwritten (see [Segment versioning](#segment-versioning)) or dropped for -other reasons (like drop rules, or being dropped manually). -- **Published, unavailable, and used:** These segments are published in deep storage and the metadata store, and -_should_ be served, but are not actually being served. If segments stay in this state for more than a few minutes, it's -usually because something is wrong. Some of the more common causes include: failure of a large number of Historicals, -Historicals being out of capacity to download more segments, and some issue with coordination that prevents the -Coordinator from telling Historicals to load new segments. -- **Published, unavailable, and unused:** These segments are published in deep storage and the metadata store, but -are inactive (because they have been overwritten or dropped). They lie dormant, and can potentially be resurrected -by manual action if needed (in particular: setting the "used" flag to true). -- **Unpublished and available:** This is the state that segments are in while they are being built by Druid ingestion -tasks. This includes all "realtime" data that has not been handed off to Historicals yet. Segments in this state may or -may not be replicated. If all replicas are lost, then the segment must be rebuilt from scratch. This may or may not be -possible. (It is possible with Kafka, and happens automatically; it is possible with S3/HDFS by restarting the job; and -it is _not_ possible with Tranquility, so in that case, data will be lost.) - -The sixth state in this matrix, "unpublished and unavailable," isn't possible. If a segment isn't published and isn't -being served then does it really exist? - - -#### Indexing and handoff - -_Indexing_ is the mechanism by which new segments are created, and _handoff_ is the mechanism by which they are published -and begin being served by Historical processes. The mechanism works like this on the indexing side: - -1. An _indexing task_ starts running and building a new segment. It must determine the identifier of the segment before -it starts building it. For a task that is appending (like a Kafka task, or an index task in append mode) this will be -done by calling an "allocate" API on the Overlord to potentially add a new partition to an existing set of segments. For -a task that is overwriting (like a Hadoop task, or an index task _not_ in append mode) this is done by locking an -interval and creating a new version number and new set of segments. -2. If the indexing task is a realtime task (like a Kafka task) then the segment is immediately queryable at this point. -It's available, but unpublished. -3. When the indexing task has finished reading data for the segment, it pushes it to deep storage and then publishes it -by writing a record into the metadata store. -4. If the indexing task is a realtime task, at this point it waits for a Historical process to load the segment. If the -indexing task is not a realtime task, it exits immediately. - -And like this on the Coordinator / Historical side: - -1. The Coordinator polls the metadata store periodically (by default, every 1 minute) for newly published segments. -2. When the Coordinator finds a segment that is published and used, but unavailable, it chooses a Historical process -to load that segment and instructs that Historical to do so. -3. The Historical loads the segment and begins serving it. -4. At this point, if the indexing task was waiting for handoff, it will exit. - - -## Ingestion methods - -In most ingestion methods, this work is done by Druid -MiddleManager nodes. One exception is Hadoop-based ingestion, where this work is instead done using a Hadoop MapReduce -job on YARN (although MiddleManager nodes are still involved in starting and monitoring the Hadoop jobs). - -Once segments have been generated and stored in [deep storage](../dependencies/deep-storage.html), they will be loaded by Druid Historical nodes. Some Druid -ingestion methods additionally support _real-time queries_, meaning you can query in-flight data on MiddleManager nodes -before it is finished being converted and written to deep storage. In general, a small amount of data will be in-flight -on MiddleManager nodes relative to the larger amount of historical data being served from Historical nodes. - -See the [Design](../design/index.html) page for more details on how Druid stores and manages your data. - -The table below lists Druid's most common data ingestion methods, along with comparisons to help you choose -the best one for your situation. - -|Method|How it works|Can append and overwrite?|Can handle late data?|Exactly-once ingestion?|Real-time queries?| -|------|------------|-------------------------|---------------------|-----------------------|------------------| -|[Native batch](native_tasks.html)|Druid loads data directly from S3, HTTP, NFS, or other networked storage.|Append or overwrite|Yes|Yes|No| -|[Hadoop](hadoop.html)|Druid launches Hadoop Map/Reduce jobs to load data files.|Append or overwrite|Yes|Yes|No| -|[Kafka indexing service](../development/extensions-core/kafka-ingestion.html)|Druid reads directly from Kafka.|Append only|Yes|Yes|Yes| -|[Tranquility](stream-push.html)|You use Tranquility, a client side library, to push individual records into Druid.|Append only|No - late data is dropped|No - may drop or duplicate data|Yes| - -## Partitioning - -Druid is a distributed data store, and it partitions your data in order to process it in parallel. Druid -[datasources](../design/index.html) are always partitioned first by time based on the -[segmentGranularity](../ingestion/index.html#granularityspec) parameter of your ingestion spec. Each of these time partitions is called -a _time chunk_, and each time chunk contains one or more [segments](../design/segments.html). The segments within a -particular time chunk may be partitioned further using options that vary based on the ingestion method you have chosen. - - * With [Hadoop](hadoop.html) you can do hash- or range-based partitioning on one or more columns. - * With [Native batch](native_tasks.html) you can partition on a hash of all dimension columns. This is useful when - rollup is enabled, since it maximizes your space savings. - * With [Kafka indexing](../development/extensions-core/kafka-ingestion.html), partitioning is based on Kafka - partitions, and is not configurable through Druid. You can configure it on the Kafka side by using the partitioning - functionality of the Kafka producer. - * With [Tranquility](stream-push.html), partitioning is done by default on a hash of all dimension columns in order - to maximize rollup. You can also provide a custom Partitioner class; see the - [Tranquility documentation](https://github.com/druid-io/tranquility/blob/master/docs/overview.md#partitioning-and-replication) - for details. - -All Druid datasources are partitioned by time. Each data ingestion method must acquire a write lock on a particular -time range when loading data, so no two methods can operate on the same time range of the same datasource at the same -time. However, two data ingestion methods _can_ operate on different time ranges of the same datasource at the same -time. For example, you can do a batch backfill from Hadoop while also doing a real-time load from Kafka, so long as -the backfill data and the real-time data do not need to be written to the same time partitions. (If they do, the -real-time load will take priority.) - -## Rollup - -Druid is able to summarize raw data at ingestion time using a process we refer to as "roll-up". -Roll-up is a first-level aggregation operation over a selected set of "dimensions", where a set of "metrics" are aggregated. - -Suppose we have the following raw data, representing total packet/byte counts in particular seconds for traffic between a source and destination. The `srcIP` and `dstIP` fields are dimensions, while `packets` and `bytes` are metrics. - -``` -timestamp srcIP dstIP packets bytes -2018-01-01T01:01:35Z 1.1.1.1 2.2.2.2 100 1000 -2018-01-01T01:01:51Z 1.1.1.1 2.2.2.2 200 2000 -2018-01-01T01:01:59Z 1.1.1.1 2.2.2.2 300 3000 -2018-01-01T01:02:14Z 1.1.1.1 2.2.2.2 400 4000 -2018-01-01T01:02:29Z 1.1.1.1 2.2.2.2 500 5000 -2018-01-01T01:03:29Z 1.1.1.1 2.2.2.2 600 6000 -2018-01-02T21:33:14Z 7.7.7.7 8.8.8.8 100 1000 -2018-01-02T21:33:45Z 7.7.7.7 8.8.8.8 200 2000 -2018-01-02T21:35:45Z 7.7.7.7 8.8.8.8 300 3000 -``` - -If we ingest this data into Druid with a `queryGranularity` of `minute` (which will floor timestamps to minutes), the roll-up operation is equivalent to the following pseudocode: - -``` -GROUP BY TRUNCATE(timestamp, MINUTE), srcIP, dstIP :: SUM(packets), SUM(bytes) -``` - -After the data above is aggregated during roll-up, the following rows will be ingested: - -``` -timestamp srcIP dstIP packets bytes -2018-01-01T01:01:00Z 1.1.1.1 2.2.2.2 600 6000 -2018-01-01T01:02:00Z 1.1.1.1 2.2.2.2 900 9000 -2018-01-01T01:03:00Z 1.1.1.1 2.2.2.2 600 6000 -2018-01-02T21:33:00Z 7.7.7.7 8.8.8.8 300 3000 -2018-01-02T21:35:00Z 7.7.7.7 8.8.8.8 300 3000 -``` - -Druid can roll up data as it is ingested to minimize the amount of raw data that needs to be stored. -In practice, we see that rolling up data can dramatically reduce the size of data that needs to be stored (up to a factor of 100). -This storage reduction does come at a cost: as we roll up data, we lose the ability to query individual events. - -The rollup granularity is the minimum granularity you will be able to explore data at and events are floored to this granularity. -Hence, Druid ingestion specs define this granularity as the `queryGranularity` of the data. The lowest supported `queryGranularity` is millisecond. - -The following links may be helpful in further understanding dimensions and metrics: -* https://en.wikipedia.org/wiki/Dimension_(data_warehouse) -* https://en.wikipedia.org/wiki/Measure_(data_warehouse)) - -### Roll-up modes - -Druid supports two roll-up modes, i.e., _perfect roll-up_ and _best-effort roll-up_. In the perfect roll-up mode, Druid guarantees that input data are perfectly aggregated at ingestion time. Meanwhile, in the best-effort roll-up, input data might not be perfectly aggregated and thus there can be multiple segments holding the rows which should belong to the same segment with the perfect roll-up since they have the same dimension value and their timestamps fall into the same interval. - -The perfect roll-up mode encompasses an additional preprocessing step to determine intervals and shardSpecs before actual data ingestion if they are not specified in the ingestionSpec. This preprocessing step usually scans the entire input data which might increase the ingestion time. The [Hadoop indexing task](../ingestion/hadoop.html) always runs with this perfect roll-up mode. - -On the contrary, the best-effort roll-up mode doesn't require any preprocessing step, but the size of ingested data might be larger than that of the perfect roll-up. All types of [streaming indexing (e.g., kafka indexing service)](../ingestion/stream-ingestion.html) run with this mode. - -Finally, the [native index task](../ingestion/native_tasks.html) supports both modes and you can choose either one which fits to your application. - -## Data maintenance - -### Inserts and overwrites - -Druid can insert new data to an existing datasource by appending new segments to existing segment sets. It can also add new data by merging an existing set of segments with new data and overwriting the original set. - -Druid does not support single-record updates by primary key. - -Updates are described further at [update existing data](../ingestion/update-existing-data.html). - -### Compaction - -Compaction is a type of overwrite operation, which reads an existing set of segments, combines them into a new set with larger but fewer segments, and overwrites the original set with the new compacted set, without changing the data that is stored. - -For performance reasons, it is sometimes beneficial to compact a set of segments into a set of larger but fewer segments, as there is some per-segment processing and memory overhead in both the ingestion and querying paths. - -For compaction documentation, please see [tasks](../ingestion/tasks.html). - -### Retention and Tiering - -Druid supports retention rules, which are used to define intervals of time where data should be preserved, and intervals where data should be discarded. - -Druid also supports separating historical nodes into tiers, and the retention rules can be configured to assign data for specific intervals to specific tiers. - -These features are useful for performance/cost management; a common use case is separating historical nodes into a "hot" tier and a "cold" tier. - -For more information, please see [Load rules](../operations/rule-configuration.html). - -### Deletes - -Druid supports permanent deletion of segments that are in an "unused" state (see the [Segment states](#segment-states) section above). - -The Kill Task deletes unused segments within a specified interval from metadata storage and deep storage. - -For more information, please see [Kill Task](../ingestion/tasks.html#kill-task). \ No newline at end of file diff --git a/docs/content/ingestion/schema-design.md b/docs/content/ingestion/schema-design.md index 37dae1baeb0..45e371873f3 100644 --- a/docs/content/ingestion/schema-design.md +++ b/docs/content/ingestion/schema-design.md @@ -33,7 +33,7 @@ See [Dimension Schema](../ingestion/index.html#dimension-schema) for more inform ## High cardinality dimensions (e.g. unique IDs) In practice, we see that exact counts for unique IDs are often not required. Storing unique IDs as a column will kill -[roll-up](../design/index.html), and impact compression. Instead, storing a sketch of the number of the unique IDs seen, and using that +[roll-up](../ingestion/index.html#rollup), and impact compression. Instead, storing a sketch of the number of the unique IDs seen, and using that sketch as part of aggregations, will greatly improve performance (up to orders of magnitude performance improvement), and significantly reduce storage. Druid's `hyperUnique` aggregator is based off of Hyperloglog and can be used for unique counts on a high cardinality dimension. For more information, see [here](https://www.youtube.com/watch?v=Hpd3f_MLdXo). @@ -53,6 +53,8 @@ then before indexing it, you should transform it to: {"foo_bar": 3} ``` +Druid is capable of flattening JSON input data, please see [Flatten JSON](../ingestion/flatten-json.html) for more details. + ## Counting the number of ingested events A count aggregator at ingestion time can be used to count the number of events ingested. However, it is important to note diff --git a/docs/content/ingestion/stream-ingestion.md b/docs/content/ingestion/stream-ingestion.md index 7d7269dc359..4effdd4375f 100644 --- a/docs/content/ingestion/stream-ingestion.md +++ b/docs/content/ingestion/stream-ingestion.md @@ -5,11 +5,9 @@ layout: doc_page # Loading streams Streams can be ingested in Druid using either [Tranquility](https://github.com/druid-io/tranquility) (a Druid-aware -client) and the [indexing service](../design/indexing-service.html) or through standalone [Realtime nodes](../design/realtime.html). -The first approach will be more complex to set up, but also offers scalability and high availability characteristics that advanced production -setups may require. The second approach has some known [limitations](../ingestion/stream-pull.html#limitations). +client) or the [Kafka Indexing Service](../development/extensions-core/kafka-ingestion.html). -## Stream push +## Tranquility (Stream Push) If you have a program that generates a stream, then you can push that stream directly into Druid in real-time. With this approach, Tranquility is embedded in your data-producing application. @@ -22,18 +20,17 @@ seamlessly and without downtime. You only have to define your Druid schema. For examples and more information, please see the [Tranquility README](https://github.com/druid-io/tranquility). -## Stream pull +A tutorial is also available at [Tutorial: Loading stream data using HTTP push](../tutorials/tutorial-tranquility.html). -If you have an external service that you want to pull data from, you have two options. The simplest -option is to set up a "copying" service that reads from the data source and writes to Druid using -the [stream push method](#stream-push). +## Kafka Indexing Service (Stream Pull) -Another option is *stream pull*. With this approach, a Druid Realtime Node ingests data from a -[Firehose](../ingestion/firehose.html) connected to the data you want to -read. Druid includes builtin firehoses for Kafka, RabbitMQ, and various other streaming systems. +Druid can pulll data from Kafka streams using the [Kafka Indexing Service](../development/extensions-core/kafka-ingestion.html). -## More information +The Kafka indexing service enables the configuration of *supervisors* on the Overlord, which facilitate ingestion from +Kafka by managing the creation and lifetime of Kafka indexing tasks. These indexing tasks read events using Kafka's own +partition and offset mechanism and are therefore able to provide guarantees of exactly-once ingestion. They are also +able to read non-recent events from Kafka and are not subject to the window period considerations imposed on other +ingestion mechanisms. The supervisor oversees the state of the indexing tasks to coordinate handoffs, manage failures, +and ensure that the scalability and replication requirements are maintained. -For more information on loading streaming data via a push based approach, please see [here](../ingestion/stream-push.html). - -For more information on loading streaming data via a pull based approach, please see [here](../ingestion/stream-pull.html). +A tutorial is available at [Tutorial: Loading stream data from Kafka](../tutorials/tutorial-kafka.html). \ No newline at end of file diff --git a/docs/content/ingestion/stream-pull.md b/docs/content/ingestion/stream-pull.md index 6fbd6b52088..e99b2db606b 100644 --- a/docs/content/ingestion/stream-pull.md +++ b/docs/content/ingestion/stream-pull.md @@ -2,6 +2,10 @@ layout: doc_page --- +
+NOTE: Realtime nodes are deprecated. Please use the Kafka Indexing Service for stream pull use cases instead. +
+ Stream Pull Ingestion ===================== @@ -344,4 +348,4 @@ difficult to manage and track at scale. Stream ingestion may generate a large number of small segments because it's difficult to optimize the segment size at ingestion time. The number of segments will increase over time, and this might cause the query performance issue. -Details on how to optimize the segment size can be found on [Segment size optimization](../../operations/segment-optimization.html). \ No newline at end of file +Details on how to optimize the segment size can be found on [Segment size optimization](../operations/segment-optimization.html). \ No newline at end of file diff --git a/docs/content/ingestion/stream-push.md b/docs/content/ingestion/stream-push.md index ca0f7013bba..267bc6bebcc 100644 --- a/docs/content/ingestion/stream-push.md +++ b/docs/content/ingestion/stream-push.md @@ -9,8 +9,8 @@ Druid can connect to any streaming data source through streams to Druid in real-time. Druid does not come bundled with Tranquility, and you will have to download the distribution.
-If you've never loaded streaming data into Druid, we recommend trying out the -stream loading tutorial first and then coming back to this page. +If you've never loaded streaming data into Druid with Tranquility before, we recommend trying out the +stream loading tutorial first and then coming back to this page.
Note that with all streaming ingestion options, you must ensure that incoming data is recent @@ -37,10 +37,26 @@ To customize Tranquility Server: them up again. For tips on customizing `server.json`, see the -*[Loading your own streams](../tutorials/tutorial-streams.html)* tutorial and the +*[Writing an ingestion spec](../tutorials/tutorial-ingestion-spec.html)* tutorial and the [Tranquility Server documentation](https://github.com/druid-io/tranquility/blob/master/docs/server.md). -### Kafka +### JVM apps and stream processors + +Tranquility can also be embedded in JVM-based applications as a library. You can do this directly +in your own program using the +[Core API](https://github.com/druid-io/tranquility/blob/master/docs/core.md), or you can use +the connectors bundled in Tranquility for popular JVM-based stream processors such as +[Storm](https://github.com/druid-io/tranquility/blob/master/docs/storm.md), +[Samza](https://github.com/druid-io/tranquility/blob/master/docs/samza.md), +[Spark Streaming](https://github.com/druid-io/tranquility/blob/master/docs/spark.md), and +[Flink](https://github.com/druid-io/tranquility/blob/master/docs/flink.md). + +### Kafka (Deprecated) + +
+NOTE: Tranquility Kafka is deprecated. Please use the Kafka Indexing Service to load data from Kafka instead. +
+ [Tranquility Kafka](https://github.com/druid-io/tranquility/blob/master/docs/kafka.md) lets you load data from Kafka into Druid without writing any code. You only need a configuration @@ -60,16 +76,6 @@ To customize Tranquility Kafka in the single-machine quickstart configuration: For tips on customizing `kafka.json`, see the [Tranquility Kafka documentation](https://github.com/druid-io/tranquility/blob/master/docs/kafka.md). -### JVM apps and stream processors - -Tranquility can also be embedded in JVM-based applications as a library. You can do this directly -in your own program using the -[Core API](https://github.com/druid-io/tranquility/blob/master/docs/core.md), or you can use -the connectors bundled in Tranquility for popular JVM-based stream processors such as -[Storm](https://github.com/druid-io/tranquility/blob/master/docs/storm.md), -[Samza](https://github.com/druid-io/tranquility/blob/master/docs/samza.md), -[Spark Streaming](https://github.com/druid-io/tranquility/blob/master/docs/spark.md), and -[Flink](https://github.com/druid-io/tranquility/blob/master/docs/flink.md). ## Concepts @@ -132,15 +138,22 @@ service, it will retry the batch, which can lead to duplicated events. at-least-once design and can lead to duplicated events. Under normal operation, these risks are minimal. But if you need absolute 100% fidelity for -historical data, we recommend a [hybrid batch/streaming](../tutorials/ingestion.html#hybrid-batch-streaming) -architecture. +historical data, we recommend a hybrid/batch streaming architecture, described below. + +### Hybrid Batch/Streaming + +You can combine batch and streaming methods in a hybrid batch/streaming architecture. In a hybrid architecture, you use a streaming method to do initial ingestion, and then periodically re-ingest older data in batch mode (typically every few hours, or nightly). When Druid re-ingests data for a time range, the new data automatically replaces the data from the earlier ingestion. + +All streaming ingestion methods currently supported by Druid do introduce the possibility of dropped or duplicated messages in certain failure scenarios, and batch re-ingestion eliminates this potential source of error for historical data. + +Batch re-ingestion also gives you the option to re-ingest your data if you needed to revise it for any reason. ### Deployment Notes Stream ingestion may generate a large number of small segments because it's difficult to optimize the segment size at ingestion time. The number of segments will increase over time, and this might cuase the query performance issue. -Details on how to optimize the segment size can be found on [Segment size optimization](../../operations/segment-optimization.html). +Details on how to optimize the segment size can be found on [Segment size optimization](../operations/segment-optimization.html). ## Documentation @@ -151,3 +164,4 @@ Tranquility documentation be found [here](https://github.com/druid-io/tranquilit Tranquility configuration can be found [here](https://github.com/druid-io/tranquility/blob/master/docs/configuration.md). Tranquility's tuningConfig can be found [here](http://static.druid.io/tranquility/api/latest/#com.metamx.tranquility.druid.DruidTuning). + diff --git a/docs/content/ingestion/tasks.md b/docs/content/ingestion/tasks.md index 680bd434bb5..b5498840fa5 100644 --- a/docs/content/ingestion/tasks.md +++ b/docs/content/ingestion/tasks.md @@ -1,313 +1,53 @@ --- layout: doc_page --- -# Tasks -Tasks are run on middle managers and always operate on a single data source. Tasks are submitted using [POST requests](../design/indexing-service.html). +# Tasks Overview + +Tasks are run on middle managers and always operate on a single data source. + +Tasks are submitted using POST requests to the Overlord. Please see [Overlord Task API](../operations/api-reference.html#overlord-tasks) for API details. There are several different types of tasks. -Segment Creation Tasks ----------------------- +## Segment Creation Tasks ### Hadoop Index Task -See [batch ingestion](../ingestion/batch-ingestion.html). +See [batch ingestion](../ingestion/hadoop.html). ### Native Index Tasks Druid provides a native index task which doesn't need any dependencies on other systems. See [native index tasks](./native_tasks.html) for more details. -Segment Merging Tasks ---------------------- +### Kafka Indexing Tasks -### Append Task +Kafka Indexing tasks are automatically created by a Kafka Supervisor and are responsible for pulling data from Kafka streams. These tasks are not meant to be created/submitted directly by users. See [Kafka Indexing Service](../development/extensions-core/kafka-ingestion.html) for more details. -Append tasks append a list of segments together into a single segment (one after the other). The grammar is: +### Stream Push Tasks (Tranquility) -```json -{ - "type": "append", - "id": , - "dataSource": , - "segments": , - "aggregations": , - "context": -} -``` +Tranquility Server automatically creates "realtime" tasks that receive events over HTTP using an [EventReceiverFirehose](../ingestion/firehose.html#eventreceiverfirehose). These tasks are not meant to be created/submitted directly by users. See [Tranquility Stream Push](../ingestion/stream-push.html) for more info. -### Merge Task +## Compaction Tasks -Merge tasks merge a list of segments together. Any common timestamps are merged. -If rollup is disabled as part of ingestion, common timestamps are not merged and rows are reordered by their timestamp. +Compaction tasks merge all segments of the given interval. Please see [Compaction](../ingestion/compaction.html) for details. -The grammar is: +## Segment Merging Tasks -```json -{ - "type": "merge", - "id": , - "dataSource": , - "aggregations": , - "rollup": , - "segments": , - "context": -} -``` - -### Same Interval Merge Task - -Same Interval Merge task is a shortcut of merge task, all segments in the interval are going to be merged. - -The grammar is: - -```json -{ - "type": "same_interval_merge", - "id": , - "dataSource": , - "aggregations": , - "rollup": , - "interval": , - "context": -} -``` - -### Compaction Task - -Compaction tasks merge all segments of the given interval. The syntax is: - -```json -{ - "type": "compact", - "id": , - "dataSource": , - "interval": , - "dimensions" , - "tuningConfig" , - "context": -} -``` - -|Field|Description|Required| -|-----|-----------|--------| -|`type`|Task type. Should be `compact`|Yes| -|`id`|Task id|No| -|`dataSource`|dataSource name to be compacted|Yes| -|`interval`|interval of segments to be compacted|Yes| -|`dimensions`|custom dimensionsSpec. compaction task will use this dimensionsSpec if exist instead of generating one. See below for more details.|No| -|`tuningConfig`|[Index task tuningConfig](#tuningconfig)|No| -|`context`|[Task context](#taskcontext)|No| - -An example of compaction task is - -```json -{ - "type" : "compact", - "dataSource" : "wikipedia", - "interval" : "2017-01-01/2018-01-01" -} -``` - -This compaction task reads _all segments_ of the interval `2017-01-01/2018-01-01` and results in new segments. -Note that intervals of the input segments are merged into a single interval of `2017-01-01/2018-01-01` no matter what the segmentGranularity was. -To control the number of result segments, you can set `targetPartitionSize` or `numShards`. See [indexTuningConfig](#tuningconfig) for more details. -To merge each day's worth of data into separate segments, you can submit multiple `compact` tasks, one for each day. They will run in parallel. - -A compaction task internally generates an `index` task spec for performing compaction work with some fixed parameters. -For example, its `firehose` is always the [ingestSegmentSpec](./firehose.html), and `dimensionsSpec` and `metricsSpec` -include all dimensions and metrics of the input segments by default. - -Compaction tasks will exit with a failure status code, without doing anything, if the interval you specify has no -data segments loaded in it (or if the interval you specify is empty). - -The output segment can have different metadata from the input segments unless all input segments have the same metadata. - -- Dimensions: since Druid supports schema change, the dimensions can be different across segments even if they are a part of the same dataSource. -If the input segments have different dimensions, the output segment basically includes all dimensions of the input segments. -However, even if the input segments have the same set of dimensions, the dimension order or the data type of dimensions can be different. For example, the data type of some dimensions can be -changed from `string` to primitive types, or the order of dimensions can be changed for better locality (See [Partitioning](batch-ingestion.html#partitioning-specification)). -In this case, the dimensions of recent segments precede that of old segments in terms of data types and the ordering. -This is because more recent segments are more likely to have the new desired order and data types. If you want to use -your own ordering and types, you can specify a custom `dimensionsSpec` in the compaction task spec. -- Roll-up: the output segment is rolled up only when `rollup` is set for all input segments. -See [Roll-up](../design/index.html#roll-up) for more details. -You can check that your segments are rolled up or not by using [Segment Metadata Queries](../querying/segmentmetadataquery.html#analysistypes). - -Segment Destroying Tasks ------------------------- - -### Kill Task - -Kill tasks delete all information about a segment and removes it from deep storage. Killable segments must be disabled (used==0) in the Druid segment table. The available grammar is: - -```json -{ - "type": "kill", - "id": , - "dataSource": , - "interval" : , - "context": -} -``` - -Misc. Tasks ------------ - -### Version Converter Task -The convert task suite takes active segments and will recompress them using a new IndexSpec. This is handy when doing activities like migrating from Concise to Roaring, or adding dimension compression to old segments. - -Upon success the new segments will have the same version as the old segment with `_converted` appended. A convert task may be run against the same interval for the same datasource multiple times. Each execution will append another `_converted` to the version for the segments - -There are two types of conversion tasks. One is the Hadoop convert task, and the other is the indexing service convert task. The Hadoop convert task runs on a hadoop cluster, and simply leaves a task monitor on the indexing service (similar to the hadoop batch task). The indexing service convert task runs the actual conversion on the indexing service. - -#### Hadoop Convert Segment Task -```json -{ - "type": "hadoop_convert_segment", - "dataSource":"some_datasource", - "interval":"2013/2015", - "indexSpec":{"bitmap":{"type":"concise"},"dimensionCompression":"lz4","metricCompression":"lz4"}, - "force": true, - "validate": false, - "distributedSuccessCache":"hdfs://some-hdfs-nn:9000/user/jobrunner/cache", - "jobPriority":"VERY_LOW", - "segmentOutputPath":"s3n://somebucket/somekeyprefix" -} -``` - -The values are described below. - -|Field|Type|Description|Required| -|-----|----|-----------|--------| -|`type`|String|Convert task identifier|Yes: `hadoop_convert_segment`| -|`dataSource`|String|The datasource to search for segments|Yes| -|`interval`|Interval string|The interval in the datasource to look for segments|Yes| -|`indexSpec`|json|The compression specification for the index|Yes| -|`force`|boolean|Forces the convert task to continue even if binary versions indicate it has been updated recently (you probably want to do this)|No| -|`validate`|boolean|Runs validation between the old and new segment before reporting task success|No| -|`distributedSuccessCache`|URI|A location where hadoop should put intermediary files.|Yes| -|`jobPriority`|`org.apache.hadoop.mapred.JobPriority` as String|The priority to set for the hadoop job|No| -|`segmentOutputPath`|URI|A base uri for the segment to be placed. Same format as other places a segment output path is needed|Yes| - - -#### Indexing Service Convert Segment Task -```json -{ - "type": "convert_segment", - "dataSource":"some_datasource", - "interval":"2013/2015", - "indexSpec":{"bitmap":{"type":"concise"},"dimensionCompression":"lz4","metricCompression":"lz4"}, - "force": true, - "validate": false -} -``` - -|Field|Type|Description|Required (default)| -|-----|----|-----------|--------| -|`type`|String|Convert task identifier|Yes: `convert_segment`| -|`dataSource`|String|The datasource to search for segments|Yes| -|`interval`|Interval string|The interval in the datasource to look for segments|Yes| -|`indexSpec`|json|The compression specification for the index|Yes| -|`force`|boolean|Forces the convert task to continue even if binary versions indicate it has been updated recently (you probably want to do this)|No (false)| -|`validate`|boolean|Runs validation between the old and new segment before reporting task success|No (true)| - -Unlike the hadoop convert task, the indexing service task draws its output path from the indexing service's configuration. - -#### IndexSpec - -The indexSpec defines segment storage format options to be used at indexing time, such as bitmap type and column -compression formats. The indexSpec is optional and default parameters will be used if not specified. - -|Field|Type|Description|Required| -|-----|----|-----------|--------| -|bitmap|Object|Compression format for bitmap indexes. Should be a JSON object; see below for options.|no (defaults to Concise)| -|dimensionCompression|String|Compression format for dimension columns. Choose from `LZ4`, `LZF`, or `uncompressed`.|no (default == `LZ4`)| -|metricCompression|String|Compression format for metric columns. Choose from `LZ4`, `LZF`, `uncompressed`, or `none`.|no (default == `LZ4`)| -|longEncoding|String|Encoding format for metric and dimension columns with type long. Choose from `auto` or `longs`. `auto` encodes the values using offset or lookup table depending on column cardinality, and store them with variable size. `longs` stores the value as is with 8 bytes each.|no (default == `longs`)| - -##### Bitmap types - -For Concise bitmaps: - -|Field|Type|Description|Required| -|-----|----|-----------|--------| -|type|String|Must be `concise`.|yes| - -For Roaring bitmaps: - -|Field|Type|Description|Required| -|-----|----|-----------|--------| -|type|String|Must be `roaring`.|yes| -|compressRunOnSerialization|Boolean|Use a run-length encoding where it is estimated as more space efficient.|no (default == `true`)| - -### Noop Task - -These tasks start, sleep for a time and are used only for testing. The available grammar is: - -```json -{ - "type": "noop", - "id": , - "interval" : , - "runTime" : , - "firehose": -} -``` - -Task Context ------------- - -The task context is used for various task configuration parameters. The following parameters apply to all task types. - -|property|default|description| -|--------|-------|-----------| -|taskLockTimeout|300000|task lock timeout in millisecond. For more details, see [the below Locking section](#locking).| -|priority|Different based on task types. See [Task Priority](#task-priority).|Task priority| - -
-When a task acquires a lock, it sends a request via HTTP and awaits until it receives a response containing the lock acquisition result. -As a result, an HTTP timeout error can occur if `taskLockTimeout` is greater than `druid.server.http.maxIdleTime` of overlords. +
+The documentation for the Append Task, Merge Task, and Same Interval Merge Task has been moved to Miscellaneous Tasks.
-Locking -------- +## Kill Task -Once an overlord node accepts a task, the task acquires locks for the data source and intervals specified in the task. +Kill tasks delete all information about a segment and removes it from deep storage. -There are two lock types, i.e., _shared lock_ and _exclusive lock_. +Please see [Deleting Data](../ingestion/delete-data.html) for details. -- A task needs to acquire a shared lock before it reads segments of an interval. Multiple shared locks can be acquired for the same dataSource and interval. Shared locks are always preemptable, but they don't preempt each other. -- A task needs to acquire an exclusive lock before it writes segments for an interval. An exclusive lock is also preemptable except while the task is publishing segments. +## Misc. Tasks -Each task can have different lock priorities. The locks of higher-priority tasks can preempt the locks of lower-priority tasks. The lock preemption works based on _optimistic locking_. When a lock is preempted, it is not notified to the owner task immediately. Instead, it's notified when the owner task tries to acquire the same lock again. (Note that lock acquisition is idempotent unless the lock is preempted.) In general, tasks don't compete for acquiring locks because they usually targets different dataSources or intervals. +Please see [Miscellaneous Tasks](../ingestion/misc-tasks.html). -A task writing data into a dataSource must acquire exclusive locks for target intervals. Note that exclusive locks are still preemptable. That is, they also be able to be preempted by higher priority locks unless they are _publishing segments_ in a critical section. Once publishing segments is finished, those locks become preemptable again. +## Task Locking and Priority -Tasks do not need to explicitly release locks, they are released upon task completion. Tasks may potentially release -locks early if they desire. Task ids are unique by naming them using UUIDs or the timestamp in which the task was created. -Tasks are also part of a "task group", which is a set of tasks that can share interval locks. - -Task Priority -------------- - -Druid's indexing tasks use locks for atomic data ingestion. Each lock is acquired for the combination of a dataSource and an interval. Once a task acquires a lock, it can write data for the dataSource and the interval of the acquired lock unless the lock is released or preempted. Please see [the below Locking section](#locking) - -Each task has a priority which is used for lock acquisition. The locks of higher-priority tasks can preempt the locks of lower-priority tasks if they try to acquire for the same dataSource and interval. If some locks of a task are preempted, the behavior of the preempted task depends on the task implementation. Usually, most tasks finish as failed if they are preempted. - -Tasks can have different default priorities depening on their types. Here are a list of default priorities. Higher the number, higher the priority. - -|task type|default priority| -|---------|----------------| -|Realtime index task|75| -|Batch index task|50| -|Merge/Append/Compaction task|25| -|Other tasks|0| - -You can override the task priority by setting your priority in the task context like below. - -```json -"context" : { - "priority" : 100 -} -``` +Please see [Task Locking and Priority](../ingestion/locking-and-priority.html). \ No newline at end of file diff --git a/docs/content/ingestion/update-existing-data.md b/docs/content/ingestion/update-existing-data.md index a999d172739..7a18d13502e 100644 --- a/docs/content/ingestion/update-existing-data.md +++ b/docs/content/ingestion/update-existing-data.md @@ -28,7 +28,7 @@ segments and avoid the overhead of rebuilding new segments with reindexing, you ### Reindexing and Delta Ingestion with Hadoop Batch Ingestion This section assumes the reader understands how to do batch ingestion using Hadoop. See -[batch-ingestion](batch-ingestion.html) for more information. Hadoop batch-ingestion can be used for reindexing and delta ingestion. +[Hadoop batch ingestion](./hadoop.html) for more information. Hadoop batch-ingestion can be used for reindexing and delta ingestion. Druid uses an `inputSpec` in the `ioConfig` to know where the data to be ingested is located and how to read it. For simple Hadoop batch ingestion, `static` or `granularity` spec types allow you to read data stored in deep storage. @@ -132,9 +132,9 @@ POST `/druid/coordinator/v1/metadata/datasources/{dataSourceName}/segments?full` Request Body: [interval1, interval2,...] for example ["2012-01-01T00:00:00.000/2012-01-03T00:00:00.000", "2012-01-05T00:00:00.000/2012-01-07T00:00:00.000"] -### Reindexing without Hadoop Batch Ingestion +### Reindexing with Native Batch Ingestion -This section assumes the reader understands how to do batch ingestion without Hadoop using the [IndexTask](../ingestion/tasks.html#index-task), +This section assumes the reader understands how to do batch ingestion without Hadoop using [Native Batch Indexing](../ingestion/native_tasks.html), which uses a "firehose" to know where and how to read the input data. [IngestSegmentFirehose](firehose.html#ingestsegmentfirehose) can be used to read data from segments inside Druid. Note that IndexTask is to be used for prototyping purposes only as it has to do all processing inside a single process and can't scale. Please use Hadoop batch ingestion for production diff --git a/docs/content/misc/math-expr.md b/docs/content/misc/math-expr.md index d8214916c22..9b2e94c5a90 100644 --- a/docs/content/misc/math-expr.md +++ b/docs/content/misc/math-expr.md @@ -47,7 +47,7 @@ The following built-in functions are available. |----|-----------| |concat|concatenate a list of strings| |like|like(expr, pattern[, escape]) is equivalent to SQL `expr LIKE pattern`| -|lookup|lookup(expr, lookup-name) looks up expr in a registered [query-time lookup](lookups.html)| +|lookup|lookup(expr, lookup-name) looks up expr in a registered [query-time lookup](../querying/lookups.html)| |regexp_extract|regexp_extract(expr, pattern[, index]) applies a regular expression pattern and extracts a capture group index, or null if there is no match. If index is unspecified or zero, returns the substring that matched the pattern.| |replace|replace(expr, pattern, replacement) replaces pattern with replacement| |substring|substring(expr, index, length) behaves like java.lang.String's substring| diff --git a/docs/content/operations/api-reference.md b/docs/content/operations/api-reference.md index a38be3b0696..4eed0e1bb08 100644 --- a/docs/content/operations/api-reference.md +++ b/docs/content/operations/api-reference.md @@ -203,7 +203,7 @@ Enables all segments of datasource which are not overshadowed by others. Enables a segment of a datasource. -#### DELETE +#### DELETE * `/druid/coordinator/v1/datasources/{dataSourceName}` @@ -312,7 +312,7 @@ will be set for them. * `/druid/coordinator/v1/config/compaction/{dataSource}` -Creates or updates the compaction config for a dataSource. See [Compaction Configuration](../configuration/coordinator.html#compaction-configuration) for configuration details. +Creates or updates the compaction config for a dataSource. See [Compaction Configuration](../configuration/index.html#compaction-dynamic-configuration) for configuration details. #### DELETE @@ -356,7 +356,7 @@ This returns a JSON object with field "leader", either true or false. In additio server is the current leader and HTTP 404 if not. This is suitable for use as a load balancer status check if you only want the active leader to be considered in-service at the load balancer. -### Tasks +### Tasks #### GET @@ -392,7 +392,6 @@ The Peon does not have any API endpoints beyond the [common endpoints](#common). ## Broker - ### Datasource Information #### GET diff --git a/docs/content/operations/including-extensions.md b/docs/content/operations/including-extensions.md index dbefc5cde2b..6f3ba1a5346 100644 --- a/docs/content/operations/including-extensions.md +++ b/docs/content/operations/including-extensions.md @@ -51,7 +51,7 @@ You only have to install the extension once. Then, add `"druid-example-extension `druid.extensions.loadList` in common.runtime.properties to instruct Druid to load the extension.
-Please make sure all the Extensions related configuration properties listed here are set correctly. +Please make sure all the Extensions related configuration properties listed here are set correctly.
diff --git a/docs/content/operations/metrics.md b/docs/content/operations/metrics.md index ed0762a678d..32aa0e161ad 100644 --- a/docs/content/operations/metrics.md +++ b/docs/content/operations/metrics.md @@ -165,7 +165,7 @@ These metrics are for the Druid coordinator and are reset each time the coordina |`segment/unavailable/count`|Number of segments (not including replicas) left to load until segments that should be loaded in the cluster are available for queries.|datasource.|0| |`segment/underReplicated/count`|Number of segments (including replicas) left to load until segments that should be loaded in the cluster are available for queries.|tier, datasource.|0| -If `emitBalancingStats` is set to `true` in the coordinator [dynamic configuration](../configuration/coordinator.html#dynamic-configuration), then [log entries](../configuration/logging.html) for class `org.apache.druid.server.coordinator.helper.DruidCoordinatorLogger` will have extra information on balancing decisions. +If `emitBalancingStats` is set to `true` in the coordinator [dynamic configuration](../configuration/index.html#dynamic-configuration), then [log entries](../configuration/logging.html) for class `io.druid.server.coordinator.helper.DruidCoordinatorLogger` will have extra information on balancing decisions. ## General Health diff --git a/docs/content/operations/other-hadoop.md b/docs/content/operations/other-hadoop.md index 7967349fdce..59556b9d095 100644 --- a/docs/content/operations/other-hadoop.md +++ b/docs/content/operations/other-hadoop.md @@ -6,7 +6,7 @@ layout: doc_page Druid can interact with Hadoop in two ways: 1. [Use HDFS for deep storage](../development/extensions-core/hdfs.html) using the druid-hdfs-storage extension. -2. [Batch-load data from Hadoop](../ingestion/batch-ingestion.html) using Map/Reduce jobs. +2. [Batch-load data from Hadoop](../ingestion/hadoop.html) using Map/Reduce jobs. These are not necessarily linked together; you can load data with Hadoop jobs into a non-HDFS deep storage (like S3), and you can use HDFS for deep storage even if you're loading data from streams rather than using Hadoop jobs. @@ -34,7 +34,7 @@ Generally, you should only set one of these parameters, not both. These properties can be set in either one of the following ways: -- Using the task definition, e.g. add `"mapreduce.job.classloader": "true"` to the `jobProperties` of the `tuningConfig` of your indexing task (see the [batch ingestion documentation](../ingestion/batch-ingestion.html)). +- Using the task definition, e.g. add `"mapreduce.job.classloader": "true"` to the `jobProperties` of the `tuningConfig` of your indexing task (see the [Hadoop batch ingestion documentation](../ingestion/hadoop.html)). - Using system properties, e.g. on the middleManager set `druid.indexer.runner.javaOpts=... -Dhadoop.mapreduce.job.classloader=true`. ### Overriding specific classes @@ -67,7 +67,7 @@ classloader. 1. HDFS deep storage uses jars from `extensions/druid-hdfs-storage/` to read and write Druid data on HDFS. 2. Batch ingestion uses jars from `hadoop-dependencies/` to submit Map/Reduce jobs (location customizable via the -`druid.extensions.hadoopDependenciesDir` runtime property; see [Configuration](../configuration/index.html)). +`druid.extensions.hadoopDependenciesDir` runtime property; see [Configuration](../configuration/index.html#extensions)). `hadoop-client:2.8.3` is the default version of the Hadoop client bundled with Druid for both purposes. This works with many Hadoop distributions (the version does not necessarily need to match), but if you run into issues, you can instead @@ -83,7 +83,7 @@ the main Druid pom.xml and rebuilding the distribution by running `mvn package`. If you have issues with Map/Reduce jobs, you can switch your Hadoop client libraries without rebuilding Druid. You can do this by adding a new set of libraries to the `hadoop-dependencies/` directory (or another directory specified by druid.extensions.hadoopDependenciesDir) and then using `hadoopDependencyCoordinates` in the -[Hadoop Index Task](../ingestion/batch-ingestion.html) to specify the Hadoop dependencies you want Druid to load. +[Hadoop Index Task](../ingestion/hadoop.html) to specify the Hadoop dependencies you want Druid to load. Example: @@ -114,7 +114,7 @@ hadoop-dependencies/ As you can see, under `hadoop-client`, there are two sub-directories, each denotes a version of `hadoop-client`. -Next, use `hadoopDependencyCoordinates` in [Hadoop Index Task](../ingestion/batch-ingestion.html) to specify the Hadoop dependencies you want Druid to load. +Next, use `hadoopDependencyCoordinates` in [Hadoop Index Task](../ingestion/hadoop.html) to specify the Hadoop dependencies you want Druid to load. For example, in your Hadoop Index Task spec file, you can write: diff --git a/docs/content/operations/password-provider.md b/docs/content/operations/password-provider.md index 42ddae14f30..a95b3e1f8a0 100644 --- a/docs/content/operations/password-provider.md +++ b/docs/content/operations/password-provider.md @@ -1,4 +1,8 @@ -#### Password Provider +--- +layout: doc_page +--- + +# Password Provider Druid needs some passwords for accessing various secured systems like metadata store, Key Store containing server certificates etc. All these passwords have corresponding runtime properties associated with them, for example `druid.metadata.storage.connector.password` corresponds to the metadata store password. diff --git a/docs/content/operations/segment-optimization.md b/docs/content/operations/segment-optimization.md index 0e43982d294..08edad06b04 100644 --- a/docs/content/operations/segment-optimization.md +++ b/docs/content/operations/segment-optimization.md @@ -1,6 +1,7 @@ --- layout: doc_page --- + # Segment size optimization In Druid, it's important to optimize the segment size because diff --git a/docs/content/querying/granularities.md b/docs/content/querying/granularities.md index c0aa169dd59..7ae63e713ed 100644 --- a/docs/content/querying/granularities.md +++ b/docs/content/querying/granularities.md @@ -159,7 +159,7 @@ Having a query granularity smaller than the ingestion granularity doesn't make s because information about that smaller granularity is not present in the indexed data. So, if the query granularity is smaller than the ingestion granularity, druid produces results that are equivalent to having set the query granularity to the ingestion granularity. -See `queryGranularity` in [Ingestion Spec](../ingestion/index.html). +See `queryGranularity` in [Ingestion Spec](../ingestion/ingestion-spec.html#granularityspec). If you change the granularity to `all`, you will get everything aggregated in 1 bucket, diff --git a/docs/content/querying/lookups.md b/docs/content/querying/lookups.md index e7ecf2d238f..17dcef9c3eb 100644 --- a/docs/content/querying/lookups.md +++ b/docs/content/querying/lookups.md @@ -332,7 +332,7 @@ The return value will be the json representation of the factory. ``` # Configuration -See the [coordinator configuration guide](../configuration/coordinator.html) for coordinator configuration. +See [Lookups Dynamic Configuration](../configuration/index.html#lookups-dynamic-configuration) for coordinator configuration. To configure a Broker / Router / Historical / Peon to announce itself as part of a lookup tier, use the `druid.zk.paths.lookupTier` property. diff --git a/docs/content/querying/multitenancy.md b/docs/content/querying/multitenancy.md index 65e5451af51..a63c722328b 100644 --- a/docs/content/querying/multitenancy.md +++ b/docs/content/querying/multitenancy.md @@ -45,7 +45,7 @@ With realtime indexing, you have a couple of options. 1. Partition on tenant_id upfront. You'd do this by tweaking the stream you send to Druid. If you're using Kafka then you can have your Kafka producer partition your topic by a hash of tenant_id. If you're using Tranquility then you can define a custom [Partitioner](http://static.druid.io/tranquility/api/latest/#com.metamx.tranquility.partition.Partitioner). -2. Reindex your older data periodically. You can do this with the ["dataSource" input spec](../ingestion/batch-ingestion.html#datasource). +2. Reindex your older data periodically. You can do this with the ["dataSource" input spec](../ingestion/hadoop.html#datasource). You can use this in concert with single-dimension partitioning to repartition your data. ## Customizing data distribution diff --git a/docs/content/querying/post-aggregations.md b/docs/content/querying/post-aggregations.md index 6cfe1078bac..13edc3dedb8 100644 --- a/docs/content/querying/post-aggregations.md +++ b/docs/content/querying/post-aggregations.md @@ -72,7 +72,7 @@ The constant post-aggregator always returns the specified value. The difference between the `doubleMax` aggregator and the `doubleGreatest` post-aggregator is that `doubleMax` returns the highest value of all rows for one specific column while `doubleGreatest` returns the highest value of multiple columns in one row. These are similar to the SQL [MAX](https://dev.mysql.com/doc/refman/5.7/en/group-by-functions.html#function_max) and -[GREATEST](shttp://dev.mysql.com/doc/refman/5.7/en/comparison-operators.html#function_greatest) functions. +[GREATEST](https://dev.mysql.com/doc/refman/5.7/en/comparison-operators.html#function_greatest) functions. Example: diff --git a/docs/content/querying/query-context.md b/docs/content/querying/query-context.md index d4e2be28f12..9e3f87f9245 100644 --- a/docs/content/querying/query-context.md +++ b/docs/content/querying/query-context.md @@ -9,8 +9,8 @@ The query context is used for various query configuration parameters. The follow |property |default | description | |-----------------|----------------------------------------|----------------------| -|timeout | `druid.server.http.defaultQueryTimeout`| Query timeout in millis, beyond which unfinished queries will be cancelled. 0 timeout means `no timeout`. To set the default timeout, see [broker configuration](../configuration/broker.html) | -|maxScatterGatherBytes| `druid.server.http.maxScatterGatherBytes` | Maximum number of bytes gathered from data nodes such as historicals and realtime processes to execute a query. This parameter can be used to further reduce `maxScatterGatherBytes` limit at query time. See [broker configuration](../configuration/broker.html) for more details.| +|timeout | `druid.server.http.defaultQueryTimeout`| Query timeout in millis, beyond which unfinished queries will be cancelled. 0 timeout means `no timeout`. To set the default timeout, see [broker configuration](../configuration/index.html#broker) | +|maxScatterGatherBytes| `druid.server.http.maxScatterGatherBytes` | Maximum number of bytes gathered from data nodes such as historicals and realtime processes to execute a query. This parameter can be used to further reduce `maxScatterGatherBytes` limit at query time. See [broker configuration](../configuration/index.html#broker) for more details.| |priority | `0` | Query Priority. Queries with higher priority get precedence for computational resources.| |queryId | auto-generated | Unique identifier given to this query. If a query ID is set or known, this can be used to cancel the query | |useCache | `true` | Flag indicating whether to leverage the query cache for this query. When set to false, it disables reading from the query cache for this query. When set to true, Druid uses druid.broker.cache.useCache or druid.historical.cache.useCache to determine whether or not to read from the query cache | diff --git a/docs/content/querying/querying.md b/docs/content/querying/querying.md index f9d99974814..549493e9481 100644 --- a/docs/content/querying/querying.md +++ b/docs/content/querying/querying.md @@ -2,11 +2,10 @@ layout: doc_page --- -Querying -======== +# Querying Queries are made using an HTTP REST style request to queryable nodes ([Broker](../design/broker.html), -[Historical](../design/historical.html), or [Realtime](../design/realtime.html)). The +[Historical](../design/historical.html). [Peons](../design/peons.html)) that are running stream ingestion tasks can also accept queries. The query is expressed in JSON and each of these node types expose the same REST query interface. For normal Druid operations, queries should be issued to the broker nodes. Queries can be posted to the queryable nodes like this - @@ -22,8 +21,7 @@ Druid's native query is relatively low level, mapping closely to how computation are designed to be lightweight and complete very quickly. This means that for more complex analysis, or to build more complex visualizations, multiple Druid queries may be required. -Available Queries ------------------ +## Available Queries Druid has numerous query types for various use cases. Queries are composed of various JSON properties and Druid has different types of queries for different use cases. The documentation for the various query types describe all the JSON properties that can be set. @@ -43,15 +41,13 @@ Druid has numerous query types for various use cases. Queries are composed of va * [Search](../querying/searchquery.html) -Which Query Should I Use? -------------------------- +## Which Query Should I Use? Where possible, we recommend using [Timeseries]() and [TopN]() queries instead of [GroupBy](). GroupBy is the most flexible Druid query, but also has the poorest performance. Timeseries are significantly faster than groupBy queries for aggregations that don't require grouping over dimensions. For grouping and sorting over a single dimension, topN queries are much more optimized than groupBys. -Query Cancellation ------------------- +## Query Cancellation Queries can be cancelled explicitly using their unique identifier. If the query identifier is set at the time of query, or is otherwise known, the following @@ -67,8 +63,7 @@ For example, if the query ID is `abc123`, the query can be cancelled as follows: curl -X DELETE "http://host:port/druid/v2/abc123" ``` -Query Errors ------------- +## Query Errors If a query fails, you will get an HTTP 500 response containing a JSON object with the following structure: diff --git a/docs/content/querying/segmentmetadataquery.md b/docs/content/querying/segmentmetadataquery.md index e04edfc17b2..0395365669d 100644 --- a/docs/content/querying/segmentmetadataquery.md +++ b/docs/content/querying/segmentmetadataquery.md @@ -32,7 +32,7 @@ There are several main parts to a segment metadata query: |toInclude|A JSON Object representing what columns should be included in the result. Defaults to "all".|no| |merge|Merge all individual segment metadata results into a single result|no| |context|See [Context](../querying/query-context.html)|no| -|analysisTypes|A list of Strings specifying what column properties (e.g. cardinality, size) should be calculated and returned in the result. Defaults to ["cardinality", "interval", "minmax"], but can be overridden with using this [BrokerConfig](../configuration/broker.html#segment-metadata-query-config). See section [analysisTypes](#analysistypes) for more details.|no| +|analysisTypes|A list of Strings specifying what column properties (e.g. cardinality, size) should be calculated and returned in the result. Defaults to ["cardinality", "interval", "minmax"], but can be overridden with using the [segment metadata query config](../configuration/index.html#segment-metadata-query-config). See section [analysisTypes](#analysistypes) for more details.|no| |lenientAggregatorMerge|If true, and if the "aggregators" analysisType is enabled, aggregators will be merged leniently. See below for details.|no| The format of the result is: diff --git a/docs/content/querying/sql.md b/docs/content/querying/sql.md index 0f15b936129..6942c883f4b 100644 --- a/docs/content/querying/sql.md +++ b/docs/content/querying/sql.md @@ -390,7 +390,7 @@ The supported result formats are: |`csv`|Comma-separated values, with one row per line. Individual field values may be escaped by being surrounded in double quotes. If double quotes appear in a field value, they will be escaped by replacing them with double-double-quotes like `""this""`. To make it possible to detect a truncated response, this format includes a trailer of one blank line.|text/csv| Errors that occur before the response body is sent will be reported in JSON, with an HTTP 500 status code, in the -same format as [native Druid query errors](../querying#query-errors). If an error occurs while the response body is +same format as [native Druid query errors](../querying/querying.html#query-errors). If an error occurs while the response body is being sent, at that point it is too late to change the HTTP status code or report a JSON error, so the response will simply end midstream and an error will be logged by the Druid server that was handling your request. diff --git a/docs/content/toc.md b/docs/content/toc.md index 5e31a6dfa70..43d2022d78f 100644 --- a/docs/content/toc.md +++ b/docs/content/toc.md @@ -10,7 +10,7 @@ layout: toc * [Datasources & Segments](/docs/VERSION/design/index.html#datasources-and-segments) * [Query processing](/docs/VERSION/design/index.html#query-processing) * [External dependencies](/docs/VERSION/design/index.html#external-dependencies) - * [Ingestion overview](/docs/VERSION/ingestion/overview.html) + * [Ingestion overview](/docs/VERSION/ingestion/index.html) * [Quickstart](/docs/VERSION/tutorials/index.html) * [Tutorial: Loading a file](/docs/VERSION/tutorials/tutorial-batch.html) * [Tutorial: Loading stream data from Kafka](/docs/VERSION/tutorials/tutorial-kafka.html) @@ -28,21 +28,26 @@ layout: toc * [Clustering](/docs/VERSION/tutorials/cluster.html) ## Data Ingestion - * [Ingestion overview](/docs/VERSION/ingestion/overview.html) + * [Ingestion overview](/docs/VERSION/ingestion/index.html) * [Data Formats](/docs/VERSION/ingestion/data-formats.html) - * [Ingestion Spec](/docs/VERSION/ingestion/index.html) + * [Tasks Overview](/docs/VERSION/ingestion/tasks.html) + * [Ingestion Spec](/docs/VERSION/ingestion/ingestion-spec.html) + * [Transform Specs](/docs/VERSION/ingestion/transform-spec.html) + * [Firehoses](/docs/VERSION/ingestion/firehose.html) * [Schema Design](/docs/VERSION/ingestion/schema-design.html) * [Schema Changes](/docs/VERSION/ingestion/schema-changes.html) * [Batch File Ingestion](/docs/VERSION/ingestion/batch-ingestion.html) * [Native Batch Ingestion](/docs/VERSION/ingestion/native_tasks.html) * [Hadoop Batch Ingestion](/docs/VERSION/ingestion/hadoop.html) * [Stream Ingestion](/docs/VERSION/ingestion/stream-ingestion.html) + * [Kafka Indexing Service (Stream Pull)](/docs/VERSION/development/extensions-core/kafka-ingestion.html) * [Stream Push](/docs/VERSION/ingestion/stream-push.html) - * [Stream Pull](/docs/VERSION/ingestion/stream-pull.html) + * [Compaction](/docs/VERSION/ingestion/compaction.html) * [Updating Existing Data](/docs/VERSION/ingestion/update-existing-data.html) - * [Ingestion Tasks](/docs/VERSION/ingestion/tasks.html) - * [Transform Specs](/docs/VERSION/ingestion/transform-spec.html) + * [Deleting Data](/docs/VERSION/ingestion/delete-data.html) + * [Task Locking & Priority](/docs/VERSION/ingestion/locking-and-priority.html) * [FAQ](/docs/VERSION/ingestion/faq.html) + * [Misc. Tasks](/docs/VERSION/ingestion/misc-tasks.html) ## Querying * [Overview](/docs/VERSION/querying/querying.html) @@ -73,7 +78,7 @@ layout: toc * [Virtual Columns](/docs/VERSION/querying/virtual-columns.html) ## Design - * [Overview](/docs/VERSION/design/design.html) + * [Overview](/docs/VERSION/design/index.html) * Storage * [Segments](/docs/VERSION/design/segments.html) * Node Types @@ -81,7 +86,10 @@ layout: toc * [Broker](/docs/VERSION/design/broker.html) * [Coordinator](/docs/VERSION/design/coordinator.html) * [Indexing Service](/docs/VERSION/design/indexing-service.html) - * [Realtime](/docs/VERSION/design/realtime.html) + * [Overlord](/docs/VERSION/design/overlord.html) + * [MiddleManager](/docs/VERSION/design/middlemanager.html) + * [Peons](/docs/VERSION/design/peon.html) + * [Realtime (Deprecated)](/docs/VERSION/design/realtime.html) * Dependencies * [Deep Storage](/docs/VERSION/dependencies/deep-storage.html) * [Metadata Storage](/docs/VERSION/dependencies/metadata-storage.html) @@ -139,7 +147,6 @@ layout: toc * [Router](/docs/VERSION/development/router.html) * [Kafka Indexing Service](/docs/VERSION/development/extensions-core/kafka-ingestion.html) - ## Misc * [Druid Expressions Language](/docs/VERSION/misc/math-expr.html) * [Papers & Talks](/docs/VERSION/misc/papers-and-talks.html) diff --git a/docs/content/tutorials/cluster.md b/docs/content/tutorials/cluster.md index b573433481e..86c4d3445b7 100644 --- a/docs/content/tutorials/cluster.md +++ b/docs/content/tutorials/cluster.md @@ -358,4 +358,4 @@ You can add more Brokers as needed based on query load. ## Loading data Congratulations, you now have a Druid cluster! The next step is to learn about recommended ways to load data into -Druid based on your use case. Read more about [loading data](ingestion.html). +Druid based on your use case. Read more about [loading data](../ingestion/index.html). diff --git a/docs/content/tutorials/tutorial-batch-hadoop.md b/docs/content/tutorials/tutorial-batch-hadoop.md index b6640b41e19..736734d66c7 100644 --- a/docs/content/tutorials/tutorial-batch-hadoop.md +++ b/docs/content/tutorials/tutorial-batch-hadoop.md @@ -55,7 +55,7 @@ On the host machine, add the following entry to `/etc/hosts`: Once the `/tmp/shared` folder has been created and the `etc/hosts` entry has been added, run the following command to start the Hadoop container. ```bash -docker run -it -h druid-hadoop-demo -p 50010:50010 -p 50020:50020 -p 50075:50075 -p 50090:50090 -p 8020:8020 -p 10020:10020 -p 19888:19888 -p 8030:8030 -p 8031:8031 -p 8032:8032 -p 8033:8033 -p 8040:8040 -p 8042:8042 -p 8088:8088 -p 8443:8443 -p 2049:2049 -p 9000:9000 -p 49707:49707 -p 2122:2122 -p 34455:34455 -v /tmp/shared:/shared druid-hadoop-demo:2.8.3 /etc/bootstrap.sh -bash +docker run -it -h druid-hadoop-demo --name druid-hadoop-demo -p 50010:50010 -p 50020:50020 -p 50075:50075 -p 50090:50090 -p 8020:8020 -p 10020:10020 -p 19888:19888 -p 8030:8030 -p 8031:8031 -p 8032:8032 -p 8033:8033 -p 8040:8040 -p 8042:8042 -p 8088:8088 -p 8443:8443 -p 2049:2049 -p 9000:9000 -p 49707:49707 -p 2122:2122 -p 34455:34455 -v /tmp/shared:/shared druid-hadoop-demo:2.8.3 /etc/bootstrap.sh -bash ``` Once the container is started, your terminal will attach to a bash shell running inside the container: @@ -78,6 +78,14 @@ bash-4.1# The `Unable to load native-hadoop library for your platform... using builtin-java classes where applicable` warning messages can be safely ignored. +#### Accessing the Hadoop container shell + +To open another shell to the Hadoop container, run the following command: + +``` +docker exec -it druid-hadoop-demo bash +``` + ### Copy input data to the Hadoop container From the druid-#{DRUIDVERSION} package root on the host, copy the `quickstart/wikiticker-2015-09-12-sampled.json.gz` sample data to the shared folder: diff --git a/docs/content/tutorials/tutorial-ingestion-spec.md b/docs/content/tutorials/tutorial-ingestion-spec.md index b9ab9eb63b9..39ff4e605ee 100644 --- a/docs/content/tutorials/tutorial-ingestion-spec.md +++ b/docs/content/tutorials/tutorial-ingestion-spec.md @@ -9,7 +9,7 @@ This tutorial will guide the reader through the process of defining an ingestion For this tutorial, we'll assume you've already downloaded Druid as described in the [single-machine quickstart](index.html) and have it running on your local machine. -It will also be helpful to have finished [Tutorial: Loading a file](/docs/VERSION/tutorials/tutorial-batch.html), [Tutorial: Querying data](/docs/VERSION/tutorials/tutorial-query.html), and [Tutorial: Rollup](/docs/VERSION/tutorials/tutorial-rollup.html). +It will also be helpful to have finished [Tutorial: Loading a file](../tutorials/tutorial-batch.html), [Tutorial: Querying data](../tutorials/tutorial-query.html), and [Tutorial: Rollup](../tutorials/tutorial-rollup.html). ## Example data