From da4836f38cd94587dd604514d56367e6eb5f494a Mon Sep 17 00:00:00 2001 From: Vadim Ogievetsky Date: Wed, 12 Dec 2018 20:42:12 -0800 Subject: [PATCH] Added titles and harmonized docs to improve usability and SEO (#6731) * added titles and harmonized docs * manually fixed some titles --- .../comparisons/druid-vs-elasticsearch.md | 5 +- .../content/comparisons/druid-vs-key-value.md | 5 +- docs/content/comparisons/druid-vs-kudu.md | 5 +- docs/content/comparisons/druid-vs-redshift.md | 5 +- docs/content/comparisons/druid-vs-spark.md | 5 +- .../comparisons/druid-vs-sql-on-hadoop.md | 41 +++++++-------- docs/content/configuration/index.md | 2 +- docs/content/configuration/logging.md | 4 +- docs/content/configuration/realtime.md | 4 +- .../dependencies/cassandra-deep-storage.md | 9 ++-- docs/content/dependencies/deep-storage.md | 2 +- docs/content/dependencies/metadata-storage.md | 2 +- docs/content/dependencies/zookeeper.md | 2 + docs/content/design/auth.md | 2 +- docs/content/design/broker.md | 4 +- docs/content/design/coordinator.md | 4 +- docs/content/design/historical.md | 4 +- docs/content/design/index.md | 3 +- docs/content/design/indexing-service.md | 4 +- docs/content/design/middlemanager.md | 5 +- docs/content/design/overlord.md | 5 +- docs/content/design/peons.md | 5 +- docs/content/design/plumber.md | 2 +- docs/content/design/realtime.md | 5 +- docs/content/design/segments.md | 4 +- docs/content/development/build.md | 4 +- docs/content/development/experimental.md | 4 +- .../ambari-metrics-emitter.md | 2 +- .../development/extensions-contrib/azure.md | 2 +- .../extensions-contrib/cassandra.md | 2 +- .../extensions-contrib/cloudfiles.md | 2 +- .../extensions-contrib/distinctcount.md | 4 +- .../development/extensions-contrib/google.md | 2 +- .../extensions-contrib/graphite.md | 2 +- .../development/extensions-contrib/influx.md | 2 +- .../extensions-contrib/kafka-emitter.md | 2 +- .../extensions-contrib/kafka-simple.md | 2 +- .../extensions-contrib/materialized-view.md | 2 +- .../extensions-contrib/opentsdb-emitter.md | 6 +-- .../development/extensions-contrib/orc.md | 10 ++-- .../extensions-contrib/rabbitmq.md | 2 +- .../extensions-contrib/redis-cache.md | 5 +- .../extensions-contrib/rocketmq.md | 2 +- .../extensions-contrib/sqlserver.md | 2 +- .../development/extensions-contrib/statsd.md | 2 +- .../development/extensions-contrib/thrift.md | 2 +- .../extensions-contrib/time-min-max.md | 2 +- .../extensions-core/approximate-histograms.md | 2 +- .../development/extensions-core/avro.md | 2 +- .../extensions-core/bloom-filter.md | 17 +++--- .../extensions-core/datasketches-extension.md | 4 +- .../extensions-core/datasketches-hll.md | 4 +- .../extensions-core/datasketches-quantiles.md | 4 +- .../extensions-core/datasketches-theta.md | 4 +- .../extensions-core/datasketches-tuple.md | 4 +- .../extensions-core/druid-basic-security.md | 12 ++--- .../extensions-core/druid-kerberos.md | 28 +++++----- .../extensions-core/druid-lookups.md | 1 + .../development/extensions-core/examples.md | 4 +- .../development/extensions-core/hdfs.md | 2 +- .../extensions-core/kafka-eight-firehose.md | 2 +- .../kafka-extraction-namespace.md | 2 +- .../extensions-core/kafka-ingestion.md | 2 +- .../extensions-core/lookups-cached-global.md | 2 +- .../development/extensions-core/mysql.md | 2 +- .../development/extensions-core/parquet.md | 2 +- .../development/extensions-core/postgresql.md | 2 +- .../development/extensions-core/protobuf.md | 2 +- .../content/development/extensions-core/s3.md | 2 +- .../simple-client-sslcontext.md | 4 +- .../development/extensions-core/stats.md | 2 +- .../development/extensions-core/test-stats.md | 2 +- docs/content/development/extensions.md | 2 +- docs/content/development/geo.md | 2 + ...tegrating-druid-with-other-technologies.md | 1 + docs/content/development/javascript.md | 1 + docs/content/development/modules.md | 2 +- docs/content/development/overview.md | 2 +- docs/content/development/router.md | 5 +- docs/content/development/versioning.md | 2 + docs/content/ingestion/batch-ingestion.md | 2 +- .../ingestion/command-line-hadoop-indexer.md | 2 +- docs/content/ingestion/compaction.md | 2 +- docs/content/ingestion/data-formats.md | 4 +- docs/content/ingestion/delete-data.md | 2 +- docs/content/ingestion/faq.md | 4 +- docs/content/ingestion/firehose.md | 2 +- docs/content/ingestion/flatten-json.md | 2 +- docs/content/ingestion/hadoop.md | 2 +- docs/content/ingestion/index.md | 2 +- docs/content/ingestion/ingestion-spec.md | 2 +- .../content/ingestion/locking-and-priority.md | 2 +- docs/content/ingestion/misc-tasks.md | 2 +- docs/content/ingestion/native_tasks.md | 1 + docs/content/ingestion/reports.md | 1 + docs/content/ingestion/schema-changes.md | 1 + docs/content/ingestion/schema-design.md | 2 +- docs/content/ingestion/stream-ingestion.md | 16 +++--- docs/content/ingestion/stream-pull.md | 52 +++++++++---------- docs/content/ingestion/stream-push.md | 4 +- docs/content/ingestion/tasks.md | 1 + docs/content/ingestion/transform-spec.md | 2 +- .../content/ingestion/update-existing-data.md | 1 + docs/content/misc/math-expr.md | 2 +- docs/content/misc/papers-and-talks.md | 2 +- docs/content/operations/alerts.md | 1 + docs/content/operations/api-reference.md | 2 +- docs/content/operations/dump-segment.md | 1 + docs/content/operations/http-compression.md | 1 + .../operations/including-extensions.md | 2 +- .../operations/insert-segment-to-db.md | 1 + docs/content/operations/metrics.md | 1 + docs/content/operations/other-hadoop.md | 1 + docs/content/operations/password-provider.md | 2 +- docs/content/operations/performance-faq.md | 2 +- docs/content/operations/pull-deps.md | 2 +- docs/content/operations/recommendations.md | 5 +- docs/content/operations/reset-cluster.md | 1 + docs/content/operations/rolling-updates.md | 5 +- docs/content/operations/rule-configuration.md | 1 + .../operations/segment-optimization.md | 2 +- docs/content/operations/tls-support.md | 5 +- .../operations/use_sbt_to_build_fat_jar.md | 4 +- docs/content/querying/aggregations.md | 1 + docs/content/querying/caching.md | 1 + docs/content/querying/datasource.md | 4 +- .../querying/datasourcemetadataquery.md | 2 + docs/content/querying/dimensionspecs.md | 2 +- docs/content/querying/filters.md | 2 + docs/content/querying/granularities.md | 3 +- docs/content/querying/groupbyquery.md | 1 + docs/content/querying/having.md | 2 + docs/content/querying/joins.md | 1 + docs/content/querying/limitspec.md | 2 + docs/content/querying/lookups.md | 2 +- .../querying/multi-value-dimensions.md | 1 + docs/content/querying/multitenancy.md | 1 + docs/content/querying/post-aggregations.md | 2 + docs/content/querying/query-context.md | 5 +- docs/content/querying/querying.md | 2 +- docs/content/querying/scan-query.md | 3 +- docs/content/querying/searchquery.md | 2 + docs/content/querying/searchqueryspec.md | 2 + docs/content/querying/segmentmetadataquery.md | 2 + docs/content/querying/select-query.md | 1 + docs/content/querying/sorting-orders.md | 2 + docs/content/querying/sql.md | 1 + docs/content/querying/timeboundaryquery.md | 2 + docs/content/querying/timeseriesquery.md | 4 +- docs/content/querying/topnmetricspec.md | 4 +- docs/content/querying/topnquery.md | 4 +- docs/content/querying/virtual-columns.md | 2 +- docs/content/tutorials/cluster.md | 2 +- docs/content/tutorials/index.md | 6 +-- .../tutorials/tutorial-batch-hadoop.md | 2 +- docs/content/tutorials/tutorial-batch.md | 2 +- docs/content/tutorials/tutorial-compaction.md | 2 +- .../content/tutorials/tutorial-delete-data.md | 2 +- .../tutorials/tutorial-ingestion-spec.md | 2 +- docs/content/tutorials/tutorial-kafka.md | 2 +- docs/content/tutorials/tutorial-query.md | 2 +- docs/content/tutorials/tutorial-retention.md | 2 +- docs/content/tutorials/tutorial-rollup.md | 2 +- .../content/tutorials/tutorial-tranquility.md | 2 +- .../tutorials/tutorial-transform-spec.md | 2 +- .../content/tutorials/tutorial-update-data.md | 2 +- 166 files changed, 312 insertions(+), 271 deletions(-) diff --git a/docs/content/comparisons/druid-vs-elasticsearch.md b/docs/content/comparisons/druid-vs-elasticsearch.md index d0782bd10f5..015200eaded 100644 --- a/docs/content/comparisons/druid-vs-elasticsearch.md +++ b/docs/content/comparisons/druid-vs-elasticsearch.md @@ -19,10 +19,9 @@ --- layout: doc_page +title: "Druid vs Elasticsearch" --- - -Druid vs Elasticsearch -====================== +# Druid vs Elasticsearch We are not experts on search systems, if anything is incorrect about our portrayal, please let us know on the mailing list or via some other means. diff --git a/docs/content/comparisons/druid-vs-key-value.md b/docs/content/comparisons/druid-vs-key-value.md index 4e911014ff6..d8ccd504082 100644 --- a/docs/content/comparisons/druid-vs-key-value.md +++ b/docs/content/comparisons/druid-vs-key-value.md @@ -19,10 +19,9 @@ --- layout: doc_page +title: "Druid vs. Key/Value Stores (HBase/Cassandra/OpenTSDB)" --- - -Druid vs. Key/Value Stores (HBase/Cassandra/OpenTSDB) -==================================================== +# Druid vs. Key/Value Stores (HBase/Cassandra/OpenTSDB) Druid is highly optimized for scans and aggregations, it supports arbitrarily deep drill downs into data sets. This same functionality is supported in key/value stores in 2 ways: diff --git a/docs/content/comparisons/druid-vs-kudu.md b/docs/content/comparisons/druid-vs-kudu.md index 8d00ae51102..7f8fc7368d4 100644 --- a/docs/content/comparisons/druid-vs-kudu.md +++ b/docs/content/comparisons/druid-vs-kudu.md @@ -19,10 +19,9 @@ --- layout: doc_page +title: "Druid vs Kudu" --- - -Druid vs Kudu -============= +# Druid vs Kudu Kudu's storage format enables single row updates, whereas updates to existing Druid segments requires recreating the segment, so theoretically the process for updating old values should be higher latency in Druid. However, the requirements in Kudu for maintaining extra head space to store diff --git a/docs/content/comparisons/druid-vs-redshift.md b/docs/content/comparisons/druid-vs-redshift.md index 65951416aba..103ecc324cd 100644 --- a/docs/content/comparisons/druid-vs-redshift.md +++ b/docs/content/comparisons/druid-vs-redshift.md @@ -19,10 +19,9 @@ --- layout: doc_page +title: "Druid vs Redshift" --- -Druid vs Redshift -================= - +# Druid vs Redshift ### How does Druid compare to Redshift? diff --git a/docs/content/comparisons/druid-vs-spark.md b/docs/content/comparisons/druid-vs-spark.md index 9723beff092..07f16fa1728 100644 --- a/docs/content/comparisons/druid-vs-spark.md +++ b/docs/content/comparisons/druid-vs-spark.md @@ -19,10 +19,9 @@ --- layout: doc_page +title: "Druid vs Spark" --- - -Druid vs Spark -============== +# Druid vs Spark Druid and Spark are complementary solutions as Druid can be used to accelerate OLAP queries in Spark. diff --git a/docs/content/comparisons/druid-vs-sql-on-hadoop.md b/docs/content/comparisons/druid-vs-sql-on-hadoop.md index 3bf2c3f248a..f867c247b0a 100644 --- a/docs/content/comparisons/druid-vs-sql-on-hadoop.md +++ b/docs/content/comparisons/druid-vs-sql-on-hadoop.md @@ -19,17 +19,16 @@ --- layout: doc_page +title: "Druid vs SQL-on-Hadoop" --- +# Druid vs SQL-on-Hadoop (Impala/Drill/Spark SQL/Presto) -Druid vs SQL-on-Hadoop (Impala/Drill/Spark SQL/Presto) -=========================================================== - -SQL-on-Hadoop engines provide an -execution engine for various data formats and data stores, and +SQL-on-Hadoop engines provide an +execution engine for various data formats and data stores, and many can be made to push down computations down to Druid, while providing a SQL interface to Druid. -For a direct comparison between the technologies and when to only use one or the other, things basically comes down to your -product requirements and what the systems were designed to do. +For a direct comparison between the technologies and when to only use one or the other, things basically comes down to your +product requirements and what the systems were designed to do. Druid was designed to @@ -37,7 +36,7 @@ Druid was designed to 1. ingest data in real-time 1. handle slice-n-dice style ad-hoc queries -SQL-on-Hadoop engines generally sidestep Map/Reduce, instead querying data directly from HDFS or, in some cases, other storage systems. +SQL-on-Hadoop engines generally sidestep Map/Reduce, instead querying data directly from HDFS or, in some cases, other storage systems. Some of these engines (including Impala and Presto) can be colocated with HDFS data nodes and coordinate with them to achieve data locality for queries. What does this mean? We can talk about it in terms of three general areas @@ -47,37 +46,37 @@ What does this mean? We can talk about it in terms of three general areas ### Queries -Druid segments stores data in a custom column format. Segments are scanned directly as part of queries and each Druid server -calculates a set of results that are eventually merged at the Broker level. This means the data that is transferred between servers +Druid segments stores data in a custom column format. Segments are scanned directly as part of queries and each Druid server +calculates a set of results that are eventually merged at the Broker level. This means the data that is transferred between servers are queries and results, and all computation is done internally as part of the Druid servers. -Most SQL-on-Hadoop engines are responsible for query planning and execution for underlying storage layers and storage formats. -They are processes that stay on even if there is no query running (eliminating the JVM startup costs from Hadoop MapReduce). -Some (Impala/Presto) SQL-on-Hadoop engines have daemon processes that can be run where the data is stored, virtually eliminating network transfer costs. There is still -some latency overhead (e.g. serde time) associated with pulling data from the underlying storage layer into the computation layer. We are unaware of exactly +Most SQL-on-Hadoop engines are responsible for query planning and execution for underlying storage layers and storage formats. +They are processes that stay on even if there is no query running (eliminating the JVM startup costs from Hadoop MapReduce). +Some (Impala/Presto) SQL-on-Hadoop engines have daemon processes that can be run where the data is stored, virtually eliminating network transfer costs. There is still +some latency overhead (e.g. serde time) associated with pulling data from the underlying storage layer into the computation layer. We are unaware of exactly how much of a performance impact this makes. ### Data Ingestion -Druid is built to allow for real-time ingestion of data. You can ingest data and query it immediately upon ingestion, +Druid is built to allow for real-time ingestion of data. You can ingest data and query it immediately upon ingestion, the latency between how quickly the event is reflected in the data is dominated by how long it takes to deliver the event to Druid. -SQL-on-Hadoop, being based on data in HDFS or some other backing store, are limited in their data ingestion rates by the -rate at which that backing store can make data available. Generally, the backing store is the biggest bottleneck for +SQL-on-Hadoop, being based on data in HDFS or some other backing store, are limited in their data ingestion rates by the +rate at which that backing store can make data available. Generally, the backing store is the biggest bottleneck for how quickly data can become available. ### Query Flexibility -Druid's query language is fairly low level and maps to how Druid operates internally. Although Druid can be combined with a high level query -planner such as [Plywood](https://github.com/implydata/plywood) to support most SQL queries and analytic SQL queries (minus joins among large tables), +Druid's query language is fairly low level and maps to how Druid operates internally. Although Druid can be combined with a high level query +planner such as [Plywood](https://github.com/implydata/plywood) to support most SQL queries and analytic SQL queries (minus joins among large tables), base Druid is less flexible than SQL-on-Hadoop solutions for generic processing. SQL-on-Hadoop support SQL style queries with full joins. ## Druid vs Parquet -Parquet is a column storage format that is designed to work with SQL-on-Hadoop engines. Parquet doesn't have a query execution engine, and instead +Parquet is a column storage format that is designed to work with SQL-on-Hadoop engines. Parquet doesn't have a query execution engine, and instead relies on external sources to pull data out of it. -Druid's storage format is highly optimized for linear scans. Although Druid has support for nested data, Parquet's storage format is much +Druid's storage format is highly optimized for linear scans. Although Druid has support for nested data, Parquet's storage format is much more hierachical, and is more designed for binary chunking. In theory, this should lead to faster scans in Druid. diff --git a/docs/content/configuration/index.md b/docs/content/configuration/index.md index e2b9f7a7ad9..9219ac0db52 100644 --- a/docs/content/configuration/index.md +++ b/docs/content/configuration/index.md @@ -19,8 +19,8 @@ --- layout: doc_page +title: "Configuration Reference" --- - # Configuration Reference This page documents all of the configuration properties for each Druid service type. diff --git a/docs/content/configuration/logging.md b/docs/content/configuration/logging.md index 640eea4616e..10a6769cd10 100644 --- a/docs/content/configuration/logging.md +++ b/docs/content/configuration/logging.md @@ -19,9 +19,9 @@ --- layout: doc_page +title: "Logging" --- -Logging -========================== +# Logging Druid nodes will emit logs that are useful for debugging to the console. Druid nodes also emit periodic metrics about their state. For more about metrics, see [Configuration](../configuration/index.html#enabling-metrics). Metric logs are printed to the console by default, and can be disabled with `-Ddruid.emitter.logging.logLevel=debug`. diff --git a/docs/content/configuration/realtime.md b/docs/content/configuration/realtime.md index 42343fdf1ad..e2d06611bb4 100644 --- a/docs/content/configuration/realtime.md +++ b/docs/content/configuration/realtime.md @@ -19,10 +19,10 @@ --- layout: doc_page +title: "Realtime Node Configuration" --- +# Realtime Node Configuration -Realtime Node Configuration -============================== For general Realtime Node information, see [here](../design/realtime.html). Runtime Configuration diff --git a/docs/content/dependencies/cassandra-deep-storage.md b/docs/content/dependencies/cassandra-deep-storage.md index 3b0e3f1950f..3ba1791bdf1 100644 --- a/docs/content/dependencies/cassandra-deep-storage.md +++ b/docs/content/dependencies/cassandra-deep-storage.md @@ -19,15 +19,18 @@ --- layout: doc_page +title: "Cassandra Deep Storage" --- +# Cassandra Deep Storage ## Introduction + Druid can use Cassandra as a deep storage mechanism. Segments and their metadata are stored in Cassandra in two tables: -`index_storage` and `descriptor_storage`. Underneath the hood, the Cassandra integration leverages Astyanax. The +`index_storage` and `descriptor_storage`. Underneath the hood, the Cassandra integration leverages Astyanax. The index storage table is a [Chunked Object](https://github.com/Netflix/astyanax/wiki/Chunked-Object-Store) repository. It contains compressed segments for distribution to historical nodes. Since segments can be large, the Chunked Object storage allows the integration to multi-thread -the write to Cassandra, and spreads the data across all the nodes in a cluster. The descriptor storage table is a normal C* table that -stores the segment metadatak. +the write to Cassandra, and spreads the data across all the nodes in a cluster. The descriptor storage table is a normal C* table that +stores the segment metadatak. ## Schema Below are the create statements for each: diff --git a/docs/content/dependencies/deep-storage.md b/docs/content/dependencies/deep-storage.md index 02a97d30e26..b75d1be90dd 100644 --- a/docs/content/dependencies/deep-storage.md +++ b/docs/content/dependencies/deep-storage.md @@ -19,8 +19,8 @@ --- layout: doc_page +title: "Deep Storage" --- - # Deep Storage Deep storage is where segments are stored. It is a storage mechanism that Druid does not provide. This deep storage infrastructure defines the level of durability of your data, as long as Druid nodes can see this storage infrastructure and get at the segments stored on it, you will not lose data no matter how many Druid nodes you lose. If segments disappear from this storage layer, then you will lose whatever data those segments represented. diff --git a/docs/content/dependencies/metadata-storage.md b/docs/content/dependencies/metadata-storage.md index 7f45f02085f..11fad816688 100644 --- a/docs/content/dependencies/metadata-storage.md +++ b/docs/content/dependencies/metadata-storage.md @@ -19,8 +19,8 @@ --- layout: doc_page +title: "Metadata Storage" --- - # Metadata Storage The Metadata Storage is an external dependency of Druid. Druid uses it to store diff --git a/docs/content/dependencies/zookeeper.md b/docs/content/dependencies/zookeeper.md index e9d60941881..c1b6bb1e6a2 100644 --- a/docs/content/dependencies/zookeeper.md +++ b/docs/content/dependencies/zookeeper.md @@ -19,8 +19,10 @@ --- layout: doc_page +title: "ZooKeeper" --- # ZooKeeper + Druid uses [ZooKeeper](http://zookeeper.apache.org/) (ZK) for management of current cluster state. The operations that happen over ZK are 1. [Coordinator](../design/coordinator.html) leader election diff --git a/docs/content/design/auth.md b/docs/content/design/auth.md index 92f0c628533..62406b8f760 100644 --- a/docs/content/design/auth.md +++ b/docs/content/design/auth.md @@ -19,8 +19,8 @@ --- layout: doc_page +title: "Authentication and Authorization" --- - # Authentication and Authorization |Property|Type|Description|Default|Required| diff --git a/docs/content/design/broker.md b/docs/content/design/broker.md index 529da809b65..7a35cd4dce4 100644 --- a/docs/content/design/broker.md +++ b/docs/content/design/broker.md @@ -19,9 +19,9 @@ --- layout: doc_page +title: "Broker" --- -Broker -====== +# Broker ### Configuration diff --git a/docs/content/design/coordinator.md b/docs/content/design/coordinator.md index 4a16e9391e7..e75935764eb 100644 --- a/docs/content/design/coordinator.md +++ b/docs/content/design/coordinator.md @@ -19,9 +19,9 @@ --- layout: doc_page +title: "Coordinator Node" --- -Coordinator Node -================ +# Coordinator Node ### Configuration diff --git a/docs/content/design/historical.md b/docs/content/design/historical.md index 9398e522f59..06f8f20291f 100644 --- a/docs/content/design/historical.md +++ b/docs/content/design/historical.md @@ -19,9 +19,9 @@ --- layout: doc_page +title: "Historical Node" --- -Historical Node -=============== +# Historical Node ### Configuration diff --git a/docs/content/design/index.md b/docs/content/design/index.md index 7cb96b83c1a..4bcb3bb5998 100644 --- a/docs/content/design/index.md +++ b/docs/content/design/index.md @@ -19,6 +19,7 @@ --- layout: doc_page +title: "Design" --- # What is Druid? @@ -159,7 +160,7 @@ queries: - Bitmap compression for bitmap indexes - Type-aware compression for all columns -Periodically, segments are committed and published. At this point, they are written to [deep storage](#deep-storage), +Periodically, segments are committed and published. At this point, they are written to [deep storage](#deep-storage), become immutable, and move from MiddleManagers to the Historical processes (see [Architecture](#architecture) above for details). An entry about the segment is also written to the [metadata store](#metadata-storage). This entry is a self-describing bit of metadata about the segment, including things like the schema of the segment, its size, and its diff --git a/docs/content/design/indexing-service.md b/docs/content/design/indexing-service.md index 4a8edef294e..b23f9c63482 100644 --- a/docs/content/design/indexing-service.md +++ b/docs/content/design/indexing-service.md @@ -19,9 +19,9 @@ --- layout: doc_page +title: "Indexing Service" --- -Indexing Service -================ +# Indexing Service The indexing service is a highly-available, distributed service that runs indexing related tasks. diff --git a/docs/content/design/middlemanager.md b/docs/content/design/middlemanager.md index 9779cdb336a..b3e5d842d04 100644 --- a/docs/content/design/middlemanager.md +++ b/docs/content/design/middlemanager.md @@ -19,10 +19,9 @@ --- layout: doc_page +title: "MiddleManager Node" --- - -Middle Manager Node ------------------- +# MiddleManager Node ### Configuration diff --git a/docs/content/design/overlord.md b/docs/content/design/overlord.md index eaad38e6947..92c394b4331 100644 --- a/docs/content/design/overlord.md +++ b/docs/content/design/overlord.md @@ -19,10 +19,9 @@ --- layout: doc_page +title: "Overlord Node" --- - -Overlord Node -------------- +# Overlord Node ### Configuration diff --git a/docs/content/design/peons.md b/docs/content/design/peons.md index c33a3d4b237..16fdff12712 100644 --- a/docs/content/design/peons.md +++ b/docs/content/design/peons.md @@ -19,10 +19,9 @@ --- layout: doc_page +title: "Peons" --- - -Peons ------ +# Peons ### Configuration diff --git a/docs/content/design/plumber.md b/docs/content/design/plumber.md index ffbdabae122..e3c1cfa7dec 100644 --- a/docs/content/design/plumber.md +++ b/docs/content/design/plumber.md @@ -19,8 +19,8 @@ --- layout: doc_page +title: "Druid Plumbers" --- - # Druid Plumbers The plumber handles generated segments both while they are being generated and when they are "done". This is also technically a pluggable interface and there are multiple implementations. However, plumbers handle numerous complex details, and therefore an advanced understanding of Druid is recommended before implementing your own. diff --git a/docs/content/design/realtime.md b/docs/content/design/realtime.md index 815aa68571e..02e4a69d944 100644 --- a/docs/content/design/realtime.md +++ b/docs/content/design/realtime.md @@ -19,10 +19,9 @@ --- layout: doc_page +title: "Real-time Node" --- - -Real-time Node -============== +# Real-time Node
NOTE: Realtime nodes are deprecated. Please use the Kafka Indexing Service for stream pull use cases instead. diff --git a/docs/content/design/segments.md b/docs/content/design/segments.md index 50f45bd8bb2..d1b8e65f342 100644 --- a/docs/content/design/segments.md +++ b/docs/content/design/segments.md @@ -19,9 +19,9 @@ --- layout: doc_page +title: "Segments" --- -Segments -======== +# Segments Druid stores its index in *segment files*, which are partitioned by time. In a basic setup, one segment file is created for each time diff --git a/docs/content/development/build.md b/docs/content/development/build.md index 9b9f6bea5ee..f0bd6e8d735 100644 --- a/docs/content/development/build.md +++ b/docs/content/development/build.md @@ -19,9 +19,9 @@ --- layout: doc_page +title: "Build from Source" --- - -### Build from Source +# Build from Source You can build Druid directly from source. Please note that these instructions are for building the latest stable version of Druid. For building the latest code in master, follow the instructions [here](https://github.com/apache/incubator-druid/blob/master/docs/content/development/build.md). diff --git a/docs/content/development/experimental.md b/docs/content/development/experimental.md index fecaff1071c..760ea0d64b4 100644 --- a/docs/content/development/experimental.md +++ b/docs/content/development/experimental.md @@ -19,9 +19,9 @@ --- layout: doc_page +title: "Experimental Features" --- - -# About Experimental Features +# Experimental Features Experimental features are features we have developed but have not fully tested in a production environment. If you choose to try them out, there will likely be edge cases that we have not covered. We would love feedback on any of these features, whether they are bug reports, suggestions for improvement, or letting us know they work as intended. diff --git a/docs/content/development/extensions-contrib/ambari-metrics-emitter.md b/docs/content/development/extensions-contrib/ambari-metrics-emitter.md index fde22799a8e..6357ca3618b 100644 --- a/docs/content/development/extensions-contrib/ambari-metrics-emitter.md +++ b/docs/content/development/extensions-contrib/ambari-metrics-emitter.md @@ -19,8 +19,8 @@ --- layout: doc_page +title: "Ambari Metrics Emitter" --- - # Ambari Metrics Emitter To use this extension, make sure to [include](../../operations/including-extensions.html) `ambari-metrics-emitter` extension. diff --git a/docs/content/development/extensions-contrib/azure.md b/docs/content/development/extensions-contrib/azure.md index 035fe27c65d..bea6b71df1d 100644 --- a/docs/content/development/extensions-contrib/azure.md +++ b/docs/content/development/extensions-contrib/azure.md @@ -19,8 +19,8 @@ --- layout: doc_page +title: "Microsoft Azure" --- - # Microsoft Azure To use this extension, make sure to [include](../../operations/including-extensions.html) `druid-azure-extensions` extension. diff --git a/docs/content/development/extensions-contrib/cassandra.md b/docs/content/development/extensions-contrib/cassandra.md index 7a70f69d4a6..b1e3c9ed3bd 100644 --- a/docs/content/development/extensions-contrib/cassandra.md +++ b/docs/content/development/extensions-contrib/cassandra.md @@ -19,8 +19,8 @@ --- layout: doc_page +title: "Apache Cassandra" --- - # Apache Cassandra To use this extension, make sure to [include](../../operations/including-extensions.html) `druid-cassandra-storage` extension. diff --git a/docs/content/development/extensions-contrib/cloudfiles.md b/docs/content/development/extensions-contrib/cloudfiles.md index ad7acee4c57..363507d4ea0 100644 --- a/docs/content/development/extensions-contrib/cloudfiles.md +++ b/docs/content/development/extensions-contrib/cloudfiles.md @@ -19,8 +19,8 @@ --- layout: doc_page +title: "Rackspace Cloud Files" --- - # Rackspace Cloud Files ## Deep Storage diff --git a/docs/content/development/extensions-contrib/distinctcount.md b/docs/content/development/extensions-contrib/distinctcount.md index 0bc4d3f9b84..77e6c39483c 100644 --- a/docs/content/development/extensions-contrib/distinctcount.md +++ b/docs/content/development/extensions-contrib/distinctcount.md @@ -19,9 +19,9 @@ --- layout: doc_page +title: "DistinctCount Aggregator" --- - -# DistinctCount aggregator +# DistinctCount Aggregator To use this extension, make sure to [include](../../operations/including-extensions.html) the `druid-distinctcount` extension. diff --git a/docs/content/development/extensions-contrib/google.md b/docs/content/development/extensions-contrib/google.md index 4a1c26c6f13..4d587eccf52 100644 --- a/docs/content/development/extensions-contrib/google.md +++ b/docs/content/development/extensions-contrib/google.md @@ -19,8 +19,8 @@ --- layout: doc_page +title: "Google Cloud Storage" --- - # Google Cloud Storage To use this extension, make sure to [include](../../operations/including-extensions.html) `druid-google-extensions` extension. diff --git a/docs/content/development/extensions-contrib/graphite.md b/docs/content/development/extensions-contrib/graphite.md index a70910d0d67..a50706a8fa1 100644 --- a/docs/content/development/extensions-contrib/graphite.md +++ b/docs/content/development/extensions-contrib/graphite.md @@ -19,8 +19,8 @@ --- layout: doc_page +title: "Graphite Emitter" --- - # Graphite Emitter To use this extension, make sure to [include](../../operations/including-extensions.html) `graphite-emitter` extension. diff --git a/docs/content/development/extensions-contrib/influx.md b/docs/content/development/extensions-contrib/influx.md index b2f61f4ad73..3446b48f972 100644 --- a/docs/content/development/extensions-contrib/influx.md +++ b/docs/content/development/extensions-contrib/influx.md @@ -19,8 +19,8 @@ --- layout: doc_page +title: "InfluxDB Line Protocol Parser" --- - # InfluxDB Line Protocol Parser To use this extension, make sure to [include](../../operations/including-extensions.html) `druid-influx-extensions`. diff --git a/docs/content/development/extensions-contrib/kafka-emitter.md b/docs/content/development/extensions-contrib/kafka-emitter.md index a2df861bab1..2ad94295fc3 100644 --- a/docs/content/development/extensions-contrib/kafka-emitter.md +++ b/docs/content/development/extensions-contrib/kafka-emitter.md @@ -19,8 +19,8 @@ --- layout: doc_page +title: "Kafka Emitter" --- - # Kafka Emitter To use this extension, make sure to [include](../../operations/including-extensions.html) `kafka-emitter` extension. diff --git a/docs/content/development/extensions-contrib/kafka-simple.md b/docs/content/development/extensions-contrib/kafka-simple.md index bb811eee283..1aeeea8068d 100644 --- a/docs/content/development/extensions-contrib/kafka-simple.md +++ b/docs/content/development/extensions-contrib/kafka-simple.md @@ -19,8 +19,8 @@ --- layout: doc_page +title: "Kafka Simple Consumer" --- - # Kafka Simple Consumer To use this extension, make sure to [include](../../operations/including-extensions.html) `druid-kafka-eight-simpleConsumer` extension. diff --git a/docs/content/development/extensions-contrib/materialized-view.md b/docs/content/development/extensions-contrib/materialized-view.md index 8c92480e408..67fa1540dc9 100644 --- a/docs/content/development/extensions-contrib/materialized-view.md +++ b/docs/content/development/extensions-contrib/materialized-view.md @@ -19,8 +19,8 @@ --- layout: doc_page +title: "Materialized View" --- - # Materialized View To use this feature, make sure to only load materialized-view-selection on broker and load materialized-view-maintenance on overlord. In addtion, this feature currently requires a hadoop cluster. diff --git a/docs/content/development/extensions-contrib/opentsdb-emitter.md b/docs/content/development/extensions-contrib/opentsdb-emitter.md index 27e9069b08e..17a3f637536 100644 --- a/docs/content/development/extensions-contrib/opentsdb-emitter.md +++ b/docs/content/development/extensions-contrib/opentsdb-emitter.md @@ -19,9 +19,9 @@ --- layout: doc_page +title: "OpenTSDB Emitter" --- - -# Opentsdb Emitter +# OpenTSDB Emitter To use this extension, make sure to [include](../../operations/including-extensions.html) `opentsdb-emitter` extension. @@ -57,5 +57,5 @@ e.g. "type" ] ``` - + For most use-cases, the default configuration is sufficient. diff --git a/docs/content/development/extensions-contrib/orc.md b/docs/content/development/extensions-contrib/orc.md index 8d65d0bf8d3..3674ec7dbb4 100644 --- a/docs/content/development/extensions-contrib/orc.md +++ b/docs/content/development/extensions-contrib/orc.md @@ -19,15 +19,15 @@ --- layout: doc_page +title: "ORC" --- - -# Orc +# ORC To use this extension, make sure to [include](../../operations/including-extensions.html) `druid-orc-extensions`. -This extension enables Druid to ingest and understand the Apache Orc data format offline. +This extension enables Druid to ingest and understand the Apache ORC data format offline. -## Orc Hadoop Parser +## ORC Hadoop Parser This is for batch ingestion using the HadoopDruidIndexer. The inputFormat of inputSpec in ioConfig must be set to `"org.apache.hadoop.hive.ql.io.orc.OrcNewInputFormat"`. @@ -35,7 +35,7 @@ This is for batch ingestion using the HadoopDruidIndexer. The inputFormat of inp |----------|-------------|----------------------------------------------------------------------------------------|---------| |type | String | This should say `orc` | yes| |parseSpec | JSON Object | Specifies the timestamp and dimensions of the data. Any parse spec that extends ParseSpec is possible but only their TimestampSpec and DimensionsSpec are used. | yes| -|typeString| String | String representation of Orc struct type info. If not specified, auto constructed from parseSpec but all metric columns are dropped | no| +|typeString| String | String representation of ORC struct type info. If not specified, auto constructed from parseSpec but all metric columns are dropped | no| |mapFieldNameFormat| String | String format for resolving the flatten map fields. Default is `_`. | no | For example of `typeString`, string column col1 and array of string column col2 is represented by `"struct>"`. diff --git a/docs/content/development/extensions-contrib/rabbitmq.md b/docs/content/development/extensions-contrib/rabbitmq.md index 055496362d8..a1bce5011e2 100644 --- a/docs/content/development/extensions-contrib/rabbitmq.md +++ b/docs/content/development/extensions-contrib/rabbitmq.md @@ -19,8 +19,8 @@ --- layout: doc_page +title: "RabbitMQ" --- - # RabbitMQ To use this extension, make sure to [include](../../operations/including-extensions.html) `druid-rabbitmq` extension. diff --git a/docs/content/development/extensions-contrib/redis-cache.md b/docs/content/development/extensions-contrib/redis-cache.md index c3c6c6cbb04..a446b4fe8f1 100644 --- a/docs/content/development/extensions-contrib/redis-cache.md +++ b/docs/content/development/extensions-contrib/redis-cache.md @@ -19,10 +19,9 @@ --- layout: doc_page +title: "Druid Redis Cache" --- - -Druid Redis Cache --------------------- +# Druid Redis Cache A cache implementation for Druid based on [Redis](https://github.com/antirez/redis). diff --git a/docs/content/development/extensions-contrib/rocketmq.md b/docs/content/development/extensions-contrib/rocketmq.md index 3ec025b444f..c9c2e00e1a7 100644 --- a/docs/content/development/extensions-contrib/rocketmq.md +++ b/docs/content/development/extensions-contrib/rocketmq.md @@ -19,8 +19,8 @@ --- layout: doc_page +title: "RocketMQ" --- - # RocketMQ To use this extension, make sure to [include](../../operations/including-extensions.html) `druid-rocketmq` extension. diff --git a/docs/content/development/extensions-contrib/sqlserver.md b/docs/content/development/extensions-contrib/sqlserver.md index 78873d0782e..99a9fac67fd 100644 --- a/docs/content/development/extensions-contrib/sqlserver.md +++ b/docs/content/development/extensions-contrib/sqlserver.md @@ -19,8 +19,8 @@ --- layout: doc_page +title: "Microsoft SQLServer" --- - # Microsoft SQLServer Make sure to [include](../../operations/including-extensions.html) `sqlserver-metadata-storage` as an extension. diff --git a/docs/content/development/extensions-contrib/statsd.md b/docs/content/development/extensions-contrib/statsd.md index 5a150bf9f15..2f371096b75 100644 --- a/docs/content/development/extensions-contrib/statsd.md +++ b/docs/content/development/extensions-contrib/statsd.md @@ -19,8 +19,8 @@ --- layout: doc_page +title: "StatsD Emitter" --- - # StatsD Emitter To use this extension, make sure to [include](../../operations/including-extensions.html) `statsd-emitter` extension. diff --git a/docs/content/development/extensions-contrib/thrift.md b/docs/content/development/extensions-contrib/thrift.md index 3a1d197346d..284879b0093 100644 --- a/docs/content/development/extensions-contrib/thrift.md +++ b/docs/content/development/extensions-contrib/thrift.md @@ -19,8 +19,8 @@ --- layout: doc_page +title: "Thrift" --- - # Thrift To use this extension, make sure to [include](../../operations/including-extensions.html) `druid-thrift-extensions`. diff --git a/docs/content/development/extensions-contrib/time-min-max.md b/docs/content/development/extensions-contrib/time-min-max.md index b4eff51491c..6782042b004 100644 --- a/docs/content/development/extensions-contrib/time-min-max.md +++ b/docs/content/development/extensions-contrib/time-min-max.md @@ -19,8 +19,8 @@ --- layout: doc_page +title: "Timestamp Min/Max aggregators" --- - # Timestamp Min/Max aggregators To use this extension, make sure to [include](../../operations/including-extensions.html) `druid-time-min-max`. diff --git a/docs/content/development/extensions-core/approximate-histograms.md b/docs/content/development/extensions-core/approximate-histograms.md index 895ca198a0f..ae96d15e792 100644 --- a/docs/content/development/extensions-core/approximate-histograms.md +++ b/docs/content/development/extensions-core/approximate-histograms.md @@ -19,8 +19,8 @@ --- layout: doc_page +title: "Approximate Histogram aggregator" --- - # Approximate Histogram aggregator Make sure to [include](../../operations/including-extensions.html) `druid-histogram` as an extension. diff --git a/docs/content/development/extensions-core/avro.md b/docs/content/development/extensions-core/avro.md index 9e50cbe6ebc..c8ba667bcb9 100644 --- a/docs/content/development/extensions-core/avro.md +++ b/docs/content/development/extensions-core/avro.md @@ -19,8 +19,8 @@ --- layout: doc_page +title: "Avro" --- - # Avro This extension enables Druid to ingest and understand the Apache Avro data format. Make sure to [include](../../operations/including-extensions.html) `druid-avro-extensions` as an extension. diff --git a/docs/content/development/extensions-core/bloom-filter.md b/docs/content/development/extensions-core/bloom-filter.md index 3dd2cca20fb..5cd895628e1 100644 --- a/docs/content/development/extensions-core/bloom-filter.md +++ b/docs/content/development/extensions-core/bloom-filter.md @@ -19,25 +19,26 @@ --- layout: doc_page +title: "Bloom Filter" --- - -# Druid Bloom Filter +# Bloom Filter Make sure to [include](../../operations/including-extensions.html) `druid-bloom-filter` as an extension. -BloomFilter is a probabilistic data structure for set membership check. -Following are some characterstics of BloomFilter +BloomFilter is a probabilistic data structure for set membership check. +Following are some characterstics of BloomFilter - BloomFilters are highly space efficient when compared to using a HashSet. - Because of the probabilistic nature of bloom filter false positive (element not present in bloom filter but test() says true) are possible -- false negatives are not possible (if element is present then test() will never say false). -- The false positive probability is configurable (default: 5%) depending on which storage requirement may increase or decrease. +- false negatives are not possible (if element is present then test() will never say false). +- The false positive probability is configurable (default: 5%) depending on which storage requirement may increase or decrease. - Lower the false positive probability greater is the space requirement. - Bloom filters are sensitive to number of elements that will be inserted in the bloom filter. - During the creation of bloom filter expected number of entries must be specified.If the number of insertions exceed the specified initial number of entries then false positive probability will increase accordingly. Internally, this implementation of bloom filter uses Murmur3 fast non-cryptographic hash algorithm. -### Json Representation of Bloom Filter +### JSON Representation of Bloom Filter + ```json { "type" : "bloom", @@ -60,7 +61,7 @@ Internally, this implementation of bloom filter uses Murmur3 fast non-cryptograp - 1 byte for the number of hash functions. - 1 big endian int(That is how OutputStream works) for the number of longs in the bitset - big endian longs in the BloomKFilter bitset - + Note: `org.apache.hive.common.util.BloomKFilter` provides a serialize method which can be used to serialize bloom filters to outputStream. ### SQL Queries diff --git a/docs/content/development/extensions-core/datasketches-extension.md b/docs/content/development/extensions-core/datasketches-extension.md index 781cf1a58c2..aec599c27c9 100644 --- a/docs/content/development/extensions-core/datasketches-extension.md +++ b/docs/content/development/extensions-core/datasketches-extension.md @@ -19,9 +19,9 @@ --- layout: doc_page +title: "DataSketches extension" --- - -## DataSketches extension +# DataSketches extension Druid aggregators based on [datasketches](http://datasketches.github.io/) library. Sketches are data structures implementing approximate streaming mergeable algorithms. Sketches can be ingested from the outside of Druid or built from raw data at ingestion time. Sketches can be stored in Druid segments as additive metrics. diff --git a/docs/content/development/extensions-core/datasketches-hll.md b/docs/content/development/extensions-core/datasketches-hll.md index 0af0a3187a5..783af1f57ab 100644 --- a/docs/content/development/extensions-core/datasketches-hll.md +++ b/docs/content/development/extensions-core/datasketches-hll.md @@ -19,9 +19,9 @@ --- layout: doc_page +title: "DataSketches HLL Sketch module" --- - -## DataSketches HLL Sketch module +# DataSketches HLL Sketch module This module provides Druid aggregators for distinct counting based on HLL sketch from [datasketches](http://datasketches.github.io/) library. At ingestion time, this aggregator creates the HLL sketch objects to be stored in Druid segments. At query time, sketches are read and merged together. In the end, by default, you receive the estimate of the number of distinct values presented to the sketch. Also, you can use post aggregator to produce a union of sketch columns in the same row. You can use the HLL sketch aggregator on columns of any identifiers. It will return estimated cardinality of the column. diff --git a/docs/content/development/extensions-core/datasketches-quantiles.md b/docs/content/development/extensions-core/datasketches-quantiles.md index 83bd9278a3c..4b5fe83d32f 100644 --- a/docs/content/development/extensions-core/datasketches-quantiles.md +++ b/docs/content/development/extensions-core/datasketches-quantiles.md @@ -19,9 +19,9 @@ --- layout: doc_page +title: "DataSketches Quantiles Sketch module" --- - -## DataSketches Quantiles Sketch module +# DataSketches Quantiles Sketch module This module provides Druid aggregators based on numeric quantiles DoublesSketch from [datasketches](http://datasketches.github.io/) library. Quantiles sketch is a mergeable streaming algorithm to estimate the distribution of values, and approximately answer queries about the rank of a value, probability mass function of the distribution (PMF) or histogram, cummulative distribution function (CDF), and quantiles (median, min, max, 95th percentile and such). See [Quantiles Sketch Overview](https://datasketches.github.io/docs/Quantiles/QuantilesOverview.html). diff --git a/docs/content/development/extensions-core/datasketches-theta.md b/docs/content/development/extensions-core/datasketches-theta.md index 46893dcd30d..8eca141d2dc 100644 --- a/docs/content/development/extensions-core/datasketches-theta.md +++ b/docs/content/development/extensions-core/datasketches-theta.md @@ -19,9 +19,9 @@ --- layout: doc_page +title: "DataSketches Theta Sketch module" --- - -## DataSketches Theta Sketch module +# DataSketches Theta Sketch module This module provides Druid aggregators based on Theta sketch from [datasketches](http://datasketches.github.io/) library. Note that sketch algorithms are approximate; see details in the "Accuracy" section of the datasketches doc. At ingestion time, this aggregator creates the Theta sketch objects which get stored in Druid segments. Logically speaking, a Theta sketch object can be thought of as a Set data structure. At query time, sketches are read and aggregated (set unioned) together. In the end, by default, you receive the estimate of the number of unique entries in the sketch object. Also, you can use post aggregators to do union, intersection or difference on sketch columns in the same row. diff --git a/docs/content/development/extensions-core/datasketches-tuple.md b/docs/content/development/extensions-core/datasketches-tuple.md index f92567e81c1..4cfa5a9c4df 100644 --- a/docs/content/development/extensions-core/datasketches-tuple.md +++ b/docs/content/development/extensions-core/datasketches-tuple.md @@ -19,9 +19,9 @@ --- layout: doc_page +title: "DataSketches Tuple Sketch module" --- - -## DataSketches Tuple Sketch module +# DataSketches Tuple Sketch module This module provides Druid aggregators based on Tuple sketch from [datasketches](http://datasketches.github.io/) library. ArrayOfDoublesSketch sketches extend the functionality of the count-distinct Theta sketches by adding arrays of double values associated with unique keys. diff --git a/docs/content/development/extensions-core/druid-basic-security.md b/docs/content/development/extensions-core/druid-basic-security.md index 59d74c13b89..6b80862ee4f 100644 --- a/docs/content/development/extensions-core/druid-basic-security.md +++ b/docs/content/development/extensions-core/druid-basic-security.md @@ -19,8 +19,8 @@ --- layout: doc_page +title: "Basic Security" --- - # Druid Basic Security This extension adds: @@ -58,7 +58,7 @@ druid.auth.authenticator.MyBasicAuthenticator.initialInternalClientPassword=pass druid.auth.authenticator.MyBasicAuthenticator.authorizerName=MyBasicAuthorizer ``` -To use the Basic authenticator, add an authenticator with type `basic` to the authenticatorChain. +To use the Basic authenticator, add an authenticator with type `basic` to the authenticatorChain. Configuration of the named authenticator is assigned through properties with the form: @@ -208,14 +208,14 @@ Set the permissions of {roleName}. This replaces the previous set of permissions Content: List of JSON Resource-Action objects, e.g.: ``` [ -{ +{ "resource": { "name": "wiki.*", "type": "DATASOURCE" }, "action": "READ" }, -{ +{ "resource": { "name": "wikiticker", "type": "DATASOURCE" @@ -225,7 +225,7 @@ Content: List of JSON Resource-Action objects, e.g.: ] ``` -The "name" field for resources in the permission definitions are regexes used to match resource names during authorization checks. +The "name" field for resources in the permission definitions are regexes used to match resource names during authorization checks. Please see [Defining permissions](#defining-permissions) for more details. @@ -238,7 +238,7 @@ Return the current load status of the local caches of the authorization database ### Authenticator If `druid.auth.authenticator..initialAdminPassword` is set, a default admin user named "admin" will be created, with the specified initial password. If this configuration is omitted, the "admin" user will not be created. -If `druid.auth.authenticator..initialInternalClientPassword` is set, a default internal system user named "druid_system" will be created, with the specified initial password. If this configuration is omitted, the "druid_system" user will not be created. +If `druid.auth.authenticator..initialInternalClientPassword` is set, a default internal system user named "druid_system" will be created, with the specified initial password. If this configuration is omitted, the "druid_system" user will not be created. ### Authorizer diff --git a/docs/content/development/extensions-core/druid-kerberos.md b/docs/content/development/extensions-core/druid-kerberos.md index 71ea60ae7b5..c74ab06dd86 100644 --- a/docs/content/development/extensions-core/druid-kerberos.md +++ b/docs/content/development/extensions-core/druid-kerberos.md @@ -19,12 +19,12 @@ --- layout: doc_page +title: "Kerberos" --- - -# Druid-Kerberos +# Kerberos Druid Extension to enable Authentication for Druid Nodes using Kerberos. -This extension adds an Authenticator which is used to protect HTTP Endpoints using the simple and protected GSSAPI negotiation mechanism [SPNEGO](https://en.wikipedia.org/wiki/SPNEGO). +This extension adds an Authenticator which is used to protect HTTP Endpoints using the simple and protected GSSAPI negotiation mechanism [SPNEGO](https://en.wikipedia.org/wiki/SPNEGO). Make sure to [include](../../operations/including-extensions.html) `druid-kerberos` as an extension. @@ -57,23 +57,23 @@ The configuration examples in the rest of this document will use "kerberos" as t |`druid.auth.authenticator.kerberos.cookieSignatureSecret`|`secretString`| Secret used to sign authentication cookies. It is advisable to explicitly set it, if you have multiple druid ndoes running on same machine with different ports as the Cookie Specification does not guarantee isolation by port.||No| |`druid.auth.authenticator.kerberos.authorizerName`|Depends on available authorizers|Authorizer that requests should be directed to|Empty|Yes| -As a note, it is required that the SPNego principal in use by the druid nodes must start with HTTP (This specified by [RFC-4559](https://tools.ietf.org/html/rfc4559)) and must be of the form "HTTP/_HOST@REALM". +As a note, it is required that the SPNego principal in use by the druid nodes must start with HTTP (This specified by [RFC-4559](https://tools.ietf.org/html/rfc4559)) and must be of the form "HTTP/_HOST@REALM". The special string _HOST will be replaced automatically with the value of config `druid.host` ### Auth to Local Syntax `druid.auth.authenticator.kerberos.authToLocal` allows you to set a general rules for mapping principal names to local user names. The syntax for mapping rules is `RULE:\[n:string](regexp)s/pattern/replacement/g`. The integer n indicates how many components the target principal should have. If this matches, then a string will be formed from string, substituting the realm of the principal for $0 and the n‘th component of the principal for $n. e.g. if the principal was druid/admin then `\[2:$2$1suffix]` would result in the string `admindruidsuffix`. If this string matches regexp, then the s//\[g] substitution command will be run over the string. The optional g will cause the substitution to be global over the string, instead of replacing only the first match in the string. -If required, multiple rules can be be joined by newline character and specified as a String. +If required, multiple rules can be be joined by newline character and specified as a String. ### Increasing HTTP Header size for large SPNEGO negotiate header In Active Directory environment, SPNEGO token in the Authorization header includes PAC (Privilege Access Certificate) information, which includes all security groups for the user. In some cases when the user belongs to many security groups the header to grow beyond what druid can handle by default. In such cases, max request header size that druid can handle can be increased by setting `druid.server.http.maxRequestHeaderSize` (default 8Kb) and `druid.router.http.maxRequestBufferSize` (default 8Kb). -## Configuring Kerberos Escalated Client +## Configuring Kerberos Escalated Client -Druid internal nodes communicate with each other using an escalated http Client. A Kerberos enabled escalated HTTP Client can be configured by following properties - +Druid internal nodes communicate with each other using an escalated http Client. A Kerberos enabled escalated HTTP Client can be configured by following properties - |Property|Example Values|Description|Default|required| @@ -83,15 +83,15 @@ Druid internal nodes communicate with each other using an escalated http Client. |`druid.escalator.internalClientKeytab`|`/etc/security/keytabs/druid.keytab`|Path to keytab file used for internal node communication|n/a|Yes| |`druid.escalator.authorizerName`|`MyBasicAuthorizer`|Authorizer that requests should be directed to.|n/a|Yes| -## Accessing Druid HTTP end points when kerberos security is enabled -1. To access druid HTTP endpoints via curl user will need to first login using `kinit` command as follows - +## Accessing Druid HTTP end points when kerberos security is enabled +1. To access druid HTTP endpoints via curl user will need to first login using `kinit` command as follows - ``` kinit -k -t user@REALM.COM ``` 2. Once the login is successful verify that login is successful using `klist` command -3. Now you can access druid HTTP endpoints using curl command as follows - +3. Now you can access druid HTTP endpoints using curl command as follows - ``` curl --negotiate -u:anyUser -b ~/cookies.txt -c ~/cookies.txt -X POST -H'Content-Type: application/json' @@ -105,13 +105,13 @@ Druid internal nodes communicate with each other using an escalated http Client. Note: Above command will authenticate the user first time using SPNego negotiate mechanism and store the authentication cookie in file. For subsequent requests the cookie will be used for authentication. ## Accessing coordinator or overlord console from web browser -To access Coordinator/Overlord console from browser you will need to configure your browser for SPNego authentication as follows - +To access Coordinator/Overlord console from browser you will need to configure your browser for SPNego authentication as follows - 1. Safari - No configurations required. -2. Firefox - Open firefox and follow these steps - +2. Firefox - Open firefox and follow these steps - 1. Go to `about:config` and search for `network.negotiate-auth.trusted-uris`. 2. Double-click and add the following values: `"http://druid-coordinator-hostname:ui-port"` and `"http://druid-overlord-hostname:port"` -3. Google Chrome - From the command line run following commands - +3. Google Chrome - From the command line run following commands - 1. `google-chrome --auth-server-whitelist="druid-coordinator-hostname" --auth-negotiate-delegate-whitelist="druid-coordinator-hostname"` 2. `google-chrome --auth-server-whitelist="druid-overlord-hostname" --auth-negotiate-delegate-whitelist="druid-overlord-hostname"` 4. Internet Explorer - @@ -119,4 +119,4 @@ To access Coordinator/Overlord console from browser you will need to configure y 2. Allow negotiation for the UI website. ## Sending Queries programmatically -Many HTTP client libraries, such as Apache Commons [HttpComponents](https://hc.apache.org/), already have support for performing SPNEGO authentication. You can use any of the available HTTP client library to communicate with druid cluster. +Many HTTP client libraries, such as Apache Commons [HttpComponents](https://hc.apache.org/), already have support for performing SPNEGO authentication. You can use any of the available HTTP client library to communicate with druid cluster. diff --git a/docs/content/development/extensions-core/druid-lookups.md b/docs/content/development/extensions-core/druid-lookups.md index 101acf58665..473109dcccc 100644 --- a/docs/content/development/extensions-core/druid-lookups.md +++ b/docs/content/development/extensions-core/druid-lookups.md @@ -19,6 +19,7 @@ --- layout: doc_page +title: "Cached Lookup Module" --- # Cached Lookup Module diff --git a/docs/content/development/extensions-core/examples.md b/docs/content/development/extensions-core/examples.md index 7199d52a0f9..02b22e6c3bd 100644 --- a/docs/content/development/extensions-core/examples.md +++ b/docs/content/development/extensions-core/examples.md @@ -19,9 +19,9 @@ --- layout: doc_page +title: "Extension Examples" --- - -# Druid examples +# Extension Examples ## TwitterSpritzerFirehose diff --git a/docs/content/development/extensions-core/hdfs.md b/docs/content/development/extensions-core/hdfs.md index da6127f64c7..e2fe62c2317 100644 --- a/docs/content/development/extensions-core/hdfs.md +++ b/docs/content/development/extensions-core/hdfs.md @@ -19,8 +19,8 @@ --- layout: doc_page +title: "HDFS" --- - # HDFS Make sure to [include](../../operations/including-extensions.html) `druid-hdfs-storage` as an extension. diff --git a/docs/content/development/extensions-core/kafka-eight-firehose.md b/docs/content/development/extensions-core/kafka-eight-firehose.md index c32e725ab4a..2ab4122f124 100644 --- a/docs/content/development/extensions-core/kafka-eight-firehose.md +++ b/docs/content/development/extensions-core/kafka-eight-firehose.md @@ -19,8 +19,8 @@ --- layout: doc_page +title: "Kafka Eight Firehose" --- - # Kafka Eight Firehose Make sure to [include](../../operations/including-extensions.html) `druid-kafka-eight` as an extension. diff --git a/docs/content/development/extensions-core/kafka-extraction-namespace.md b/docs/content/development/extensions-core/kafka-extraction-namespace.md index 93437ed4ac6..6d9ea1676eb 100644 --- a/docs/content/development/extensions-core/kafka-extraction-namespace.md +++ b/docs/content/development/extensions-core/kafka-extraction-namespace.md @@ -19,8 +19,8 @@ --- layout: doc_page +title: "Kafka Lookups" --- - # Kafka Lookups
diff --git a/docs/content/development/extensions-core/kafka-ingestion.md b/docs/content/development/extensions-core/kafka-ingestion.md index ebecacf15e9..f67adfc4c54 100644 --- a/docs/content/development/extensions-core/kafka-ingestion.md +++ b/docs/content/development/extensions-core/kafka-ingestion.md @@ -19,8 +19,8 @@ --- layout: doc_page +title: "Kafka Indexing Service" --- - # Kafka Indexing Service The Kafka indexing service enables the configuration of *supervisors* on the Overlord, which facilitate ingestion from diff --git a/docs/content/development/extensions-core/lookups-cached-global.md b/docs/content/development/extensions-core/lookups-cached-global.md index ef7b2ad560f..c5a89cbd04a 100644 --- a/docs/content/development/extensions-core/lookups-cached-global.md +++ b/docs/content/development/extensions-core/lookups-cached-global.md @@ -19,8 +19,8 @@ --- layout: doc_page +title: "Globally Cached Lookups" --- - # Globally Cached Lookups
diff --git a/docs/content/development/extensions-core/mysql.md b/docs/content/development/extensions-core/mysql.md index c054bda841e..105bd0bce2e 100644 --- a/docs/content/development/extensions-core/mysql.md +++ b/docs/content/development/extensions-core/mysql.md @@ -19,8 +19,8 @@ --- layout: doc_page +title: "MySQL Metadata Store" --- - # MySQL Metadata Store Make sure to [include](../../operations/including-extensions.html) `mysql-metadata-storage` as an extension. diff --git a/docs/content/development/extensions-core/parquet.md b/docs/content/development/extensions-core/parquet.md index a31a072f3b5..81f1b5882b0 100644 --- a/docs/content/development/extensions-core/parquet.md +++ b/docs/content/development/extensions-core/parquet.md @@ -19,8 +19,8 @@ --- layout: doc_page +title: "Druid Parquet Extension" --- - # Druid Parquet Extension This module extends [Druid Hadoop based indexing](../../ingestion/hadoop.html) to ingest data directly from offline diff --git a/docs/content/development/extensions-core/postgresql.md b/docs/content/development/extensions-core/postgresql.md index 0a121265c81..cc54cdf8b9f 100644 --- a/docs/content/development/extensions-core/postgresql.md +++ b/docs/content/development/extensions-core/postgresql.md @@ -19,8 +19,8 @@ --- layout: doc_page +title: "PostgreSQL Metadata Store" --- - # PostgreSQL Metadata Store Make sure to [include](../../operations/including-extensions.html) `postgresql-metadata-storage` as an extension. diff --git a/docs/content/development/extensions-core/protobuf.md b/docs/content/development/extensions-core/protobuf.md index 000e72a250a..b8f31257f76 100644 --- a/docs/content/development/extensions-core/protobuf.md +++ b/docs/content/development/extensions-core/protobuf.md @@ -19,8 +19,8 @@ --- layout: doc_page +title: "Protobuf" --- - # Protobuf This extension enables Druid to ingest and understand the Protobuf data format. Make sure to [include](../../operations/including-extensions.html) `druid-protobuf-extensions` as an extension. diff --git a/docs/content/development/extensions-core/s3.md b/docs/content/development/extensions-core/s3.md index dcd81cd3b33..df8d745bff6 100644 --- a/docs/content/development/extensions-core/s3.md +++ b/docs/content/development/extensions-core/s3.md @@ -19,8 +19,8 @@ --- layout: doc_page +title: "S3-compatible" --- - # S3-compatible Make sure to [include](../../operations/including-extensions.html) `druid-s3-extensions` as an extension. diff --git a/docs/content/development/extensions-core/simple-client-sslcontext.md b/docs/content/development/extensions-core/simple-client-sslcontext.md index f5694e89c14..23036e9a2a4 100644 --- a/docs/content/development/extensions-core/simple-client-sslcontext.md +++ b/docs/content/development/extensions-core/simple-client-sslcontext.md @@ -19,9 +19,9 @@ --- layout: doc_page +title: "Simple SSLContext Provider Module" --- - -## Simple SSLContext Provider Module +# Simple SSLContext Provider Module This module contains a simple implementation of [SSLContext](http://docs.oracle.com/javase/8/docs/api/javax/net/ssl/SSLContext.html) that will be injected to be used with HttpClient that Druid nodes use internally to communicate with each other. To learn more about diff --git a/docs/content/development/extensions-core/stats.md b/docs/content/development/extensions-core/stats.md index 0e66d319b35..31117c7c4fa 100644 --- a/docs/content/development/extensions-core/stats.md +++ b/docs/content/development/extensions-core/stats.md @@ -19,8 +19,8 @@ --- layout: doc_page +title: "Stats aggregator" --- - # Stats aggregator Includes stat-related aggregators, including variance and standard deviations, etc. Make sure to [include](../../operations/including-extensions.html) `druid-stats` as an extension. diff --git a/docs/content/development/extensions-core/test-stats.md b/docs/content/development/extensions-core/test-stats.md index 2e61641f8a7..9e175aef43f 100644 --- a/docs/content/development/extensions-core/test-stats.md +++ b/docs/content/development/extensions-core/test-stats.md @@ -19,8 +19,8 @@ --- layout: doc_page +title: "Test Stats Aggregators" --- - # Test Stats Aggregators Incorporates test statistics related aggregators, including z-score and p-value. Please refer to [https://www.paypal-engineering.com/2017/06/29/democratizing-experimentation-data-for-product-innovations/](https://www.paypal-engineering.com/2017/06/29/democratizing-experimentation-data-for-product-innovations/) for math background and details. diff --git a/docs/content/development/extensions.md b/docs/content/development/extensions.md index 998f3dcd778..aabc515481e 100644 --- a/docs/content/development/extensions.md +++ b/docs/content/development/extensions.md @@ -19,8 +19,8 @@ --- layout: doc_page +title: "Druid extensions" --- - # Druid extensions Druid implements an extension system that allows for adding functionality at runtime. Extensions diff --git a/docs/content/development/geo.md b/docs/content/development/geo.md index ac8db41ac18..7f9befad322 100644 --- a/docs/content/development/geo.md +++ b/docs/content/development/geo.md @@ -19,8 +19,10 @@ --- layout: doc_page +title: "Geographic Queries" --- # Geographic Queries + Druid supports filtering specially spatially indexed columns based on an origin and a bound. # Spatial Indexing diff --git a/docs/content/development/integrating-druid-with-other-technologies.md b/docs/content/development/integrating-druid-with-other-technologies.md index 5862bfae5cb..16c6bde9a44 100644 --- a/docs/content/development/integrating-druid-with-other-technologies.md +++ b/docs/content/development/integrating-druid-with-other-technologies.md @@ -19,6 +19,7 @@ --- layout: doc_page +title: "Integrating Druid With Other Technologies" --- # Integrating Druid With Other Technologies diff --git a/docs/content/development/javascript.md b/docs/content/development/javascript.md index 53c93f48cca..a90a08c21c9 100644 --- a/docs/content/development/javascript.md +++ b/docs/content/development/javascript.md @@ -19,6 +19,7 @@ --- layout: doc_page +title: "JavaScript Programming Guide" --- # JavaScript Programming Guide diff --git a/docs/content/development/modules.md b/docs/content/development/modules.md index df67783d91b..899d4ba91c4 100644 --- a/docs/content/development/modules.md +++ b/docs/content/development/modules.md @@ -19,8 +19,8 @@ --- layout: doc_page +title: "Extending Druid With Custom Modules" --- - # Extending Druid With Custom Modules Druid uses a module system that allows for the addition of extensions at runtime. diff --git a/docs/content/development/overview.md b/docs/content/development/overview.md index 361049dd074..d900b4e29db 100644 --- a/docs/content/development/overview.md +++ b/docs/content/development/overview.md @@ -19,8 +19,8 @@ --- layout: doc_page +title: "Developing on Druid" --- - # Developing on Druid Druid's codebase consists of several major components. For developers interested in learning the code, this document provides diff --git a/docs/content/development/router.md b/docs/content/development/router.md index b6225308959..3e2d78daa6a 100644 --- a/docs/content/development/router.md +++ b/docs/content/development/router.md @@ -19,10 +19,9 @@ --- layout: doc_page +title: "Router Node" --- - -Router Node -=========== +# Router Node You should only ever need the router node if you have a Druid cluster well into the terabyte range. The router node can be used to route queries to different broker nodes. By default, the broker routes queries based on how [Rules](../operations/rule-configuration.html) are set up. For example, if 1 month of recent data is loaded into a `hot` cluster, queries that fall within the recent month can be routed to a dedicated set of brokers. Queries outside this range are routed to another set of brokers. This set up provides query isolation such that queries for more important data are not impacted by queries for less important data. diff --git a/docs/content/development/versioning.md b/docs/content/development/versioning.md index dfd04a0f14d..4b1577ffc39 100644 --- a/docs/content/development/versioning.md +++ b/docs/content/development/versioning.md @@ -19,8 +19,10 @@ --- layout: doc_page +title: "Versioning Druid" --- # Versioning Druid + This page discusses how we do versioning and provides information on our stable releases. Versioning Strategy diff --git a/docs/content/ingestion/batch-ingestion.md b/docs/content/ingestion/batch-ingestion.md index dfa90076286..db394c63447 100644 --- a/docs/content/ingestion/batch-ingestion.md +++ b/docs/content/ingestion/batch-ingestion.md @@ -19,8 +19,8 @@ --- layout: doc_page +title: "Batch Data Ingestion" --- - # Batch Data Ingestion Druid can load data from static files through a variety of methods described here. diff --git a/docs/content/ingestion/command-line-hadoop-indexer.md b/docs/content/ingestion/command-line-hadoop-indexer.md index 162499ab1e2..3068783474c 100644 --- a/docs/content/ingestion/command-line-hadoop-indexer.md +++ b/docs/content/ingestion/command-line-hadoop-indexer.md @@ -19,8 +19,8 @@ --- layout: doc_page +title: "Command Line Hadoop Indexer" --- - # Command Line Hadoop Indexer To run: diff --git a/docs/content/ingestion/compaction.md b/docs/content/ingestion/compaction.md index 2c46e094514..956c3470b5d 100644 --- a/docs/content/ingestion/compaction.md +++ b/docs/content/ingestion/compaction.md @@ -19,8 +19,8 @@ --- layout: doc_page +title: "Compaction Task" --- - # Compaction Task Compaction tasks merge all segments of the given interval. The syntax is: diff --git a/docs/content/ingestion/data-formats.md b/docs/content/ingestion/data-formats.md index bdb7fb105a5..bfd796256d6 100644 --- a/docs/content/ingestion/data-formats.md +++ b/docs/content/ingestion/data-formats.md @@ -19,9 +19,9 @@ --- layout: doc_page +title: "Data Formats for Ingestion" --- -Data Formats for Ingestion -========================== +# Data Formats for Ingestion Druid can ingest denormalized data in JSON, CSV, or a delimited form such as TSV, or any custom format. While most examples in the documentation use data in JSON format, it is not difficult to configure Druid to ingest any other delimited data. We welcome any contributions to new formats. diff --git a/docs/content/ingestion/delete-data.md b/docs/content/ingestion/delete-data.md index cd0c2a0e839..6f5e9668a41 100644 --- a/docs/content/ingestion/delete-data.md +++ b/docs/content/ingestion/delete-data.md @@ -19,8 +19,8 @@ --- layout: doc_page +title: "Deleting Data" --- - # Deleting Data Permanent deletion of a Druid segment has two steps: diff --git a/docs/content/ingestion/faq.md b/docs/content/ingestion/faq.md index a5bbe6d82b4..9ed403eccf6 100644 --- a/docs/content/ingestion/faq.md +++ b/docs/content/ingestion/faq.md @@ -19,9 +19,9 @@ --- layout: doc_page +title: "My Data isn't being loaded" --- - -## My Data isn't being loaded +# My Data isn't being loaded ### Realtime Ingestion diff --git a/docs/content/ingestion/firehose.md b/docs/content/ingestion/firehose.md index c11a73febfe..8aab73988a4 100644 --- a/docs/content/ingestion/firehose.md +++ b/docs/content/ingestion/firehose.md @@ -19,8 +19,8 @@ --- layout: doc_page +title: "Druid Firehoses" --- - # Druid Firehoses Firehoses are used in [native batch ingestion tasks](../ingestion/native_tasks.html), stream push tasks automatically created by [Tranquility](../ingestion/stream-push.html), and the [stream-pull (deprecated)](../ingestion/stream-pull.html) ingestion model. diff --git a/docs/content/ingestion/flatten-json.md b/docs/content/ingestion/flatten-json.md index d9f31b3e600..bcaf6c86002 100644 --- a/docs/content/ingestion/flatten-json.md +++ b/docs/content/ingestion/flatten-json.md @@ -19,8 +19,8 @@ --- layout: doc_page +title: "JSON Flatten Spec" --- - # JSON Flatten Spec | Field | Type | Description | Required | diff --git a/docs/content/ingestion/hadoop.md b/docs/content/ingestion/hadoop.md index c0335d1fac4..a79bf715ba9 100644 --- a/docs/content/ingestion/hadoop.md +++ b/docs/content/ingestion/hadoop.md @@ -19,8 +19,8 @@ --- layout: doc_page +title: "Hadoop-based Batch Ingestion" --- - # Hadoop-based Batch Ingestion Hadoop-based batch ingestion in Druid is supported via a Hadoop-ingestion task. These tasks can be posted to a running diff --git a/docs/content/ingestion/index.md b/docs/content/ingestion/index.md index 069d350aefe..80ba3f7ec4d 100644 --- a/docs/content/ingestion/index.md +++ b/docs/content/ingestion/index.md @@ -19,8 +19,8 @@ --- layout: doc_page +title: "Ingestion" --- - # Ingestion ## Overview diff --git a/docs/content/ingestion/ingestion-spec.md b/docs/content/ingestion/ingestion-spec.md index d463d577886..82888ce6570 100644 --- a/docs/content/ingestion/ingestion-spec.md +++ b/docs/content/ingestion/ingestion-spec.md @@ -19,8 +19,8 @@ --- layout: doc_page +title: "Ingestion Spec" --- - # Ingestion Spec A Druid ingestion spec consists of 3 components: diff --git a/docs/content/ingestion/locking-and-priority.md b/docs/content/ingestion/locking-and-priority.md index d343e97f708..d2a857980c9 100644 --- a/docs/content/ingestion/locking-and-priority.md +++ b/docs/content/ingestion/locking-and-priority.md @@ -19,8 +19,8 @@ --- layout: doc_page +title: "Task Locking & Priority" --- - # Task Locking & Priority ## Locking diff --git a/docs/content/ingestion/misc-tasks.md b/docs/content/ingestion/misc-tasks.md index e309bbb76f0..fe119dc789a 100644 --- a/docs/content/ingestion/misc-tasks.md +++ b/docs/content/ingestion/misc-tasks.md @@ -19,8 +19,8 @@ --- layout: doc_page +title: "Miscellaneous Tasks" --- - # Miscellaneous Tasks ## Noop Task diff --git a/docs/content/ingestion/native_tasks.md b/docs/content/ingestion/native_tasks.md index 4857255981e..f7c251404db 100644 --- a/docs/content/ingestion/native_tasks.md +++ b/docs/content/ingestion/native_tasks.md @@ -19,6 +19,7 @@ --- layout: doc_page +title: "Native Index Tasks" --- # Native Index Tasks diff --git a/docs/content/ingestion/reports.md b/docs/content/ingestion/reports.md index 20b1836fa95..2f3031734dd 100644 --- a/docs/content/ingestion/reports.md +++ b/docs/content/ingestion/reports.md @@ -19,6 +19,7 @@ --- layout: doc_page +title: "Ingestion Reports" --- # Ingestion Reports diff --git a/docs/content/ingestion/schema-changes.md b/docs/content/ingestion/schema-changes.md index a8d72a099ac..5f091f1bee3 100644 --- a/docs/content/ingestion/schema-changes.md +++ b/docs/content/ingestion/schema-changes.md @@ -19,6 +19,7 @@ --- layout: doc_page +title: "Schema Changes" --- # Schema Changes diff --git a/docs/content/ingestion/schema-design.md b/docs/content/ingestion/schema-design.md index e86cdebb675..6858f146974 100644 --- a/docs/content/ingestion/schema-design.md +++ b/docs/content/ingestion/schema-design.md @@ -19,8 +19,8 @@ --- layout: doc_page +title: "Schema Design" --- - # Schema Design This page is meant to assist users in designing a schema for data to be ingested in Druid. Druid intakes denormalized data diff --git a/docs/content/ingestion/stream-ingestion.md b/docs/content/ingestion/stream-ingestion.md index 292e0744277..dd2221854eb 100644 --- a/docs/content/ingestion/stream-ingestion.md +++ b/docs/content/ingestion/stream-ingestion.md @@ -19,22 +19,22 @@ --- layout: doc_page +title: "Loading Streams" --- +# Loading Streams -# Loading streams - -Streams can be ingested in Druid using either [Tranquility](https://github.com/druid-io/tranquility) (a Druid-aware +Streams can be ingested in Druid using either [Tranquility](https://github.com/druid-io/tranquility) (a Druid-aware client) or the [Kafka Indexing Service](../development/extensions-core/kafka-ingestion.html). ## Tranquility (Stream Push) -If you have a program that generates a stream, then you can push that stream directly into Druid in -real-time. With this approach, Tranquility is embedded in your data-producing application. -Tranquility comes with bindings for the -Storm and Samza stream processors. It also has a direct API that can be used from any JVM-based +If you have a program that generates a stream, then you can push that stream directly into Druid in +real-time. With this approach, Tranquility is embedded in your data-producing application. +Tranquility comes with bindings for the +Storm and Samza stream processors. It also has a direct API that can be used from any JVM-based program, such as Spark Streaming or a Kafka consumer. -Tranquility handles partitioning, replication, service discovery, and schema rollover for you, +Tranquility handles partitioning, replication, service discovery, and schema rollover for you, seamlessly and without downtime. You only have to define your Druid schema. For examples and more information, please see the [Tranquility README](https://github.com/druid-io/tranquility). diff --git a/docs/content/ingestion/stream-pull.md b/docs/content/ingestion/stream-pull.md index 08f0293988f..1075325d1bf 100644 --- a/docs/content/ingestion/stream-pull.md +++ b/docs/content/ingestion/stream-pull.md @@ -19,14 +19,14 @@ --- layout: doc_page +title: "Stream Pull Ingestion" ---
-NOTE: Realtime nodes are deprecated. Please use the Kafka Indexing Service for stream pull use cases instead. +NOTE: Realtime nodes are deprecated. Please use the Kafka Indexing Service for stream pull use cases instead.
-Stream Pull Ingestion -===================== +# Stream Pull Ingestion If you have an external service that you want to pull data from, you have two options. The simplest option is to set up a "copying" service that reads from the data source and writes to Druid using @@ -34,7 +34,7 @@ the [stream push method](stream-push.html). Another option is *stream pull*. With this approach, a Druid Realtime Node ingests data from a [Firehose](../ingestion/firehose.html) connected to the data you want to -read. The Druid quickstart and tutorials do not include information about how to set up standalone realtime nodes, but +read. The Druid quickstart and tutorials do not include information about how to set up standalone realtime nodes, but they can be used in place for Tranquility server and the indexing service. Please note that Realtime nodes have different properties and roles than the indexing service. ## Realtime Node Ingestion @@ -182,7 +182,7 @@ The tuningConfig is optional and default parameters will be used if no tuningCon |dedupColumn|String|the column to judge whether this row is already in this segment, if so, throw away this row. If it is String type column, to reduce heap cost, use long type hashcode of this column's value to judge whether this row is already ingested, so there maybe very small chance to throw away a row that is not ingested before.|no (default == null)| |indexSpec|Object|Tune how data is indexed. See below for more information.|no| -Before enabling thread priority settings, users are highly encouraged to read the [original pull request](https://github.com/apache/incubator-druid/pull/984) and other documentation about proper use of `-XX:+UseThreadPriorities`. +Before enabling thread priority settings, users are highly encouraged to read the [original pull request](https://github.com/apache/incubator-druid/pull/984) and other documentation about proper use of `-XX:+UseThreadPriorities`. #### Rejection Policy @@ -254,7 +254,7 @@ Configure `linear` under `schema`: "partitionNum": 0 } ``` - + ##### Numbered @@ -269,7 +269,7 @@ Configure `numbered` under `schema`: "partitions": 2 } ``` - + ##### Scale and Redundancy @@ -283,7 +283,7 @@ For example, if RealTimeNode1 has: "partitionNum": 0 } ``` - + and RealTimeNode2 has: ```json @@ -329,48 +329,48 @@ The normal, expected use cases have the following overall constraints: `intermed Standalone realtime nodes use the Kafka high level consumer, which imposes a few restrictions. -Druid replicates segment such that logically equivalent data segments are concurrently hosted on N nodes. If N–1 nodes go down, -the data will still be available for querying. On real-time nodes, this process depends on maintaining logically equivalent -data segments on each of the N nodes, which is not possible with standard Kafka consumer groups if your Kafka topic requires more than one consumer +Druid replicates segment such that logically equivalent data segments are concurrently hosted on N nodes. If N–1 nodes go down, +the data will still be available for querying. On real-time nodes, this process depends on maintaining logically equivalent +data segments on each of the N nodes, which is not possible with standard Kafka consumer groups if your Kafka topic requires more than one consumer (because consumers in different consumer groups will split up the data differently). -For example, let's say your topic is split across Kafka partitions 1, 2, & 3 and you have 2 real-time nodes with linear shard specs 1 & 2. -Both of the real-time nodes are in the same consumer group. Real-time node 1 may consume data from partitions 1 & 3, and real-time node 2 may consume data from partition 2. +For example, let's say your topic is split across Kafka partitions 1, 2, & 3 and you have 2 real-time nodes with linear shard specs 1 & 2. +Both of the real-time nodes are in the same consumer group. Real-time node 1 may consume data from partitions 1 & 3, and real-time node 2 may consume data from partition 2. Querying for your data through the broker will yield correct results. -The problem arises if you want to replicate your data by creating real-time nodes 3 & 4. These new real-time nodes also -have linear shard specs 1 & 2, and they will consume data from Kafka using a different consumer group. In this case, -real-time node 3 may consume data from partitions 1 & 2, and real-time node 4 may consume data from partition 2. -From Druid's perspective, the segments hosted by real-time nodes 1 and 3 are the same, and the data hosted by real-time nodes -2 and 4 are the same, although they are reading from different Kafka partitions. Querying for the data will yield inconsistent +The problem arises if you want to replicate your data by creating real-time nodes 3 & 4. These new real-time nodes also +have linear shard specs 1 & 2, and they will consume data from Kafka using a different consumer group. In this case, +real-time node 3 may consume data from partitions 1 & 2, and real-time node 4 may consume data from partition 2. +From Druid's perspective, the segments hosted by real-time nodes 1 and 3 are the same, and the data hosted by real-time nodes +2 and 4 are the same, although they are reading from different Kafka partitions. Querying for the data will yield inconsistent results. -Is this always a problem? No. If your data is small enough to fit on a single Kafka partition, you can replicate without issues. +Is this always a problem? No. If your data is small enough to fit on a single Kafka partition, you can replicate without issues. Otherwise, you can run real-time nodes without replication. Please note that druid will skip over event that failed its checksum and it is corrupt. ### Locking -Using stream pull ingestion with Realtime nodes together batch ingestion may introduce data override issues. For example, if you -are generating hourly segments for the current day, and run a daily batch job for the current day's data, the segments created by -the batch job will have a more recent version than most of the segments generated by realtime ingestion. If your batch job is indexing -data that isn't yet complete for the day, the daily segment created by the batch job can override recent segments created by +Using stream pull ingestion with Realtime nodes together batch ingestion may introduce data override issues. For example, if you +are generating hourly segments for the current day, and run a daily batch job for the current day's data, the segments created by +the batch job will have a more recent version than most of the segments generated by realtime ingestion. If your batch job is indexing +data that isn't yet complete for the day, the daily segment created by the batch job can override recent segments created by realtime nodes. A portion of data will appear to be lost in this case. ### Schema changes -Standalone realtime nodes require stopping a node to update a schema, and starting it up again for the schema to take effect. +Standalone realtime nodes require stopping a node to update a schema, and starting it up again for the schema to take effect. This can be difficult to manage at scale, especially with multiple partitions. ### Log management -Each standalone realtime node has its own set of logs. Diagnosing errors across many partitions across many servers may be +Each standalone realtime node has its own set of logs. Diagnosing errors across many partitions across many servers may be difficult to manage and track at scale. ## Deployment Notes Stream ingestion may generate a large number of small segments because it's difficult to optimize the segment size at -ingestion time. The number of segments will increase over time, and this might cause the query performance issue. +ingestion time. The number of segments will increase over time, and this might cause the query performance issue. Details on how to optimize the segment size can be found on [Segment size optimization](../operations/segment-optimization.html). diff --git a/docs/content/ingestion/stream-push.md b/docs/content/ingestion/stream-push.md index 7a5a9dec0db..c6e79adad1d 100644 --- a/docs/content/ingestion/stream-push.md +++ b/docs/content/ingestion/stream-push.md @@ -19,9 +19,9 @@ --- layout: doc_page +title: "Stream Push" --- - -## Stream Push +# Stream Push Druid can connect to any streaming data source through [Tranquility](https://github.com/druid-io/tranquility/blob/master/README.md), a package for pushing diff --git a/docs/content/ingestion/tasks.md b/docs/content/ingestion/tasks.md index 27f446aebc6..44ffc3b0897 100644 --- a/docs/content/ingestion/tasks.md +++ b/docs/content/ingestion/tasks.md @@ -19,6 +19,7 @@ --- layout: doc_page +title: "Tasks Overview" --- # Tasks Overview diff --git a/docs/content/ingestion/transform-spec.md b/docs/content/ingestion/transform-spec.md index 84c00d3ef72..4e7c66e3b95 100644 --- a/docs/content/ingestion/transform-spec.md +++ b/docs/content/ingestion/transform-spec.md @@ -19,8 +19,8 @@ --- layout: doc_page +title: "Transform Specs" --- - # Transform Specs Transform specs allow Druid to filter and transform input data during ingestion. diff --git a/docs/content/ingestion/update-existing-data.md b/docs/content/ingestion/update-existing-data.md index 3fdf557ef6e..da8ab31ff93 100644 --- a/docs/content/ingestion/update-existing-data.md +++ b/docs/content/ingestion/update-existing-data.md @@ -19,6 +19,7 @@ --- layout: doc_page +title: "Updating Existing Data" --- # Updating Existing Data diff --git a/docs/content/misc/math-expr.md b/docs/content/misc/math-expr.md index a798d0b1703..c4a572403d2 100644 --- a/docs/content/misc/math-expr.md +++ b/docs/content/misc/math-expr.md @@ -19,8 +19,8 @@ --- layout: doc_page +title: "Druid Expressions" --- - # Druid Expressions
diff --git a/docs/content/misc/papers-and-talks.md b/docs/content/misc/papers-and-talks.md index a265ef15e17..f97c3d32528 100644 --- a/docs/content/misc/papers-and-talks.md +++ b/docs/content/misc/papers-and-talks.md @@ -19,8 +19,8 @@ --- layout: doc_page +title: "Papers" --- - # Papers * [Druid: A Real-time Analytical Data Store](http://static.druid.io/docs/druid.pdf) - Discusses the Druid architecture in detail. diff --git a/docs/content/operations/alerts.md b/docs/content/operations/alerts.md index 7faa29656f7..239c330f858 100644 --- a/docs/content/operations/alerts.md +++ b/docs/content/operations/alerts.md @@ -19,6 +19,7 @@ --- layout: doc_page +title: "Druid Alerts" --- # Druid Alerts diff --git a/docs/content/operations/api-reference.md b/docs/content/operations/api-reference.md index 5d3fda34458..21ad403a0ef 100644 --- a/docs/content/operations/api-reference.md +++ b/docs/content/operations/api-reference.md @@ -19,8 +19,8 @@ --- layout: doc_page +title: "API Reference" --- - # API Reference This page documents all of the API endpoints for each Druid service type. diff --git a/docs/content/operations/dump-segment.md b/docs/content/operations/dump-segment.md index b881e118955..1f93dfd7a6d 100644 --- a/docs/content/operations/dump-segment.md +++ b/docs/content/operations/dump-segment.md @@ -19,6 +19,7 @@ --- layout: doc_page +title: "DumpSegment tool" --- # DumpSegment tool diff --git a/docs/content/operations/http-compression.md b/docs/content/operations/http-compression.md index 4bbcd50982e..5ba9c0db535 100644 --- a/docs/content/operations/http-compression.md +++ b/docs/content/operations/http-compression.md @@ -19,6 +19,7 @@ --- layout: doc_page +title: "HTTP Compression" --- # HTTP Compression diff --git a/docs/content/operations/including-extensions.md b/docs/content/operations/including-extensions.md index 2de6b7fa0ae..d8cb69e6817 100644 --- a/docs/content/operations/including-extensions.md +++ b/docs/content/operations/including-extensions.md @@ -19,8 +19,8 @@ --- layout: doc_page +title: "Loading extensions" --- - # Loading extensions ## Loading core extensions diff --git a/docs/content/operations/insert-segment-to-db.md b/docs/content/operations/insert-segment-to-db.md index 3c4306e5044..8f9aed60a1f 100644 --- a/docs/content/operations/insert-segment-to-db.md +++ b/docs/content/operations/insert-segment-to-db.md @@ -19,6 +19,7 @@ --- layout: doc_page +title: "insert-segment-to-db Tool" --- # insert-segment-to-db Tool diff --git a/docs/content/operations/metrics.md b/docs/content/operations/metrics.md index 75c38e2c47e..aac824ab07f 100644 --- a/docs/content/operations/metrics.md +++ b/docs/content/operations/metrics.md @@ -19,6 +19,7 @@ --- layout: doc_page +title: "Druid Metrics" --- # Druid Metrics diff --git a/docs/content/operations/other-hadoop.md b/docs/content/operations/other-hadoop.md index cc35ef67ab0..3cf1a1642c1 100644 --- a/docs/content/operations/other-hadoop.md +++ b/docs/content/operations/other-hadoop.md @@ -19,6 +19,7 @@ --- layout: doc_page +title: "Working with different versions of Hadoop" --- # Working with different versions of Hadoop diff --git a/docs/content/operations/password-provider.md b/docs/content/operations/password-provider.md index 9a89990eec9..7ed0e5a9165 100644 --- a/docs/content/operations/password-provider.md +++ b/docs/content/operations/password-provider.md @@ -19,8 +19,8 @@ --- layout: doc_page +title: "Password Provider" --- - # Password Provider Druid needs some passwords for accessing various secured systems like metadata store, Key Store containing server certificates etc. diff --git a/docs/content/operations/performance-faq.md b/docs/content/operations/performance-faq.md index b18ec1ad22e..6b703ab1253 100644 --- a/docs/content/operations/performance-faq.md +++ b/docs/content/operations/performance-faq.md @@ -19,8 +19,8 @@ --- layout: doc_page +title: "Performance FAQ" --- - # Performance FAQ ## I can't match your benchmarked results diff --git a/docs/content/operations/pull-deps.md b/docs/content/operations/pull-deps.md index d9abf575d7a..6721f7faa85 100644 --- a/docs/content/operations/pull-deps.md +++ b/docs/content/operations/pull-deps.md @@ -19,8 +19,8 @@ --- layout: doc_page +title: "pull-deps Tool" --- - # pull-deps Tool `pull-deps` is a tool that can pull down dependencies to the local repository and lay dependencies out into the extension directory as needed. diff --git a/docs/content/operations/recommendations.md b/docs/content/operations/recommendations.md index aa365e0768e..2672ea30f41 100644 --- a/docs/content/operations/recommendations.md +++ b/docs/content/operations/recommendations.md @@ -19,10 +19,9 @@ --- layout: doc_page +title: "Recommendations" --- - -Recommendations -=============== +# Recommendations # Some General guidelines diff --git a/docs/content/operations/reset-cluster.md b/docs/content/operations/reset-cluster.md index b16baa781cf..f33667ed12f 100644 --- a/docs/content/operations/reset-cluster.md +++ b/docs/content/operations/reset-cluster.md @@ -19,6 +19,7 @@ --- layout: doc_page +title: "ResetCluster tool" --- # ResetCluster tool diff --git a/docs/content/operations/rolling-updates.md b/docs/content/operations/rolling-updates.md index 72acfe01b45..df948429d01 100644 --- a/docs/content/operations/rolling-updates.md +++ b/docs/content/operations/rolling-updates.md @@ -19,10 +19,9 @@ --- layout: doc_page +title: "Rolling Updates" --- - -Rolling Updates -=============== +# Rolling Updates For rolling Druid cluster updates with no downtime, we recommend updating Druid nodes in the following order: diff --git a/docs/content/operations/rule-configuration.md b/docs/content/operations/rule-configuration.md index ca42a4711ec..b05e0cd7a90 100644 --- a/docs/content/operations/rule-configuration.md +++ b/docs/content/operations/rule-configuration.md @@ -19,6 +19,7 @@ --- layout: doc_page +title: "Retaining or Automatically Dropping Data" --- # Retaining or Automatically Dropping Data diff --git a/docs/content/operations/segment-optimization.md b/docs/content/operations/segment-optimization.md index a3d8aa67c88..e539d9d949e 100644 --- a/docs/content/operations/segment-optimization.md +++ b/docs/content/operations/segment-optimization.md @@ -19,8 +19,8 @@ --- layout: doc_page +title: "Segment size optimization" --- - # Segment size optimization In Druid, it's important to optimize the segment size because diff --git a/docs/content/operations/tls-support.md b/docs/content/operations/tls-support.md index 079ffd1411c..86223d66d77 100644 --- a/docs/content/operations/tls-support.md +++ b/docs/content/operations/tls-support.md @@ -19,10 +19,9 @@ --- layout: doc_page +title: "TLS Support" --- - -TLS Support -=============== +# TLS Support # General Configuration diff --git a/docs/content/operations/use_sbt_to_build_fat_jar.md b/docs/content/operations/use_sbt_to_build_fat_jar.md index eeae5af9b14..79f5a0fee54 100644 --- a/docs/content/operations/use_sbt_to_build_fat_jar.md +++ b/docs/content/operations/use_sbt_to_build_fat_jar.md @@ -19,10 +19,10 @@ --- layout: doc_page +title: "Content for build.sbt" --- +# Content for build.sbt -Content for build.sbt ---------------------- ```scala libraryDependencies ++= Seq( "com.amazonaws" % "aws-java-sdk" % "1.9.23" exclude("common-logging", "common-logging"), diff --git a/docs/content/querying/aggregations.md b/docs/content/querying/aggregations.md index 8f65140c5f5..ddd804e22f1 100644 --- a/docs/content/querying/aggregations.md +++ b/docs/content/querying/aggregations.md @@ -19,6 +19,7 @@ --- layout: doc_page +title: "Aggregations" --- # Aggregations diff --git a/docs/content/querying/caching.md b/docs/content/querying/caching.md index f5da776f61b..68b7daaeea6 100644 --- a/docs/content/querying/caching.md +++ b/docs/content/querying/caching.md @@ -19,6 +19,7 @@ --- layout: doc_page +title: "Query Caching" --- # Query Caching diff --git a/docs/content/querying/datasource.md b/docs/content/querying/datasource.md index a966fbad948..7dee075ff0f 100644 --- a/docs/content/querying/datasource.md +++ b/docs/content/querying/datasource.md @@ -19,9 +19,9 @@ --- layout: doc_page +title: "Datasources" --- - -## Datasources +# Datasources A data source is the Druid equivalent of a database table. However, a query can also masquerade as a data source, providing subquery-like functionality. Query data sources are currently supported only by [GroupBy](../querying/groupbyquery.html) queries. diff --git a/docs/content/querying/datasourcemetadataquery.md b/docs/content/querying/datasourcemetadataquery.md index 2f102226c52..f7d2da12314 100644 --- a/docs/content/querying/datasourcemetadataquery.md +++ b/docs/content/querying/datasourcemetadataquery.md @@ -19,8 +19,10 @@ --- layout: doc_page +title: "Data Source Metadata Queries" --- # Data Source Metadata Queries + Data Source Metadata queries return metadata information for a dataSource. These queries return information about: * The timestamp of latest ingested event for the dataSource. This is the ingested event without any consideration of rollup. diff --git a/docs/content/querying/dimensionspecs.md b/docs/content/querying/dimensionspecs.md index a35b0297e76..823ea07b279 100644 --- a/docs/content/querying/dimensionspecs.md +++ b/docs/content/querying/dimensionspecs.md @@ -19,8 +19,8 @@ --- layout: doc_page +title: "Transforming Dimension Values" --- - # Transforming Dimension Values The following JSON fields can be used in a query to operate on dimension values. diff --git a/docs/content/querying/filters.md b/docs/content/querying/filters.md index 73bebbb96be..cd22132bb70 100644 --- a/docs/content/querying/filters.md +++ b/docs/content/querying/filters.md @@ -19,8 +19,10 @@ --- layout: doc_page +title: "Query Filters" --- # Query Filters + A filter is a JSON object indicating which rows of data should be included in the computation for a query. It’s essentially the equivalent of the WHERE clause in SQL. Druid supports the following types of filters. ### Selector filter diff --git a/docs/content/querying/granularities.md b/docs/content/querying/granularities.md index 677a4767866..c8a1a47e925 100644 --- a/docs/content/querying/granularities.md +++ b/docs/content/querying/granularities.md @@ -19,9 +19,10 @@ --- layout: doc_page +title: "Aggregation Granularity" --- - # Aggregation Granularity + The granularity field determines how data gets bucketed across the time dimension, or how it gets aggregated by hour, day, minute, etc. It can be specified either as a string for simple granularities or as an object for arbitrary granularities. diff --git a/docs/content/querying/groupbyquery.md b/docs/content/querying/groupbyquery.md index 4a90908edf5..e4e39a3dd48 100644 --- a/docs/content/querying/groupbyquery.md +++ b/docs/content/querying/groupbyquery.md @@ -19,6 +19,7 @@ --- layout: doc_page +title: "groupBy Queries" --- # groupBy Queries diff --git a/docs/content/querying/having.md b/docs/content/querying/having.md index da37d157100..aba8acff7b2 100644 --- a/docs/content/querying/having.md +++ b/docs/content/querying/having.md @@ -19,8 +19,10 @@ --- layout: doc_page +title: "Filter groupBy Query Results" --- # Filter groupBy Query Results + A having clause is a JSON object identifying which rows from a groupBy query should be returned, by specifying conditions on aggregated values. It is essentially the equivalent of the HAVING clause in SQL. diff --git a/docs/content/querying/joins.md b/docs/content/querying/joins.md index 6286c5698ad..1c8c5fd9fa1 100644 --- a/docs/content/querying/joins.md +++ b/docs/content/querying/joins.md @@ -19,6 +19,7 @@ --- layout: doc_page +title: "Joins" --- # Joins diff --git a/docs/content/querying/limitspec.md b/docs/content/querying/limitspec.md index cfd715d890c..cc57ab34e84 100644 --- a/docs/content/querying/limitspec.md +++ b/docs/content/querying/limitspec.md @@ -19,8 +19,10 @@ --- layout: doc_page +title: "Sort groupBy Query Results" --- # Sort groupBy Query Results + The limitSpec field provides the functionality to sort and limit the set of results from a groupBy query. If you group by a single dimension and are ordering by a single metric, we highly recommend using [TopN Queries](../querying/topnquery.html) instead. The performance will be substantially better. Available options are: ### DefaultLimitSpec diff --git a/docs/content/querying/lookups.md b/docs/content/querying/lookups.md index c5bafab5848..b86501cdd77 100644 --- a/docs/content/querying/lookups.md +++ b/docs/content/querying/lookups.md @@ -19,8 +19,8 @@ --- layout: doc_page +title: "Lookups" --- - # Lookups
diff --git a/docs/content/querying/multi-value-dimensions.md b/docs/content/querying/multi-value-dimensions.md index ef20032baa8..532538e2089 100644 --- a/docs/content/querying/multi-value-dimensions.md +++ b/docs/content/querying/multi-value-dimensions.md @@ -19,6 +19,7 @@ --- layout: doc_page +title: "Multi-value dimensions" --- # Multi-value dimensions diff --git a/docs/content/querying/multitenancy.md b/docs/content/querying/multitenancy.md index 4e2b345cbe8..7ab468e9a83 100644 --- a/docs/content/querying/multitenancy.md +++ b/docs/content/querying/multitenancy.md @@ -19,6 +19,7 @@ --- layout: doc_page +title: "Multitenancy Considerations" --- # Multitenancy Considerations diff --git a/docs/content/querying/post-aggregations.md b/docs/content/querying/post-aggregations.md index 9b7d7760738..15f8d80349a 100644 --- a/docs/content/querying/post-aggregations.md +++ b/docs/content/querying/post-aggregations.md @@ -19,8 +19,10 @@ --- layout: doc_page +title: "Post-Aggregations" --- # Post-Aggregations + Post-aggregations are specifications of processing that should happen on aggregated values as they come out of Druid. If you include a post aggregation as part of a query, make sure to include all aggregators the post-aggregator requires. There are several post-aggregators available. diff --git a/docs/content/querying/query-context.md b/docs/content/querying/query-context.md index 0612a67e4ea..b608511c071 100644 --- a/docs/content/querying/query-context.md +++ b/docs/content/querying/query-context.md @@ -19,10 +19,9 @@ --- layout: doc_page +title: "Query Context" --- - -Query Context -============= +# Query Context The query context is used for various query configuration parameters. The following parameters apply to all queries. diff --git a/docs/content/querying/querying.md b/docs/content/querying/querying.md index 59de93f21f6..a022cad296b 100644 --- a/docs/content/querying/querying.md +++ b/docs/content/querying/querying.md @@ -19,8 +19,8 @@ --- layout: doc_page +title: "Querying" --- - # Querying Queries are made using an HTTP REST style request to queryable nodes ([Broker](../design/broker.html), diff --git a/docs/content/querying/scan-query.md b/docs/content/querying/scan-query.md index 4636aa485e9..3571727f15e 100644 --- a/docs/content/querying/scan-query.md +++ b/docs/content/querying/scan-query.md @@ -19,9 +19,10 @@ --- layout: doc_page +title: "Scan query" --- - # Scan query + Scan query returns raw Druid rows in streaming mode. ```json diff --git a/docs/content/querying/searchquery.md b/docs/content/querying/searchquery.md index d621f86ef34..e26746b74f9 100644 --- a/docs/content/querying/searchquery.md +++ b/docs/content/querying/searchquery.md @@ -19,8 +19,10 @@ --- layout: doc_page +title: "Search Queries" --- # Search Queries + A search query returns dimension values that match the search specification. ```json diff --git a/docs/content/querying/searchqueryspec.md b/docs/content/querying/searchqueryspec.md index 0a22448a168..6e157576932 100644 --- a/docs/content/querying/searchqueryspec.md +++ b/docs/content/querying/searchqueryspec.md @@ -19,8 +19,10 @@ --- layout: doc_page +title: "Refining Search Queries" --- # Refining Search Queries + Search query specs define how a "match" is defined between a search value and a dimension value. The available search query specs are: InsensitiveContainsSearchQuerySpec diff --git a/docs/content/querying/segmentmetadataquery.md b/docs/content/querying/segmentmetadataquery.md index e979f5826ac..ae3477fa3a5 100644 --- a/docs/content/querying/segmentmetadataquery.md +++ b/docs/content/querying/segmentmetadataquery.md @@ -19,8 +19,10 @@ --- layout: doc_page +title: "Segment Metadata Queries" --- # Segment Metadata Queries + Segment metadata queries return per-segment information about: * Cardinality of all columns in the segment diff --git a/docs/content/querying/select-query.md b/docs/content/querying/select-query.md index ac44e499ae9..7454ba5e48f 100644 --- a/docs/content/querying/select-query.md +++ b/docs/content/querying/select-query.md @@ -19,6 +19,7 @@ --- layout: doc_page +title: "Select Queries" --- # Select Queries diff --git a/docs/content/querying/sorting-orders.md b/docs/content/querying/sorting-orders.md index a50617e5b9a..4ba336eeeb0 100644 --- a/docs/content/querying/sorting-orders.md +++ b/docs/content/querying/sorting-orders.md @@ -19,8 +19,10 @@ --- layout: doc_page +title: "Sorting Orders" --- # Sorting Orders + These sorting orders are used by the [TopNMetricSpec](./topnmetricspec.html), [SearchQuery](./searchquery.html), GroupByQuery's [LimitSpec](./limitspec.html), and [BoundFilter](./filters.html#bound-filter). ## Lexicographic diff --git a/docs/content/querying/sql.md b/docs/content/querying/sql.md index f996c627ea1..41095b38a04 100644 --- a/docs/content/querying/sql.md +++ b/docs/content/querying/sql.md @@ -19,6 +19,7 @@ --- layout: doc_page +title: "SQL" --- # SQL diff --git a/docs/content/querying/timeboundaryquery.md b/docs/content/querying/timeboundaryquery.md index 5aa95811527..971f733e1a8 100644 --- a/docs/content/querying/timeboundaryquery.md +++ b/docs/content/querying/timeboundaryquery.md @@ -19,8 +19,10 @@ --- layout: doc_page +title: "Time Boundary Queries" --- # Time Boundary Queries + Time boundary queries return the earliest and latest data points of a data set. The grammar is: ```json diff --git a/docs/content/querying/timeseriesquery.md b/docs/content/querying/timeseriesquery.md index d71632994a4..1af681521bd 100644 --- a/docs/content/querying/timeseriesquery.md +++ b/docs/content/querying/timeseriesquery.md @@ -19,9 +19,9 @@ --- layout: doc_page +title: "Timeseries queries" --- -Timeseries queries -================== +# Timeseries queries These types of queries take a timeseries query object and return an array of JSON objects where each object represents a value asked for by the timeseries query. diff --git a/docs/content/querying/topnmetricspec.md b/docs/content/querying/topnmetricspec.md index 4cf035c2728..f3b195b501a 100644 --- a/docs/content/querying/topnmetricspec.md +++ b/docs/content/querying/topnmetricspec.md @@ -19,9 +19,9 @@ --- layout: doc_page +title: "TopNMetricSpec" --- -TopNMetricSpec -================== +# TopNMetricSpec The topN metric spec specifies how topN values should be sorted. diff --git a/docs/content/querying/topnquery.md b/docs/content/querying/topnquery.md index bca0bdc5891..b9adf80fbb7 100644 --- a/docs/content/querying/topnquery.md +++ b/docs/content/querying/topnquery.md @@ -19,9 +19,9 @@ --- layout: doc_page +title: "TopN queries" --- -TopN queries -================== +# TopN queries TopN queries return a sorted set of results for the values in a given dimension according to some criteria. Conceptually, they can be thought of as an approximate [GroupByQuery](../querying/groupbyquery.html) over a single dimension with an [Ordering](../querying/limitspec.html) spec. TopNs are much faster and resource efficient than GroupBys for this use case. These types of queries take a topN query object and return an array of JSON objects where each object represents a value asked for by the topN query. diff --git a/docs/content/querying/virtual-columns.md b/docs/content/querying/virtual-columns.md index 30a94e42c7f..1a9779cc270 100644 --- a/docs/content/querying/virtual-columns.md +++ b/docs/content/querying/virtual-columns.md @@ -19,8 +19,8 @@ --- layout: doc_page +title: "Virtual Columns" --- - # Virtual Columns Virtual columns are queryable column "views" created from a set of columns during a query. diff --git a/docs/content/tutorials/cluster.md b/docs/content/tutorials/cluster.md index 65d7f177f6a..f9b2cee1a18 100644 --- a/docs/content/tutorials/cluster.md +++ b/docs/content/tutorials/cluster.md @@ -19,8 +19,8 @@ --- layout: doc_page +title: "Clustering" --- - # Clustering Druid is designed to be deployed as a scalable, fault-tolerant cluster. diff --git a/docs/content/tutorials/index.md b/docs/content/tutorials/index.md index e68aaa7d65b..18633652193 100644 --- a/docs/content/tutorials/index.md +++ b/docs/content/tutorials/index.md @@ -19,8 +19,8 @@ --- layout: doc_page +title: "Quickstart" --- - # Druid Quickstart In this quickstart, we will download Druid and set it up on a single machine. The cluster will be ready to load data @@ -106,7 +106,7 @@ bin/supervise -c quickstart/tutorial/conf/tutorial-cluster.conf All persistent state such as the cluster metadata store and segments for the services will be kept in the `var` directory under the apache-druid-#{DRUIDVERSION} package root. Logs for the services are located at `var/sv`. -Later on, if you'd like to stop the services, CTRL-C to exit the `bin/supervise` script, which will terminate the Druid processes. +Later on, if you'd like to stop the services, CTRL-C to exit the `bin/supervise` script, which will terminate the Druid processes. ### Resetting cluster state @@ -153,7 +153,7 @@ The sample data has the following columns, and an example event is shown below: * regionIsoCode * regionName * user - + ```json { "timestamp":"2015-09-12T20:03:45.018Z", diff --git a/docs/content/tutorials/tutorial-batch-hadoop.md b/docs/content/tutorials/tutorial-batch-hadoop.md index 921972f4660..0afd25b9784 100644 --- a/docs/content/tutorials/tutorial-batch-hadoop.md +++ b/docs/content/tutorials/tutorial-batch-hadoop.md @@ -19,8 +19,8 @@ --- layout: doc_page +title: "Tutorial: Load batch data using Hadoop" --- - # Tutorial: Load batch data using Hadoop This tutorial shows you how to load data files into Druid using a remote Hadoop cluster. diff --git a/docs/content/tutorials/tutorial-batch.md b/docs/content/tutorials/tutorial-batch.md index d7842ba654f..1cf8b2fe267 100644 --- a/docs/content/tutorials/tutorial-batch.md +++ b/docs/content/tutorials/tutorial-batch.md @@ -19,8 +19,8 @@ --- layout: doc_page +title: "Tutorial: Loading a file" --- - # Tutorial: Loading a file ## Getting started diff --git a/docs/content/tutorials/tutorial-compaction.md b/docs/content/tutorials/tutorial-compaction.md index 697ce6f017d..3080eb898ac 100644 --- a/docs/content/tutorials/tutorial-compaction.md +++ b/docs/content/tutorials/tutorial-compaction.md @@ -19,8 +19,8 @@ --- layout: doc_page +title: "Tutorial: Compacting segments" --- - # Tutorial: Compacting segments This tutorial demonstrates how to compact existing segments into fewer but larger segments. diff --git a/docs/content/tutorials/tutorial-delete-data.md b/docs/content/tutorials/tutorial-delete-data.md index 877abb957b3..49b9d148abf 100644 --- a/docs/content/tutorials/tutorial-delete-data.md +++ b/docs/content/tutorials/tutorial-delete-data.md @@ -19,8 +19,8 @@ --- layout: doc_page +title: "Tutorial: Deleting data" --- - # Tutorial: Deleting data This tutorial demonstrates how to delete existing data. diff --git a/docs/content/tutorials/tutorial-ingestion-spec.md b/docs/content/tutorials/tutorial-ingestion-spec.md index 1c3d34cacb8..34ee3abc2b6 100644 --- a/docs/content/tutorials/tutorial-ingestion-spec.md +++ b/docs/content/tutorials/tutorial-ingestion-spec.md @@ -19,8 +19,8 @@ --- layout: doc_page +title: "Tutorial: Writing an ingestion spec" --- - # Tutorial: Writing an ingestion spec This tutorial will guide the reader through the process of defining an ingestion spec, pointing out key considerations and guidelines. diff --git a/docs/content/tutorials/tutorial-kafka.md b/docs/content/tutorials/tutorial-kafka.md index 279533136c0..2cfc05fed3d 100644 --- a/docs/content/tutorials/tutorial-kafka.md +++ b/docs/content/tutorials/tutorial-kafka.md @@ -19,8 +19,8 @@ --- layout: doc_page +title: "Tutorial: Load streaming data from Kafka" --- - # Tutorial: Load streaming data from Kafka ## Getting started diff --git a/docs/content/tutorials/tutorial-query.md b/docs/content/tutorials/tutorial-query.md index 23de38c9853..fbf75e059c6 100644 --- a/docs/content/tutorials/tutorial-query.md +++ b/docs/content/tutorials/tutorial-query.md @@ -19,8 +19,8 @@ --- layout: doc_page +title: "Tutorial: Querying data" --- - # Tutorial: Querying data This tutorial will demonstrate how to query data in Druid, with examples for Druid's native query format and Druid SQL. diff --git a/docs/content/tutorials/tutorial-retention.md b/docs/content/tutorials/tutorial-retention.md index b5acc41a706..8c894f346ae 100644 --- a/docs/content/tutorials/tutorial-retention.md +++ b/docs/content/tutorials/tutorial-retention.md @@ -19,8 +19,8 @@ --- layout: doc_page +title: "Tutorial: Configuring data retention" --- - # Tutorial: Configuring data retention This tutorial demonstrates how to configure retention rules on a datasource to set the time intervals of data that will be retained or dropped. diff --git a/docs/content/tutorials/tutorial-rollup.md b/docs/content/tutorials/tutorial-rollup.md index f9451629fed..dd57085f97a 100644 --- a/docs/content/tutorials/tutorial-rollup.md +++ b/docs/content/tutorials/tutorial-rollup.md @@ -19,8 +19,8 @@ --- layout: doc_page +title: "Tutorial: Roll-up" --- - # Tutorial: Roll-up Druid can summarize raw data at ingestion time using a process we refer to as "roll-up". Roll-up is a first-level aggregation operation over a selected set of columns that reduces the size of stored segments. diff --git a/docs/content/tutorials/tutorial-tranquility.md b/docs/content/tutorials/tutorial-tranquility.md index 2b14c3af2df..21fbe5a46cf 100644 --- a/docs/content/tutorials/tutorial-tranquility.md +++ b/docs/content/tutorials/tutorial-tranquility.md @@ -19,8 +19,8 @@ --- layout: doc_page +title: "Tutorial: Load streaming data with HTTP push" --- - # Tutorial: Load streaming data with HTTP push ## Getting started diff --git a/docs/content/tutorials/tutorial-transform-spec.md b/docs/content/tutorials/tutorial-transform-spec.md index 677206acf07..f1e13c42156 100644 --- a/docs/content/tutorials/tutorial-transform-spec.md +++ b/docs/content/tutorials/tutorial-transform-spec.md @@ -19,8 +19,8 @@ --- layout: doc_page +title: "Tutorial: Transforming input data" --- - # Tutorial: Transforming input data This tutorial will demonstrate how to use transform specs to filter and transform input data during ingestion. diff --git a/docs/content/tutorials/tutorial-update-data.md b/docs/content/tutorials/tutorial-update-data.md index 4a2b7258a03..f24e55964b8 100644 --- a/docs/content/tutorials/tutorial-update-data.md +++ b/docs/content/tutorials/tutorial-update-data.md @@ -19,8 +19,8 @@ --- layout: doc_page +title: "Tutorial: Updating existing data" --- - # Tutorial: Updating existing data This tutorial demonstrates how to update existing data, showing both overwrites and appends.