2019-09-17 15:47:30 -04:00
|
|
|
|
# Licensed to the Apache Software Foundation (ASF) under one or more
|
|
|
|
|
# contributor license agreements. See the NOTICE file distributed with
|
|
|
|
|
# this work for additional information regarding copyright ownership.
|
|
|
|
|
# The ASF licenses this file to You under the Apache License, Version 2.0
|
|
|
|
|
# (the "License"); you may not use this file except in compliance with
|
|
|
|
|
# the License. You may obtain a copy of the License at
|
|
|
|
|
#
|
|
|
|
|
# http://www.apache.org/licenses/LICENSE-2.0
|
|
|
|
|
#
|
|
|
|
|
# Unless required by applicable law or agreed to in writing, software
|
|
|
|
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
|
|
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
|
|
|
# See the License for the specific language governing permissions and
|
|
|
|
|
# limitations under the License.
|
|
|
|
|
|
|
|
|
|
# markdown-spellcheck spelling configuration file
|
|
|
|
|
# Format - lines beginning # are comments
|
|
|
|
|
# global dictionary is at the start, file overrides afterwards
|
|
|
|
|
# one word per line, to define a file override use ' - filename'
|
|
|
|
|
# where filename is relative to this configuration file
|
|
|
|
|
32-bit
|
2020-08-21 12:43:58 -04:00
|
|
|
|
500MiB
|
2019-09-17 15:47:30 -04:00
|
|
|
|
64-bit
|
|
|
|
|
ACL
|
2021-05-08 23:56:19 -04:00
|
|
|
|
ACLs
|
2019-09-17 15:47:30 -04:00
|
|
|
|
APIs
|
2020-02-11 00:53:11 -05:00
|
|
|
|
AvroStorage
|
2021-06-09 06:32:35 -04:00
|
|
|
|
ARN
|
2019-09-17 15:47:30 -04:00
|
|
|
|
AWS
|
2019-10-12 12:12:14 -04:00
|
|
|
|
AWS_CONTAINER_CREDENTIALS_RELATIVE_URI
|
|
|
|
|
AWS_CONTAINER_CREDENTIALS_FULL_URI
|
2019-09-17 15:47:30 -04:00
|
|
|
|
Actian
|
|
|
|
|
Authorizer
|
|
|
|
|
Avatica
|
|
|
|
|
Avro
|
|
|
|
|
Azul
|
|
|
|
|
BCP
|
|
|
|
|
Base64
|
|
|
|
|
Base64-encoded
|
|
|
|
|
ByteBuffer
|
2021-12-09 21:53:23 -05:00
|
|
|
|
concat
|
2019-09-17 15:47:30 -04:00
|
|
|
|
CIDR
|
|
|
|
|
CORS
|
2021-03-29 16:57:58 -04:00
|
|
|
|
CNF
|
2019-09-17 15:47:30 -04:00
|
|
|
|
CPUs
|
|
|
|
|
CSVs
|
|
|
|
|
Ceph
|
2021-03-29 16:57:58 -04:00
|
|
|
|
CloudWatch
|
2019-09-17 15:47:30 -04:00
|
|
|
|
ColumnDescriptor
|
|
|
|
|
Corretto
|
|
|
|
|
DDL
|
|
|
|
|
DML
|
|
|
|
|
DNS
|
|
|
|
|
DRUIDVERSION
|
|
|
|
|
DataSketches
|
|
|
|
|
DateTime
|
|
|
|
|
DateType
|
2022-05-03 19:22:25 -04:00
|
|
|
|
dimensionsSpec
|
2019-09-17 15:47:30 -04:00
|
|
|
|
DimensionSpec
|
|
|
|
|
DimensionSpecs
|
|
|
|
|
Dockerfile
|
|
|
|
|
DogStatsD
|
|
|
|
|
Double.NEGATIVE_INFINITY
|
|
|
|
|
Double.NEGATIVE_INFINITY.
|
|
|
|
|
Double.POSITIVE_INFINITY
|
|
|
|
|
Double.POSITIVE_INFINITY.
|
2019-10-01 17:59:30 -04:00
|
|
|
|
Dropwizard
|
|
|
|
|
dropwizard
|
2020-01-17 18:52:05 -05:00
|
|
|
|
DruidInputSource
|
2019-09-17 15:47:30 -04:00
|
|
|
|
DruidSQL
|
2020-12-10 11:24:33 -05:00
|
|
|
|
DynamicConfigProvider
|
2019-09-17 15:47:30 -04:00
|
|
|
|
EC2
|
2019-10-12 12:12:14 -04:00
|
|
|
|
EC2ContainerCredentialsProviderWrapper
|
|
|
|
|
ECS
|
2019-09-17 15:47:30 -04:00
|
|
|
|
EMR
|
|
|
|
|
EMRFS
|
|
|
|
|
ETL
|
|
|
|
|
Elasticsearch
|
2021-07-07 01:05:41 -04:00
|
|
|
|
Enums
|
2019-09-17 15:47:30 -04:00
|
|
|
|
FirehoseFactory
|
2020-01-17 18:52:05 -05:00
|
|
|
|
FlattenSpec
|
2019-09-17 15:47:30 -04:00
|
|
|
|
Float.NEGATIVE_INFINITY
|
2021-11-16 13:13:35 -05:00
|
|
|
|
Float.NEGATIVE_INFINITY.
|
2019-09-17 15:47:30 -04:00
|
|
|
|
Float.POSITIVE_INFINITY
|
2021-11-16 13:13:35 -05:00
|
|
|
|
Float.POSITIVE_INFINITY.
|
2019-12-12 20:00:08 -05:00
|
|
|
|
ForwardedRequestCustomizer
|
2019-09-17 15:47:30 -04:00
|
|
|
|
GC
|
|
|
|
|
GPG
|
|
|
|
|
GSSAPI
|
|
|
|
|
GUIs
|
|
|
|
|
GroupBy
|
|
|
|
|
Guice
|
|
|
|
|
HDFS
|
2020-01-17 18:52:05 -05:00
|
|
|
|
HDFSFirehose
|
2019-09-17 15:47:30 -04:00
|
|
|
|
HLL
|
|
|
|
|
HashSet
|
|
|
|
|
Homebrew
|
|
|
|
|
HyperLogLog
|
2021-01-07 00:15:29 -05:00
|
|
|
|
IAM
|
2019-09-17 15:47:30 -04:00
|
|
|
|
IANA
|
|
|
|
|
IETF
|
2019-09-25 14:25:03 -04:00
|
|
|
|
IP
|
2019-09-17 15:47:30 -04:00
|
|
|
|
IPv4
|
2020-06-30 00:08:13 -04:00
|
|
|
|
IS_BROADCAST
|
|
|
|
|
IS_JOINABLE
|
2019-09-17 15:47:30 -04:00
|
|
|
|
IS0
|
|
|
|
|
ISO-8601
|
|
|
|
|
ISO8601
|
|
|
|
|
IndexSpec
|
|
|
|
|
IndexTask
|
|
|
|
|
InfluxDB
|
2019-11-22 13:49:16 -05:00
|
|
|
|
InputFormat
|
2020-01-17 18:52:05 -05:00
|
|
|
|
InputSource
|
2020-06-09 15:55:20 -04:00
|
|
|
|
InputSources
|
2019-09-17 15:47:30 -04:00
|
|
|
|
Integer.MAX_VALUE
|
2022-05-03 19:22:25 -04:00
|
|
|
|
ioConfig
|
2019-10-09 02:43:58 -04:00
|
|
|
|
JBOD
|
2019-09-17 15:47:30 -04:00
|
|
|
|
JDBC
|
|
|
|
|
JDK
|
|
|
|
|
JDK7
|
|
|
|
|
JDK8
|
|
|
|
|
JKS
|
2019-10-01 17:59:30 -04:00
|
|
|
|
JMX
|
2019-09-17 15:47:30 -04:00
|
|
|
|
JRE
|
|
|
|
|
JS
|
|
|
|
|
JSON
|
2020-01-17 18:52:05 -05:00
|
|
|
|
JsonPath
|
2022-08-19 20:12:19 -04:00
|
|
|
|
JSONPath
|
2020-11-19 18:24:58 -05:00
|
|
|
|
JSSE
|
2019-09-17 15:47:30 -04:00
|
|
|
|
JVM
|
|
|
|
|
JVMs
|
|
|
|
|
Joda
|
|
|
|
|
JsonProperty
|
2021-08-24 11:49:29 -04:00
|
|
|
|
Jupyter
|
2019-09-17 15:47:30 -04:00
|
|
|
|
KMS
|
|
|
|
|
Kerberized
|
|
|
|
|
Kerberos
|
2021-04-14 11:58:17 -04:00
|
|
|
|
KeyStores
|
2019-09-17 15:47:30 -04:00
|
|
|
|
Kinesis
|
2019-11-06 15:56:21 -05:00
|
|
|
|
Kubernetes
|
2019-09-17 15:47:30 -04:00
|
|
|
|
LRU
|
|
|
|
|
LZ4
|
|
|
|
|
LZO
|
|
|
|
|
LimitSpec
|
|
|
|
|
Long.MAX_VALUE
|
2021-11-16 13:13:35 -05:00
|
|
|
|
Long.MAX_VALUE.
|
2019-09-17 15:47:30 -04:00
|
|
|
|
Long.MIN_VALUE
|
2021-11-16 13:13:35 -05:00
|
|
|
|
Long.MIN_VALUE.
|
2019-09-17 15:47:30 -04:00
|
|
|
|
Lucene
|
|
|
|
|
MapBD
|
|
|
|
|
MapDB
|
2021-08-19 04:52:26 -04:00
|
|
|
|
MariaDB
|
2019-09-17 15:47:30 -04:00
|
|
|
|
MiddleManager
|
|
|
|
|
MiddleManagers
|
|
|
|
|
Montréal
|
|
|
|
|
Murmur3
|
2021-08-09 10:48:29 -04:00
|
|
|
|
MVCC
|
2019-09-17 15:47:30 -04:00
|
|
|
|
NFS
|
2020-05-16 17:09:12 -04:00
|
|
|
|
OCF
|
2019-09-17 15:47:30 -04:00
|
|
|
|
OLAP
|
|
|
|
|
OOMs
|
|
|
|
|
OpenJDK
|
2020-11-19 18:24:58 -05:00
|
|
|
|
OpenLDAP
|
2019-09-17 15:47:30 -04:00
|
|
|
|
OpenTSDB
|
|
|
|
|
OutputStream
|
|
|
|
|
ParAccel
|
|
|
|
|
ParseSpec
|
|
|
|
|
ParseSpecs
|
|
|
|
|
Protobuf
|
2021-08-09 20:27:35 -04:00
|
|
|
|
pull-deps
|
2019-09-17 15:47:30 -04:00
|
|
|
|
RDBMS
|
|
|
|
|
RDDs
|
2021-01-07 00:15:29 -05:00
|
|
|
|
RDS
|
2019-09-17 15:47:30 -04:00
|
|
|
|
Rackspace
|
|
|
|
|
Redis
|
|
|
|
|
S3
|
|
|
|
|
SDK
|
|
|
|
|
SIGAR
|
|
|
|
|
SPNEGO
|
2020-06-09 15:55:20 -04:00
|
|
|
|
SqlInputSource
|
2019-09-17 15:47:30 -04:00
|
|
|
|
SQLServer
|
|
|
|
|
SSD
|
|
|
|
|
SSDs
|
|
|
|
|
SSL
|
|
|
|
|
Samza
|
|
|
|
|
Splunk
|
|
|
|
|
SqlFirehose
|
2020-02-19 16:09:20 -05:00
|
|
|
|
SqlParameter
|
2020-11-19 18:24:58 -05:00
|
|
|
|
SslContextFactory
|
2019-09-17 15:47:30 -04:00
|
|
|
|
StatsD
|
2021-09-21 16:28:26 -04:00
|
|
|
|
SYSTEM_TABLE
|
2019-09-17 15:47:30 -04:00
|
|
|
|
TCP
|
|
|
|
|
TGT
|
|
|
|
|
TLS
|
|
|
|
|
TopN
|
|
|
|
|
TopNs
|
|
|
|
|
UI
|
|
|
|
|
UIs
|
|
|
|
|
URI
|
|
|
|
|
URIs
|
|
|
|
|
UTF-16
|
|
|
|
|
UTF-8
|
|
|
|
|
UTF8
|
|
|
|
|
XMLs
|
|
|
|
|
ZK
|
2020-08-26 15:39:48 -04:00
|
|
|
|
ZSTD
|
2019-09-17 15:47:30 -04:00
|
|
|
|
accessor
|
|
|
|
|
ad-hoc
|
|
|
|
|
aggregator
|
|
|
|
|
aggregators
|
|
|
|
|
ambari
|
|
|
|
|
analytics
|
2022-08-19 20:12:19 -04:00
|
|
|
|
arrayElement
|
2021-06-09 06:32:35 -04:00
|
|
|
|
assumeRoleArn
|
|
|
|
|
assumeRoleExternalId
|
parallel broker merges on fork join pool (#8578)
* sketch of broker parallel merges done in small batches on fork join pool
* fix non-terminating sequences, auto compute parallelism
* adjust benches
* adjust benchmarks
* now hella more faster, fixed dumb
* fix
* remove comments
* log.info for debug
* javadoc
* safer block for sequence to yielder conversion
* refactor LifecycleForkJoinPool into LifecycleForkJoinPoolProvider which wraps a ForkJoinPool
* smooth yield rate adjustment, more logs to help tune
* cleanup, less logs
* error handling, bug fixes, on by default, more parallel, more tests
* remove unused var
* comments
* timeboundary mergeFn
* simplify, more javadoc
* formatting
* pushdown config
* use nanos consistently, move logs back to debug level, bit more javadoc
* static terminal result batch
* javadoc for nullability of createMergeFn
* cleanup
* oops
* fix race, add docs
* spelling, remove todo, add unhandled exception log
* cleanup, revert unintended change
* another unintended change
* review stuff
* add ParallelMergeCombiningSequenceBenchmark, fixes
* hyper-threading is the enemy
* fix initial start delay, lol
* parallelism computer now balances partition sizes to partition counts using sqrt of sequence count instead of sequence count by 2
* fix those important style issues with the benchmarks code
* lazy sequence creation for benchmarks
* more benchmark comments
* stable sequence generation time
* update defaults to use 100ms target time, 4096 batch size, 16384 initial yield, also update user docs
* add jmh thread based benchmarks, cleanup some stuff
* oops
* style
* add spread to jmh thread benchmark start range, more comments to benchmarks parameters and purpose
* retool benchmark to allow modeling more typical heterogenous heavy workloads
* spelling
* fix
* refactor benchmarks
* formatting
* docs
* add maxThreadStartDelay parameter to threaded benchmark
* why does catch need to be on its own line but else doesnt
2019-11-07 14:58:46 -05:00
|
|
|
|
async
|
2019-09-17 15:47:30 -04:00
|
|
|
|
authorizer
|
|
|
|
|
authorizers
|
|
|
|
|
autocomplete
|
|
|
|
|
autodiscovery
|
|
|
|
|
autoscaler
|
|
|
|
|
autoscaling
|
|
|
|
|
averager
|
|
|
|
|
averagers
|
|
|
|
|
backend
|
|
|
|
|
backfills
|
|
|
|
|
backpressure
|
|
|
|
|
base64
|
|
|
|
|
big-endian
|
2022-08-19 20:12:19 -04:00
|
|
|
|
bigint
|
2019-09-17 15:47:30 -04:00
|
|
|
|
blobstore
|
|
|
|
|
boolean
|
|
|
|
|
breakpoint
|
|
|
|
|
broadcasted
|
|
|
|
|
checksums
|
|
|
|
|
classpath
|
|
|
|
|
clickstream
|
2022-07-15 14:03:34 -04:00
|
|
|
|
clientConfig
|
2019-09-17 15:47:30 -04:00
|
|
|
|
codebase
|
|
|
|
|
codec
|
|
|
|
|
colocated
|
|
|
|
|
colocation
|
|
|
|
|
compactable
|
2021-03-24 14:41:44 -04:00
|
|
|
|
compactionTask
|
2019-09-17 15:47:30 -04:00
|
|
|
|
config
|
|
|
|
|
configs
|
2020-12-10 11:24:33 -05:00
|
|
|
|
consumerProperties
|
2019-09-17 15:47:30 -04:00
|
|
|
|
cron
|
|
|
|
|
csv
|
|
|
|
|
customizable
|
|
|
|
|
dataset
|
|
|
|
|
datasets
|
|
|
|
|
datasketches
|
|
|
|
|
datasource
|
|
|
|
|
datasources
|
|
|
|
|
dbcp
|
2021-08-13 16:40:25 -04:00
|
|
|
|
deepstore
|
2019-09-17 15:47:30 -04:00
|
|
|
|
denormalization
|
|
|
|
|
denormalize
|
|
|
|
|
denormalized
|
2020-03-13 04:41:54 -04:00
|
|
|
|
deprioritization
|
|
|
|
|
deprioritizes
|
2019-09-17 15:47:30 -04:00
|
|
|
|
dequeued
|
|
|
|
|
deserialization
|
|
|
|
|
deserialize
|
|
|
|
|
deserialized
|
2022-08-19 20:12:19 -04:00
|
|
|
|
deserializes
|
2019-09-17 15:47:30 -04:00
|
|
|
|
downtimes
|
2020-03-23 21:15:45 -04:00
|
|
|
|
druid
|
2020-12-15 00:10:31 -05:00
|
|
|
|
druid–kubernetes-extensions
|
2019-09-17 15:47:30 -04:00
|
|
|
|
e.g.
|
|
|
|
|
encodings
|
|
|
|
|
endian
|
2022-07-15 14:03:34 -04:00
|
|
|
|
endpointConfig
|
2019-09-17 15:47:30 -04:00
|
|
|
|
enum
|
2022-08-19 20:12:19 -04:00
|
|
|
|
expectedType
|
2020-01-17 18:52:05 -05:00
|
|
|
|
expr
|
2019-09-17 15:47:30 -04:00
|
|
|
|
failover
|
2020-01-17 18:52:05 -05:00
|
|
|
|
featureSpec
|
|
|
|
|
findColumnsFromHeader
|
2019-09-17 15:47:30 -04:00
|
|
|
|
filenames
|
|
|
|
|
filesystem
|
|
|
|
|
firefox
|
|
|
|
|
firehose
|
|
|
|
|
firehoses
|
2020-02-11 00:53:11 -05:00
|
|
|
|
fromPigAvroStorage
|
2019-09-17 15:47:30 -04:00
|
|
|
|
frontends
|
|
|
|
|
granularities
|
2021-03-24 14:41:44 -04:00
|
|
|
|
granularitySpec
|
2019-09-17 15:47:30 -04:00
|
|
|
|
gzip
|
|
|
|
|
gzipped
|
|
|
|
|
hadoop
|
|
|
|
|
hasher
|
|
|
|
|
hashtable
|
2022-05-16 04:12:00 -04:00
|
|
|
|
high-QPS
|
2019-09-17 15:47:30 -04:00
|
|
|
|
historicals
|
|
|
|
|
hostname
|
|
|
|
|
hostnames
|
|
|
|
|
http
|
|
|
|
|
https
|
2020-07-08 02:12:39 -04:00
|
|
|
|
idempotency
|
2019-09-17 15:47:30 -04:00
|
|
|
|
i.e.
|
|
|
|
|
influxdb
|
2020-02-25 23:59:53 -05:00
|
|
|
|
ingestionSpec
|
2019-09-17 15:47:30 -04:00
|
|
|
|
injective
|
|
|
|
|
inlined
|
2022-03-22 09:03:57 -04:00
|
|
|
|
inSubQueryThreshold
|
2019-09-17 15:47:30 -04:00
|
|
|
|
interruptible
|
2022-08-19 20:12:19 -04:00
|
|
|
|
isAllowList
|
2020-01-17 18:52:05 -05:00
|
|
|
|
jackson-jq
|
2019-09-17 15:47:30 -04:00
|
|
|
|
javadoc
|
2020-06-30 00:08:13 -04:00
|
|
|
|
joinable
|
2022-08-19 20:12:19 -04:00
|
|
|
|
json_keys
|
|
|
|
|
json_object
|
|
|
|
|
json_paths
|
|
|
|
|
json_query
|
|
|
|
|
json_value
|
2019-09-17 15:47:30 -04:00
|
|
|
|
kerberos
|
|
|
|
|
keystore
|
2021-04-14 11:58:17 -04:00
|
|
|
|
keytool
|
2019-09-17 15:47:30 -04:00
|
|
|
|
keytab
|
2020-12-15 00:10:31 -05:00
|
|
|
|
kubernetes
|
2020-03-10 05:57:16 -04:00
|
|
|
|
laning
|
2019-09-17 15:47:30 -04:00
|
|
|
|
lifecycle
|
|
|
|
|
localhost
|
|
|
|
|
log4j
|
|
|
|
|
log4j2
|
|
|
|
|
log4j2.xml
|
|
|
|
|
lookback
|
|
|
|
|
lookups
|
|
|
|
|
mapreduce
|
|
|
|
|
masse
|
2022-02-15 23:45:07 -05:00
|
|
|
|
maxNumericInFilters
|
2020-08-21 12:43:58 -04:00
|
|
|
|
maxNumFiles
|
|
|
|
|
maxNumSegments
|
2019-10-09 02:43:58 -04:00
|
|
|
|
max_map_count
|
2019-09-17 15:47:30 -04:00
|
|
|
|
memcached
|
|
|
|
|
mergeable
|
|
|
|
|
metadata
|
|
|
|
|
millis
|
|
|
|
|
misconfiguration
|
2021-03-24 14:41:44 -04:00
|
|
|
|
misconfigured
|
2020-01-23 16:42:03 -05:00
|
|
|
|
mostAvailableSize
|
2019-09-17 15:47:30 -04:00
|
|
|
|
multitenancy
|
|
|
|
|
multitenant
|
|
|
|
|
mysql
|
|
|
|
|
namespace
|
|
|
|
|
namespaced
|
|
|
|
|
namespaces
|
|
|
|
|
natively
|
|
|
|
|
netflow
|
|
|
|
|
non-nullable
|
|
|
|
|
noop
|
|
|
|
|
numerics
|
2021-03-24 14:41:44 -04:00
|
|
|
|
numShards
|
2019-09-17 15:47:30 -04:00
|
|
|
|
parameterized
|
2022-08-19 20:12:19 -04:00
|
|
|
|
parse_json
|
2019-09-17 15:47:30 -04:00
|
|
|
|
parseable
|
|
|
|
|
partitioner
|
2020-09-24 19:32:56 -04:00
|
|
|
|
partitionFunction
|
2020-09-15 14:28:09 -04:00
|
|
|
|
partitionsSpec
|
2022-08-19 20:12:19 -04:00
|
|
|
|
pathParts
|
2019-09-17 15:47:30 -04:00
|
|
|
|
performant
|
|
|
|
|
plaintext
|
|
|
|
|
pluggable
|
|
|
|
|
postgres
|
|
|
|
|
postgresql
|
|
|
|
|
pre-aggregated
|
|
|
|
|
pre-aggregates
|
|
|
|
|
pre-aggregating
|
|
|
|
|
pre-aggregation
|
|
|
|
|
pre-computation
|
|
|
|
|
pre-compute
|
|
|
|
|
pre-computing
|
|
|
|
|
pre-configured
|
2020-04-17 01:12:20 -04:00
|
|
|
|
pre-filtered
|
|
|
|
|
pre-filtering
|
2019-09-17 15:47:30 -04:00
|
|
|
|
pre-generated
|
|
|
|
|
pre-made
|
|
|
|
|
pre-processing
|
|
|
|
|
preemptible
|
|
|
|
|
prefetch
|
|
|
|
|
prefetched
|
|
|
|
|
prefetching
|
|
|
|
|
prepend
|
|
|
|
|
prepended
|
|
|
|
|
prepending
|
|
|
|
|
prepends
|
2020-04-30 15:07:28 -04:00
|
|
|
|
prepopulated
|
2019-09-17 15:47:30 -04:00
|
|
|
|
preprocessing
|
|
|
|
|
priori
|
2020-07-08 22:47:09 -04:00
|
|
|
|
procs
|
2022-08-19 20:12:19 -04:00
|
|
|
|
processFromRaw
|
2019-09-17 15:47:30 -04:00
|
|
|
|
programmatically
|
|
|
|
|
proto
|
|
|
|
|
proxied
|
2022-07-15 14:03:34 -04:00
|
|
|
|
proxyConfig
|
2022-05-16 04:12:00 -04:00
|
|
|
|
QPS
|
2019-09-17 15:47:30 -04:00
|
|
|
|
quantile
|
|
|
|
|
quantiles
|
|
|
|
|
queryable
|
|
|
|
|
quickstart
|
|
|
|
|
realtime
|
|
|
|
|
rebalance
|
|
|
|
|
redis
|
|
|
|
|
regexes
|
|
|
|
|
reimported
|
|
|
|
|
reindex
|
|
|
|
|
reindexing
|
|
|
|
|
reingest
|
|
|
|
|
reingesting
|
|
|
|
|
reingestion
|
|
|
|
|
repo
|
2021-04-01 20:30:47 -04:00
|
|
|
|
requireSSL
|
2019-09-17 15:47:30 -04:00
|
|
|
|
rollup
|
|
|
|
|
rollups
|
|
|
|
|
rsync
|
|
|
|
|
runtime
|
|
|
|
|
schemas
|
2022-04-05 12:15:42 -04:00
|
|
|
|
schemaless
|
2019-09-17 15:47:30 -04:00
|
|
|
|
searchable
|
2020-09-24 19:32:56 -04:00
|
|
|
|
secondaryPartitionPruning
|
2020-07-08 02:12:39 -04:00
|
|
|
|
seekable-stream
|
2019-12-12 20:00:08 -05:00
|
|
|
|
servlet
|
2022-05-16 04:12:00 -04:00
|
|
|
|
setProcessingThreadNames
|
2020-04-10 21:01:59 -04:00
|
|
|
|
simple-client-sslcontext
|
2019-09-17 15:47:30 -04:00
|
|
|
|
sharded
|
|
|
|
|
sharding
|
2020-01-17 18:52:05 -05:00
|
|
|
|
skipHeaderRows
|
2022-07-20 18:37:57 -04:00
|
|
|
|
Smoosh
|
|
|
|
|
smoosh
|
2019-09-17 15:47:30 -04:00
|
|
|
|
smooshed
|
|
|
|
|
splittable
|
2021-04-01 20:30:47 -04:00
|
|
|
|
ssl
|
|
|
|
|
sslmode
|
2019-09-17 15:47:30 -04:00
|
|
|
|
stdout
|
|
|
|
|
storages
|
|
|
|
|
stringified
|
|
|
|
|
subarray
|
|
|
|
|
subnet
|
|
|
|
|
subqueries
|
|
|
|
|
subquery
|
|
|
|
|
subsecond
|
|
|
|
|
substring
|
2020-08-21 12:43:58 -04:00
|
|
|
|
subtask
|
2020-07-08 02:12:39 -04:00
|
|
|
|
subtasks
|
2020-10-10 22:35:17 -04:00
|
|
|
|
supervisorTaskId
|
2019-09-17 15:47:30 -04:00
|
|
|
|
symlink
|
2022-08-19 20:12:19 -04:00
|
|
|
|
syntaxes
|
2019-09-17 15:47:30 -04:00
|
|
|
|
tiering
|
|
|
|
|
timeseries
|
|
|
|
|
timestamp
|
|
|
|
|
timestamps
|
2022-08-19 20:12:19 -04:00
|
|
|
|
to_json_string
|
2019-09-17 15:47:30 -04:00
|
|
|
|
tradeoffs
|
2021-11-24 13:56:38 -05:00
|
|
|
|
transformSpec
|
2022-08-19 20:12:19 -04:00
|
|
|
|
try_parse_json
|
2019-09-17 15:47:30 -04:00
|
|
|
|
tsv
|
2019-10-09 02:43:58 -04:00
|
|
|
|
ulimit
|
2019-09-17 15:47:30 -04:00
|
|
|
|
unannounce
|
|
|
|
|
unannouncements
|
|
|
|
|
unary
|
|
|
|
|
unassign
|
|
|
|
|
uncomment
|
|
|
|
|
underutilization
|
|
|
|
|
unintuitive
|
|
|
|
|
unioned
|
|
|
|
|
unmergeable
|
|
|
|
|
unmerged
|
2021-08-13 16:40:25 -04:00
|
|
|
|
UNNEST
|
2019-09-17 15:47:30 -04:00
|
|
|
|
unparseable
|
|
|
|
|
unparsed
|
2020-04-30 15:07:28 -04:00
|
|
|
|
unsetting
|
2020-11-19 18:24:58 -05:00
|
|
|
|
untrusted
|
2020-04-17 01:12:20 -04:00
|
|
|
|
useFilterCNF
|
2022-08-19 20:12:19 -04:00
|
|
|
|
useJqSyntax
|
2021-04-01 20:30:47 -04:00
|
|
|
|
useSSL
|
2019-09-17 15:47:30 -04:00
|
|
|
|
uptime
|
2019-11-19 22:49:43 -05:00
|
|
|
|
uris
|
2021-02-27 17:25:35 -05:00
|
|
|
|
urls
|
2020-01-17 18:52:05 -05:00
|
|
|
|
useFieldDiscovery
|
2019-09-17 15:47:30 -04:00
|
|
|
|
v1
|
|
|
|
|
v2
|
|
|
|
|
vCPUs
|
|
|
|
|
validator
|
2022-08-19 20:12:19 -04:00
|
|
|
|
varchar
|
2019-09-17 15:47:30 -04:00
|
|
|
|
vectorizable
|
|
|
|
|
vectorize
|
2020-09-28 21:48:34 -04:00
|
|
|
|
vectorizeVirtualColumns
|
2019-09-17 15:47:30 -04:00
|
|
|
|
versioning
|
2022-08-18 23:49:23 -04:00
|
|
|
|
virtualColumns
|
2019-09-17 15:47:30 -04:00
|
|
|
|
w.r.t.
|
|
|
|
|
whitelist
|
|
|
|
|
whitelisted
|
|
|
|
|
whitespace
|
|
|
|
|
wildcard
|
2019-10-28 11:07:38 -04:00
|
|
|
|
wildcards
|
2019-09-17 15:47:30 -04:00
|
|
|
|
xml
|
|
|
|
|
znode
|
|
|
|
|
znodes
|
2022-02-11 17:43:30 -05:00
|
|
|
|
APPROX_COUNT_DISTINCT
|
|
|
|
|
APPROX_QUANTILE
|
|
|
|
|
ARRAY_AGG
|
|
|
|
|
BIGINT
|
|
|
|
|
CATALOG_NAME
|
|
|
|
|
CHARACTER_MAXIMUM_LENGTH
|
|
|
|
|
CHARACTER_OCTET_LENGTH
|
|
|
|
|
CHARACTER_SET_NAME
|
|
|
|
|
COLLATION_NAME
|
|
|
|
|
COLUMN_DEFAULT
|
|
|
|
|
COLUMN_NAME
|
|
|
|
|
Concats
|
|
|
|
|
DATA_TYPE
|
|
|
|
|
DATETIME_PRECISION
|
|
|
|
|
DEFAULT_CHARACTER_SET_CATALOG
|
|
|
|
|
DEFAULT_CHARACTER_SET_NAME
|
|
|
|
|
DEFAULT_CHARACTER_SET_SCHEMA
|
|
|
|
|
ISODOW
|
|
|
|
|
ISOYEAR
|
|
|
|
|
IS_NULLABLE
|
|
|
|
|
JDBC_TYPE
|
|
|
|
|
MIDDLE_MANAGER
|
2022-06-21 16:05:37 -04:00
|
|
|
|
MILLIS_TO_TIMESTAMP
|
2022-02-11 17:43:30 -05:00
|
|
|
|
NULLable
|
|
|
|
|
NUMERIC_PRECISION
|
|
|
|
|
NUMERIC_PRECISION_RADIX
|
|
|
|
|
NUMERIC_SCALE
|
|
|
|
|
ORDINAL_POSITION
|
2022-06-21 16:05:37 -04:00
|
|
|
|
POSIX
|
2022-02-11 17:43:30 -05:00
|
|
|
|
PT1M
|
|
|
|
|
PT5M
|
|
|
|
|
SCHEMA_NAME
|
|
|
|
|
SCHEMA_OWNER
|
|
|
|
|
SERVER_SEGMENTS
|
|
|
|
|
SMALLINT
|
|
|
|
|
SQL_PATH
|
|
|
|
|
STRING_AGG
|
|
|
|
|
SYSTEM_TABLE
|
|
|
|
|
TABLE_CATALOG
|
|
|
|
|
TABLE_NAME
|
|
|
|
|
TABLE_SCHEMA
|
|
|
|
|
TABLE_TYPE
|
|
|
|
|
TIME_PARSE
|
|
|
|
|
TIME_SHIFT
|
|
|
|
|
TINYINT
|
|
|
|
|
VARCHAR
|
|
|
|
|
avg_num_rows
|
|
|
|
|
avg_size
|
|
|
|
|
created_time
|
|
|
|
|
current_size
|
|
|
|
|
detailed_state
|
|
|
|
|
druid.server.maxSize
|
|
|
|
|
druid.server.tier
|
|
|
|
|
druid.sql.planner.maxSemiJoinRowsInMemory
|
|
|
|
|
druid.sql.planner.sqlTimeZone
|
|
|
|
|
druid.sql.planner.useApproximateCountDistinct
|
|
|
|
|
druid.sql.planner.useApproximateTopN
|
2022-08-22 21:47:40 -04:00
|
|
|
|
druid.sql.planner.useGroupingSetForExactDistinct
|
|
|
|
|
druid.sql.planner.useNativeQueryExplain
|
2022-02-11 17:43:30 -05:00
|
|
|
|
error_msg
|
|
|
|
|
exprs
|
|
|
|
|
group_id
|
|
|
|
|
interval_expr
|
2022-05-19 17:23:28 -04:00
|
|
|
|
is_active
|
2022-02-11 17:43:30 -05:00
|
|
|
|
is_available
|
|
|
|
|
is_leader
|
|
|
|
|
is_overshadowed
|
|
|
|
|
is_published
|
|
|
|
|
is_realtime
|
|
|
|
|
java.sql.Types
|
|
|
|
|
last_compaction_state
|
|
|
|
|
max_size
|
|
|
|
|
num_replicas
|
|
|
|
|
num_rows
|
|
|
|
|
num_segments
|
|
|
|
|
partition_num
|
|
|
|
|
plaintext_port
|
|
|
|
|
queue_insertion_time
|
|
|
|
|
runner_status
|
|
|
|
|
segment_id
|
|
|
|
|
server_type
|
|
|
|
|
shard_spec
|
|
|
|
|
sqlTimeZone
|
|
|
|
|
supervisor_id
|
|
|
|
|
sys
|
|
|
|
|
sys.segments
|
|
|
|
|
task_id
|
|
|
|
|
timestamp_expr
|
|
|
|
|
tls_port
|
|
|
|
|
total_size
|
|
|
|
|
useApproximateCountDistinct
|
|
|
|
|
useGroupingSetForExactDistinct
|
|
|
|
|
useApproximateTopN
|
|
|
|
|
wikipedia
|
2022-05-10 05:53:42 -04:00
|
|
|
|
enableTimeBoundaryPlanning
|
|
|
|
|
TimeBoundary
|
|
|
|
|
druid.query.default.context.enableTimeBoundaryPlanning
|
2022-02-11 17:43:30 -05:00
|
|
|
|
IEC
|
2019-09-17 15:47:30 -04:00
|
|
|
|
- ../docs/comparisons/druid-vs-elasticsearch.md
|
|
|
|
|
100x
|
|
|
|
|
- ../docs/configuration/logging.md
|
|
|
|
|
_common
|
2022-05-16 05:37:21 -04:00
|
|
|
|
appenders
|
2019-09-17 15:47:30 -04:00
|
|
|
|
- ../docs/dependencies/deep-storage.md
|
|
|
|
|
druid-hdfs-storage
|
|
|
|
|
druid-s3-extensions
|
2022-02-15 23:45:07 -05:00
|
|
|
|
druid.sql.planner.maxNumericInFilters
|
2019-09-17 15:47:30 -04:00
|
|
|
|
- ../docs/dependencies/metadata-storage.md
|
|
|
|
|
BasicDataSource
|
|
|
|
|
- ../docs/dependencies/zookeeper.md
|
2021-11-23 01:28:51 -05:00
|
|
|
|
LeaderLatch
|
2021-05-25 15:49:49 -04:00
|
|
|
|
3.5.x
|
|
|
|
|
3.4.x
|
2019-09-17 15:47:30 -04:00
|
|
|
|
- ../docs/design/auth.md
|
|
|
|
|
AllowAll
|
|
|
|
|
AuthenticationResult
|
|
|
|
|
AuthorizationLoadingLookupTest
|
|
|
|
|
HttpClient
|
|
|
|
|
allowAll
|
|
|
|
|
authenticatorChain
|
|
|
|
|
defaultUser
|
|
|
|
|
- ../docs/design/coordinator.md
|
|
|
|
|
inputSegmentSizeBytes
|
|
|
|
|
skipOffsetFromLatest
|
|
|
|
|
- ../docs/design/router.md
|
2021-07-27 11:26:05 -04:00
|
|
|
|
brokerService
|
2019-09-17 15:47:30 -04:00
|
|
|
|
c3.2xlarge
|
2021-07-27 11:26:05 -04:00
|
|
|
|
defaultManualBrokerService
|
2019-09-17 15:47:30 -04:00
|
|
|
|
maxPriority
|
|
|
|
|
minPriority
|
|
|
|
|
runtime.properties
|
|
|
|
|
timeBoundary
|
|
|
|
|
- ../docs/design/segments.md
|
|
|
|
|
0x0
|
|
|
|
|
0x9
|
|
|
|
|
2GB
|
|
|
|
|
300mb-700mb
|
|
|
|
|
Bieber
|
|
|
|
|
IndexTask-based
|
|
|
|
|
Ke
|
|
|
|
|
datasource_intervalStart_intervalEnd_version_partitionNum
|
|
|
|
|
partitionNum
|
|
|
|
|
v9
|
|
|
|
|
- ../docs/development/build.md
|
|
|
|
|
3.x
|
|
|
|
|
8u92
|
|
|
|
|
DskipTests
|
|
|
|
|
Papache-release
|
|
|
|
|
Pdist
|
2020-05-21 15:35:54 -04:00
|
|
|
|
Ddruid.console.skip
|
|
|
|
|
yaml
|
2021-10-30 13:16:24 -04:00
|
|
|
|
Phadoop3
|
|
|
|
|
dist-hadoop3
|
|
|
|
|
hadoop3
|
|
|
|
|
hadoop2
|
|
|
|
|
2.x.x
|
|
|
|
|
3.x.x
|
2019-09-17 15:47:30 -04:00
|
|
|
|
- ../docs/development/extensions-contrib/ambari-metrics-emitter.md
|
|
|
|
|
ambari-metrics
|
|
|
|
|
metricName
|
|
|
|
|
trustStore
|
2020-02-25 20:49:16 -05:00
|
|
|
|
- ../docs/development/extensions-core/azure.md
|
2019-09-17 15:47:30 -04:00
|
|
|
|
StaticAzureBlobStoreFirehose
|
|
|
|
|
StaticS3Firehose
|
|
|
|
|
fetchTimeout
|
|
|
|
|
gz
|
|
|
|
|
maxCacheCapacityBytes
|
|
|
|
|
maxFetchCapacityBytes
|
|
|
|
|
maxFetchRetry
|
|
|
|
|
prefetchTriggerBytes
|
|
|
|
|
shardSpecs
|
2022-02-22 07:57:43 -05:00
|
|
|
|
sharedAccessStorageToken
|
2019-09-17 15:47:30 -04:00
|
|
|
|
- ../docs/development/extensions-contrib/cloudfiles.md
|
|
|
|
|
StaticCloudFilesFirehose
|
|
|
|
|
cloudfiles
|
|
|
|
|
rackspace-cloudfiles-uk
|
|
|
|
|
rackspace-cloudfiles-us
|
2020-02-25 20:49:16 -05:00
|
|
|
|
StaticAzureBlobStoreFirehose
|
|
|
|
|
gz
|
|
|
|
|
shardSpecs
|
|
|
|
|
maxCacheCapacityBytes
|
|
|
|
|
maxFetchCapacityBytes
|
|
|
|
|
fetchTimeout
|
|
|
|
|
maxFetchRetry
|
2019-09-17 15:47:30 -04:00
|
|
|
|
- ../docs/development/extensions-contrib/distinctcount.md
|
|
|
|
|
distinctCount
|
|
|
|
|
groupBy
|
|
|
|
|
maxIntermediateRows
|
|
|
|
|
numValuesPerPass
|
|
|
|
|
queryGranularity
|
|
|
|
|
segmentGranularity
|
|
|
|
|
topN
|
|
|
|
|
visitor_id
|
|
|
|
|
- ../docs/development/extensions-contrib/influx.md
|
|
|
|
|
cpu
|
|
|
|
|
web_requests
|
|
|
|
|
- ../docs/development/extensions-contrib/influxdb-emitter.md
|
|
|
|
|
_
|
|
|
|
|
druid_
|
|
|
|
|
druid_cache_total
|
|
|
|
|
druid_hits
|
|
|
|
|
druid_query
|
|
|
|
|
historical001
|
|
|
|
|
- ../docs/development/extensions-contrib/materialized-view.md
|
|
|
|
|
HadoopTuningConfig
|
|
|
|
|
TuningConfig
|
|
|
|
|
base-dataSource's
|
|
|
|
|
baseDataSource
|
|
|
|
|
baseDataSource-hashCode
|
|
|
|
|
classpathPrefix
|
|
|
|
|
derivativeDataSource
|
|
|
|
|
druid.extensions.hadoopDependenciesDir
|
|
|
|
|
hadoopDependencyCoordinates
|
|
|
|
|
maxTaskCount
|
|
|
|
|
metricsSpec
|
|
|
|
|
queryType
|
|
|
|
|
tuningConfig
|
|
|
|
|
- ../docs/development/extensions-contrib/momentsketch-quantiles.md
|
|
|
|
|
arcsinh
|
|
|
|
|
fieldName
|
|
|
|
|
momentSketchMerge
|
|
|
|
|
momentsketch
|
|
|
|
|
- ../docs/development/extensions-contrib/moving-average-query.md
|
|
|
|
|
10-minutes
|
|
|
|
|
MeanNoNulls
|
|
|
|
|
P1D
|
|
|
|
|
cycleSize
|
|
|
|
|
doubleMax
|
2020-09-14 22:44:58 -04:00
|
|
|
|
doubleAny
|
2019-09-17 15:47:30 -04:00
|
|
|
|
doubleMean
|
|
|
|
|
doubleMeanNoNulls
|
|
|
|
|
doubleMin
|
2019-10-25 01:04:08 -04:00
|
|
|
|
doubleSum
|
2019-09-17 15:47:30 -04:00
|
|
|
|
druid.generic.useDefaultValueForNull
|
2022-03-29 17:31:36 -04:00
|
|
|
|
druid.generic.ignoreNullsForStringCardinality
|
2019-09-17 15:47:30 -04:00
|
|
|
|
limitSpec
|
|
|
|
|
longMax
|
2020-09-14 22:44:58 -04:00
|
|
|
|
longAny
|
2019-09-17 15:47:30 -04:00
|
|
|
|
longMean
|
|
|
|
|
longMeanNoNulls
|
|
|
|
|
longMin
|
2019-10-25 01:04:08 -04:00
|
|
|
|
longSum
|
2019-09-17 15:47:30 -04:00
|
|
|
|
movingAverage
|
|
|
|
|
postAggregations
|
|
|
|
|
postAveragers
|
2021-05-28 01:10:55 -04:00
|
|
|
|
pull-deps
|
2019-09-17 15:47:30 -04:00
|
|
|
|
- ../docs/development/extensions-contrib/opentsdb-emitter.md
|
|
|
|
|
defaultMetrics.json
|
Add config option for namespacePrefix (#9372)
* Add config option for namespacePrefix
opentsdb emitter sends metric names to opentsdb verbatim as what druid
names them, for example "query.count", this doesn't fit well with a
central opentsdb server which might have namespaced metrics, for example
"druid.query.count". This adds support for adding an optional prefix.
The prefix also gets a trailing dot (.), after it, so the metric name
becomes <namespacePrefix>.<metricname>
configureable as "druid.emitter.opentsdb.namespacePrefix", as
documented.
Co-authored-by: Martin Gerholm <martin.gerholm@deltaprojects.com>
Signed-off-by: Martin Gerholm <martin.gerholm@deltaprojects.com>
Signed-off-by: Björn Zettergren <bjorn.zettergren@deltaprojects.com>
* Spelling for PR #9372
Added "namespacePrefix" to .spelling exceptions, it's a variable name
used in documentation for opentsdb-emitter.
* fixing tests for PR #9372
changed naming of variables to be more descriptive
added test of prefix being an empty string: "".
added a conditional to buildNamespacePrefix to check for empty string
being fed if EventConverter called without OpentsdbEmitterConfig
instance.
* fixing checkstyle errors for PR #9372
used == to compare literal string, should be equals()
* cleaned up and updated PR #9372
Created a buildMetric function as suggested by clintropolis, and
removed redundant tests for empty strings as they're only used when
calling EventConverter directly without going through
OpentsdbEmitterConfig.
* consistent naming of tests PR #9372
Changed names of tests in files to match better with what it was
actually testing
changed check for Strings.isNullOrEmpty to just check for `null`, as
empty string valued `namespacePrefix` is handled in
OpentsdbEmitterConfig.
Co-authored-by: Martin Gerholm <inspector-martin@users.noreply.github.com>
2020-02-20 17:01:41 -05:00
|
|
|
|
namespacePrefix
|
2019-09-17 15:47:30 -04:00
|
|
|
|
src
|
|
|
|
|
- ../docs/development/extensions-contrib/redis-cache.md
|
|
|
|
|
loadList
|
2020-08-23 22:29:04 -04:00
|
|
|
|
pull-deps
|
|
|
|
|
PT2S
|
2019-09-17 15:47:30 -04:00
|
|
|
|
- ../docs/development/extensions-contrib/sqlserver.md
|
|
|
|
|
com.microsoft.sqlserver.jdbc.SQLServerDriver
|
|
|
|
|
sqljdbc
|
|
|
|
|
- ../docs/development/extensions-contrib/statsd.md
|
|
|
|
|
convertRange
|
2021-03-09 17:37:31 -05:00
|
|
|
|
- ../docs/development/extensions-contrib/prometheus.md
|
|
|
|
|
HTTPServer
|
|
|
|
|
conversionFactor
|
|
|
|
|
prometheus
|
|
|
|
|
Pushgateway
|
2019-09-17 15:47:30 -04:00
|
|
|
|
- ../docs/development/extensions-contrib/tdigestsketch-quantiles.md
|
|
|
|
|
postAggregator
|
|
|
|
|
quantileFromTDigestSketch
|
|
|
|
|
quantilesFromTDigestSketch
|
|
|
|
|
tDigestSketch
|
|
|
|
|
- ../docs/development/extensions-contrib/thrift.md
|
|
|
|
|
HadoopDruidIndexer
|
|
|
|
|
LzoThriftBlock
|
|
|
|
|
SequenceFile
|
|
|
|
|
classname
|
|
|
|
|
hadoop-lzo
|
|
|
|
|
inputFormat
|
|
|
|
|
inputSpec
|
|
|
|
|
ioConfig
|
|
|
|
|
parseSpec
|
|
|
|
|
thriftClass
|
|
|
|
|
thriftJar
|
|
|
|
|
- ../docs/development/extensions-contrib/time-min-max.md
|
|
|
|
|
timeMax
|
|
|
|
|
timeMin
|
2020-07-02 01:20:53 -04:00
|
|
|
|
- ../docs/development/extensions-contrib/aliyun-oss-extensions.md
|
2021-08-09 20:27:35 -04:00
|
|
|
|
Alibaba
|
2020-07-02 01:20:53 -04:00
|
|
|
|
Aliyun
|
2021-08-09 20:27:35 -04:00
|
|
|
|
aliyun-oss-extensions
|
2020-07-02 01:20:53 -04:00
|
|
|
|
AccessKey
|
2021-08-09 20:27:35 -04:00
|
|
|
|
accessKey
|
2020-07-02 01:20:53 -04:00
|
|
|
|
aliyun-oss
|
2021-08-09 20:27:35 -04:00
|
|
|
|
json
|
|
|
|
|
OSS
|
2020-07-02 01:20:53 -04:00
|
|
|
|
oss
|
2021-08-09 20:27:35 -04:00
|
|
|
|
secretKey
|
2020-07-02 01:20:53 -04:00
|
|
|
|
url
|
2019-09-17 15:47:30 -04:00
|
|
|
|
- ../docs/development/extensions-core/approximate-histograms.md
|
|
|
|
|
approxHistogram
|
|
|
|
|
approxHistogramFold
|
2020-09-09 16:56:33 -04:00
|
|
|
|
fixedBucketsHistogram
|
2019-09-17 15:47:30 -04:00
|
|
|
|
bucketNum
|
|
|
|
|
lowerLimit
|
|
|
|
|
numBuckets
|
|
|
|
|
upperLimit
|
|
|
|
|
- ../docs/development/extensions-core/avro.md
|
|
|
|
|
AVRO-1124
|
|
|
|
|
Avro-1124
|
|
|
|
|
SchemaRepo
|
|
|
|
|
avro
|
|
|
|
|
avroBytesDecoder
|
2021-04-13 01:03:13 -04:00
|
|
|
|
protoBytesDecoder
|
2020-10-08 00:08:22 -04:00
|
|
|
|
flattenSpec
|
2019-09-17 15:47:30 -04:00
|
|
|
|
jq
|
|
|
|
|
org.apache.druid.extensions
|
|
|
|
|
schemaRepository
|
|
|
|
|
schema_inline
|
|
|
|
|
subjectAndIdConverter
|
|
|
|
|
url
|
|
|
|
|
- ../docs/development/extensions-core/bloom-filter.md
|
|
|
|
|
BloomKFilter
|
|
|
|
|
bitset
|
|
|
|
|
outputStream
|
|
|
|
|
- ../docs/development/extensions-core/datasketches-hll.md
|
|
|
|
|
HLLSketchBuild
|
|
|
|
|
HLLSketchMerge
|
|
|
|
|
lgK
|
|
|
|
|
log2
|
|
|
|
|
tgtHllType
|
|
|
|
|
- ../docs/development/extensions-core/datasketches-quantiles.md
|
|
|
|
|
CDF
|
|
|
|
|
DoublesSketch
|
2021-08-31 17:56:37 -04:00
|
|
|
|
maxStreamLength
|
2019-09-17 15:47:30 -04:00
|
|
|
|
PMF
|
|
|
|
|
quantilesDoublesSketch
|
|
|
|
|
toString
|
|
|
|
|
- ../docs/development/extensions-core/datasketches-theta.md
|
|
|
|
|
isInputThetaSketch
|
|
|
|
|
thetaSketch
|
|
|
|
|
user_id
|
|
|
|
|
- ../docs/development/extensions-core/datasketches-tuple.md
|
|
|
|
|
ArrayOfDoublesSketch
|
|
|
|
|
arrayOfDoublesSketch
|
|
|
|
|
metricColumns
|
|
|
|
|
nominalEntries
|
|
|
|
|
numberOfValues
|
|
|
|
|
- ../docs/development/extensions-core/druid-basic-security.md
|
|
|
|
|
INFORMATION_SCHEMA
|
|
|
|
|
MyBasicAuthenticator
|
|
|
|
|
MyBasicAuthorizer
|
|
|
|
|
authenticatorName
|
|
|
|
|
authorizerName
|
|
|
|
|
druid_system
|
|
|
|
|
pollingPeriod
|
|
|
|
|
roleName
|
2019-10-08 20:08:27 -04:00
|
|
|
|
LDAP
|
|
|
|
|
ldap
|
|
|
|
|
MyBasicMetadataAuthenticator
|
|
|
|
|
MyBasicLDAPAuthenticator
|
|
|
|
|
MyBasicMetadataAuthorizer
|
|
|
|
|
MyBasicLDAPAuthorizer
|
|
|
|
|
credentialsValidator
|
|
|
|
|
sAMAccountName
|
|
|
|
|
objectClass
|
|
|
|
|
initialAdminRole
|
|
|
|
|
adminGroupMapping
|
|
|
|
|
groupMappingName
|
2019-09-17 15:47:30 -04:00
|
|
|
|
- ../docs/development/extensions-core/druid-kerberos.md
|
2021-06-30 16:42:45 -04:00
|
|
|
|
8KiB
|
2019-09-17 15:47:30 -04:00
|
|
|
|
HttpComponents
|
|
|
|
|
MyKerberosAuthenticator
|
|
|
|
|
RFC-4559
|
|
|
|
|
SPNego
|
|
|
|
|
_HOST
|
|
|
|
|
- ../docs/development/extensions-core/druid-lookups.md
|
|
|
|
|
cacheFactory
|
|
|
|
|
concurrencyLevel
|
|
|
|
|
dataFetcher
|
|
|
|
|
expireAfterAccess
|
|
|
|
|
expireAfterWrite
|
|
|
|
|
initialCapacity
|
|
|
|
|
loadingCacheSpec
|
|
|
|
|
maxEntriesSize
|
|
|
|
|
maxStoreSize
|
|
|
|
|
maximumSize
|
|
|
|
|
onHeapPolling
|
|
|
|
|
pollPeriod
|
|
|
|
|
reverseLoadingCacheSpec
|
2020-03-23 21:15:45 -04:00
|
|
|
|
- ../docs/development/extensions-core/druid-pac4j.md
|
|
|
|
|
OAuth
|
|
|
|
|
Okta
|
|
|
|
|
OpenID
|
|
|
|
|
pac4j
|
2020-12-15 00:10:31 -05:00
|
|
|
|
- ../docs/development/extensions-core/kubernetes.md
|
|
|
|
|
Env
|
|
|
|
|
POD_NAME
|
|
|
|
|
POD_NAMESPACE
|
|
|
|
|
ConfigMap
|
|
|
|
|
PT17S
|
2019-09-17 15:47:30 -04:00
|
|
|
|
- ../docs/development/extensions-core/google.md
|
|
|
|
|
GCS
|
|
|
|
|
StaticGoogleBlobStoreFirehose
|
|
|
|
|
- ../docs/development/extensions-core/hdfs.md
|
|
|
|
|
gcs-connector
|
|
|
|
|
hadoop2
|
|
|
|
|
hdfs
|
|
|
|
|
- ../docs/development/extensions-core/kafka-extraction-namespace.md
|
2022-08-09 06:44:22 -04:00
|
|
|
|
Aotearoa
|
|
|
|
|
Czechia
|
|
|
|
|
KTable
|
2019-09-17 15:47:30 -04:00
|
|
|
|
LookupExtractorFactory
|
2022-08-09 06:44:22 -04:00
|
|
|
|
Zeelund
|
2019-09-17 15:47:30 -04:00
|
|
|
|
zookeeper.connect
|
|
|
|
|
- ../docs/development/extensions-core/kafka-ingestion.md
|
|
|
|
|
0.11.x.
|
|
|
|
|
00Z
|
|
|
|
|
2016-01-01T11
|
|
|
|
|
2016-01-01T12
|
|
|
|
|
2016-01-01T14
|
|
|
|
|
CONNECTING_TO_STREAM
|
|
|
|
|
CREATING_TASKS
|
|
|
|
|
DISCOVERING_INITIAL_TASKS
|
|
|
|
|
KafkaSupervisorIOConfig
|
|
|
|
|
KafkaSupervisorTuningConfig
|
|
|
|
|
LOST_CONTACT_WITH_STREAM
|
|
|
|
|
OffsetOutOfRangeException
|
|
|
|
|
P2147483647D
|
|
|
|
|
PT10M
|
|
|
|
|
PT10S
|
|
|
|
|
PT1H
|
|
|
|
|
PT30M
|
|
|
|
|
PT30S
|
|
|
|
|
PT5S
|
|
|
|
|
PT80S
|
2021-10-14 20:51:32 -04:00
|
|
|
|
SASL
|
2019-09-17 15:47:30 -04:00
|
|
|
|
SegmentWriteOutMediumFactory
|
|
|
|
|
UNABLE_TO_CONNECT_TO_STREAM
|
|
|
|
|
UNHEALTHY_SUPERVISOR
|
|
|
|
|
UNHEALTHY_TASKS
|
|
|
|
|
dimensionCompression
|
|
|
|
|
earlyMessageRejectionPeriod
|
|
|
|
|
indexSpec
|
|
|
|
|
intermediateHandoffPeriod
|
|
|
|
|
longEncoding
|
|
|
|
|
maxBytesInMemory
|
|
|
|
|
maxPendingPersists
|
|
|
|
|
maxRowsInMemory
|
|
|
|
|
maxRowsPerSegment
|
|
|
|
|
maxSavedParseExceptions
|
|
|
|
|
maxTotalRows
|
|
|
|
|
metricCompression
|
|
|
|
|
numKafkaPartitions
|
|
|
|
|
taskCount
|
|
|
|
|
taskDuration
|
|
|
|
|
- ../docs/development/extensions-core/kinesis-ingestion.md
|
|
|
|
|
9.2dist
|
|
|
|
|
KinesisSupervisorIOConfig
|
|
|
|
|
KinesisSupervisorTuningConfig
|
2019-11-28 15:59:01 -05:00
|
|
|
|
Resharding
|
|
|
|
|
resharding
|
2019-09-17 15:47:30 -04:00
|
|
|
|
LZ4LZFuncompressedLZ4LZ4LZFuncompressednoneLZ4autolongsautolongslongstypeconcisetyperoaringcompressRunOnSerializationtruetypestreamendpointreplicastaskCounttaskCount
|
|
|
|
|
deaggregate
|
|
|
|
|
druid-kinesis-indexing-service
|
|
|
|
|
maxRecordsPerPoll
|
|
|
|
|
maxRecordsPerPollrecordsPerFetchfetchDelayMillisreplicasfetchDelayMillisrecordsPerFetchfetchDelayMillismaxRecordsPerPollamazon-kinesis-client1
|
|
|
|
|
numKinesisShards
|
|
|
|
|
numProcessors
|
|
|
|
|
q.size
|
2019-10-16 02:19:17 -04:00
|
|
|
|
repartitionTransitionDuration
|
2019-09-17 15:47:30 -04:00
|
|
|
|
replicastaskCounttaskCount
|
|
|
|
|
resetuseEarliestSequenceNumberPOST
|
|
|
|
|
resumePOST
|
|
|
|
|
statusrecentErrorsdruid.supervisor.maxStoredExceptionEventsstatedetailedStatestatedetailedStatestatestatePENDINGRUNNINGSUSPENDEDSTOPPINGUNHEALTHY_SUPERVISORUNHEALTHY_TASKSdetailedStatestatedruid.supervisor.unhealthinessThresholddruid.supervisor.taskUnhealthinessThresholdtaskDurationtaskCountreplicasdetailedStatedetailedStateRUNNINGPOST
|
|
|
|
|
supervisorPOST
|
|
|
|
|
supervisorfetchThreadsfetchDelayMillisrecordsPerFetchmaxRecordsPerPollpoll
|
|
|
|
|
suspendPOST
|
|
|
|
|
taskCounttaskDurationreplicas
|
|
|
|
|
taskCounttaskDurationtaskDurationPOST
|
|
|
|
|
taskDurationstartDelayperioduseEarliestSequenceNumbercompletionTimeouttaskDurationlateMessageRejectionPeriodPT1HearlyMessageRejectionPeriodPT1HPT1HrecordsPerFetchfetchDelayMillisawsAssumedRoleArnawsExternalIddeaggregateGET
|
|
|
|
|
terminatePOST
|
|
|
|
|
terminatedruid.worker.capacitytaskDurationcompletionTimeoutreplicastaskCountreplicas
|
2019-10-16 02:19:17 -04:00
|
|
|
|
PT2M
|
|
|
|
|
kinesis.us
|
|
|
|
|
amazonaws.com
|
|
|
|
|
PT6H
|
|
|
|
|
GetRecords
|
|
|
|
|
KCL
|
|
|
|
|
signalled
|
|
|
|
|
ProvisionedThroughputExceededException
|
|
|
|
|
Deaggregation
|
2019-09-17 15:47:30 -04:00
|
|
|
|
- ../docs/development/extensions-core/lookups-cached-global.md
|
|
|
|
|
baz
|
|
|
|
|
customJson
|
|
|
|
|
lookupParseSpec
|
|
|
|
|
namespaceParseSpec
|
|
|
|
|
simpleJson
|
|
|
|
|
- ../docs/development/extensions-core/orc.md
|
|
|
|
|
dimensionSpec
|
|
|
|
|
flattenSpec
|
|
|
|
|
- ../docs/development/extensions-core/parquet.md
|
|
|
|
|
binaryAsString
|
|
|
|
|
- ../docs/development/extensions-core/postgresql.md
|
|
|
|
|
sslFactory's
|
|
|
|
|
sslMode
|
|
|
|
|
- ../docs/development/extensions-core/protobuf.md
|
|
|
|
|
Proto
|
|
|
|
|
metrics.desc
|
|
|
|
|
metrics.desc.
|
|
|
|
|
metrics.proto.
|
|
|
|
|
metrics_pb
|
|
|
|
|
protoMessageType
|
|
|
|
|
timeAndDims
|
|
|
|
|
tmp
|
|
|
|
|
- ../docs/development/extensions-core/s3.md
|
|
|
|
|
SigV4
|
|
|
|
|
jvm.config
|
|
|
|
|
kms
|
|
|
|
|
s3
|
|
|
|
|
s3a
|
|
|
|
|
s3n
|
|
|
|
|
uris
|
|
|
|
|
- ../docs/development/extensions-core/simple-client-sslcontext.md
|
|
|
|
|
KeyManager
|
|
|
|
|
SSLContext
|
|
|
|
|
TrustManager
|
|
|
|
|
- ../docs/development/extensions-core/stats.md
|
|
|
|
|
GenericUDAFVariance
|
|
|
|
|
Golub
|
|
|
|
|
J.L.
|
|
|
|
|
LeVeque
|
|
|
|
|
Numer
|
|
|
|
|
chunk1
|
|
|
|
|
chunk2
|
|
|
|
|
stddev
|
|
|
|
|
t1
|
|
|
|
|
t2
|
|
|
|
|
variance1
|
|
|
|
|
variance2
|
|
|
|
|
varianceFold
|
|
|
|
|
variance_pop
|
|
|
|
|
variance_sample
|
|
|
|
|
- ../docs/development/extensions-core/test-stats.md
|
|
|
|
|
Berry_statbook
|
|
|
|
|
Berry_statbook_chpt6.pdf
|
|
|
|
|
S.E.
|
|
|
|
|
engineering.com
|
|
|
|
|
jcb0773
|
|
|
|
|
n1
|
|
|
|
|
n2
|
|
|
|
|
p1
|
|
|
|
|
p2
|
|
|
|
|
pvalue2tailedZtest
|
|
|
|
|
sqrt
|
|
|
|
|
successCount1
|
|
|
|
|
successCount2
|
|
|
|
|
www.isixsigma.com
|
|
|
|
|
www.paypal
|
|
|
|
|
www.ucs.louisiana.edu
|
|
|
|
|
zscore
|
|
|
|
|
zscore2sample
|
|
|
|
|
ztests
|
|
|
|
|
- ../docs/development/extensions.md
|
|
|
|
|
DistinctCount
|
|
|
|
|
artifactId
|
|
|
|
|
com.example
|
|
|
|
|
common.runtime.properties
|
2021-01-07 00:15:29 -05:00
|
|
|
|
druid-aws-rds-extensions
|
2019-09-17 15:47:30 -04:00
|
|
|
|
druid-cassandra-storage
|
|
|
|
|
druid-distinctcount
|
|
|
|
|
druid-ec2-extensions
|
|
|
|
|
druid-kafka-extraction-namespace
|
|
|
|
|
druid-kafka-indexing-service
|
|
|
|
|
druid-opentsdb-emitter
|
|
|
|
|
druid-protobuf-extensions
|
|
|
|
|
druid-tdigestsketch
|
|
|
|
|
druid.apache.org
|
|
|
|
|
groupId
|
|
|
|
|
jvm-global
|
|
|
|
|
kafka-emitter
|
|
|
|
|
org.apache.druid.extensions.contrib.
|
|
|
|
|
pull-deps
|
|
|
|
|
sqlserver-metadata-storage
|
|
|
|
|
statsd-emitter
|
|
|
|
|
- ../docs/development/geo.md
|
|
|
|
|
coords
|
|
|
|
|
dimName
|
|
|
|
|
maxCoords
|
2019-10-01 17:59:30 -04:00
|
|
|
|
Mb
|
2019-09-17 15:47:30 -04:00
|
|
|
|
minCoords
|
|
|
|
|
- ../docs/development/javascript.md
|
|
|
|
|
Metaspace
|
|
|
|
|
dev
|
|
|
|
|
- ../docs/development/modules.md
|
|
|
|
|
AggregatorFactory
|
|
|
|
|
ArchiveTask
|
|
|
|
|
ComplexMetrics
|
|
|
|
|
DataSegmentArchiver
|
|
|
|
|
DataSegmentKiller
|
|
|
|
|
DataSegmentMover
|
|
|
|
|
DataSegmentPuller
|
|
|
|
|
DataSegmentPusher
|
|
|
|
|
DruidModule
|
|
|
|
|
ExtractionFns
|
|
|
|
|
HdfsStorageDruidModule
|
|
|
|
|
JacksonInject
|
|
|
|
|
MapBinder
|
|
|
|
|
MoveTask
|
|
|
|
|
ObjectMapper
|
|
|
|
|
PasswordProvider
|
|
|
|
|
PostAggregators
|
|
|
|
|
QueryRunnerFactory
|
|
|
|
|
SegmentMetadataQuery
|
|
|
|
|
SegmentMetadataQueryQueryToolChest
|
|
|
|
|
StaticS3FirehoseFactory
|
|
|
|
|
loadSpec
|
|
|
|
|
multibind
|
|
|
|
|
pom.xml
|
|
|
|
|
- ../docs/ingestion/data-formats.md
|
|
|
|
|
0.6.x
|
|
|
|
|
0.7.x
|
|
|
|
|
0.7.x.
|
|
|
|
|
TimeAndDims
|
|
|
|
|
column2
|
|
|
|
|
column_1
|
|
|
|
|
column_n
|
|
|
|
|
com.opencsv
|
|
|
|
|
ctrl
|
Kafka Input Format for headers, key and payload parsing (#11630)
### Description
Today we ingest a number of high cardinality metrics into Druid across dimensions. These metrics are rolled up on a per minute basis, and are very useful when looking at metrics on a partition or client basis. Events is another class of data that provides useful information about a particular incident/scenario inside a Kafka cluster. Events themselves are carried inside kafka payload, but nonetheless there are some very useful metadata that is carried in kafka headers that can serve as useful dimension for aggregation and in turn bringing better insights.
PR(https://github.com/apache/druid/pull/10730) introduced support of Kafka headers in InputFormats.
We still need an input format to parse out the headers and translate those into relevant columns in Druid. Until that’s implemented, none of the information available in the Kafka message headers would be exposed. So first there is a need to write an input format that can parse headers in any given format(provided we support the format) like we parse payloads today. Apart from headers there is also some useful information present in the key portion of the kafka record. We also need a way to expose the data present in the key as druid columns. We need a generic way to express at configuration time what attributes from headers, key and payload need to be ingested into druid. We need to keep the design generic enough so that users can specify different parsers for headers, key and payload.
This PR is designed to solve the above by providing wrapper around any existing input formats and merging the data into a single unified Druid row.
Lets look at a sample input format from the above discussion
"inputFormat":
{
"type": "kafka", // New input format type
"headerLabelPrefix": "kafka.header.", // Label prefix for header columns, this will avoid collusions while merging columns
"recordTimestampLabelPrefix": "kafka.", // Kafka record's timestamp is made available in case payload does not carry timestamp
"headerFormat": // Header parser specifying that values are of type string
{
"type": "string"
},
"valueFormat": // Value parser from json parsing
{
"type": "json",
"flattenSpec": {
"useFieldDiscovery": true,
"fields": [...]
}
},
"keyFormat": // Key parser also from json parsing
{
"type": "json"
}
}
Since we have independent sections for header, key and payload, it will enable parsing each section with its own parser, eg., headers coming in as string and payload as json.
KafkaInputFormat will be the uber class extending inputFormat interface and will be responsible for creating individual parsers for header, key and payload, blend the data resolving conflicts in columns and generating a single unified InputRow for Druid ingestion.
"headerFormat" will allow users to plug parser type for the header values and will add default header prefix as "kafka.header."(can be overridden) for attributes to avoid collision while merging attributes with payload.
Kafka payload parser will be responsible for parsing the Value portion of the Kafka record. This is where most of the data will come from and we should be able to plugin existing parser. One thing to note here is that if batching is performed, then the code is augmenting header and key values to every record in the batch.
Kafka key parser will handle parsing Key portion of the Kafka record and will ingest the Key with dimension name as "kafka.key".
## KafkaInputFormat Class:
This is the class that orchestrates sending the consumerRecord to each parser, retrieve rows, merge the columns into one final row for Druid consumption. KafkaInputformat should make sure to release the resources that gets allocated as a part of reader in CloseableIterator<InputRow> during normal and exception cases.
During conflicts in dimension/metrics names, the code will prefer dimension names from payload and ignore the dimension either from headers/key. This is done so that existing input formats can be easily migrated to this new format without worrying about losing information.
2021-10-07 11:56:27 -04:00
|
|
|
|
headerFormat
|
|
|
|
|
headerLabelPrefix
|
2019-09-17 15:47:30 -04:00
|
|
|
|
jsonLowercase
|
Kafka Input Format for headers, key and payload parsing (#11630)
### Description
Today we ingest a number of high cardinality metrics into Druid across dimensions. These metrics are rolled up on a per minute basis, and are very useful when looking at metrics on a partition or client basis. Events is another class of data that provides useful information about a particular incident/scenario inside a Kafka cluster. Events themselves are carried inside kafka payload, but nonetheless there are some very useful metadata that is carried in kafka headers that can serve as useful dimension for aggregation and in turn bringing better insights.
PR(https://github.com/apache/druid/pull/10730) introduced support of Kafka headers in InputFormats.
We still need an input format to parse out the headers and translate those into relevant columns in Druid. Until that’s implemented, none of the information available in the Kafka message headers would be exposed. So first there is a need to write an input format that can parse headers in any given format(provided we support the format) like we parse payloads today. Apart from headers there is also some useful information present in the key portion of the kafka record. We also need a way to expose the data present in the key as druid columns. We need a generic way to express at configuration time what attributes from headers, key and payload need to be ingested into druid. We need to keep the design generic enough so that users can specify different parsers for headers, key and payload.
This PR is designed to solve the above by providing wrapper around any existing input formats and merging the data into a single unified Druid row.
Lets look at a sample input format from the above discussion
"inputFormat":
{
"type": "kafka", // New input format type
"headerLabelPrefix": "kafka.header.", // Label prefix for header columns, this will avoid collusions while merging columns
"recordTimestampLabelPrefix": "kafka.", // Kafka record's timestamp is made available in case payload does not carry timestamp
"headerFormat": // Header parser specifying that values are of type string
{
"type": "string"
},
"valueFormat": // Value parser from json parsing
{
"type": "json",
"flattenSpec": {
"useFieldDiscovery": true,
"fields": [...]
}
},
"keyFormat": // Key parser also from json parsing
{
"type": "json"
}
}
Since we have independent sections for header, key and payload, it will enable parsing each section with its own parser, eg., headers coming in as string and payload as json.
KafkaInputFormat will be the uber class extending inputFormat interface and will be responsible for creating individual parsers for header, key and payload, blend the data resolving conflicts in columns and generating a single unified InputRow for Druid ingestion.
"headerFormat" will allow users to plug parser type for the header values and will add default header prefix as "kafka.header."(can be overridden) for attributes to avoid collision while merging attributes with payload.
Kafka payload parser will be responsible for parsing the Value portion of the Kafka record. This is where most of the data will come from and we should be able to plugin existing parser. One thing to note here is that if batching is performed, then the code is augmenting header and key values to every record in the batch.
Kafka key parser will handle parsing Key portion of the Kafka record and will ingest the Key with dimension name as "kafka.key".
## KafkaInputFormat Class:
This is the class that orchestrates sending the consumerRecord to each parser, retrieve rows, merge the columns into one final row for Druid consumption. KafkaInputformat should make sure to release the resources that gets allocated as a part of reader in CloseableIterator<InputRow> during normal and exception cases.
During conflicts in dimension/metrics names, the code will prefer dimension names from payload and ignore the dimension either from headers/key. This is done so that existing input formats can be easily migrated to this new format without worrying about losing information.
2021-10-07 11:56:27 -04:00
|
|
|
|
kafka
|
|
|
|
|
KafkaStringHeaderFormat
|
|
|
|
|
kafka.header.
|
|
|
|
|
kafka.key
|
|
|
|
|
kafka.timestamp
|
|
|
|
|
keyColumnName
|
|
|
|
|
keyFormat
|
2019-09-17 15:47:30 -04:00
|
|
|
|
listDelimiter
|
Kafka Input Format for headers, key and payload parsing (#11630)
### Description
Today we ingest a number of high cardinality metrics into Druid across dimensions. These metrics are rolled up on a per minute basis, and are very useful when looking at metrics on a partition or client basis. Events is another class of data that provides useful information about a particular incident/scenario inside a Kafka cluster. Events themselves are carried inside kafka payload, but nonetheless there are some very useful metadata that is carried in kafka headers that can serve as useful dimension for aggregation and in turn bringing better insights.
PR(https://github.com/apache/druid/pull/10730) introduced support of Kafka headers in InputFormats.
We still need an input format to parse out the headers and translate those into relevant columns in Druid. Until that’s implemented, none of the information available in the Kafka message headers would be exposed. So first there is a need to write an input format that can parse headers in any given format(provided we support the format) like we parse payloads today. Apart from headers there is also some useful information present in the key portion of the kafka record. We also need a way to expose the data present in the key as druid columns. We need a generic way to express at configuration time what attributes from headers, key and payload need to be ingested into druid. We need to keep the design generic enough so that users can specify different parsers for headers, key and payload.
This PR is designed to solve the above by providing wrapper around any existing input formats and merging the data into a single unified Druid row.
Lets look at a sample input format from the above discussion
"inputFormat":
{
"type": "kafka", // New input format type
"headerLabelPrefix": "kafka.header.", // Label prefix for header columns, this will avoid collusions while merging columns
"recordTimestampLabelPrefix": "kafka.", // Kafka record's timestamp is made available in case payload does not carry timestamp
"headerFormat": // Header parser specifying that values are of type string
{
"type": "string"
},
"valueFormat": // Value parser from json parsing
{
"type": "json",
"flattenSpec": {
"useFieldDiscovery": true,
"fields": [...]
}
},
"keyFormat": // Key parser also from json parsing
{
"type": "json"
}
}
Since we have independent sections for header, key and payload, it will enable parsing each section with its own parser, eg., headers coming in as string and payload as json.
KafkaInputFormat will be the uber class extending inputFormat interface and will be responsible for creating individual parsers for header, key and payload, blend the data resolving conflicts in columns and generating a single unified InputRow for Druid ingestion.
"headerFormat" will allow users to plug parser type for the header values and will add default header prefix as "kafka.header."(can be overridden) for attributes to avoid collision while merging attributes with payload.
Kafka payload parser will be responsible for parsing the Value portion of the Kafka record. This is where most of the data will come from and we should be able to plugin existing parser. One thing to note here is that if batching is performed, then the code is augmenting header and key values to every record in the batch.
Kafka key parser will handle parsing Key portion of the Kafka record and will ingest the Key with dimension name as "kafka.key".
## KafkaInputFormat Class:
This is the class that orchestrates sending the consumerRecord to each parser, retrieve rows, merge the columns into one final row for Druid consumption. KafkaInputformat should make sure to release the resources that gets allocated as a part of reader in CloseableIterator<InputRow> during normal and exception cases.
During conflicts in dimension/metrics names, the code will prefer dimension names from payload and ignore the dimension either from headers/key. This is done so that existing input formats can be easily migrated to this new format without worrying about losing information.
2021-10-07 11:56:27 -04:00
|
|
|
|
timestampColumnName
|
2019-09-17 15:47:30 -04:00
|
|
|
|
timestampSpec
|
2021-02-27 17:25:35 -05:00
|
|
|
|
urls
|
Kafka Input Format for headers, key and payload parsing (#11630)
### Description
Today we ingest a number of high cardinality metrics into Druid across dimensions. These metrics are rolled up on a per minute basis, and are very useful when looking at metrics on a partition or client basis. Events is another class of data that provides useful information about a particular incident/scenario inside a Kafka cluster. Events themselves are carried inside kafka payload, but nonetheless there are some very useful metadata that is carried in kafka headers that can serve as useful dimension for aggregation and in turn bringing better insights.
PR(https://github.com/apache/druid/pull/10730) introduced support of Kafka headers in InputFormats.
We still need an input format to parse out the headers and translate those into relevant columns in Druid. Until that’s implemented, none of the information available in the Kafka message headers would be exposed. So first there is a need to write an input format that can parse headers in any given format(provided we support the format) like we parse payloads today. Apart from headers there is also some useful information present in the key portion of the kafka record. We also need a way to expose the data present in the key as druid columns. We need a generic way to express at configuration time what attributes from headers, key and payload need to be ingested into druid. We need to keep the design generic enough so that users can specify different parsers for headers, key and payload.
This PR is designed to solve the above by providing wrapper around any existing input formats and merging the data into a single unified Druid row.
Lets look at a sample input format from the above discussion
"inputFormat":
{
"type": "kafka", // New input format type
"headerLabelPrefix": "kafka.header.", // Label prefix for header columns, this will avoid collusions while merging columns
"recordTimestampLabelPrefix": "kafka.", // Kafka record's timestamp is made available in case payload does not carry timestamp
"headerFormat": // Header parser specifying that values are of type string
{
"type": "string"
},
"valueFormat": // Value parser from json parsing
{
"type": "json",
"flattenSpec": {
"useFieldDiscovery": true,
"fields": [...]
}
},
"keyFormat": // Key parser also from json parsing
{
"type": "json"
}
}
Since we have independent sections for header, key and payload, it will enable parsing each section with its own parser, eg., headers coming in as string and payload as json.
KafkaInputFormat will be the uber class extending inputFormat interface and will be responsible for creating individual parsers for header, key and payload, blend the data resolving conflicts in columns and generating a single unified InputRow for Druid ingestion.
"headerFormat" will allow users to plug parser type for the header values and will add default header prefix as "kafka.header."(can be overridden) for attributes to avoid collision while merging attributes with payload.
Kafka payload parser will be responsible for parsing the Value portion of the Kafka record. This is where most of the data will come from and we should be able to plugin existing parser. One thing to note here is that if batching is performed, then the code is augmenting header and key values to every record in the batch.
Kafka key parser will handle parsing Key portion of the Kafka record and will ingest the Key with dimension name as "kafka.key".
## KafkaInputFormat Class:
This is the class that orchestrates sending the consumerRecord to each parser, retrieve rows, merge the columns into one final row for Druid consumption. KafkaInputformat should make sure to release the resources that gets allocated as a part of reader in CloseableIterator<InputRow> during normal and exception cases.
During conflicts in dimension/metrics names, the code will prefer dimension names from payload and ignore the dimension either from headers/key. This is done so that existing input formats can be easily migrated to this new format without worrying about losing information.
2021-10-07 11:56:27 -04:00
|
|
|
|
valueFormat
|
2019-09-17 15:47:30 -04:00
|
|
|
|
- ../docs/ingestion/data-management.md
|
|
|
|
|
1GB
|
2019-10-09 14:12:00 -04:00
|
|
|
|
IOConfig
|
2019-09-17 15:47:30 -04:00
|
|
|
|
compactionTask
|
|
|
|
|
compactionTasks
|
|
|
|
|
ingestSegmentFirehose
|
|
|
|
|
numShards
|
|
|
|
|
- ../docs/ingestion/faq.md
|
|
|
|
|
IngestSegment
|
|
|
|
|
IngestSegmentFirehose
|
|
|
|
|
maxSizes
|
|
|
|
|
windowPeriod
|
|
|
|
|
- ../docs/ingestion/hadoop.md
|
|
|
|
|
2012-01-01T00
|
|
|
|
|
2012-01-03T00
|
|
|
|
|
2012-01-05T00
|
|
|
|
|
2012-01-07T00
|
|
|
|
|
500MB
|
|
|
|
|
CombineTextInputFormat
|
|
|
|
|
HadoopIndexTask
|
|
|
|
|
InputFormat
|
|
|
|
|
InputSplit
|
|
|
|
|
JobHistory
|
|
|
|
|
a.example.com
|
|
|
|
|
assumeGrouped
|
2021-04-09 00:03:00 -04:00
|
|
|
|
awaitSegmentAvailabilityTimeoutMillis
|
2019-09-17 15:47:30 -04:00
|
|
|
|
cleanupOnFailure
|
|
|
|
|
combineText
|
|
|
|
|
connectURI
|
|
|
|
|
dataGranularity
|
|
|
|
|
datetime
|
|
|
|
|
f.example.com
|
|
|
|
|
filePattern
|
|
|
|
|
forceExtendableShardSpecs
|
|
|
|
|
ignoreInvalidRows
|
|
|
|
|
ignoreWhenNoSegments
|
|
|
|
|
indexSpecForIntermediatePersists
|
|
|
|
|
index_hadoop
|
|
|
|
|
inputPath
|
|
|
|
|
inputSpecs
|
|
|
|
|
interval1
|
|
|
|
|
interval2
|
|
|
|
|
jobProperties
|
|
|
|
|
leaveIntermediate
|
|
|
|
|
logParseExceptions
|
|
|
|
|
mapred.map.tasks
|
|
|
|
|
mapreduce.job.maps
|
|
|
|
|
maxParseExceptions
|
|
|
|
|
maxPartitionSize
|
|
|
|
|
maxSplitSize
|
|
|
|
|
metadataUpdateSpec
|
|
|
|
|
numBackgroundPersistThreads
|
|
|
|
|
overwriteFiles
|
|
|
|
|
partitionDimension
|
|
|
|
|
partitionDimensions
|
|
|
|
|
partitionSpec
|
|
|
|
|
pathFormat
|
|
|
|
|
segmentOutputPath
|
|
|
|
|
segmentTable
|
|
|
|
|
shardSpec
|
|
|
|
|
single_dim
|
|
|
|
|
targetPartitionSize
|
2019-09-20 16:59:18 -04:00
|
|
|
|
targetRowsPerSegment
|
2019-09-17 15:47:30 -04:00
|
|
|
|
useCombiner
|
|
|
|
|
useExplicitVersion
|
|
|
|
|
useNewAggs
|
|
|
|
|
useYarnRMJobStatusFallback
|
|
|
|
|
workingPath
|
|
|
|
|
z.example.com
|
|
|
|
|
- ../docs/ingestion/native-batch.md
|
|
|
|
|
150MB
|
|
|
|
|
CombiningFirehose
|
|
|
|
|
DataSchema
|
|
|
|
|
DefaultPassword
|
|
|
|
|
EnvironmentVariablePasswordProvider
|
|
|
|
|
HttpFirehose
|
|
|
|
|
IOConfig
|
|
|
|
|
InlineFirehose
|
|
|
|
|
LocalFirehose
|
|
|
|
|
PartitionsSpec
|
|
|
|
|
PasswordProviders
|
2019-10-18 16:24:14 -04:00
|
|
|
|
SegmentsSplitHintSpec
|
|
|
|
|
SplitHintSpec
|
2020-02-25 23:59:53 -05:00
|
|
|
|
accessKeyId
|
2019-09-17 15:47:30 -04:00
|
|
|
|
appendToExisting
|
|
|
|
|
baseDir
|
|
|
|
|
chatHandlerNumRetries
|
|
|
|
|
chatHandlerTimeout
|
2021-12-03 06:07:14 -05:00
|
|
|
|
cityName
|
2019-09-17 15:47:30 -04:00
|
|
|
|
connectorConfig
|
DruidInputSource: Fix issues in column projection, timestamp handling. (#10267)
* DruidInputSource: Fix issues in column projection, timestamp handling.
DruidInputSource, DruidSegmentReader changes:
1) Remove "dimensions" and "metrics". They are not necessary, because we
can compute which columns we need to read based on what is going to
be used by the timestamp, transform, dimensions, and metrics.
2) Start using ColumnsFilter (see below) to decide which columns we need
to read.
3) Actually respect the "timestampSpec". Previously, it was ignored, and
the timestamp of the returned InputRows was set to the `__time` column
of the input datasource.
(1) and (2) together fix a bug in which the DruidInputSource would not
properly read columns that are used as inputs to a transformSpec.
(3) fixes a bug where the timestampSpec would be ignored if you attempted
to set the column to something other than `__time`.
(1) and (3) are breaking changes.
Web console changes:
1) Remove "Dimensions" and "Metrics" from the Druid input source.
2) Set timestampSpec to `{"column": "__time", "format": "millis"}` for
compatibility with the new behavior.
Other changes:
1) Add ColumnsFilter, a new class that allows input readers to determine
which columns they need to read. Currently, it's only used by the
DruidInputSource, but it could be used by other columnar input sources
in the future.
2) Add a ColumnsFilter to InputRowSchema.
3) Remove the metric names from InputRowSchema (they were unused).
4) Add InputRowSchemas.fromDataSchema method that computes the proper
ColumnsFilter for given timestamp, dimensions, transform, and metrics.
5) Add "getRequiredColumns" method to TransformSpec to support the above.
* Various fixups.
* Uncomment incorrectly commented lines.
* Move TransformSpecTest to the proper module.
* Add druid.indexer.task.ignoreTimestampSpecForDruidInputSource setting.
* Fix.
* Fix build.
* Checkstyle.
* Misc fixes.
* Fix test.
* Move config.
* Fix imports.
* Fixup.
* Fix ShuffleResourceTest.
* Add import.
* Smarter exclusions.
* Fixes based on tests.
Also, add TIME_COLUMN constant in the web console.
* Adjustments for tests.
* Reorder test data.
* Update docs.
* Update docs to say Druid 0.22.0 instead of 0.21.0.
* Fix test.
* Fix ITAutoCompactionTest.
* Changes from review & from merging.
2021-03-25 13:32:21 -04:00
|
|
|
|
countryName
|
2019-09-17 15:47:30 -04:00
|
|
|
|
dataSchema's
|
2021-04-01 15:29:36 -04:00
|
|
|
|
dropExisting
|
2019-09-17 15:47:30 -04:00
|
|
|
|
foldCase
|
|
|
|
|
forceGuaranteedRollup
|
|
|
|
|
httpAuthenticationPassword
|
|
|
|
|
httpAuthenticationUsername
|
|
|
|
|
ingestSegment
|
2019-12-05 19:50:00 -05:00
|
|
|
|
InputSource
|
|
|
|
|
DruidInputSource
|
2021-01-06 01:19:09 -05:00
|
|
|
|
maxColumnsToMerge
|
2019-09-17 15:47:30 -04:00
|
|
|
|
maxInputSegmentBytesPerTask
|
|
|
|
|
maxNumConcurrentSubTasks
|
|
|
|
|
maxNumSegmentsToMerge
|
|
|
|
|
maxRetry
|
|
|
|
|
pushTimeout
|
|
|
|
|
reportParseExceptions
|
2020-02-25 23:59:53 -05:00
|
|
|
|
secretAccessKey
|
2019-09-17 15:47:30 -04:00
|
|
|
|
segmentWriteOutMediumFactory
|
|
|
|
|
sql
|
|
|
|
|
sqls
|
2019-10-18 16:24:14 -04:00
|
|
|
|
splitHintSpec
|
2019-09-17 15:47:30 -04:00
|
|
|
|
taskStatusCheckPeriodMs
|
|
|
|
|
timeChunk
|
|
|
|
|
totalNumMergeTasks
|
2020-02-25 20:49:16 -05:00
|
|
|
|
StaticS3Firehose
|
|
|
|
|
prefetchTriggerBytes
|
2021-04-09 00:03:00 -04:00
|
|
|
|
awaitSegmentAvailabilityTimeoutMillis
|
2021-12-03 06:07:14 -05:00
|
|
|
|
- ../docs/ingestion/native-batch-firehose.md
|
|
|
|
|
LocalFirehose
|
|
|
|
|
baseDir
|
|
|
|
|
HttpFirehose
|
|
|
|
|
httpAuthenticationUsername
|
|
|
|
|
DefaultPassword
|
|
|
|
|
PasswordProviders
|
|
|
|
|
EnvironmentVariablePasswordProvider
|
|
|
|
|
ingestSegment
|
|
|
|
|
maxInputSegmentBytesPerTask
|
|
|
|
|
150MB
|
|
|
|
|
foldCase
|
|
|
|
|
sqls
|
|
|
|
|
connectorConfig
|
|
|
|
|
InlineFirehose
|
|
|
|
|
CombiningFirehose
|
|
|
|
|
httpAuthenticationPassword
|
|
|
|
|
- ../docs/ingestion/native-batch-input-source.md
|
|
|
|
|
accessKeyId
|
|
|
|
|
secretAccessKey
|
|
|
|
|
accessKeyId
|
|
|
|
|
httpAuthenticationPassword
|
|
|
|
|
countryName
|
|
|
|
|
- ../docs/ingestion/native-batch-simple-task.md
|
|
|
|
|
dataSchema's
|
|
|
|
|
appendToExisting
|
|
|
|
|
dropExisting
|
|
|
|
|
timeChunk
|
|
|
|
|
PartitionsSpec
|
|
|
|
|
forceGuaranteedRollup
|
|
|
|
|
reportParseExceptions
|
|
|
|
|
pushTimeout
|
|
|
|
|
segmentWriteOutMediumFactory
|
2019-09-17 15:47:30 -04:00
|
|
|
|
- ../docs/ingestion/schema-design.md
|
|
|
|
|
product_category
|
|
|
|
|
product_id
|
|
|
|
|
product_name
|
|
|
|
|
- ../docs/ingestion/tasks.md
|
|
|
|
|
BUILD_SEGMENTS
|
|
|
|
|
DETERMINE_PARTITIONS
|
|
|
|
|
forceTimeChunkLock
|
|
|
|
|
taskLockTimeout
|
|
|
|
|
- ../docs/misc/math-expr.md
|
|
|
|
|
DOUBLE_ARRAY
|
|
|
|
|
DOY
|
|
|
|
|
DateTimeFormat
|
|
|
|
|
LONG_ARRAY
|
|
|
|
|
Los_Angeles
|
|
|
|
|
P3M
|
|
|
|
|
PT12H
|
|
|
|
|
STRING_ARRAY
|
|
|
|
|
String.format
|
|
|
|
|
acos
|
|
|
|
|
args
|
|
|
|
|
arr1
|
|
|
|
|
arr2
|
|
|
|
|
array_append
|
|
|
|
|
array_concat
|
2021-04-22 21:30:16 -04:00
|
|
|
|
array_set_add
|
|
|
|
|
array_set_add_all
|
2019-09-17 15:47:30 -04:00
|
|
|
|
array_contains
|
|
|
|
|
array_length
|
|
|
|
|
array_offset
|
|
|
|
|
array_offset_of
|
|
|
|
|
array_ordinal
|
|
|
|
|
array_ordinal_of
|
|
|
|
|
array_overlap
|
|
|
|
|
array_prepend
|
|
|
|
|
array_slice
|
|
|
|
|
array_to_string
|
|
|
|
|
asin
|
|
|
|
|
atan
|
|
|
|
|
atan2
|
2021-01-28 14:16:53 -05:00
|
|
|
|
bitwise
|
|
|
|
|
bitwiseAnd
|
|
|
|
|
bitwiseComplement
|
|
|
|
|
bitwiseConvertDoubleToLongBits
|
|
|
|
|
bitwiseConvertLongBitsToDouble
|
|
|
|
|
bitwiseOr
|
|
|
|
|
bitwiseShiftLeft
|
|
|
|
|
bitwiseShiftRight
|
|
|
|
|
bitwiseXor
|
2019-09-17 15:47:30 -04:00
|
|
|
|
bloom_filter_test
|
|
|
|
|
cartesian_fold
|
|
|
|
|
cartesian_map
|
|
|
|
|
case_searched
|
|
|
|
|
case_simple
|
|
|
|
|
cbrt
|
|
|
|
|
concat
|
|
|
|
|
copysign
|
|
|
|
|
expm1
|
|
|
|
|
expr
|
|
|
|
|
expr1
|
|
|
|
|
expr2
|
2022-08-19 20:12:19 -04:00
|
|
|
|
expr3
|
|
|
|
|
expr4
|
2019-09-17 15:47:30 -04:00
|
|
|
|
fromIndex
|
|
|
|
|
getExponent
|
|
|
|
|
hypot
|
|
|
|
|
ipv4_match
|
|
|
|
|
ipv4_parse
|
2021-12-02 19:40:23 -05:00
|
|
|
|
isnull
|
2019-09-17 15:47:30 -04:00
|
|
|
|
ipv4_stringify
|
|
|
|
|
java.lang.Math
|
|
|
|
|
java.lang.String
|
|
|
|
|
log10
|
|
|
|
|
log1p
|
|
|
|
|
lpad
|
|
|
|
|
ltrim
|
|
|
|
|
nextUp
|
|
|
|
|
nextafter
|
2021-12-02 19:40:23 -05:00
|
|
|
|
notnull
|
2019-09-17 15:47:30 -04:00
|
|
|
|
nvl
|
|
|
|
|
parse_long
|
|
|
|
|
regexp_extract
|
2020-06-03 17:31:37 -04:00
|
|
|
|
regexp_like
|
2020-09-14 12:57:54 -04:00
|
|
|
|
contains_string
|
|
|
|
|
icontains_string
|
2019-09-17 15:47:30 -04:00
|
|
|
|
result1
|
|
|
|
|
result2
|
|
|
|
|
rint
|
|
|
|
|
rpad
|
|
|
|
|
rtrim
|
2021-11-17 11:22:41 -05:00
|
|
|
|
safe_divide
|
2019-09-17 15:47:30 -04:00
|
|
|
|
scalb
|
|
|
|
|
signum
|
|
|
|
|
str1
|
|
|
|
|
str2
|
|
|
|
|
string_to_array
|
2020-09-14 22:44:58 -04:00
|
|
|
|
stringAny
|
2019-09-17 15:47:30 -04:00
|
|
|
|
strlen
|
|
|
|
|
strpos
|
|
|
|
|
timestamp_ceil
|
|
|
|
|
timestamp_extract
|
|
|
|
|
timestamp_floor
|
|
|
|
|
timestamp_format
|
|
|
|
|
timestamp_parse
|
|
|
|
|
timestamp_shift
|
|
|
|
|
todegrees
|
|
|
|
|
toradians
|
|
|
|
|
ulp
|
|
|
|
|
unix_timestamp
|
|
|
|
|
value1
|
|
|
|
|
value2
|
|
|
|
|
valueOf
|
2021-08-13 13:27:49 -04:00
|
|
|
|
IEC
|
|
|
|
|
human_readable_binary_byte_format
|
|
|
|
|
human_readable_decimal_byte_format
|
|
|
|
|
human_readable_decimal_format
|
2019-09-17 15:47:30 -04:00
|
|
|
|
- ../docs/misc/papers-and-talks.md
|
|
|
|
|
RADStack
|
|
|
|
|
- ../docs/operations/api-reference.md
|
|
|
|
|
00.000Z
|
|
|
|
|
2015-09-12T03
|
|
|
|
|
2015-09-12T05
|
|
|
|
|
2016-06-27_2016-06-28
|
|
|
|
|
Param
|
|
|
|
|
SupervisorSpec
|
|
|
|
|
dropRule
|
|
|
|
|
druid.query.segmentMetadata.defaultHistory
|
|
|
|
|
isointerval
|
|
|
|
|
json
|
|
|
|
|
loadRule
|
|
|
|
|
maxTime
|
|
|
|
|
minTime
|
|
|
|
|
numCandidates
|
|
|
|
|
param
|
|
|
|
|
segmentId1
|
|
|
|
|
segmentId2
|
|
|
|
|
taskId
|
|
|
|
|
taskid
|
|
|
|
|
un
|
|
|
|
|
- ../docs/operations/basic-cluster-tuning.md
|
2021-06-30 16:42:45 -04:00
|
|
|
|
100MiB
|
|
|
|
|
128MiB
|
2019-09-17 15:47:30 -04:00
|
|
|
|
15ms
|
2021-06-30 16:42:45 -04:00
|
|
|
|
2.5MiB
|
|
|
|
|
24GiB
|
|
|
|
|
256MiB
|
|
|
|
|
30GiB-60GiB
|
|
|
|
|
4GiB
|
2019-09-17 15:47:30 -04:00
|
|
|
|
5MB
|
2021-06-30 16:42:45 -04:00
|
|
|
|
64KiB
|
|
|
|
|
8GiB
|
2019-09-17 15:47:30 -04:00
|
|
|
|
G1GC
|
|
|
|
|
GroupBys
|
|
|
|
|
QoS-type
|
|
|
|
|
- ../docs/operations/dump-segment.md
|
|
|
|
|
DumpSegment
|
|
|
|
|
SegmentMetadata
|
|
|
|
|
__time
|
|
|
|
|
bitmapSerdeFactory
|
|
|
|
|
columnName
|
|
|
|
|
index.zip
|
|
|
|
|
time-iso8601
|
|
|
|
|
- ../docs/operations/export-metadata.md
|
|
|
|
|
hadoopStorageDirectory
|
|
|
|
|
- ../docs/operations/insert-segment-to-db.md
|
|
|
|
|
0.14.x
|
2022-08-04 02:16:05 -04:00
|
|
|
|
- ../docs/operations/java.md
|
|
|
|
|
G1
|
|
|
|
|
Temurin
|
2019-09-17 15:47:30 -04:00
|
|
|
|
- ../docs/operations/metrics.md
|
|
|
|
|
0.14.x
|
|
|
|
|
1s
|
|
|
|
|
Bufferpool
|
|
|
|
|
EventReceiverFirehose
|
|
|
|
|
EventReceiverFirehoseMonitor
|
2022-08-04 02:16:05 -04:00
|
|
|
|
Filesystem
|
2019-09-17 15:47:30 -04:00
|
|
|
|
JVMMonitor
|
|
|
|
|
QueryCountStatsMonitor
|
|
|
|
|
RealtimeMetricsMonitor
|
|
|
|
|
Sys
|
|
|
|
|
SysMonitor
|
|
|
|
|
TaskCountStatsMonitor
|
2020-09-29 02:50:38 -04:00
|
|
|
|
TaskSlotCountStatsMonitor
|
2022-04-26 12:44:44 -04:00
|
|
|
|
WorkerTaskCountStatsMonitor
|
2022-07-14 01:09:03 -04:00
|
|
|
|
workerVersion
|
2019-09-17 15:47:30 -04:00
|
|
|
|
bufferCapacity
|
2020-06-25 15:20:25 -04:00
|
|
|
|
bufferpoolName
|
2019-09-17 15:47:30 -04:00
|
|
|
|
cms
|
|
|
|
|
cpuName
|
|
|
|
|
cpuTime
|
2021-11-07 06:21:44 -05:00
|
|
|
|
druid.server.http.numThreads
|
|
|
|
|
druid.server.http.queueSize
|
2019-09-17 15:47:30 -04:00
|
|
|
|
fsDevName
|
|
|
|
|
fsDirName
|
|
|
|
|
fsOptions
|
|
|
|
|
fsSysTypeName
|
|
|
|
|
fsTypeName
|
|
|
|
|
g1
|
|
|
|
|
gcGen
|
|
|
|
|
gcName
|
|
|
|
|
handoffed
|
|
|
|
|
hasFilters
|
|
|
|
|
memKind
|
|
|
|
|
nativeQueryIds
|
|
|
|
|
netAddress
|
|
|
|
|
netHwaddr
|
|
|
|
|
netName
|
2021-07-30 10:59:26 -04:00
|
|
|
|
noticeType
|
2019-09-17 15:47:30 -04:00
|
|
|
|
numComplexMetrics
|
|
|
|
|
numDimensions
|
|
|
|
|
numMetrics
|
|
|
|
|
poolKind
|
|
|
|
|
poolName
|
|
|
|
|
remoteAddress
|
2021-12-10 12:40:52 -05:00
|
|
|
|
segmentAvailabilityConfirmed
|
2019-09-17 15:47:30 -04:00
|
|
|
|
serviceName
|
2022-05-23 15:32:47 -04:00
|
|
|
|
taskIngestionMode
|
2019-09-17 15:47:30 -04:00
|
|
|
|
taskStatus
|
|
|
|
|
taskType
|
2021-11-07 06:21:44 -05:00
|
|
|
|
threadPoolNumBusyThreads.
|
|
|
|
|
threadPoolNumIdleThreads
|
|
|
|
|
threadPoolNumTotalThreads.
|
2019-09-17 15:47:30 -04:00
|
|
|
|
- ../docs/operations/other-hadoop.md
|
|
|
|
|
CDH
|
|
|
|
|
Classloader
|
|
|
|
|
assembly.sbt
|
|
|
|
|
build.sbt
|
|
|
|
|
classloader
|
|
|
|
|
druid_build
|
|
|
|
|
mapred-default
|
|
|
|
|
mapred-site
|
|
|
|
|
sbt
|
|
|
|
|
scala-2
|
|
|
|
|
- ../docs/operations/pull-deps.md
|
|
|
|
|
org.apache.hadoop
|
|
|
|
|
proxy.com.
|
|
|
|
|
remoteRepository
|
|
|
|
|
- ../docs/operations/recommendations.md
|
|
|
|
|
JBOD
|
|
|
|
|
druid.processing.buffer.sizeBytes.
|
|
|
|
|
druid.processing.numMergeBuffers
|
|
|
|
|
druid.processing.numThreads
|
|
|
|
|
tmpfs
|
|
|
|
|
- ../docs/operations/rule-configuration.md
|
|
|
|
|
broadcastByInterval
|
|
|
|
|
broadcastByPeriod
|
|
|
|
|
broadcastForever
|
|
|
|
|
colocatedDataSources
|
|
|
|
|
dropBeforeByPeriod
|
|
|
|
|
dropByInterval
|
|
|
|
|
dropByPeriod
|
|
|
|
|
dropForever
|
|
|
|
|
loadByInterval
|
|
|
|
|
loadByPeriod
|
|
|
|
|
loadForever
|
|
|
|
|
- ../docs/operations/segment-optimization.md
|
|
|
|
|
700MB
|
|
|
|
|
- ../docs/operations/single-server.md
|
2021-06-30 16:42:45 -04:00
|
|
|
|
128GiB
|
|
|
|
|
16GiB
|
|
|
|
|
256GiB
|
|
|
|
|
4GiB
|
|
|
|
|
512GiB
|
|
|
|
|
64GiB
|
2019-09-17 15:47:30 -04:00
|
|
|
|
Nano-Quickstart
|
|
|
|
|
i3
|
|
|
|
|
i3.16xlarge
|
|
|
|
|
i3.2xlarge
|
|
|
|
|
i3.4xlarge
|
|
|
|
|
i3.8xlarge
|
|
|
|
|
- ../docs/operations/tls-support.md
|
|
|
|
|
CN
|
|
|
|
|
subjectAltNames
|
|
|
|
|
- ../docs/querying/aggregations.md
|
|
|
|
|
HyperUnique
|
|
|
|
|
hyperUnique
|
|
|
|
|
longSum
|
|
|
|
|
- ../docs/querying/datasource.md
|
|
|
|
|
groupBys
|
|
|
|
|
- ../docs/querying/datasourcemetadataquery.md
|
|
|
|
|
dataSourceMetadata
|
|
|
|
|
- ../docs/querying/dimensionspecs.md
|
|
|
|
|
ExtractionDimensionSpec
|
|
|
|
|
SimpleDateFormat
|
|
|
|
|
bar_1
|
|
|
|
|
dimensionSpecs
|
|
|
|
|
isWhitelist
|
|
|
|
|
joda
|
|
|
|
|
nullHandling
|
|
|
|
|
product_1
|
|
|
|
|
product_3
|
|
|
|
|
registeredLookup
|
|
|
|
|
timeFormat
|
|
|
|
|
tz
|
|
|
|
|
v3
|
|
|
|
|
weekyears
|
|
|
|
|
- ../docs/querying/filters.md
|
|
|
|
|
___bar
|
|
|
|
|
caseSensitive
|
|
|
|
|
extractionFn
|
|
|
|
|
insensitive_contains
|
|
|
|
|
last_name
|
|
|
|
|
lowerStrict
|
|
|
|
|
upperStrict
|
|
|
|
|
- ../docs/querying/granularities.md
|
|
|
|
|
1970-01-01T00
|
|
|
|
|
P2W
|
|
|
|
|
PT0.750S
|
|
|
|
|
PT1H30M
|
|
|
|
|
TimeseriesQuery
|
|
|
|
|
- ../docs/querying/groupbyquery.md
|
|
|
|
|
D1
|
|
|
|
|
D2
|
|
|
|
|
D3
|
|
|
|
|
druid.query.groupBy.defaultStrategy
|
2022-03-08 16:13:11 -05:00
|
|
|
|
druid.query.groupBy.maxSelectorDictionarySize
|
2019-09-17 15:47:30 -04:00
|
|
|
|
druid.query.groupBy.maxMergingDictionarySize
|
|
|
|
|
druid.query.groupBy.maxOnDiskStorage
|
|
|
|
|
druid.query.groupBy.maxResults.
|
|
|
|
|
groupByStrategy
|
|
|
|
|
maxOnDiskStorage
|
|
|
|
|
maxResults
|
|
|
|
|
orderby
|
|
|
|
|
orderbys
|
|
|
|
|
outputName
|
2022-03-08 16:13:11 -05:00
|
|
|
|
pre-existing
|
2019-09-17 15:47:30 -04:00
|
|
|
|
pushdown
|
|
|
|
|
row1
|
|
|
|
|
subtotalsSpec
|
2022-05-21 13:28:54 -04:00
|
|
|
|
tradeoff
|
2022-02-16 10:23:26 -05:00
|
|
|
|
unnested
|
|
|
|
|
unnesting
|
2019-09-17 15:47:30 -04:00
|
|
|
|
- ../docs/querying/having.md
|
|
|
|
|
HavingSpec
|
|
|
|
|
HavingSpecs
|
|
|
|
|
dimSelector
|
|
|
|
|
equalTo
|
|
|
|
|
greaterThan
|
|
|
|
|
lessThan
|
|
|
|
|
- ../docs/querying/hll-old.md
|
|
|
|
|
DefaultDimensionSpec
|
|
|
|
|
druid-hll
|
|
|
|
|
isInputHyperUnique
|
|
|
|
|
- ../docs/querying/joins.md
|
|
|
|
|
pre-join
|
|
|
|
|
- ../docs/querying/limitspec.md
|
|
|
|
|
DefaultLimitSpec
|
|
|
|
|
OrderByColumnSpec
|
|
|
|
|
OrderByColumnSpecs
|
|
|
|
|
dimensionOrder
|
|
|
|
|
- ../docs/querying/lookups.md
|
|
|
|
|
60_000
|
|
|
|
|
kafka-extraction-namespace
|
|
|
|
|
mins
|
|
|
|
|
tierName
|
|
|
|
|
- ../docs/querying/multi-value-dimensions.md
|
|
|
|
|
row2
|
|
|
|
|
row3
|
|
|
|
|
row4
|
|
|
|
|
t3
|
|
|
|
|
t4
|
|
|
|
|
t5
|
2022-02-16 10:23:26 -05:00
|
|
|
|
groupByEnableMultiValueUnnesting
|
|
|
|
|
unnesting
|
2019-09-17 15:47:30 -04:00
|
|
|
|
- ../docs/querying/multitenancy.md
|
|
|
|
|
500ms
|
|
|
|
|
tenant_id
|
|
|
|
|
- ../docs/querying/post-aggregations.md
|
|
|
|
|
fieldAccess
|
|
|
|
|
finalizingFieldAccess
|
|
|
|
|
hyperUniqueCardinality
|
|
|
|
|
- ../docs/querying/query-context.md
|
2021-07-27 11:26:05 -04:00
|
|
|
|
brokerService
|
2019-09-17 15:47:30 -04:00
|
|
|
|
bySegment
|
|
|
|
|
doubleSum
|
|
|
|
|
druid.broker.cache.populateCache
|
|
|
|
|
druid.broker.cache.populateResultLevelCache
|
|
|
|
|
druid.broker.cache.useCache
|
|
|
|
|
druid.broker.cache.useResultLevelCache
|
|
|
|
|
druid.historical.cache.populateCache
|
|
|
|
|
druid.historical.cache.useCache
|
parallel broker merges on fork join pool (#8578)
* sketch of broker parallel merges done in small batches on fork join pool
* fix non-terminating sequences, auto compute parallelism
* adjust benches
* adjust benchmarks
* now hella more faster, fixed dumb
* fix
* remove comments
* log.info for debug
* javadoc
* safer block for sequence to yielder conversion
* refactor LifecycleForkJoinPool into LifecycleForkJoinPoolProvider which wraps a ForkJoinPool
* smooth yield rate adjustment, more logs to help tune
* cleanup, less logs
* error handling, bug fixes, on by default, more parallel, more tests
* remove unused var
* comments
* timeboundary mergeFn
* simplify, more javadoc
* formatting
* pushdown config
* use nanos consistently, move logs back to debug level, bit more javadoc
* static terminal result batch
* javadoc for nullability of createMergeFn
* cleanup
* oops
* fix race, add docs
* spelling, remove todo, add unhandled exception log
* cleanup, revert unintended change
* another unintended change
* review stuff
* add ParallelMergeCombiningSequenceBenchmark, fixes
* hyper-threading is the enemy
* fix initial start delay, lol
* parallelism computer now balances partition sizes to partition counts using sqrt of sequence count instead of sequence count by 2
* fix those important style issues with the benchmarks code
* lazy sequence creation for benchmarks
* more benchmark comments
* stable sequence generation time
* update defaults to use 100ms target time, 4096 batch size, 16384 initial yield, also update user docs
* add jmh thread based benchmarks, cleanup some stuff
* oops
* style
* add spread to jmh thread benchmark start range, more comments to benchmarks parameters and purpose
* retool benchmark to allow modeling more typical heterogenous heavy workloads
* spelling
* fix
* refactor benchmarks
* formatting
* docs
* add maxThreadStartDelay parameter to threaded benchmark
* why does catch need to be on its own line but else doesnt
2019-11-07 14:58:46 -05:00
|
|
|
|
enableParallelMerge
|
2021-12-15 02:21:53 -05:00
|
|
|
|
enableJoinLeftTableScanDirect
|
|
|
|
|
enableJoinFilterPushDown
|
|
|
|
|
enableJoinFilterRewrite
|
2022-08-13 11:07:43 -04:00
|
|
|
|
enableRewriteJoinToFilter
|
2021-12-15 02:21:53 -05:00
|
|
|
|
enableJoinFilterRewriteValueColumnFilters
|
2019-09-17 15:47:30 -04:00
|
|
|
|
floatSum
|
2021-12-15 02:21:53 -05:00
|
|
|
|
joinFilterRewriteMaxSize
|
2019-09-17 15:47:30 -04:00
|
|
|
|
maxQueuedBytes
|
|
|
|
|
maxScatterGatherBytes
|
|
|
|
|
minTopNThreshold
|
parallel broker merges on fork join pool (#8578)
* sketch of broker parallel merges done in small batches on fork join pool
* fix non-terminating sequences, auto compute parallelism
* adjust benches
* adjust benchmarks
* now hella more faster, fixed dumb
* fix
* remove comments
* log.info for debug
* javadoc
* safer block for sequence to yielder conversion
* refactor LifecycleForkJoinPool into LifecycleForkJoinPoolProvider which wraps a ForkJoinPool
* smooth yield rate adjustment, more logs to help tune
* cleanup, less logs
* error handling, bug fixes, on by default, more parallel, more tests
* remove unused var
* comments
* timeboundary mergeFn
* simplify, more javadoc
* formatting
* pushdown config
* use nanos consistently, move logs back to debug level, bit more javadoc
* static terminal result batch
* javadoc for nullability of createMergeFn
* cleanup
* oops
* fix race, add docs
* spelling, remove todo, add unhandled exception log
* cleanup, revert unintended change
* another unintended change
* review stuff
* add ParallelMergeCombiningSequenceBenchmark, fixes
* hyper-threading is the enemy
* fix initial start delay, lol
* parallelism computer now balances partition sizes to partition counts using sqrt of sequence count instead of sequence count by 2
* fix those important style issues with the benchmarks code
* lazy sequence creation for benchmarks
* more benchmark comments
* stable sequence generation time
* update defaults to use 100ms target time, 4096 batch size, 16384 initial yield, also update user docs
* add jmh thread based benchmarks, cleanup some stuff
* oops
* style
* add spread to jmh thread benchmark start range, more comments to benchmarks parameters and purpose
* retool benchmark to allow modeling more typical heterogenous heavy workloads
* spelling
* fix
* refactor benchmarks
* formatting
* docs
* add maxThreadStartDelay parameter to threaded benchmark
* why does catch need to be on its own line but else doesnt
2019-11-07 14:58:46 -05:00
|
|
|
|
parallelMergeInitialYieldRows
|
|
|
|
|
parallelMergeParallelism
|
|
|
|
|
parallelMergeSmallBatchRows
|
2019-09-17 15:47:30 -04:00
|
|
|
|
populateCache
|
|
|
|
|
populateResultLevelCache
|
|
|
|
|
queryId
|
|
|
|
|
row-matchers
|
|
|
|
|
serializeDateTimeAsLong
|
|
|
|
|
serializeDateTimeAsLongInner
|
|
|
|
|
skipEmptyBuckets
|
|
|
|
|
useCache
|
|
|
|
|
useResultLevelCache
|
|
|
|
|
vectorSize
|
2021-12-21 16:07:53 -05:00
|
|
|
|
enableJoinLeftTableScanDirect
|
|
|
|
|
enableJoinFilterPushDown
|
|
|
|
|
enableJoinFilterRewrite
|
|
|
|
|
enableJoinFilterRewriteValueColumnFilters
|
|
|
|
|
joinFilterRewriteMaxSize
|
2019-09-17 15:47:30 -04:00
|
|
|
|
- ../docs/querying/querying.md
|
2021-06-30 16:42:45 -04:00
|
|
|
|
7KiB
|
2019-09-17 15:47:30 -04:00
|
|
|
|
DatasourceMetadata
|
|
|
|
|
TimeBoundary
|
|
|
|
|
errorClass
|
|
|
|
|
errorMessage
|
|
|
|
|
x-jackson-smile
|
|
|
|
|
- ../docs/querying/scan-query.md
|
|
|
|
|
batchSize
|
|
|
|
|
compactedList
|
|
|
|
|
druid.query.scan.legacy
|
|
|
|
|
druid.query.scan.maxRowsQueuedForOrdering
|
|
|
|
|
druid.query.scan.maxSegmentPartitionsOrderedInMemory
|
|
|
|
|
maxRowsQueuedForOrdering
|
|
|
|
|
maxSegmentPartitionsOrderedInMemory
|
|
|
|
|
resultFormat
|
|
|
|
|
valueVector
|
|
|
|
|
- ../docs/querying/searchquery.md
|
|
|
|
|
SearchQuerySpec
|
|
|
|
|
cursorOnly
|
|
|
|
|
druid.query.search.searchStrategy
|
|
|
|
|
queryableIndexSegment
|
|
|
|
|
searchDimensions
|
|
|
|
|
searchStrategy
|
|
|
|
|
useIndexes
|
|
|
|
|
- ../docs/querying/searchqueryspec.md
|
|
|
|
|
ContainsSearchQuerySpec
|
|
|
|
|
FragmentSearchQuerySpec
|
|
|
|
|
InsensitiveContainsSearchQuerySpec
|
|
|
|
|
RegexSearchQuerySpec
|
|
|
|
|
- ../docs/querying/segmentmetadataquery.md
|
|
|
|
|
analysisType
|
|
|
|
|
analysisTypes
|
|
|
|
|
lenientAggregatorMerge
|
|
|
|
|
minmax
|
|
|
|
|
segmentMetadata
|
|
|
|
|
toInclude
|
|
|
|
|
- ../docs/querying/select-query.md
|
|
|
|
|
PagingSpec
|
|
|
|
|
fromNext
|
|
|
|
|
pagingSpec
|
|
|
|
|
- ../docs/querying/sorting-orders.md
|
|
|
|
|
BoundFilter
|
|
|
|
|
GroupByQuery's
|
|
|
|
|
SearchQuery
|
|
|
|
|
TopNMetricSpec
|
|
|
|
|
compareTo
|
|
|
|
|
file12
|
|
|
|
|
file2
|
2022-02-11 17:43:30 -05:00
|
|
|
|
- ../docs/querying/sql-operators.md
|
|
|
|
|
_x_
|
2019-09-17 15:47:30 -04:00
|
|
|
|
- ../docs/querying/timeseriesquery.md
|
|
|
|
|
fieldName1
|
|
|
|
|
fieldName2
|
|
|
|
|
- ../docs/querying/topnmetricspec.md
|
|
|
|
|
DimensionTopNMetricSpec
|
|
|
|
|
metricSpec
|
|
|
|
|
previousStop
|
|
|
|
|
- ../docs/querying/topnquery.md
|
|
|
|
|
GroupByQuery
|
|
|
|
|
top500
|
|
|
|
|
- ../docs/querying/virtual-columns.md
|
|
|
|
|
outputType
|
|
|
|
|
- ../docs/tutorials/cluster.md
|
|
|
|
|
1.9TB
|
|
|
|
|
16CPU
|
|
|
|
|
WebUpd8
|
|
|
|
|
m5.2xlarge
|
|
|
|
|
metadata.storage.
|
2021-06-30 16:42:45 -04:00
|
|
|
|
256GiB
|
|
|
|
|
128GiB
|
2019-09-17 15:47:30 -04:00
|
|
|
|
- ../docs/tutorials/tutorial-batch-hadoop.md
|
|
|
|
|
PATH_TO_DRUID
|
|
|
|
|
namenode
|
|
|
|
|
- ../docs/tutorials/tutorial-delete-data.md
|
|
|
|
|
segmentID
|
|
|
|
|
segmentIds
|
|
|
|
|
- ../docs/tutorials/tutorial-ingestion-spec.md
|
|
|
|
|
dstIP
|
|
|
|
|
dstPort
|
|
|
|
|
srcIP
|
|
|
|
|
srcPort
|
|
|
|
|
- ../docs/tutorials/tutorial-kerberos-hadoop.md
|
|
|
|
|
common_runtime_properties
|
|
|
|
|
druid.extensions.directory
|
|
|
|
|
druid.extensions.loadList
|
|
|
|
|
druid.hadoop.security.kerberos.keytab
|
|
|
|
|
druid.hadoop.security.kerberos.principal
|
|
|
|
|
druid.indexer.logs.directory
|
|
|
|
|
druid.indexer.logs.type
|
|
|
|
|
druid.storage.storageDirectory
|
|
|
|
|
druid.storage.type
|
|
|
|
|
hdfs.headless.keytab
|
|
|
|
|
indexing_log
|
|
|
|
|
keytabs
|
|
|
|
|
- ../docs/tutorials/tutorial-query.md
|
|
|
|
|
dsql
|
|
|
|
|
- ../docs/tutorials/tutorial-retention.md
|
|
|
|
|
2015-09-12T12
|
|
|
|
|
- ../docs/tutorials/tutorial-update-data.md
|
|
|
|
|
bear-111
|
|
|
|
|
- ../docs/configuration/index.md
|
2020-11-23 18:03:13 -05:00
|
|
|
|
10KiB
|
|
|
|
|
2GiB
|
|
|
|
|
512KiB
|
2020-07-30 21:58:48 -04:00
|
|
|
|
1GiB
|
|
|
|
|
KiB
|
|
|
|
|
GiB
|
2019-09-17 15:47:30 -04:00
|
|
|
|
00.000Z
|
|
|
|
|
100ms
|
|
|
|
|
10ms
|
|
|
|
|
1GB
|
|
|
|
|
1_000_000
|
|
|
|
|
2012-01-01T00
|
|
|
|
|
2GB
|
|
|
|
|
30_000
|
|
|
|
|
524288000L
|
2021-06-30 16:42:45 -04:00
|
|
|
|
5MiB
|
2019-09-17 15:47:30 -04:00
|
|
|
|
8u60
|
|
|
|
|
Autoscaler
|
2021-10-25 15:16:21 -04:00
|
|
|
|
APPROX_COUNT_DISTINCT_BUILTIN
|
2019-09-17 15:47:30 -04:00
|
|
|
|
AvaticaConnectionBalancer
|
|
|
|
|
EventReceiverFirehose
|
|
|
|
|
File.getFreeSpace
|
|
|
|
|
File.getTotalSpace
|
|
|
|
|
ForkJoinPool
|
2020-04-28 06:13:39 -04:00
|
|
|
|
GCE
|
2019-09-17 15:47:30 -04:00
|
|
|
|
HadoopIndexTasks
|
|
|
|
|
HttpEmitter
|
|
|
|
|
HttpPostEmitter
|
|
|
|
|
InetAddress.getLocalHost
|
2021-04-09 03:12:28 -04:00
|
|
|
|
IOConfig
|
2019-09-17 15:47:30 -04:00
|
|
|
|
JRE8u60
|
|
|
|
|
KeyManager
|
|
|
|
|
L1
|
|
|
|
|
L2
|
2020-04-28 06:13:39 -04:00
|
|
|
|
ListManagedInstances
|
2019-09-17 15:47:30 -04:00
|
|
|
|
LoadSpec
|
|
|
|
|
LoggingEmitter
|
|
|
|
|
Los_Angeles
|
|
|
|
|
MDC
|
|
|
|
|
NoopServiceEmitter
|
2021-10-08 23:52:03 -04:00
|
|
|
|
NUMA
|
2019-09-17 15:47:30 -04:00
|
|
|
|
ONLY_EVENTS
|
|
|
|
|
P1D
|
|
|
|
|
P1W
|
|
|
|
|
PT-1S
|
|
|
|
|
PT0.050S
|
|
|
|
|
PT10M
|
|
|
|
|
PT10S
|
|
|
|
|
PT15M
|
|
|
|
|
PT1800S
|
|
|
|
|
PT1M
|
|
|
|
|
PT1S
|
|
|
|
|
PT24H
|
|
|
|
|
PT300S
|
|
|
|
|
PT30S
|
2021-04-20 20:10:43 -04:00
|
|
|
|
PT3600S
|
2019-09-17 15:47:30 -04:00
|
|
|
|
PT5M
|
|
|
|
|
PT5S
|
|
|
|
|
PT60S
|
|
|
|
|
PT90M
|
|
|
|
|
Param
|
|
|
|
|
Runtime.maxMemory
|
|
|
|
|
SSLContext
|
|
|
|
|
SegmentMetadata
|
|
|
|
|
SegmentWriteOutMediumFactory
|
|
|
|
|
ServiceEmitter
|
|
|
|
|
System.getProperty
|
|
|
|
|
TLSv1.2
|
|
|
|
|
TrustManager
|
|
|
|
|
TuningConfig
|
|
|
|
|
_N_
|
|
|
|
|
_default
|
|
|
|
|
_default_tier
|
|
|
|
|
addr
|
|
|
|
|
affinityConfig
|
|
|
|
|
allowAll
|
2020-04-15 19:12:20 -04:00
|
|
|
|
ANDed
|
2019-09-17 15:47:30 -04:00
|
|
|
|
array_mod
|
2020-04-28 06:13:39 -04:00
|
|
|
|
autoscale
|
|
|
|
|
autoscalers
|
2019-09-17 15:47:30 -04:00
|
|
|
|
batch_index_task
|
|
|
|
|
cgroup
|
|
|
|
|
classloader
|
|
|
|
|
com.metamx
|
|
|
|
|
common.runtime.properties
|
|
|
|
|
cpuacct
|
|
|
|
|
dataSourceName
|
|
|
|
|
datetime
|
|
|
|
|
defaultHistory
|
|
|
|
|
doubleMax
|
|
|
|
|
doubleMin
|
|
|
|
|
doubleSum
|
|
|
|
|
druid.enableTlsPort
|
|
|
|
|
druid.indexer.autoscale.workerVersion
|
|
|
|
|
druid.service
|
|
|
|
|
druid.storage.disableAcl
|
|
|
|
|
druid_audit
|
|
|
|
|
druid_config
|
|
|
|
|
druid_dataSource
|
|
|
|
|
druid_pendingSegments
|
|
|
|
|
druid_rules
|
|
|
|
|
druid_segments
|
|
|
|
|
druid_supervisors
|
|
|
|
|
druid_taskLock
|
|
|
|
|
druid_taskLog
|
|
|
|
|
druid_tasks
|
2020-04-15 19:12:20 -04:00
|
|
|
|
DruidQueryRel
|
2022-05-10 22:05:15 -04:00
|
|
|
|
durationToRetain
|
2019-09-17 15:47:30 -04:00
|
|
|
|
ec2
|
|
|
|
|
equalDistribution
|
|
|
|
|
extractionFn
|
|
|
|
|
file.encoding
|
|
|
|
|
fillCapacity
|
|
|
|
|
first_location
|
|
|
|
|
floatMax
|
2020-09-14 22:44:58 -04:00
|
|
|
|
floatAny
|
2019-09-17 15:47:30 -04:00
|
|
|
|
floatMin
|
|
|
|
|
floatSum
|
|
|
|
|
freeSpacePercent
|
2020-04-28 06:13:39 -04:00
|
|
|
|
gce
|
|
|
|
|
gce-extensions
|
2019-09-17 15:47:30 -04:00
|
|
|
|
getCanonicalHostName
|
|
|
|
|
groupBy
|
|
|
|
|
hdfs
|
|
|
|
|
httpRemote
|
|
|
|
|
indexTask
|
|
|
|
|
info_dir
|
2020-04-15 19:12:20 -04:00
|
|
|
|
inlining
|
2019-09-17 15:47:30 -04:00
|
|
|
|
java.class.path
|
|
|
|
|
java.io.tmpdir
|
|
|
|
|
javaOpts
|
|
|
|
|
javaOptsArray
|
Making optimal usage of multiple segment cache locations (#8038)
* #7641 - Changing segment distribution algorithm to distribute segments to multiple segment cache locations
* Fixing indentation
* WIP
* Adding interface for location strategy selection, least bytes used strategy impl, round-robin strategy impl, locationSelectorStrategy config with least bytes used strategy as the default strategy
* fixing code style
* Fixing test
* Adding a method visible only for testing, fixing tests
* 1. Changing the method contract to return an iterator of locations instead of a single best location. 2. Check style fixes
* fixing the conditional statement
* Added testSegmentDistributionUsingLeastBytesUsedStrategy, fixed testSegmentDistributionUsingRoundRobinStrategy
* to trigger CI build
* Add documentation for the selection strategy configuration
* to re trigger CI build
* updated docs as per review comments, made LeastBytesUsedStorageLocationSelectorStrategy.getLocations a synchronzied method, other minor fixes
* In checkLocationConfigForNull method, using getLocations() to check for null instead of directly referring to the locations variable so that tests overriding getLocations() method do not fail
* Implementing review comments. Added tests for StorageLocationSelectorStrategy
* Checkstyle fixes
* Adding java doc comments for StorageLocationSelectorStrategy interface
* checkstyle
* empty commit to retrigger build
* Empty commit
* Adding suppressions for words leastBytesUsed and roundRobin of ../docs/configuration/index.md file
* Impl review comments including updating docs as suggested
* Removing checkLocationConfigForNull(), @NotEmpty annotation serves the purpose
* Round robin iterator to keep track of the no. of iterations, impl review comments, added tests for round robin strategy
* Fixing the round robin iterator
* Removed numLocationsToTry, updated java docs
* changing property attribute value from tier to type
* Fixing assert messages
2019-09-28 02:17:44 -04:00
|
|
|
|
leastBytesUsed
|
2019-09-17 15:47:30 -04:00
|
|
|
|
loadList
|
|
|
|
|
loadqueuepeon
|
|
|
|
|
loadspec
|
|
|
|
|
localStorage
|
|
|
|
|
maxHeaderSize
|
|
|
|
|
maxQueuedBytes
|
|
|
|
|
maxSize
|
|
|
|
|
middlemanager
|
|
|
|
|
minTimeMs
|
|
|
|
|
minmax
|
|
|
|
|
mins
|
2020-04-15 19:12:20 -04:00
|
|
|
|
nullable
|
2019-09-17 15:47:30 -04:00
|
|
|
|
orderby
|
|
|
|
|
orderbys
|
|
|
|
|
org.apache.druid
|
|
|
|
|
org.apache.druid.jetty.RequestLog
|
|
|
|
|
org.apache.hadoop
|
|
|
|
|
overlord.html
|
|
|
|
|
pendingSegments
|
|
|
|
|
pre-flight
|
2020-04-15 19:12:20 -04:00
|
|
|
|
preloaded
|
2019-09-17 15:47:30 -04:00
|
|
|
|
queryType
|
|
|
|
|
remoteTaskRunnerConfig
|
|
|
|
|
rendezvousHash
|
2021-05-05 08:39:36 -04:00
|
|
|
|
replicants
|
2019-09-17 15:47:30 -04:00
|
|
|
|
resultsets
|
Making optimal usage of multiple segment cache locations (#8038)
* #7641 - Changing segment distribution algorithm to distribute segments to multiple segment cache locations
* Fixing indentation
* WIP
* Adding interface for location strategy selection, least bytes used strategy impl, round-robin strategy impl, locationSelectorStrategy config with least bytes used strategy as the default strategy
* fixing code style
* Fixing test
* Adding a method visible only for testing, fixing tests
* 1. Changing the method contract to return an iterator of locations instead of a single best location. 2. Check style fixes
* fixing the conditional statement
* Added testSegmentDistributionUsingLeastBytesUsedStrategy, fixed testSegmentDistributionUsingRoundRobinStrategy
* to trigger CI build
* Add documentation for the selection strategy configuration
* to re trigger CI build
* updated docs as per review comments, made LeastBytesUsedStorageLocationSelectorStrategy.getLocations a synchronzied method, other minor fixes
* In checkLocationConfigForNull method, using getLocations() to check for null instead of directly referring to the locations variable so that tests overriding getLocations() method do not fail
* Implementing review comments. Added tests for StorageLocationSelectorStrategy
* Checkstyle fixes
* Adding java doc comments for StorageLocationSelectorStrategy interface
* checkstyle
* empty commit to retrigger build
* Empty commit
* Adding suppressions for words leastBytesUsed and roundRobin of ../docs/configuration/index.md file
* Impl review comments including updating docs as suggested
* Removing checkLocationConfigForNull(), @NotEmpty annotation serves the purpose
* Round robin iterator to keep track of the no. of iterations, impl review comments, added tests for round robin strategy
* Fixing the round robin iterator
* Removed numLocationsToTry, updated java docs
* changing property attribute value from tier to type
* Fixing assert messages
2019-09-28 02:17:44 -04:00
|
|
|
|
roundRobin
|
2019-09-17 15:47:30 -04:00
|
|
|
|
runtime.properties
|
|
|
|
|
runtime.properties.
|
|
|
|
|
s3
|
|
|
|
|
s3a
|
|
|
|
|
s3n
|
|
|
|
|
slf4j
|
|
|
|
|
sql
|
|
|
|
|
sqlQuery
|
|
|
|
|
successfulSending
|
|
|
|
|
taskBlackListCleanupPeriod
|
|
|
|
|
tasklogs
|
|
|
|
|
timeBoundary
|
DruidInputSource: Fix issues in column projection, timestamp handling. (#10267)
* DruidInputSource: Fix issues in column projection, timestamp handling.
DruidInputSource, DruidSegmentReader changes:
1) Remove "dimensions" and "metrics". They are not necessary, because we
can compute which columns we need to read based on what is going to
be used by the timestamp, transform, dimensions, and metrics.
2) Start using ColumnsFilter (see below) to decide which columns we need
to read.
3) Actually respect the "timestampSpec". Previously, it was ignored, and
the timestamp of the returned InputRows was set to the `__time` column
of the input datasource.
(1) and (2) together fix a bug in which the DruidInputSource would not
properly read columns that are used as inputs to a transformSpec.
(3) fixes a bug where the timestampSpec would be ignored if you attempted
to set the column to something other than `__time`.
(1) and (3) are breaking changes.
Web console changes:
1) Remove "Dimensions" and "Metrics" from the Druid input source.
2) Set timestampSpec to `{"column": "__time", "format": "millis"}` for
compatibility with the new behavior.
Other changes:
1) Add ColumnsFilter, a new class that allows input readers to determine
which columns they need to read. Currently, it's only used by the
DruidInputSource, but it could be used by other columnar input sources
in the future.
2) Add a ColumnsFilter to InputRowSchema.
3) Remove the metric names from InputRowSchema (they were unused).
4) Add InputRowSchemas.fromDataSchema method that computes the proper
ColumnsFilter for given timestamp, dimensions, transform, and metrics.
5) Add "getRequiredColumns" method to TransformSpec to support the above.
* Various fixups.
* Uncomment incorrectly commented lines.
* Move TransformSpecTest to the proper module.
* Add druid.indexer.task.ignoreTimestampSpecForDruidInputSource setting.
* Fix.
* Fix build.
* Checkstyle.
* Misc fixes.
* Fix test.
* Move config.
* Fix imports.
* Fixup.
* Fix ShuffleResourceTest.
* Add import.
* Smarter exclusions.
* Fixes based on tests.
Also, add TIME_COLUMN constant in the web console.
* Adjustments for tests.
* Reorder test data.
* Update docs.
* Update docs to say Druid 0.22.0 instead of 0.21.0.
* Fix test.
* Fix ITAutoCompactionTest.
* Changes from review & from merging.
2021-03-25 13:32:21 -04:00
|
|
|
|
timestampSpec
|
2019-09-17 15:47:30 -04:00
|
|
|
|
tmp
|
|
|
|
|
tmpfs
|
|
|
|
|
truststore
|
|
|
|
|
tuningConfig
|
2020-04-15 19:12:20 -04:00
|
|
|
|
unioning
|
2019-09-17 15:47:30 -04:00
|
|
|
|
useIndexes
|
|
|
|
|
user.timezone
|
|
|
|
|
v0.12.0
|
|
|
|
|
versionReplacementString
|
|
|
|
|
workerId
|
|
|
|
|
yyyy-MM-dd
|
2019-10-17 15:57:19 -04:00
|
|
|
|
taskType
|
|
|
|
|
index_kafka
|
|
|
|
|
c1
|
|
|
|
|
c2
|
|
|
|
|
ds1
|
|
|
|
|
equalDistributionWithCategorySpec
|
|
|
|
|
fillCapacityWithCategorySpec
|
|
|
|
|
WorkerCategorySpec
|
|
|
|
|
workerCategorySpec
|
|
|
|
|
CategoryConfig
|
2019-09-17 15:47:30 -04:00
|
|
|
|
- ../docs/design/index.md
|
|
|
|
|
logsearch
|
|
|
|
|
- ../docs/ingestion/index.md
|
|
|
|
|
2000-01-01T01
|
|
|
|
|
DateTimeFormat
|
|
|
|
|
JsonPath
|
|
|
|
|
autodetect
|
|
|
|
|
createBitmapIndex
|
|
|
|
|
dimensionExclusions
|
|
|
|
|
expr
|
|
|
|
|
jackson-jq
|
|
|
|
|
missingValue
|
2021-01-27 03:34:56 -05:00
|
|
|
|
skipBytesInMemoryOverheadCheck
|
2019-09-17 15:47:30 -04:00
|
|
|
|
spatialDimensions
|
|
|
|
|
useFieldDiscovery
|
|
|
|
|
- ../docs/tutorials/index.md
|
|
|
|
|
4CPU
|
|
|
|
|
cityName
|
|
|
|
|
countryIsoCode
|
|
|
|
|
countryName
|
|
|
|
|
isAnonymous
|
|
|
|
|
isMinor
|
|
|
|
|
isNew
|
|
|
|
|
isRobot
|
|
|
|
|
isUnpatrolled
|
|
|
|
|
metroCode
|
|
|
|
|
regionIsoCode
|
2020-04-04 12:02:24 -04:00
|
|
|
|
regionName
|
2021-06-30 16:42:45 -04:00
|
|
|
|
4GiB
|
|
|
|
|
512GiB
|
2020-04-04 12:02:24 -04:00
|
|
|
|
- ../docs/development/extensions-core/druid-ranger-security.md
|
|
|
|
|
json
|
|
|
|
|
metastore
|
2020-04-09 13:43:11 -04:00
|
|
|
|
UserGroupInformation
|
|
|
|
|
CVE-2019-17571
|
|
|
|
|
CVE-2019-12399
|
|
|
|
|
CVE-2018-17196
|
2020-07-08 02:12:39 -04:00
|
|
|
|
bin.tar.gz
|
2020-07-30 21:58:48 -04:00
|
|
|
|
- ../docs/configuration/human-readable-byte.md
|
|
|
|
|
0s
|
|
|
|
|
1T
|
|
|
|
|
3G
|
|
|
|
|
1_000
|
|
|
|
|
1_000_000
|
|
|
|
|
1_000_000_000
|
|
|
|
|
1_000_000_000_000
|
|
|
|
|
1_000_000_000_000_000
|
|
|
|
|
Giga
|
|
|
|
|
Tera
|
|
|
|
|
Peta
|
|
|
|
|
KiB
|
|
|
|
|
MiB
|
|
|
|
|
GiB
|
|
|
|
|
TiB
|
|
|
|
|
PiB
|
2021-03-31 15:46:25 -04:00
|
|
|
|
protobuf
|
|
|
|
|
Golang
|
2021-05-25 15:49:49 -04:00
|
|
|
|
multiValueHandling
|
2021-11-16 13:13:35 -05:00
|
|
|
|
_n_
|
2022-06-21 23:35:03 -04:00
|
|
|
|
100TB
|
2022-07-13 21:59:55 -04:00
|
|
|
|
- ../docs/querying/sql-functions.md
|
|
|
|
|
ANY_VALUE
|
|
|
|
|
APPROX_COUNT_DISTINCT_DS_HLL
|
|
|
|
|
APPROX_COUNT_DISTINCT_DS_THETA
|
|
|
|
|
APPROX_QUANTILE_DS
|
|
|
|
|
APPROX_QUANTILE_FIXED_BUCKETS
|
|
|
|
|
ARRAY_CONCAT_AGG
|
|
|
|
|
BIT_AND
|
|
|
|
|
BIT_OR
|
|
|
|
|
BIT_XOR
|
|
|
|
|
BITWISE_AND
|
|
|
|
|
BITWISE_COMPLEMENT
|
|
|
|
|
BITWISE_CONVERT_DOUBLE_TO_LONG_BITS
|
|
|
|
|
BITWISE_CONVERT_LONG_BITS_TO_DOUBLE
|
|
|
|
|
BITWISE_OR
|
|
|
|
|
BITWISE_SHIFT_LEFT
|
|
|
|
|
BITWISE_SHIFT_RIGHT
|
|
|
|
|
BITWISE_XOR
|
|
|
|
|
BLOOM_FILTER
|
|
|
|
|
BTRIM
|
|
|
|
|
CHAR_LENGTH
|
|
|
|
|
CHARACTER_LENGTH
|
|
|
|
|
CURRENT_DATE
|
|
|
|
|
CURRENT_TIMESTAMP
|
|
|
|
|
DATE_TRUNC
|
|
|
|
|
DS_CDF
|
|
|
|
|
DS_GET_QUANTILE
|
|
|
|
|
DS_GET_QUANTILES
|
|
|
|
|
DS_HISTOGRAM
|
|
|
|
|
DS_HLL
|
|
|
|
|
DS_QUANTILE_SUMMARY
|
|
|
|
|
DS_QUANTILES_SKETCH
|
|
|
|
|
DS_RANK
|
|
|
|
|
DS_THETA
|
|
|
|
|
EARLIEST_BY
|
|
|
|
|
_e_
|
|
|
|
|
HLL_SKETCH_ESTIMATE
|
|
|
|
|
HLL_SKETCH_ESTIMATE_WITH_ERROR_BOUNDS
|
|
|
|
|
HLL_SKETCH_TO_STRING
|
|
|
|
|
HLL_SKETCH_UNION
|
|
|
|
|
LATEST_BY
|
|
|
|
|
base-10
|
|
|
|
|
MV_APPEND
|
|
|
|
|
MV_CONCAT
|
|
|
|
|
MV_CONTAINS
|
|
|
|
|
MV_FILTER_NONE
|
|
|
|
|
MV_FILTER_ONLY
|
|
|
|
|
MV_LENGTH
|
|
|
|
|
MV_OFFSET
|
|
|
|
|
MV_OFFSET_OF
|
|
|
|
|
MV_ORDINAL
|
|
|
|
|
MV_ORDINAL_OF
|
|
|
|
|
MV_OVERLAP
|
|
|
|
|
MV_PREPEND
|
|
|
|
|
MV_SLICE
|
|
|
|
|
MV_TO_STRING
|
|
|
|
|
NULLIF
|
|
|
|
|
_n_th
|
|
|
|
|
STDDEV_POP
|
|
|
|
|
STDDEV_SAMP
|
|
|
|
|
STRING_FORMAT
|
|
|
|
|
STRING_TO_MV
|
|
|
|
|
SUBSTR
|
|
|
|
|
TDIGEST_GENERATE_SKETCH
|
|
|
|
|
TDIGEST_QUANTILE
|
|
|
|
|
TEXTCAT
|
|
|
|
|
THETA_SKETCH_ESTIMATE
|
|
|
|
|
THETA_SKETCH_ESTIMATE_WITH_ERROR_BOUNDS
|
|
|
|
|
THETA_SKETCH_INTERSECT
|
|
|
|
|
THETA_SKETCH_NOT
|
|
|
|
|
THETA_SKETCH_UNION
|
|
|
|
|
TIME_CEIL
|
|
|
|
|
TIME_EXTRACT
|
|
|
|
|
TIME_FLOOR
|
|
|
|
|
TIME_FORMAT
|
|
|
|
|
TIME_IN_INTERVAL
|
|
|
|
|
TIMESTAMP_TO_MILLIS
|
|
|
|
|
TIMESTAMPADD
|
|
|
|
|
TIMESTAMPDIFF
|
|
|
|
|
TRUNC
|
|
|
|
|
VAR_POP
|
|
|
|
|
VAR_SAMP
|
2022-08-09 06:44:22 -04:00
|
|
|
|
KTable
|
|
|
|
|
Aotearoa
|
|
|
|
|
Czechia
|
2022-08-22 21:47:40 -04:00
|
|
|
|
Zeelund
|