2019-09-17 15:47:30 -04:00
|
|
|
|
# Licensed to the Apache Software Foundation (ASF) under one or more
|
|
|
|
|
# contributor license agreements. See the NOTICE file distributed with
|
|
|
|
|
# this work for additional information regarding copyright ownership.
|
|
|
|
|
# The ASF licenses this file to You under the Apache License, Version 2.0
|
|
|
|
|
# (the "License"); you may not use this file except in compliance with
|
|
|
|
|
# the License. You may obtain a copy of the License at
|
|
|
|
|
#
|
|
|
|
|
# http://www.apache.org/licenses/LICENSE-2.0
|
|
|
|
|
#
|
|
|
|
|
# Unless required by applicable law or agreed to in writing, software
|
|
|
|
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
|
|
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
|
|
|
# See the License for the specific language governing permissions and
|
|
|
|
|
# limitations under the License.
|
|
|
|
|
|
|
|
|
|
# markdown-spellcheck spelling configuration file
|
|
|
|
|
# Format - lines beginning # are comments
|
|
|
|
|
# global dictionary is at the start, file overrides afterwards
|
|
|
|
|
# one word per line, to define a file override use ' - filename'
|
|
|
|
|
# where filename is relative to this configuration file
|
2022-09-06 13:36:09 -04:00
|
|
|
|
1M
|
2023-04-18 04:55:20 -04:00
|
|
|
|
100MiB
|
2019-09-17 15:47:30 -04:00
|
|
|
|
32-bit
|
2024-04-05 02:40:10 -04:00
|
|
|
|
4MiB
|
2020-08-21 12:43:58 -04:00
|
|
|
|
500MiB
|
2019-09-17 15:47:30 -04:00
|
|
|
|
64-bit
|
|
|
|
|
ACL
|
2021-05-08 23:56:19 -04:00
|
|
|
|
ACLs
|
2019-09-17 15:47:30 -04:00
|
|
|
|
APIs
|
2023-03-27 01:56:37 -04:00
|
|
|
|
apache.org
|
2020-02-11 00:53:11 -05:00
|
|
|
|
AvroStorage
|
2021-06-09 06:32:35 -04:00
|
|
|
|
ARN
|
2023-11-06 14:34:42 -05:00
|
|
|
|
ASC
|
2022-11-10 10:33:04 -05:00
|
|
|
|
autokill
|
2019-09-17 15:47:30 -04:00
|
|
|
|
AWS
|
2019-10-12 12:12:14 -04:00
|
|
|
|
AWS_CONTAINER_CREDENTIALS_RELATIVE_URI
|
|
|
|
|
AWS_CONTAINER_CREDENTIALS_FULL_URI
|
2019-09-17 15:47:30 -04:00
|
|
|
|
Actian
|
|
|
|
|
Authorizer
|
|
|
|
|
Avatica
|
|
|
|
|
Avro
|
|
|
|
|
Azul
|
2024-03-04 16:13:28 -05:00
|
|
|
|
AzureDNSZone
|
2019-09-17 15:47:30 -04:00
|
|
|
|
BCP
|
|
|
|
|
Base64
|
|
|
|
|
Base64-encoded
|
|
|
|
|
ByteBuffer
|
2022-09-06 13:36:09 -04:00
|
|
|
|
bottlenecked
|
2023-11-29 03:16:11 -05:00
|
|
|
|
cartesian
|
2021-12-09 21:53:23 -05:00
|
|
|
|
concat
|
2019-09-17 15:47:30 -04:00
|
|
|
|
CIDR
|
|
|
|
|
CORS
|
2021-03-29 16:57:58 -04:00
|
|
|
|
CNF
|
2019-09-17 15:47:30 -04:00
|
|
|
|
CPUs
|
|
|
|
|
CSVs
|
2024-04-24 12:52:53 -04:00
|
|
|
|
CentralizedDatasourceSchema
|
2019-09-17 15:47:30 -04:00
|
|
|
|
Ceph
|
2024-07-18 20:06:22 -04:00
|
|
|
|
circledR
|
2021-03-29 16:57:58 -04:00
|
|
|
|
CloudWatch
|
2019-09-17 15:47:30 -04:00
|
|
|
|
ColumnDescriptor
|
|
|
|
|
Corretto
|
2023-08-17 20:32:51 -04:00
|
|
|
|
CLI
|
2024-03-07 18:16:52 -05:00
|
|
|
|
CUME_DIST
|
2019-09-17 15:47:30 -04:00
|
|
|
|
DDL
|
2024-03-07 18:16:52 -05:00
|
|
|
|
DENSE_RANK
|
2019-09-17 15:47:30 -04:00
|
|
|
|
DML
|
|
|
|
|
DNS
|
|
|
|
|
DRUIDVERSION
|
|
|
|
|
DataSketches
|
|
|
|
|
DateTime
|
|
|
|
|
DateType
|
2024-01-31 00:53:50 -05:00
|
|
|
|
DeltaLakeInputSource
|
2022-05-03 19:22:25 -04:00
|
|
|
|
dimensionsSpec
|
2019-09-17 15:47:30 -04:00
|
|
|
|
DimensionSpec
|
|
|
|
|
DimensionSpecs
|
|
|
|
|
Dockerfile
|
2023-06-05 13:53:17 -04:00
|
|
|
|
Docusaurus
|
2019-09-17 15:47:30 -04:00
|
|
|
|
DogStatsD
|
2022-09-06 13:36:09 -04:00
|
|
|
|
DOCTYPE
|
2019-09-17 15:47:30 -04:00
|
|
|
|
Double.NEGATIVE_INFINITY
|
|
|
|
|
Double.NEGATIVE_INFINITY.
|
|
|
|
|
Double.POSITIVE_INFINITY
|
|
|
|
|
Double.POSITIVE_INFINITY.
|
2022-11-21 23:26:32 -05:00
|
|
|
|
downsampled
|
|
|
|
|
downsamples
|
|
|
|
|
downsampling
|
2019-10-01 17:59:30 -04:00
|
|
|
|
Dropwizard
|
|
|
|
|
dropwizard
|
2024-01-31 00:53:50 -05:00
|
|
|
|
druid-deltalake-extensions
|
2020-01-17 18:52:05 -05:00
|
|
|
|
DruidInputSource
|
2019-09-17 15:47:30 -04:00
|
|
|
|
DruidSQL
|
2020-12-10 11:24:33 -05:00
|
|
|
|
DynamicConfigProvider
|
2019-09-17 15:47:30 -04:00
|
|
|
|
EC2
|
2019-10-12 12:12:14 -04:00
|
|
|
|
EC2ContainerCredentialsProviderWrapper
|
|
|
|
|
ECS
|
2019-09-17 15:47:30 -04:00
|
|
|
|
EMR
|
|
|
|
|
EMRFS
|
|
|
|
|
ETL
|
|
|
|
|
Elasticsearch
|
2021-07-07 01:05:41 -04:00
|
|
|
|
Enums
|
2024-03-07 18:16:52 -05:00
|
|
|
|
FIRST_VALUE
|
2020-01-17 18:52:05 -05:00
|
|
|
|
FlattenSpec
|
2019-09-17 15:47:30 -04:00
|
|
|
|
Float.NEGATIVE_INFINITY
|
2021-11-16 13:13:35 -05:00
|
|
|
|
Float.NEGATIVE_INFINITY.
|
2019-09-17 15:47:30 -04:00
|
|
|
|
Float.POSITIVE_INFINITY
|
2021-11-16 13:13:35 -05:00
|
|
|
|
Float.POSITIVE_INFINITY.
|
2019-12-12 20:00:08 -05:00
|
|
|
|
ForwardedRequestCustomizer
|
2019-09-17 15:47:30 -04:00
|
|
|
|
GC
|
|
|
|
|
GPG
|
|
|
|
|
GSSAPI
|
|
|
|
|
GUIs
|
|
|
|
|
GroupBy
|
|
|
|
|
Guice
|
|
|
|
|
HDFS
|
|
|
|
|
HLL
|
|
|
|
|
HashSet
|
|
|
|
|
Homebrew
|
2022-09-06 13:36:09 -04:00
|
|
|
|
html
|
2019-09-17 15:47:30 -04:00
|
|
|
|
HyperLogLog
|
2021-01-07 00:15:29 -05:00
|
|
|
|
IAM
|
2019-09-17 15:47:30 -04:00
|
|
|
|
IANA
|
2023-07-17 23:29:57 -04:00
|
|
|
|
IcebergFilter
|
|
|
|
|
IcebergInputSource
|
2019-09-17 15:47:30 -04:00
|
|
|
|
IETF
|
2024-06-04 12:47:49 -04:00
|
|
|
|
IoT
|
2019-09-25 14:25:03 -04:00
|
|
|
|
IP
|
2019-09-17 15:47:30 -04:00
|
|
|
|
IPv4
|
2023-12-08 02:09:06 -05:00
|
|
|
|
IPv6
|
2023-06-13 15:44:04 -04:00
|
|
|
|
IS_AGGREGATOR
|
2020-06-30 00:08:13 -04:00
|
|
|
|
IS_BROADCAST
|
|
|
|
|
IS_JOINABLE
|
2019-09-17 15:47:30 -04:00
|
|
|
|
IS0
|
|
|
|
|
ISO-8601
|
|
|
|
|
ISO8601
|
|
|
|
|
IndexSpec
|
|
|
|
|
IndexTask
|
|
|
|
|
InfluxDB
|
2019-11-22 13:49:16 -05:00
|
|
|
|
InputFormat
|
2020-01-17 18:52:05 -05:00
|
|
|
|
InputSource
|
2020-06-09 15:55:20 -04:00
|
|
|
|
InputSources
|
2019-09-17 15:47:30 -04:00
|
|
|
|
Integer.MAX_VALUE
|
2024-01-04 17:02:32 -05:00
|
|
|
|
IntelliJ
|
2022-05-03 19:22:25 -04:00
|
|
|
|
ioConfig
|
Support for middle manager less druid, tasks launch as k8s jobs (#13156)
* Support for middle manager less druid, tasks launch as k8s jobs
* Fixing forking task runner test
* Test cleanup, dependency cleanup, intellij inspections cleanup
* Changes per PR review
Add configuration option to disable http/https proxy for the k8s client
Update the docs to provide more detail about sidecar support
* Removing un-needed log lines
* Small changes per PR review
* Upon task completion we callback to the overlord to update the status / locaiton, for slower k8s clusters, this reduces locking time significantly
* Merge conflict fix
* Fixing tests and docs
* update tiny-cluster.yaml
changed `enableTaskLevelLogPush` to `encapsulatedTask`
* Apply suggestions from code review
Co-authored-by: Abhishek Agarwal <1477457+abhishekagarwal87@users.noreply.github.com>
* Minor changes per PR request
* Cleanup, adding test to AbstractTask
* Add comment in peon.sh
* Bumping code coverage
* More tests to make code coverage happy
* Doh a duplicate dependnecy
* Integration test setup is weird for k8s, will do this in a different PR
* Reverting back all integration test changes, will do in anotbher PR
* use StringUtils.base64 instead of Base64
* Jdk is nasty, if i compress in jdk 11 in jdk 17 the decompressed result is different
Co-authored-by: Rahul Gidwani <r_gidwani@apple.com>
Co-authored-by: Abhishek Agarwal <1477457+abhishekagarwal87@users.noreply.github.com>
2022-11-02 22:44:47 -04:00
|
|
|
|
Istio
|
2019-10-09 02:43:58 -04:00
|
|
|
|
JBOD
|
2019-09-17 15:47:30 -04:00
|
|
|
|
JDBC
|
|
|
|
|
JDK
|
|
|
|
|
JDK7
|
|
|
|
|
JDK8
|
|
|
|
|
JKS
|
2022-11-29 12:26:32 -05:00
|
|
|
|
jks
|
2019-10-01 17:59:30 -04:00
|
|
|
|
JMX
|
2019-09-17 15:47:30 -04:00
|
|
|
|
JRE
|
|
|
|
|
JS
|
|
|
|
|
JSON
|
2020-01-17 18:52:05 -05:00
|
|
|
|
JsonPath
|
2022-08-19 20:12:19 -04:00
|
|
|
|
JSONPath
|
2020-11-19 18:24:58 -05:00
|
|
|
|
JSSE
|
2019-09-17 15:47:30 -04:00
|
|
|
|
JVM
|
|
|
|
|
JVMs
|
2023-03-25 09:11:40 -04:00
|
|
|
|
JWT
|
2019-09-17 15:47:30 -04:00
|
|
|
|
Joda
|
|
|
|
|
JsonProperty
|
2021-08-24 11:49:29 -04:00
|
|
|
|
Jupyter
|
2022-12-16 16:33:50 -05:00
|
|
|
|
JupyterLab
|
2019-09-17 15:47:30 -04:00
|
|
|
|
KMS
|
|
|
|
|
Kerberized
|
|
|
|
|
Kerberos
|
2021-04-14 11:58:17 -04:00
|
|
|
|
KeyStores
|
2019-09-17 15:47:30 -04:00
|
|
|
|
Kinesis
|
2019-11-06 15:56:21 -05:00
|
|
|
|
Kubernetes
|
2024-01-31 00:53:50 -05:00
|
|
|
|
Lakehouse
|
2022-11-29 12:26:32 -05:00
|
|
|
|
LDAPS
|
2019-09-17 15:47:30 -04:00
|
|
|
|
LRU
|
|
|
|
|
LZ4
|
|
|
|
|
LZO
|
|
|
|
|
LimitSpec
|
|
|
|
|
Long.MAX_VALUE
|
2021-11-16 13:13:35 -05:00
|
|
|
|
Long.MAX_VALUE.
|
2019-09-17 15:47:30 -04:00
|
|
|
|
Long.MIN_VALUE
|
2021-11-16 13:13:35 -05:00
|
|
|
|
Long.MIN_VALUE.
|
2019-09-17 15:47:30 -04:00
|
|
|
|
Lucene
|
|
|
|
|
MapBD
|
|
|
|
|
MapDB
|
2021-08-19 04:52:26 -04:00
|
|
|
|
MariaDB
|
2019-09-17 15:47:30 -04:00
|
|
|
|
MiddleManager
|
|
|
|
|
MiddleManagers
|
|
|
|
|
Montréal
|
2022-09-06 13:36:09 -04:00
|
|
|
|
MSQ
|
2019-09-17 15:47:30 -04:00
|
|
|
|
Murmur3
|
2021-08-09 10:48:29 -04:00
|
|
|
|
MVCC
|
2023-04-04 16:07:54 -04:00
|
|
|
|
MV_TO_ARRAY
|
2019-09-17 15:47:30 -04:00
|
|
|
|
NFS
|
2020-05-16 17:09:12 -04:00
|
|
|
|
OCF
|
2023-03-25 09:11:40 -04:00
|
|
|
|
OIDC
|
2019-09-17 15:47:30 -04:00
|
|
|
|
OLAP
|
|
|
|
|
OOMs
|
|
|
|
|
OpenJDK
|
2020-11-19 18:24:58 -05:00
|
|
|
|
OpenLDAP
|
2019-09-17 15:47:30 -04:00
|
|
|
|
OpenTSDB
|
|
|
|
|
OutputStream
|
|
|
|
|
ParAccel
|
|
|
|
|
ParseSpec
|
|
|
|
|
ParseSpecs
|
|
|
|
|
Protobuf
|
2023-05-19 12:42:27 -04:00
|
|
|
|
protobuf
|
2021-08-09 20:27:35 -04:00
|
|
|
|
pull-deps
|
2019-09-17 15:47:30 -04:00
|
|
|
|
RDBMS
|
|
|
|
|
RDDs
|
2021-01-07 00:15:29 -05:00
|
|
|
|
RDS
|
2023-06-13 15:44:04 -04:00
|
|
|
|
ROUTINE_CATALOG
|
|
|
|
|
ROUTINE_NAME
|
|
|
|
|
ROUTINE_SCHEMA
|
|
|
|
|
ROUTINE_TYPE
|
2019-09-17 15:47:30 -04:00
|
|
|
|
Rackspace
|
|
|
|
|
Redis
|
|
|
|
|
S3
|
2024-06-20 12:31:29 -04:00
|
|
|
|
SAS
|
2019-09-17 15:47:30 -04:00
|
|
|
|
SDK
|
|
|
|
|
SIGAR
|
|
|
|
|
SPNEGO
|
Support for middle manager less druid, tasks launch as k8s jobs (#13156)
* Support for middle manager less druid, tasks launch as k8s jobs
* Fixing forking task runner test
* Test cleanup, dependency cleanup, intellij inspections cleanup
* Changes per PR review
Add configuration option to disable http/https proxy for the k8s client
Update the docs to provide more detail about sidecar support
* Removing un-needed log lines
* Small changes per PR review
* Upon task completion we callback to the overlord to update the status / locaiton, for slower k8s clusters, this reduces locking time significantly
* Merge conflict fix
* Fixing tests and docs
* update tiny-cluster.yaml
changed `enableTaskLevelLogPush` to `encapsulatedTask`
* Apply suggestions from code review
Co-authored-by: Abhishek Agarwal <1477457+abhishekagarwal87@users.noreply.github.com>
* Minor changes per PR request
* Cleanup, adding test to AbstractTask
* Add comment in peon.sh
* Bumping code coverage
* More tests to make code coverage happy
* Doh a duplicate dependnecy
* Integration test setup is weird for k8s, will do this in a different PR
* Reverting back all integration test changes, will do in anotbher PR
* use StringUtils.base64 instead of Base64
* Jdk is nasty, if i compress in jdk 11 in jdk 17 the decompressed result is different
Co-authored-by: Rahul Gidwani <r_gidwani@apple.com>
Co-authored-by: Abhishek Agarwal <1477457+abhishekagarwal87@users.noreply.github.com>
2022-11-02 22:44:47 -04:00
|
|
|
|
Splunk
|
2020-06-09 15:55:20 -04:00
|
|
|
|
SqlInputSource
|
2019-09-17 15:47:30 -04:00
|
|
|
|
SQLServer
|
|
|
|
|
SSD
|
|
|
|
|
SSDs
|
|
|
|
|
SSL
|
|
|
|
|
Samza
|
|
|
|
|
Splunk
|
2020-02-19 16:09:20 -05:00
|
|
|
|
SqlParameter
|
2020-11-19 18:24:58 -05:00
|
|
|
|
SslContextFactory
|
2019-09-17 15:47:30 -04:00
|
|
|
|
StatsD
|
2021-09-21 16:28:26 -04:00
|
|
|
|
SYSTEM_TABLE
|
2023-05-12 19:51:00 -04:00
|
|
|
|
TaskRunner
|
2023-08-16 22:01:21 -04:00
|
|
|
|
TabItem
|
2019-09-17 15:47:30 -04:00
|
|
|
|
TCP
|
|
|
|
|
TGT
|
|
|
|
|
TLS
|
2022-11-29 12:26:32 -05:00
|
|
|
|
tls
|
2019-09-17 15:47:30 -04:00
|
|
|
|
TopN
|
|
|
|
|
TopNs
|
|
|
|
|
UI
|
|
|
|
|
UIs
|
2024-03-14 19:31:33 -04:00
|
|
|
|
UPSERT
|
2019-09-17 15:47:30 -04:00
|
|
|
|
URI
|
|
|
|
|
URIs
|
2023-01-17 11:41:57 -05:00
|
|
|
|
uris
|
2019-09-17 15:47:30 -04:00
|
|
|
|
UTF-16
|
|
|
|
|
UTF-8
|
|
|
|
|
UTF8
|
|
|
|
|
XMLs
|
|
|
|
|
ZK
|
2020-08-26 15:39:48 -04:00
|
|
|
|
ZSTD
|
2019-09-17 15:47:30 -04:00
|
|
|
|
accessor
|
|
|
|
|
ad-hoc
|
|
|
|
|
aggregator
|
|
|
|
|
aggregators
|
|
|
|
|
ambari
|
|
|
|
|
analytics
|
2022-08-19 20:12:19 -04:00
|
|
|
|
arrayElement
|
2023-12-07 03:14:00 -05:00
|
|
|
|
arrayContainsElement
|
2022-09-26 20:51:04 -04:00
|
|
|
|
assumeNewlineDelimited
|
2021-06-09 06:32:35 -04:00
|
|
|
|
assumeRoleArn
|
|
|
|
|
assumeRoleExternalId
|
parallel broker merges on fork join pool (#8578)
* sketch of broker parallel merges done in small batches on fork join pool
* fix non-terminating sequences, auto compute parallelism
* adjust benches
* adjust benchmarks
* now hella more faster, fixed dumb
* fix
* remove comments
* log.info for debug
* javadoc
* safer block for sequence to yielder conversion
* refactor LifecycleForkJoinPool into LifecycleForkJoinPoolProvider which wraps a ForkJoinPool
* smooth yield rate adjustment, more logs to help tune
* cleanup, less logs
* error handling, bug fixes, on by default, more parallel, more tests
* remove unused var
* comments
* timeboundary mergeFn
* simplify, more javadoc
* formatting
* pushdown config
* use nanos consistently, move logs back to debug level, bit more javadoc
* static terminal result batch
* javadoc for nullability of createMergeFn
* cleanup
* oops
* fix race, add docs
* spelling, remove todo, add unhandled exception log
* cleanup, revert unintended change
* another unintended change
* review stuff
* add ParallelMergeCombiningSequenceBenchmark, fixes
* hyper-threading is the enemy
* fix initial start delay, lol
* parallelism computer now balances partition sizes to partition counts using sqrt of sequence count instead of sequence count by 2
* fix those important style issues with the benchmarks code
* lazy sequence creation for benchmarks
* more benchmark comments
* stable sequence generation time
* update defaults to use 100ms target time, 4096 batch size, 16384 initial yield, also update user docs
* add jmh thread based benchmarks, cleanup some stuff
* oops
* style
* add spread to jmh thread benchmark start range, more comments to benchmarks parameters and purpose
* retool benchmark to allow modeling more typical heterogenous heavy workloads
* spelling
* fix
* refactor benchmarks
* formatting
* docs
* add maxThreadStartDelay parameter to threaded benchmark
* why does catch need to be on its own line but else doesnt
2019-11-07 14:58:46 -05:00
|
|
|
|
async
|
2019-09-17 15:47:30 -04:00
|
|
|
|
authorizer
|
|
|
|
|
authorizers
|
|
|
|
|
autocomplete
|
|
|
|
|
autodiscovery
|
|
|
|
|
autoscaler
|
|
|
|
|
autoscaling
|
|
|
|
|
averager
|
|
|
|
|
averagers
|
|
|
|
|
backend
|
|
|
|
|
backfills
|
|
|
|
|
backpressure
|
|
|
|
|
base64
|
|
|
|
|
big-endian
|
2022-08-19 20:12:19 -04:00
|
|
|
|
bigint
|
2024-05-29 15:44:37 -04:00
|
|
|
|
blkio
|
2019-09-17 15:47:30 -04:00
|
|
|
|
blobstore
|
2023-05-19 12:42:27 -04:00
|
|
|
|
Boolean
|
2019-09-17 15:47:30 -04:00
|
|
|
|
boolean
|
|
|
|
|
breakpoint
|
|
|
|
|
broadcasted
|
2022-10-25 21:05:38 -04:00
|
|
|
|
bucketSize
|
2019-09-17 15:47:30 -04:00
|
|
|
|
checksums
|
|
|
|
|
classpath
|
|
|
|
|
clickstream
|
2022-07-15 14:03:34 -04:00
|
|
|
|
clientConfig
|
2019-09-17 15:47:30 -04:00
|
|
|
|
codebase
|
|
|
|
|
codec
|
|
|
|
|
colocated
|
|
|
|
|
colocation
|
2023-12-08 14:40:24 -05:00
|
|
|
|
colocating
|
2019-09-17 15:47:30 -04:00
|
|
|
|
compactable
|
2021-03-24 14:41:44 -04:00
|
|
|
|
compactionTask
|
2019-09-17 15:47:30 -04:00
|
|
|
|
config
|
|
|
|
|
configs
|
2020-12-10 11:24:33 -05:00
|
|
|
|
consumerProperties
|
2019-09-17 15:47:30 -04:00
|
|
|
|
cron
|
|
|
|
|
csv
|
|
|
|
|
customizable
|
|
|
|
|
dataset
|
|
|
|
|
datasets
|
|
|
|
|
datasketches
|
|
|
|
|
datasource
|
|
|
|
|
datasources
|
|
|
|
|
dbcp
|
2021-08-13 16:40:25 -04:00
|
|
|
|
deepstore
|
2019-09-17 15:47:30 -04:00
|
|
|
|
denormalization
|
|
|
|
|
denormalize
|
|
|
|
|
denormalized
|
2020-03-13 04:41:54 -04:00
|
|
|
|
deprioritization
|
|
|
|
|
deprioritizes
|
2019-09-17 15:47:30 -04:00
|
|
|
|
dequeued
|
|
|
|
|
deserialization
|
|
|
|
|
deserialize
|
|
|
|
|
deserialized
|
2022-08-19 20:12:19 -04:00
|
|
|
|
deserializes
|
2019-09-17 15:47:30 -04:00
|
|
|
|
downtimes
|
2020-03-23 21:15:45 -04:00
|
|
|
|
druid
|
2020-12-15 00:10:31 -05:00
|
|
|
|
druid–kubernetes-extensions
|
2019-09-17 15:47:30 -04:00
|
|
|
|
e.g.
|
|
|
|
|
encodings
|
|
|
|
|
endian
|
2022-07-15 14:03:34 -04:00
|
|
|
|
endpointConfig
|
2019-09-17 15:47:30 -04:00
|
|
|
|
enum
|
2022-08-19 20:12:19 -04:00
|
|
|
|
expectedType
|
2020-01-17 18:52:05 -05:00
|
|
|
|
expr
|
2019-09-17 15:47:30 -04:00
|
|
|
|
failover
|
2023-07-25 22:24:36 -04:00
|
|
|
|
failovers
|
2020-01-17 18:52:05 -05:00
|
|
|
|
featureSpec
|
|
|
|
|
findColumnsFromHeader
|
2019-09-17 15:47:30 -04:00
|
|
|
|
filenames
|
|
|
|
|
filesystem
|
2023-07-17 23:29:57 -04:00
|
|
|
|
filterColumn
|
|
|
|
|
filterValue
|
2019-09-17 15:47:30 -04:00
|
|
|
|
firefox
|
|
|
|
|
firehose
|
|
|
|
|
firehoses
|
2020-02-11 00:53:11 -05:00
|
|
|
|
fromPigAvroStorage
|
2019-09-17 15:47:30 -04:00
|
|
|
|
frontends
|
|
|
|
|
granularities
|
2021-03-24 14:41:44 -04:00
|
|
|
|
granularitySpec
|
2019-09-17 15:47:30 -04:00
|
|
|
|
gzip
|
|
|
|
|
gzipped
|
|
|
|
|
hadoop
|
|
|
|
|
hasher
|
2023-08-16 08:30:11 -04:00
|
|
|
|
hashcode
|
2019-09-17 15:47:30 -04:00
|
|
|
|
hashtable
|
2022-05-16 04:12:00 -04:00
|
|
|
|
high-QPS
|
2019-09-17 15:47:30 -04:00
|
|
|
|
historicals
|
|
|
|
|
hostname
|
|
|
|
|
hostnames
|
|
|
|
|
http
|
|
|
|
|
https
|
2020-07-08 02:12:39 -04:00
|
|
|
|
idempotency
|
2019-09-17 15:47:30 -04:00
|
|
|
|
i.e.
|
|
|
|
|
influxdb
|
2024-06-04 12:47:49 -04:00
|
|
|
|
influencer
|
|
|
|
|
influencers
|
2022-09-06 13:36:09 -04:00
|
|
|
|
ingestions
|
2020-02-25 23:59:53 -05:00
|
|
|
|
ingestionSpec
|
2019-09-17 15:47:30 -04:00
|
|
|
|
injective
|
|
|
|
|
inlined
|
2022-03-22 09:03:57 -04:00
|
|
|
|
inSubQueryThreshold
|
2019-09-17 15:47:30 -04:00
|
|
|
|
interruptible
|
2022-08-19 20:12:19 -04:00
|
|
|
|
isAllowList
|
2020-01-17 18:52:05 -05:00
|
|
|
|
jackson-jq
|
2019-09-17 15:47:30 -04:00
|
|
|
|
javadoc
|
2023-08-08 18:49:29 -04:00
|
|
|
|
javascript
|
2020-06-30 00:08:13 -04:00
|
|
|
|
joinable
|
2022-10-25 21:05:38 -04:00
|
|
|
|
jsonCompression
|
2022-08-19 20:12:19 -04:00
|
|
|
|
json_keys
|
|
|
|
|
json_object
|
|
|
|
|
json_paths
|
|
|
|
|
json_query
|
2023-12-08 08:28:46 -05:00
|
|
|
|
json_query_array
|
2022-08-19 20:12:19 -04:00
|
|
|
|
json_value
|
Support for middle manager less druid, tasks launch as k8s jobs (#13156)
* Support for middle manager less druid, tasks launch as k8s jobs
* Fixing forking task runner test
* Test cleanup, dependency cleanup, intellij inspections cleanup
* Changes per PR review
Add configuration option to disable http/https proxy for the k8s client
Update the docs to provide more detail about sidecar support
* Removing un-needed log lines
* Small changes per PR review
* Upon task completion we callback to the overlord to update the status / locaiton, for slower k8s clusters, this reduces locking time significantly
* Merge conflict fix
* Fixing tests and docs
* update tiny-cluster.yaml
changed `enableTaskLevelLogPush` to `encapsulatedTask`
* Apply suggestions from code review
Co-authored-by: Abhishek Agarwal <1477457+abhishekagarwal87@users.noreply.github.com>
* Minor changes per PR request
* Cleanup, adding test to AbstractTask
* Add comment in peon.sh
* Bumping code coverage
* More tests to make code coverage happy
* Doh a duplicate dependnecy
* Integration test setup is weird for k8s, will do this in a different PR
* Reverting back all integration test changes, will do in anotbher PR
* use StringUtils.base64 instead of Base64
* Jdk is nasty, if i compress in jdk 11 in jdk 17 the decompressed result is different
Co-authored-by: Rahul Gidwani <r_gidwani@apple.com>
Co-authored-by: Abhishek Agarwal <1477457+abhishekagarwal87@users.noreply.github.com>
2022-11-02 22:44:47 -04:00
|
|
|
|
karlkfi
|
2019-09-17 15:47:30 -04:00
|
|
|
|
kerberos
|
|
|
|
|
keystore
|
2021-04-14 11:58:17 -04:00
|
|
|
|
keytool
|
2019-09-17 15:47:30 -04:00
|
|
|
|
keytab
|
2020-12-15 00:10:31 -05:00
|
|
|
|
kubernetes
|
Support for middle manager less druid, tasks launch as k8s jobs (#13156)
* Support for middle manager less druid, tasks launch as k8s jobs
* Fixing forking task runner test
* Test cleanup, dependency cleanup, intellij inspections cleanup
* Changes per PR review
Add configuration option to disable http/https proxy for the k8s client
Update the docs to provide more detail about sidecar support
* Removing un-needed log lines
* Small changes per PR review
* Upon task completion we callback to the overlord to update the status / locaiton, for slower k8s clusters, this reduces locking time significantly
* Merge conflict fix
* Fixing tests and docs
* update tiny-cluster.yaml
changed `enableTaskLevelLogPush` to `encapsulatedTask`
* Apply suggestions from code review
Co-authored-by: Abhishek Agarwal <1477457+abhishekagarwal87@users.noreply.github.com>
* Minor changes per PR request
* Cleanup, adding test to AbstractTask
* Add comment in peon.sh
* Bumping code coverage
* More tests to make code coverage happy
* Doh a duplicate dependnecy
* Integration test setup is weird for k8s, will do this in a different PR
* Reverting back all integration test changes, will do in anotbher PR
* use StringUtils.base64 instead of Base64
* Jdk is nasty, if i compress in jdk 11 in jdk 17 the decompressed result is different
Co-authored-by: Rahul Gidwani <r_gidwani@apple.com>
Co-authored-by: Abhishek Agarwal <1477457+abhishekagarwal87@users.noreply.github.com>
2022-11-02 22:44:47 -04:00
|
|
|
|
kubexit
|
|
|
|
|
k8s
|
2020-03-10 05:57:16 -04:00
|
|
|
|
laning
|
2019-09-17 15:47:30 -04:00
|
|
|
|
lifecycle
|
|
|
|
|
localhost
|
|
|
|
|
log4j
|
|
|
|
|
log4j2
|
|
|
|
|
log4j2.xml
|
|
|
|
|
lookback
|
|
|
|
|
lookups
|
|
|
|
|
mapreduce
|
|
|
|
|
masse
|
2022-09-06 13:36:09 -04:00
|
|
|
|
maxBytes
|
2022-02-15 23:45:07 -05:00
|
|
|
|
maxNumericInFilters
|
2020-08-21 12:43:58 -04:00
|
|
|
|
maxNumFiles
|
|
|
|
|
maxNumSegments
|
2019-10-09 02:43:58 -04:00
|
|
|
|
max_map_count
|
2019-09-17 15:47:30 -04:00
|
|
|
|
memcached
|
|
|
|
|
mergeable
|
|
|
|
|
metadata
|
2023-07-17 23:29:57 -04:00
|
|
|
|
metastores
|
2019-09-17 15:47:30 -04:00
|
|
|
|
millis
|
2022-09-17 00:58:11 -04:00
|
|
|
|
microbatch
|
|
|
|
|
microbatches
|
2019-09-17 15:47:30 -04:00
|
|
|
|
misconfiguration
|
2021-03-24 14:41:44 -04:00
|
|
|
|
misconfigured
|
2020-01-23 16:42:03 -05:00
|
|
|
|
mostAvailableSize
|
2019-09-17 15:47:30 -04:00
|
|
|
|
multitenancy
|
|
|
|
|
multitenant
|
2023-05-17 04:36:02 -04:00
|
|
|
|
MVDs
|
2019-09-17 15:47:30 -04:00
|
|
|
|
mysql
|
|
|
|
|
namespace
|
|
|
|
|
namespaced
|
|
|
|
|
namespaces
|
|
|
|
|
natively
|
|
|
|
|
netflow
|
2023-04-04 16:07:54 -04:00
|
|
|
|
nondescriptive
|
2022-09-06 13:36:09 -04:00
|
|
|
|
nonfinalized
|
Compaction: Fetch segments one at a time on main task; skip when possible. (#13280)
* Compaction: Fetch segments one at a time on main task; skip when possible.
Compact tasks include the ability to fetch existing segments and determine
reasonable defaults for granularitySpec, dimensionsSpec, and metricsSpec.
This is a useful feature that makes compact tasks work well even when the
user running the compaction does not have a clear idea of what they want
the compacted segments to be like.
However, this comes at a cost: it takes time, and disk space, to do all
of these fetches. This patch improves the situation in two ways:
1) When segments do need to be fetched, download them one at a time and
delete them when we're done. This still takes time, but minimizes the
required disk space.
2) Don't fetch segments on the main compact task when they aren't needed.
If the user provides a full granularitySpec, dimensionsSpec, and
metricsSpec, we can skip it.
* Adjustments.
* Changes from code review.
* Fix logic for determining rollup.
2022-11-07 04:20:14 -05:00
|
|
|
|
non-null
|
2019-09-17 15:47:30 -04:00
|
|
|
|
non-nullable
|
|
|
|
|
noop
|
2024-03-07 18:16:52 -05:00
|
|
|
|
NTILE
|
2019-09-17 15:47:30 -04:00
|
|
|
|
numerics
|
2021-03-24 14:41:44 -04:00
|
|
|
|
numShards
|
2022-11-30 19:25:35 -05:00
|
|
|
|
parameterize
|
2022-11-11 02:46:40 -05:00
|
|
|
|
objectGlob
|
2019-09-17 15:47:30 -04:00
|
|
|
|
parameterized
|
2022-08-19 20:12:19 -04:00
|
|
|
|
parse_json
|
2019-09-17 15:47:30 -04:00
|
|
|
|
parseable
|
|
|
|
|
partitioner
|
2020-09-24 19:32:56 -04:00
|
|
|
|
partitionFunction
|
2020-09-15 14:28:09 -04:00
|
|
|
|
partitionsSpec
|
2022-08-19 20:12:19 -04:00
|
|
|
|
pathParts
|
2024-03-07 18:16:52 -05:00
|
|
|
|
PERCENT_RANK
|
2019-09-17 15:47:30 -04:00
|
|
|
|
performant
|
|
|
|
|
plaintext
|
|
|
|
|
pluggable
|
Support for middle manager less druid, tasks launch as k8s jobs (#13156)
* Support for middle manager less druid, tasks launch as k8s jobs
* Fixing forking task runner test
* Test cleanup, dependency cleanup, intellij inspections cleanup
* Changes per PR review
Add configuration option to disable http/https proxy for the k8s client
Update the docs to provide more detail about sidecar support
* Removing un-needed log lines
* Small changes per PR review
* Upon task completion we callback to the overlord to update the status / locaiton, for slower k8s clusters, this reduces locking time significantly
* Merge conflict fix
* Fixing tests and docs
* update tiny-cluster.yaml
changed `enableTaskLevelLogPush` to `encapsulatedTask`
* Apply suggestions from code review
Co-authored-by: Abhishek Agarwal <1477457+abhishekagarwal87@users.noreply.github.com>
* Minor changes per PR request
* Cleanup, adding test to AbstractTask
* Add comment in peon.sh
* Bumping code coverage
* More tests to make code coverage happy
* Doh a duplicate dependnecy
* Integration test setup is weird for k8s, will do this in a different PR
* Reverting back all integration test changes, will do in anotbher PR
* use StringUtils.base64 instead of Base64
* Jdk is nasty, if i compress in jdk 11 in jdk 17 the decompressed result is different
Co-authored-by: Rahul Gidwani <r_gidwani@apple.com>
Co-authored-by: Abhishek Agarwal <1477457+abhishekagarwal87@users.noreply.github.com>
2022-11-02 22:44:47 -04:00
|
|
|
|
podSpec
|
2019-09-17 15:47:30 -04:00
|
|
|
|
postgres
|
|
|
|
|
postgresql
|
2024-01-14 12:52:30 -05:00
|
|
|
|
pre-aggregate
|
2019-09-17 15:47:30 -04:00
|
|
|
|
pre-aggregated
|
|
|
|
|
pre-aggregates
|
|
|
|
|
pre-aggregating
|
|
|
|
|
pre-aggregation
|
|
|
|
|
pre-computation
|
|
|
|
|
pre-compute
|
|
|
|
|
pre-computing
|
2023-12-08 14:40:24 -05:00
|
|
|
|
preconfigured
|
2022-09-17 00:58:11 -04:00
|
|
|
|
pre-existing
|
2020-04-17 01:12:20 -04:00
|
|
|
|
pre-filtered
|
|
|
|
|
pre-filtering
|
2019-09-17 15:47:30 -04:00
|
|
|
|
pre-generated
|
|
|
|
|
pre-made
|
|
|
|
|
pre-processing
|
|
|
|
|
preemptible
|
|
|
|
|
prefetch
|
|
|
|
|
prefetched
|
|
|
|
|
prefetching
|
2023-09-06 01:05:57 -04:00
|
|
|
|
precached
|
2019-09-17 15:47:30 -04:00
|
|
|
|
prepend
|
|
|
|
|
prepended
|
|
|
|
|
prepending
|
|
|
|
|
prepends
|
2020-04-30 15:07:28 -04:00
|
|
|
|
prepopulated
|
2019-09-17 15:47:30 -04:00
|
|
|
|
preprocessing
|
|
|
|
|
priori
|
2020-07-08 22:47:09 -04:00
|
|
|
|
procs
|
2022-08-19 20:12:19 -04:00
|
|
|
|
processFromRaw
|
2019-09-17 15:47:30 -04:00
|
|
|
|
programmatically
|
|
|
|
|
proto
|
|
|
|
|
proxied
|
2022-07-15 14:03:34 -04:00
|
|
|
|
proxyConfig
|
Druid automated quickstart (#13365)
* Druid automated quickstart
* remove conf/druid/single-server/quickstart/_common/historical/jvm.config
* Minor changes in python script
* Add lower bound memory for some services
* Additional runtime properties for services
* Update supervise script to accept command arguments, corresponding changes in druid-quickstart.py
* File end newline
* Limit the ability to start multiple instances of a service, documentation changes
* simplify script arguments
* restore changes in medium profile
* run-druid refactor
* compute and pass middle manager runtime properties to run-druid
supervise script changes to process java opts array
use argparse, leave free memory, logging
* Remove extra quotes from mm task javaopts array
* Update logic to compute minimum memory
* simplify run-druid
* remove debug options from run-druid
* resolve the config_path provided
* comment out service specific runtime properties which are computed in the code
* simplify run-druid
* clean up docs, naming changes
* Throw ValueError exception on illegal state
* update docs
* rename args, compute_only -> compute, run_zk -> zk
* update help documentation
* update help documentation
* move task memory computation into separate method
* Add validation checks
* remove print
* Add validations
* remove start-druid bash script, rename start-druid-main
* Include tasks in lower bound memory calculation
* Fix test
* 256m instead of 256g
* caffeine cache uses 5% of heap
* ensure min task count is 2, task count is monotonic
* update configs and documentation for runtime props in conf/druid/single-server/quickstart
* Update docs
* Specify memory argument for each profile in single-server.md
* Update middleManager runtime.properties
* Move quickstart configs to conf/druid/base, add bash launch script, support python2
* Update supervise script
* rename base config directory to auto
* rename python script, changes to pass repeated args to supervise
* remove exmaples/conf/druid/base dir
* add docs
* restore changes in conf dir
* update start-druid-auto
* remove hashref for commands in supervise script
* start-druid-main java_opts array is comma separated
* update entry point script name in python script
* Update help docs
* documentation changes
* docs changes
* update docs
* add support for running indexer
* update supported services list
* update help
* Update python.md
* remove dir
* update .spelling
* Remove dependency on psutil and pathlib
* update docs
* Update get_physical_memory method
* Update help docs
* update docs
* update method to get physical memory on python
* udpate spelling
* update .spelling
* minor change
* Minor change
* memory comptuation for indexer
* update start-druid
* Update python.md
* Update single-server.md
* Update python.md
* run python3 --version to check if python is installed
* Update supervise script
* start-druid: echo message if python not found
* update anchor text
* minor change
* Update condition in supervise script
* JVM not jvm in docs
2022-12-09 14:04:02 -05:00
|
|
|
|
python2
|
|
|
|
|
python3
|
|
|
|
|
Python2
|
|
|
|
|
Python3
|
2022-05-16 04:12:00 -04:00
|
|
|
|
QPS
|
2019-09-17 15:47:30 -04:00
|
|
|
|
quantile
|
|
|
|
|
quantiles
|
|
|
|
|
queryable
|
|
|
|
|
quickstart
|
|
|
|
|
realtime
|
|
|
|
|
rebalance
|
|
|
|
|
redis
|
|
|
|
|
regexes
|
|
|
|
|
reimported
|
|
|
|
|
reindex
|
|
|
|
|
reindexing
|
|
|
|
|
reingest
|
|
|
|
|
reingesting
|
|
|
|
|
reingestion
|
|
|
|
|
repo
|
2021-04-01 20:30:47 -04:00
|
|
|
|
requireSSL
|
2019-09-17 15:47:30 -04:00
|
|
|
|
rollup
|
|
|
|
|
rollups
|
2024-03-07 18:16:52 -05:00
|
|
|
|
ROW_NUMBER
|
2019-09-17 15:47:30 -04:00
|
|
|
|
rsync
|
|
|
|
|
runtime
|
|
|
|
|
schemas
|
2022-04-05 12:15:42 -04:00
|
|
|
|
schemaless
|
2019-09-17 15:47:30 -04:00
|
|
|
|
searchable
|
2020-09-24 19:32:56 -04:00
|
|
|
|
secondaryPartitionPruning
|
2024-07-19 17:37:21 -04:00
|
|
|
|
seekable
|
2020-07-08 02:12:39 -04:00
|
|
|
|
seekable-stream
|
2019-12-12 20:00:08 -05:00
|
|
|
|
servlet
|
2022-05-16 04:12:00 -04:00
|
|
|
|
setProcessingThreadNames
|
Support for middle manager less druid, tasks launch as k8s jobs (#13156)
* Support for middle manager less druid, tasks launch as k8s jobs
* Fixing forking task runner test
* Test cleanup, dependency cleanup, intellij inspections cleanup
* Changes per PR review
Add configuration option to disable http/https proxy for the k8s client
Update the docs to provide more detail about sidecar support
* Removing un-needed log lines
* Small changes per PR review
* Upon task completion we callback to the overlord to update the status / locaiton, for slower k8s clusters, this reduces locking time significantly
* Merge conflict fix
* Fixing tests and docs
* update tiny-cluster.yaml
changed `enableTaskLevelLogPush` to `encapsulatedTask`
* Apply suggestions from code review
Co-authored-by: Abhishek Agarwal <1477457+abhishekagarwal87@users.noreply.github.com>
* Minor changes per PR request
* Cleanup, adding test to AbstractTask
* Add comment in peon.sh
* Bumping code coverage
* More tests to make code coverage happy
* Doh a duplicate dependnecy
* Integration test setup is weird for k8s, will do this in a different PR
* Reverting back all integration test changes, will do in anotbher PR
* use StringUtils.base64 instead of Base64
* Jdk is nasty, if i compress in jdk 11 in jdk 17 the decompressed result is different
Co-authored-by: Rahul Gidwani <r_gidwani@apple.com>
Co-authored-by: Abhishek Agarwal <1477457+abhishekagarwal87@users.noreply.github.com>
2022-11-02 22:44:47 -04:00
|
|
|
|
sigterm
|
2020-04-10 21:01:59 -04:00
|
|
|
|
simple-client-sslcontext
|
2019-09-17 15:47:30 -04:00
|
|
|
|
sharded
|
|
|
|
|
sharding
|
2020-01-17 18:52:05 -05:00
|
|
|
|
skipHeaderRows
|
2022-07-20 18:37:57 -04:00
|
|
|
|
Smoosh
|
|
|
|
|
smoosh
|
2019-09-17 15:47:30 -04:00
|
|
|
|
smooshed
|
2023-07-17 23:29:57 -04:00
|
|
|
|
snapshotting
|
2019-09-17 15:47:30 -04:00
|
|
|
|
splittable
|
2021-04-01 20:30:47 -04:00
|
|
|
|
ssl
|
|
|
|
|
sslmode
|
2023-04-14 05:53:34 -04:00
|
|
|
|
start_time
|
2019-09-17 15:47:30 -04:00
|
|
|
|
stdout
|
|
|
|
|
storages
|
2022-10-25 21:05:38 -04:00
|
|
|
|
stringDictionaryEncoding
|
2019-09-17 15:47:30 -04:00
|
|
|
|
stringified
|
2023-11-29 03:16:11 -05:00
|
|
|
|
sub-conditions
|
2019-09-17 15:47:30 -04:00
|
|
|
|
subarray
|
|
|
|
|
subnet
|
|
|
|
|
subqueries
|
|
|
|
|
subquery
|
|
|
|
|
subsecond
|
|
|
|
|
substring
|
2024-05-31 18:13:10 -04:00
|
|
|
|
substrings
|
2020-08-21 12:43:58 -04:00
|
|
|
|
subtask
|
2020-07-08 02:12:39 -04:00
|
|
|
|
subtasks
|
2020-10-10 22:35:17 -04:00
|
|
|
|
supervisorTaskId
|
2023-06-05 13:53:17 -04:00
|
|
|
|
SVG
|
2019-09-17 15:47:30 -04:00
|
|
|
|
symlink
|
2022-08-19 20:12:19 -04:00
|
|
|
|
syntaxes
|
2023-11-02 13:31:28 -04:00
|
|
|
|
systemFields
|
2024-01-31 00:53:50 -05:00
|
|
|
|
tablePath
|
2019-09-17 15:47:30 -04:00
|
|
|
|
tiering
|
|
|
|
|
timeseries
|
2023-05-19 12:42:27 -04:00
|
|
|
|
Timeseries
|
2019-09-17 15:47:30 -04:00
|
|
|
|
timestamp
|
|
|
|
|
timestamps
|
2022-08-19 20:12:19 -04:00
|
|
|
|
to_json_string
|
2019-09-17 15:47:30 -04:00
|
|
|
|
tradeoffs
|
2021-11-24 13:56:38 -05:00
|
|
|
|
transformSpec
|
2022-08-19 20:12:19 -04:00
|
|
|
|
try_parse_json
|
2019-09-17 15:47:30 -04:00
|
|
|
|
tsv
|
2019-10-09 02:43:58 -04:00
|
|
|
|
ulimit
|
2019-09-17 15:47:30 -04:00
|
|
|
|
unannounce
|
|
|
|
|
unannouncements
|
|
|
|
|
unary
|
|
|
|
|
unassign
|
|
|
|
|
uncomment
|
|
|
|
|
underutilization
|
|
|
|
|
unintuitive
|
|
|
|
|
unioned
|
|
|
|
|
unmergeable
|
|
|
|
|
unmerged
|
2021-08-13 16:40:25 -04:00
|
|
|
|
UNNEST
|
2022-09-06 13:36:09 -04:00
|
|
|
|
unnest
|
2022-09-17 00:58:11 -04:00
|
|
|
|
unnested
|
|
|
|
|
unnesting
|
2022-09-06 13:36:09 -04:00
|
|
|
|
unnests
|
2019-09-17 15:47:30 -04:00
|
|
|
|
unparseable
|
|
|
|
|
unparsed
|
2020-04-30 15:07:28 -04:00
|
|
|
|
unsetting
|
2020-11-19 18:24:58 -05:00
|
|
|
|
untrusted
|
2020-04-17 01:12:20 -04:00
|
|
|
|
useFilterCNF
|
2022-08-19 20:12:19 -04:00
|
|
|
|
useJqSyntax
|
2022-09-26 20:51:04 -04:00
|
|
|
|
useJsonNodeReader
|
2021-04-01 20:30:47 -04:00
|
|
|
|
useSSL
|
2024-03-14 19:31:33 -04:00
|
|
|
|
upsert
|
2019-09-17 15:47:30 -04:00
|
|
|
|
uptime
|
2019-11-19 22:49:43 -05:00
|
|
|
|
uris
|
2021-02-27 17:25:35 -05:00
|
|
|
|
urls
|
2020-01-17 18:52:05 -05:00
|
|
|
|
useFieldDiscovery
|
2019-09-17 15:47:30 -04:00
|
|
|
|
v1
|
|
|
|
|
v2
|
|
|
|
|
vCPUs
|
|
|
|
|
validator
|
2022-08-19 20:12:19 -04:00
|
|
|
|
varchar
|
2019-09-17 15:47:30 -04:00
|
|
|
|
vectorizable
|
|
|
|
|
vectorize
|
2020-09-28 21:48:34 -04:00
|
|
|
|
vectorizeVirtualColumns
|
2024-01-31 00:53:50 -05:00
|
|
|
|
versioned
|
2019-09-17 15:47:30 -04:00
|
|
|
|
versioning
|
2022-08-18 23:49:23 -04:00
|
|
|
|
virtualColumns
|
2019-09-17 15:47:30 -04:00
|
|
|
|
w.r.t.
|
2022-09-06 13:36:09 -04:00
|
|
|
|
walkthrough
|
2019-09-17 15:47:30 -04:00
|
|
|
|
whitelist
|
|
|
|
|
whitelisted
|
|
|
|
|
whitespace
|
|
|
|
|
wildcard
|
2019-10-28 11:07:38 -04:00
|
|
|
|
wildcards
|
2019-09-17 15:47:30 -04:00
|
|
|
|
xml
|
2023-05-19 12:42:27 -04:00
|
|
|
|
XOR
|
2019-09-17 15:47:30 -04:00
|
|
|
|
znode
|
|
|
|
|
znodes
|
2022-02-11 17:43:30 -05:00
|
|
|
|
APPROX_COUNT_DISTINCT
|
|
|
|
|
APPROX_QUANTILE
|
|
|
|
|
ARRAY_AGG
|
2023-06-01 19:45:27 -04:00
|
|
|
|
ARRAY_TO_MV
|
2022-02-11 17:43:30 -05:00
|
|
|
|
BIGINT
|
|
|
|
|
CATALOG_NAME
|
|
|
|
|
CHARACTER_MAXIMUM_LENGTH
|
|
|
|
|
CHARACTER_OCTET_LENGTH
|
|
|
|
|
CHARACTER_SET_NAME
|
|
|
|
|
COLLATION_NAME
|
|
|
|
|
COLUMN_DEFAULT
|
|
|
|
|
COLUMN_NAME
|
|
|
|
|
Concats
|
|
|
|
|
DATA_TYPE
|
|
|
|
|
DATETIME_PRECISION
|
|
|
|
|
DEFAULT_CHARACTER_SET_CATALOG
|
|
|
|
|
DEFAULT_CHARACTER_SET_NAME
|
|
|
|
|
DEFAULT_CHARACTER_SET_SCHEMA
|
|
|
|
|
ISODOW
|
|
|
|
|
ISOYEAR
|
|
|
|
|
IS_NULLABLE
|
|
|
|
|
JDBC_TYPE
|
|
|
|
|
MIDDLE_MANAGER
|
2022-06-21 16:05:37 -04:00
|
|
|
|
MILLIS_TO_TIMESTAMP
|
2022-02-11 17:43:30 -05:00
|
|
|
|
NULLable
|
|
|
|
|
NUMERIC_PRECISION
|
|
|
|
|
NUMERIC_PRECISION_RADIX
|
|
|
|
|
NUMERIC_SCALE
|
|
|
|
|
ORDINAL_POSITION
|
2023-06-05 13:53:17 -04:00
|
|
|
|
PNG
|
2022-06-21 16:05:37 -04:00
|
|
|
|
POSIX
|
2022-09-06 13:36:09 -04:00
|
|
|
|
P1M
|
|
|
|
|
P1Y
|
2022-02-11 17:43:30 -05:00
|
|
|
|
PT1M
|
|
|
|
|
PT5M
|
|
|
|
|
SCHEMA_NAME
|
|
|
|
|
SCHEMA_OWNER
|
|
|
|
|
SERVER_SEGMENTS
|
|
|
|
|
SMALLINT
|
|
|
|
|
SQL_PATH
|
|
|
|
|
STRING_AGG
|
|
|
|
|
SYSTEM_TABLE
|
|
|
|
|
TABLE_CATALOG
|
|
|
|
|
TABLE_NAME
|
|
|
|
|
TABLE_SCHEMA
|
|
|
|
|
TABLE_TYPE
|
|
|
|
|
TIME_PARSE
|
|
|
|
|
TIME_SHIFT
|
|
|
|
|
TINYINT
|
|
|
|
|
VARCHAR
|
|
|
|
|
avg_num_rows
|
|
|
|
|
avg_size
|
|
|
|
|
created_time
|
|
|
|
|
current_size
|
|
|
|
|
detailed_state
|
|
|
|
|
druid.server.maxSize
|
|
|
|
|
druid.server.tier
|
|
|
|
|
druid.sql.planner.maxSemiJoinRowsInMemory
|
|
|
|
|
druid.sql.planner.sqlTimeZone
|
|
|
|
|
druid.sql.planner.useApproximateCountDistinct
|
|
|
|
|
druid.sql.planner.useApproximateTopN
|
2022-08-22 21:47:40 -04:00
|
|
|
|
druid.sql.planner.useGroupingSetForExactDistinct
|
|
|
|
|
druid.sql.planner.useNativeQueryExplain
|
2022-02-11 17:43:30 -05:00
|
|
|
|
error_msg
|
|
|
|
|
exprs
|
|
|
|
|
group_id
|
|
|
|
|
interval_expr
|
2022-05-19 17:23:28 -04:00
|
|
|
|
is_active
|
2022-02-11 17:43:30 -05:00
|
|
|
|
is_available
|
|
|
|
|
is_leader
|
|
|
|
|
is_overshadowed
|
|
|
|
|
is_published
|
|
|
|
|
is_realtime
|
|
|
|
|
java.sql.Types
|
|
|
|
|
last_compaction_state
|
|
|
|
|
max_size
|
|
|
|
|
num_replicas
|
|
|
|
|
num_rows
|
|
|
|
|
num_segments
|
|
|
|
|
partition_num
|
|
|
|
|
plaintext_port
|
|
|
|
|
queue_insertion_time
|
2023-06-18 00:32:21 -04:00
|
|
|
|
replication_factor
|
2022-02-11 17:43:30 -05:00
|
|
|
|
runner_status
|
|
|
|
|
segment_id
|
|
|
|
|
server_type
|
|
|
|
|
shard_spec
|
|
|
|
|
sqlTimeZone
|
2022-09-06 13:36:09 -04:00
|
|
|
|
sql-msq-task
|
2022-02-11 17:43:30 -05:00
|
|
|
|
supervisor_id
|
|
|
|
|
sys
|
|
|
|
|
sys.segments
|
|
|
|
|
task_id
|
|
|
|
|
timestamp_expr
|
|
|
|
|
tls_port
|
|
|
|
|
total_size
|
|
|
|
|
useApproximateCountDistinct
|
|
|
|
|
useGroupingSetForExactDistinct
|
|
|
|
|
useApproximateTopN
|
|
|
|
|
wikipedia
|
2022-09-06 13:36:09 -04:00
|
|
|
|
your-table
|
2022-05-10 05:53:42 -04:00
|
|
|
|
enableTimeBoundaryPlanning
|
|
|
|
|
TimeBoundary
|
|
|
|
|
druid.query.default.context.enableTimeBoundaryPlanning
|
2022-02-11 17:43:30 -05:00
|
|
|
|
IEC
|
2022-09-06 13:36:09 -04:00
|
|
|
|
# MSQ general
|
|
|
|
|
SegmentGenerator
|
|
|
|
|
granularity_string
|
|
|
|
|
QueryKit
|
|
|
|
|
# MSQ report fields
|
|
|
|
|
taskId
|
|
|
|
|
multiStageQuery.taskId
|
|
|
|
|
multiStageQuery.payload.status
|
|
|
|
|
multiStageQuery.payload.status.status
|
2022-11-11 02:46:40 -05:00
|
|
|
|
multiStageQuery.payload.status.startTime
|
2022-09-06 13:36:09 -04:00
|
|
|
|
multiStageQuery.payload.status.durationMs
|
2022-10-28 02:30:15 -04:00
|
|
|
|
multiStageQuery.payload.status.pendingTasks
|
|
|
|
|
multiStageQuery.payload.status.runningTasks
|
2022-09-06 13:36:09 -04:00
|
|
|
|
multiStageQuery.payload.status.errorReport
|
|
|
|
|
multiStageQuery.payload.status.errorReport.taskId
|
|
|
|
|
multiStageQuery.payload.status.errorReport.host
|
|
|
|
|
multiStageQuery.payload.status.errorReport.stageNumber
|
|
|
|
|
multiStageQuery.payload.status.errorReport.error
|
|
|
|
|
multiStageQuery.payload.status.errorReport.error.errorCode
|
|
|
|
|
multiStageQuery.payload.status.errorReport.error.errorMessage
|
|
|
|
|
multiStageQuery.payload.status.errorReport.exceptionStackTrace
|
|
|
|
|
multiStageQuery.payload.stages stages
|
|
|
|
|
multiStageQuery.payload.stages[].stageNumber
|
|
|
|
|
definition.id
|
|
|
|
|
definition.input
|
|
|
|
|
definition.broadcast
|
|
|
|
|
definition.processor
|
|
|
|
|
definition.signature
|
|
|
|
|
stageNumber
|
|
|
|
|
startTime
|
|
|
|
|
multiStageQuery.payload.stages
|
|
|
|
|
READING_INPUT
|
|
|
|
|
POST_READING
|
|
|
|
|
RESULTS_COMPLETE
|
|
|
|
|
workerCount
|
|
|
|
|
partitionCount
|
|
|
|
|
startCount
|
|
|
|
|
# MSQ errors and limits
|
2022-11-11 02:46:40 -05:00
|
|
|
|
BroadcastTablesTooLarge
|
|
|
|
|
CannotParseExternalData
|
2022-09-06 13:36:09 -04:00
|
|
|
|
ColumnNameRestricted
|
|
|
|
|
ColumnTypeNotSupported
|
2022-11-11 02:46:40 -05:00
|
|
|
|
DurableStorageConfiguration
|
|
|
|
|
ColumnTypeNotSupported
|
2022-09-06 13:36:09 -04:00
|
|
|
|
InsertCannotAllocateSegment
|
2022-11-11 02:46:40 -05:00
|
|
|
|
InsertCannotBeEmpty
|
|
|
|
|
InsertCannotReplaceExistingSegment
|
|
|
|
|
InsertLockPreempted
|
2022-09-06 13:36:09 -04:00
|
|
|
|
InsertTimeNull
|
2022-11-11 02:46:40 -05:00
|
|
|
|
CURRENT_TIMESTAMP
|
2022-09-06 13:36:09 -04:00
|
|
|
|
InsertTimeOutOfBounds
|
|
|
|
|
UnknownError
|
|
|
|
|
TaskStartTimeout
|
|
|
|
|
OutOfMemoryError
|
|
|
|
|
SegmentGenerator
|
|
|
|
|
maxFrameSize
|
2022-11-11 02:46:40 -05:00
|
|
|
|
InvalidNullByte
|
|
|
|
|
QueryNotSupported
|
|
|
|
|
QueryNotSupported
|
|
|
|
|
RowTooLarge
|
|
|
|
|
TooManyBuckets
|
|
|
|
|
TooManyInputFiles
|
|
|
|
|
TooManyPartitions
|
|
|
|
|
TooManyColumns
|
|
|
|
|
TooManyWarnings
|
|
|
|
|
TooManyWorkers
|
|
|
|
|
NotEnoughMemory
|
|
|
|
|
WorkerFailed
|
|
|
|
|
WorkerRpcFailed
|
2023-09-06 01:05:57 -04:00
|
|
|
|
TIMED_OUT
|
2022-09-06 13:36:09 -04:00
|
|
|
|
# MSQ context parameters
|
2022-11-11 02:46:40 -05:00
|
|
|
|
maxNumTasks
|
|
|
|
|
taskAssignment
|
2022-10-28 17:27:50 -04:00
|
|
|
|
finalizeAggregations
|
|
|
|
|
indexSpec
|
2022-11-11 02:46:40 -05:00
|
|
|
|
rowsInMemory
|
|
|
|
|
segmentSortOrder
|
|
|
|
|
rowsPerSegment
|
2022-09-06 13:36:09 -04:00
|
|
|
|
durableShuffleStorage
|
2023-01-23 14:32:03 -05:00
|
|
|
|
composedIntermediateSuperSorterStorageEnabled
|
|
|
|
|
intermediateSuperSorterStorageMaxLocalBytes
|
|
|
|
|
ResourceLimitExceededException
|
2022-09-06 13:36:09 -04:00
|
|
|
|
# Aggregations
|
|
|
|
|
groupByEnableMultiValueUnnesting
|
|
|
|
|
APPROX_COUNT_DISTINCT_DS_HLL
|
|
|
|
|
APPROX_COUNT_DISTINCT_DS_THETA
|
|
|
|
|
APPROX_QUANTILE_DS
|
|
|
|
|
DS_QUANTILES_SKETCH
|
|
|
|
|
APPROX_QUANTILE_FIXED_BUCKETS
|
2024-01-25 12:53:39 -05:00
|
|
|
|
# Operators
|
|
|
|
|
pivoted
|
|
|
|
|
UNPIVOT
|
|
|
|
|
unpivoted
|
2022-09-06 13:36:09 -04:00
|
|
|
|
# File specific overrides
|
2019-09-17 15:47:30 -04:00
|
|
|
|
100x
|
|
|
|
|
_common
|
2023-04-24 13:41:56 -04:00
|
|
|
|
appender
|
2022-05-16 05:37:21 -04:00
|
|
|
|
appenders
|
2019-09-17 15:47:30 -04:00
|
|
|
|
druid-hdfs-storage
|
|
|
|
|
druid-s3-extensions
|
2022-02-15 23:45:07 -05:00
|
|
|
|
druid.sql.planner.maxNumericInFilters
|
2022-12-12 20:28:24 -05:00
|
|
|
|
Minio
|
|
|
|
|
multi-server
|
2019-09-17 15:47:30 -04:00
|
|
|
|
BasicDataSource
|
2021-11-23 01:28:51 -05:00
|
|
|
|
LeaderLatch
|
2023-12-11 01:05:16 -05:00
|
|
|
|
2.x
|
2024-01-31 00:53:50 -05:00
|
|
|
|
28.x
|
|
|
|
|
3.0.x
|
2021-05-25 15:49:49 -04:00
|
|
|
|
3.5.x
|
|
|
|
|
3.4.x
|
2024-01-31 00:53:50 -05:00
|
|
|
|
3.5.x.
|
2019-09-17 15:47:30 -04:00
|
|
|
|
AllowAll
|
|
|
|
|
AuthenticationResult
|
|
|
|
|
AuthorizationLoadingLookupTest
|
2023-12-11 01:05:16 -05:00
|
|
|
|
booleans
|
|
|
|
|
EOF
|
|
|
|
|
IE11
|
|
|
|
|
InsufficientResourceException
|
2019-09-17 15:47:30 -04:00
|
|
|
|
HttpClient
|
2023-12-11 01:05:16 -05:00
|
|
|
|
JsonConfigurator
|
|
|
|
|
KIP-297
|
2019-09-17 15:47:30 -04:00
|
|
|
|
allowAll
|
|
|
|
|
authenticatorChain
|
|
|
|
|
defaultUser
|
|
|
|
|
inputSegmentSizeBytes
|
|
|
|
|
skipOffsetFromLatest
|
2021-07-27 11:26:05 -04:00
|
|
|
|
brokerService
|
2019-09-17 15:47:30 -04:00
|
|
|
|
c3.2xlarge
|
2021-07-27 11:26:05 -04:00
|
|
|
|
defaultManualBrokerService
|
2019-09-17 15:47:30 -04:00
|
|
|
|
maxPriority
|
|
|
|
|
minPriority
|
2023-12-11 01:05:16 -05:00
|
|
|
|
NUMBER_FEATURES
|
|
|
|
|
NUMBER_OF_CONTRIBUTORS
|
|
|
|
|
PreparedStatement
|
|
|
|
|
pre-upgrade
|
|
|
|
|
QueryCapacityExceededException
|
|
|
|
|
QueryTimeoutException
|
|
|
|
|
QueryUnsupportedException
|
|
|
|
|
ResultSet
|
2019-09-17 15:47:30 -04:00
|
|
|
|
runtime.properties
|
2023-12-11 01:05:16 -05:00
|
|
|
|
SqlParseException
|
2019-09-17 15:47:30 -04:00
|
|
|
|
timeBoundary
|
2023-12-11 01:05:16 -05:00
|
|
|
|
ValidationException
|
2019-09-17 15:47:30 -04:00
|
|
|
|
0x0
|
|
|
|
|
0x9
|
|
|
|
|
2GB
|
|
|
|
|
300mb-700mb
|
|
|
|
|
Bieber
|
|
|
|
|
IndexTask-based
|
|
|
|
|
Ke
|
|
|
|
|
datasource_intervalStart_intervalEnd_version_partitionNum
|
|
|
|
|
partitionNum
|
|
|
|
|
v9
|
|
|
|
|
3.x
|
|
|
|
|
8u92
|
|
|
|
|
DskipTests
|
|
|
|
|
Papache-release
|
|
|
|
|
Pdist
|
2022-09-19 18:02:17 -04:00
|
|
|
|
Dweb.console.skip
|
2020-05-21 15:35:54 -04:00
|
|
|
|
yaml
|
2021-10-30 13:16:24 -04:00
|
|
|
|
Phadoop3
|
|
|
|
|
dist-hadoop3
|
|
|
|
|
hadoop3
|
|
|
|
|
2.x.x
|
|
|
|
|
3.x.x
|
2019-09-17 15:47:30 -04:00
|
|
|
|
ambari-metrics
|
|
|
|
|
metricName
|
|
|
|
|
trustStore
|
|
|
|
|
fetchTimeout
|
|
|
|
|
gz
|
|
|
|
|
maxCacheCapacityBytes
|
|
|
|
|
maxFetchCapacityBytes
|
|
|
|
|
maxFetchRetry
|
|
|
|
|
prefetchTriggerBytes
|
|
|
|
|
shardSpecs
|
2022-02-22 07:57:43 -05:00
|
|
|
|
sharedAccessStorageToken
|
2019-09-17 15:47:30 -04:00
|
|
|
|
cloudfiles
|
|
|
|
|
rackspace-cloudfiles-uk
|
|
|
|
|
rackspace-cloudfiles-us
|
2020-02-25 20:49:16 -05:00
|
|
|
|
gz
|
|
|
|
|
shardSpecs
|
|
|
|
|
maxCacheCapacityBytes
|
|
|
|
|
maxFetchCapacityBytes
|
|
|
|
|
fetchTimeout
|
|
|
|
|
maxFetchRetry
|
2019-09-17 15:47:30 -04:00
|
|
|
|
distinctCount
|
|
|
|
|
groupBy
|
|
|
|
|
maxIntermediateRows
|
|
|
|
|
numValuesPerPass
|
|
|
|
|
queryGranularity
|
|
|
|
|
segmentGranularity
|
|
|
|
|
topN
|
|
|
|
|
visitor_id
|
|
|
|
|
cpu
|
|
|
|
|
web_requests
|
|
|
|
|
_
|
|
|
|
|
druid_
|
|
|
|
|
druid_cache_total
|
|
|
|
|
druid_hits
|
|
|
|
|
druid_query
|
|
|
|
|
historical001
|
|
|
|
|
HadoopTuningConfig
|
|
|
|
|
TuningConfig
|
2023-05-19 12:42:27 -04:00
|
|
|
|
base-dataSource
|
2019-09-17 15:47:30 -04:00
|
|
|
|
baseDataSource
|
|
|
|
|
baseDataSource-hashCode
|
|
|
|
|
classpathPrefix
|
|
|
|
|
derivativeDataSource
|
|
|
|
|
druid.extensions.hadoopDependenciesDir
|
|
|
|
|
hadoopDependencyCoordinates
|
|
|
|
|
maxTaskCount
|
|
|
|
|
metricsSpec
|
|
|
|
|
queryType
|
|
|
|
|
tuningConfig
|
|
|
|
|
arcsinh
|
|
|
|
|
fieldName
|
|
|
|
|
momentSketchMerge
|
|
|
|
|
momentsketch
|
|
|
|
|
10-minutes
|
|
|
|
|
MeanNoNulls
|
|
|
|
|
P1D
|
|
|
|
|
cycleSize
|
|
|
|
|
doubleMax
|
2020-09-14 22:44:58 -04:00
|
|
|
|
doubleAny
|
2023-08-08 18:49:29 -04:00
|
|
|
|
doubleFirst
|
|
|
|
|
doubleLast
|
2019-09-17 15:47:30 -04:00
|
|
|
|
doubleMean
|
|
|
|
|
doubleMeanNoNulls
|
|
|
|
|
doubleMin
|
2019-10-25 01:04:08 -04:00
|
|
|
|
doubleSum
|
2019-09-17 15:47:30 -04:00
|
|
|
|
druid.generic.useDefaultValueForNull
|
2022-03-29 17:31:36 -04:00
|
|
|
|
druid.generic.ignoreNullsForStringCardinality
|
2019-09-17 15:47:30 -04:00
|
|
|
|
limitSpec
|
|
|
|
|
longMax
|
2020-09-14 22:44:58 -04:00
|
|
|
|
longAny
|
2023-08-08 18:49:29 -04:00
|
|
|
|
longFirst
|
|
|
|
|
longLast
|
2019-09-17 15:47:30 -04:00
|
|
|
|
longMean
|
|
|
|
|
longMeanNoNulls
|
|
|
|
|
longMin
|
2019-10-25 01:04:08 -04:00
|
|
|
|
longSum
|
2019-09-17 15:47:30 -04:00
|
|
|
|
movingAverage
|
|
|
|
|
postAggregations
|
|
|
|
|
postAveragers
|
2021-05-28 01:10:55 -04:00
|
|
|
|
pull-deps
|
2019-09-17 15:47:30 -04:00
|
|
|
|
defaultMetrics.json
|
Add config option for namespacePrefix (#9372)
* Add config option for namespacePrefix
opentsdb emitter sends metric names to opentsdb verbatim as what druid
names them, for example "query.count", this doesn't fit well with a
central opentsdb server which might have namespaced metrics, for example
"druid.query.count". This adds support for adding an optional prefix.
The prefix also gets a trailing dot (.), after it, so the metric name
becomes <namespacePrefix>.<metricname>
configureable as "druid.emitter.opentsdb.namespacePrefix", as
documented.
Co-authored-by: Martin Gerholm <martin.gerholm@deltaprojects.com>
Signed-off-by: Martin Gerholm <martin.gerholm@deltaprojects.com>
Signed-off-by: Björn Zettergren <bjorn.zettergren@deltaprojects.com>
* Spelling for PR #9372
Added "namespacePrefix" to .spelling exceptions, it's a variable name
used in documentation for opentsdb-emitter.
* fixing tests for PR #9372
changed naming of variables to be more descriptive
added test of prefix being an empty string: "".
added a conditional to buildNamespacePrefix to check for empty string
being fed if EventConverter called without OpentsdbEmitterConfig
instance.
* fixing checkstyle errors for PR #9372
used == to compare literal string, should be equals()
* cleaned up and updated PR #9372
Created a buildMetric function as suggested by clintropolis, and
removed redundant tests for empty strings as they're only used when
calling EventConverter directly without going through
OpentsdbEmitterConfig.
* consistent naming of tests PR #9372
Changed names of tests in files to match better with what it was
actually testing
changed check for Strings.isNullOrEmpty to just check for `null`, as
empty string valued `namespacePrefix` is handled in
OpentsdbEmitterConfig.
Co-authored-by: Martin Gerholm <inspector-martin@users.noreply.github.com>
2020-02-20 17:01:41 -05:00
|
|
|
|
namespacePrefix
|
2019-09-17 15:47:30 -04:00
|
|
|
|
src
|
|
|
|
|
loadList
|
2020-08-23 22:29:04 -04:00
|
|
|
|
pull-deps
|
|
|
|
|
PT2S
|
2019-09-17 15:47:30 -04:00
|
|
|
|
com.microsoft.sqlserver.jdbc.SQLServerDriver
|
|
|
|
|
sqljdbc
|
|
|
|
|
convertRange
|
2021-03-09 17:37:31 -05:00
|
|
|
|
HTTPServer
|
|
|
|
|
conversionFactor
|
|
|
|
|
prometheus
|
|
|
|
|
Pushgateway
|
2022-09-09 08:46:14 -04:00
|
|
|
|
flushPeriod
|
2019-09-17 15:47:30 -04:00
|
|
|
|
postAggregator
|
2024-01-14 12:52:30 -05:00
|
|
|
|
postAggregators
|
2019-09-17 15:47:30 -04:00
|
|
|
|
quantileFromTDigestSketch
|
|
|
|
|
quantilesFromTDigestSketch
|
|
|
|
|
tDigestSketch
|
|
|
|
|
HadoopDruidIndexer
|
|
|
|
|
LzoThriftBlock
|
|
|
|
|
SequenceFile
|
|
|
|
|
classname
|
|
|
|
|
hadoop-lzo
|
|
|
|
|
inputFormat
|
|
|
|
|
inputSpec
|
|
|
|
|
ioConfig
|
|
|
|
|
parseSpec
|
|
|
|
|
thriftClass
|
|
|
|
|
thriftJar
|
|
|
|
|
timeMax
|
|
|
|
|
timeMin
|
2021-08-09 20:27:35 -04:00
|
|
|
|
Alibaba
|
2020-07-02 01:20:53 -04:00
|
|
|
|
Aliyun
|
2021-08-09 20:27:35 -04:00
|
|
|
|
aliyun-oss-extensions
|
2020-07-02 01:20:53 -04:00
|
|
|
|
AccessKey
|
2021-08-09 20:27:35 -04:00
|
|
|
|
accessKey
|
2020-07-02 01:20:53 -04:00
|
|
|
|
aliyun-oss
|
2021-08-09 20:27:35 -04:00
|
|
|
|
json
|
2023-11-28 05:11:19 -05:00
|
|
|
|
Oshi
|
2021-08-09 20:27:35 -04:00
|
|
|
|
OSS
|
2020-07-02 01:20:53 -04:00
|
|
|
|
oss
|
2021-08-09 20:27:35 -04:00
|
|
|
|
secretKey
|
2020-07-02 01:20:53 -04:00
|
|
|
|
url
|
2019-09-17 15:47:30 -04:00
|
|
|
|
approxHistogram
|
|
|
|
|
approxHistogramFold
|
2020-09-09 16:56:33 -04:00
|
|
|
|
fixedBucketsHistogram
|
2019-09-17 15:47:30 -04:00
|
|
|
|
bucketNum
|
|
|
|
|
lowerLimit
|
|
|
|
|
numBuckets
|
|
|
|
|
upperLimit
|
|
|
|
|
AVRO-1124
|
|
|
|
|
Avro-1124
|
|
|
|
|
SchemaRepo
|
|
|
|
|
avro
|
|
|
|
|
avroBytesDecoder
|
2021-04-13 01:03:13 -04:00
|
|
|
|
protoBytesDecoder
|
2020-10-08 00:08:22 -04:00
|
|
|
|
flattenSpec
|
2019-09-17 15:47:30 -04:00
|
|
|
|
jq
|
|
|
|
|
org.apache.druid.extensions
|
|
|
|
|
schemaRepository
|
|
|
|
|
schema_inline
|
|
|
|
|
subjectAndIdConverter
|
|
|
|
|
url
|
|
|
|
|
BloomKFilter
|
|
|
|
|
bitset
|
|
|
|
|
outputStream
|
|
|
|
|
HLLSketchBuild
|
|
|
|
|
HLLSketchMerge
|
|
|
|
|
lgK
|
|
|
|
|
log2
|
|
|
|
|
tgtHllType
|
|
|
|
|
CDF
|
|
|
|
|
DoublesSketch
|
2021-08-31 17:56:37 -04:00
|
|
|
|
maxStreamLength
|
2019-09-17 15:47:30 -04:00
|
|
|
|
PMF
|
|
|
|
|
quantilesDoublesSketch
|
|
|
|
|
toString
|
|
|
|
|
isInputThetaSketch
|
|
|
|
|
thetaSketch
|
|
|
|
|
user_id
|
|
|
|
|
ArrayOfDoublesSketch
|
|
|
|
|
arrayOfDoublesSketch
|
|
|
|
|
metricColumns
|
|
|
|
|
nominalEntries
|
|
|
|
|
numberOfValues
|
|
|
|
|
INFORMATION_SCHEMA
|
|
|
|
|
MyBasicAuthenticator
|
|
|
|
|
MyBasicAuthorizer
|
|
|
|
|
authenticatorName
|
|
|
|
|
authorizerName
|
|
|
|
|
druid_system
|
|
|
|
|
pollingPeriod
|
|
|
|
|
roleName
|
2019-10-08 20:08:27 -04:00
|
|
|
|
LDAP
|
|
|
|
|
ldap
|
|
|
|
|
MyBasicMetadataAuthenticator
|
|
|
|
|
MyBasicLDAPAuthenticator
|
|
|
|
|
MyBasicMetadataAuthorizer
|
|
|
|
|
MyBasicLDAPAuthorizer
|
|
|
|
|
credentialsValidator
|
|
|
|
|
sAMAccountName
|
|
|
|
|
objectClass
|
|
|
|
|
initialAdminRole
|
|
|
|
|
adminGroupMapping
|
|
|
|
|
groupMappingName
|
2021-06-30 16:42:45 -04:00
|
|
|
|
8KiB
|
2019-09-17 15:47:30 -04:00
|
|
|
|
HttpComponents
|
|
|
|
|
MyKerberosAuthenticator
|
|
|
|
|
RFC-4559
|
|
|
|
|
SPNego
|
|
|
|
|
_HOST
|
|
|
|
|
cacheFactory
|
|
|
|
|
concurrencyLevel
|
|
|
|
|
dataFetcher
|
|
|
|
|
expireAfterAccess
|
|
|
|
|
expireAfterWrite
|
|
|
|
|
initialCapacity
|
|
|
|
|
loadingCacheSpec
|
|
|
|
|
maxEntriesSize
|
|
|
|
|
maxStoreSize
|
|
|
|
|
maximumSize
|
|
|
|
|
onHeapPolling
|
|
|
|
|
pollPeriod
|
|
|
|
|
reverseLoadingCacheSpec
|
2020-03-23 21:15:45 -04:00
|
|
|
|
OAuth
|
|
|
|
|
Okta
|
|
|
|
|
OpenID
|
|
|
|
|
pac4j
|
2020-12-15 00:10:31 -05:00
|
|
|
|
Env
|
|
|
|
|
POD_NAME
|
|
|
|
|
POD_NAMESPACE
|
|
|
|
|
ConfigMap
|
|
|
|
|
PT17S
|
2019-09-17 15:47:30 -04:00
|
|
|
|
GCS
|
|
|
|
|
gcs-connector
|
|
|
|
|
hdfs
|
2022-08-09 06:44:22 -04:00
|
|
|
|
Aotearoa
|
|
|
|
|
Czechia
|
|
|
|
|
KTable
|
2019-09-17 15:47:30 -04:00
|
|
|
|
LookupExtractorFactory
|
2022-08-09 06:44:22 -04:00
|
|
|
|
Zeelund
|
2023-05-19 12:42:27 -04:00
|
|
|
|
zookeeper.connect
|
2019-09-17 15:47:30 -04:00
|
|
|
|
0.11.x.
|
|
|
|
|
00Z
|
|
|
|
|
2016-01-01T11
|
|
|
|
|
2016-01-01T12
|
|
|
|
|
2016-01-01T14
|
|
|
|
|
CONNECTING_TO_STREAM
|
|
|
|
|
CREATING_TASKS
|
|
|
|
|
DISCOVERING_INITIAL_TASKS
|
|
|
|
|
KafkaSupervisorIOConfig
|
|
|
|
|
KafkaSupervisorTuningConfig
|
|
|
|
|
LOST_CONTACT_WITH_STREAM
|
|
|
|
|
OffsetOutOfRangeException
|
|
|
|
|
P2147483647D
|
|
|
|
|
PT10M
|
|
|
|
|
PT10S
|
|
|
|
|
PT1H
|
|
|
|
|
PT30M
|
|
|
|
|
PT30S
|
|
|
|
|
PT5S
|
|
|
|
|
PT80S
|
2021-10-14 20:51:32 -04:00
|
|
|
|
SASL
|
2019-09-17 15:47:30 -04:00
|
|
|
|
SegmentWriteOutMediumFactory
|
|
|
|
|
UNABLE_TO_CONNECT_TO_STREAM
|
|
|
|
|
UNHEALTHY_SUPERVISOR
|
|
|
|
|
UNHEALTHY_TASKS
|
|
|
|
|
dimensionCompression
|
|
|
|
|
earlyMessageRejectionPeriod
|
|
|
|
|
indexSpec
|
|
|
|
|
intermediateHandoffPeriod
|
|
|
|
|
longEncoding
|
|
|
|
|
maxBytesInMemory
|
|
|
|
|
maxPendingPersists
|
|
|
|
|
maxRowsInMemory
|
|
|
|
|
maxRowsPerSegment
|
|
|
|
|
maxSavedParseExceptions
|
|
|
|
|
maxTotalRows
|
|
|
|
|
metricCompression
|
|
|
|
|
numKafkaPartitions
|
|
|
|
|
taskCount
|
|
|
|
|
taskDuration
|
|
|
|
|
9.2dist
|
|
|
|
|
KinesisSupervisorIOConfig
|
|
|
|
|
KinesisSupervisorTuningConfig
|
2024-02-22 00:20:37 -05:00
|
|
|
|
RabbitMQ
|
2019-11-28 15:59:01 -05:00
|
|
|
|
Resharding
|
|
|
|
|
resharding
|
2023-03-09 18:48:02 -05:00
|
|
|
|
LZ4LZFuncompressedLZ4LZ4LZFuncompressednoneLZ4autolongsautolongslongstypeconcisetyperoaringtypestreamendpointreplicastaskCounttaskCount
|
2019-09-17 15:47:30 -04:00
|
|
|
|
deaggregate
|
|
|
|
|
druid-kinesis-indexing-service
|
|
|
|
|
maxRecordsPerPoll
|
Kinesis adaptive memory management (#15360)
### Description
Our Kinesis consumer works by using the [GetRecords API](https://docs.aws.amazon.com/kinesis/latest/APIReference/API_GetRecords.html) in some number of `fetchThreads`, each fetching some number of records (`recordsPerFetch`) and each inserting into a shared buffer that can hold a `recordBufferSize` number of records. The logic is described in our documentation at: https://druid.apache.org/docs/27.0.0/development/extensions-core/kinesis-ingestion/#determine-fetch-settings
There is a problem with the logic that this pr fixes: the memory limits rely on a hard-coded “estimated record size” that is `10 KB` if `deaggregate: false` and `1 MB` if `deaggregate: true`. There have been cases where a supervisor had `deaggregate: true` set even though it wasn’t needed, leading to under-utilization of memory and poor ingestion performance.
Users don’t always know if their records are aggregated or not. Also, even if they could figure it out, it’s better to not have to. So we’d like to eliminate the `deaggregate` parameter, which means we need to do memory management more adaptively based on the actual record sizes.
We take advantage of the fact that GetRecords doesn’t return more than 10MB (https://docs.aws.amazon.com/streams/latest/dev/service-sizes-and-limits.html ):
This pr:
eliminates `recordsPerFetch`, always use the max limit of 10000 records (the default limit if not set)
eliminate `deaggregate`, always have it true
cap `fetchThreads` to ensure that if each fetch returns the max (`10MB`) then we don't exceed our budget (`100MB` or `5% of heap`). In practice this means `fetchThreads` will never be more than `10`. Tasks usually don't have that many processors available to them anyway, so in practice I don't think this will change the number of threads for too many deployments
add `recordBufferSizeBytes` as a bytes-based limit rather than records-based limit for the shared queue. We do know the byte size of kinesis records by at this point. Default should be `100MB` or `10% of heap`, whichever is smaller.
add `maxBytesPerPoll` as a bytes-based limit for how much data we poll from shared buffer at a time. Default is `1000000` bytes.
deprecate `recordBufferSize`, use `recordBufferSizeBytes` instead. Warning is logged if `recordBufferSize` is specified
deprecate `maxRecordsPerPoll`, use `maxBytesPerPoll` instead. Warning is logged if maxRecordsPerPoll` is specified
Fixed issue that when the record buffer is full, the fetchRecords logic throws away the rest of the GetRecords result after `recordBufferOfferTimeout` and starts a new shard iterator. This seems excessively churny. Instead, wait an unbounded amount of time for queue to stop being full. If the queue remains full, we’ll end up right back waiting for it after the restarted fetch.
There was also a call to `newQ::offer` without check in `filterBufferAndResetBackgroundFetch`, which seemed like it could cause data loss. Now checking return value here, and failing if false.
### Release Note
Kinesis ingestion memory tuning config has been greatly simplified, and a more adaptive approach is now taken for the configuration. Here is a summary of the changes made:
eliminates `recordsPerFetch`, always use the max limit of 10000 records (the default limit if not set)
eliminate `deaggregate`, always have it true
cap `fetchThreads` to ensure that if each fetch returns the max (`10MB`) then we don't exceed our budget (`100MB` or `5% of heap`). In practice this means `fetchThreads` will never be more than `10`. Tasks usually don't have that many processors available to them anyway, so in practice I don't think this will change the number of threads for too many deployments
add `recordBufferSizeBytes` as a bytes-based limit rather than records-based limit for the shared queue. We do know the byte size of kinesis records by at this point. Default should be `100MB` or `10% of heap`, whichever is smaller.
add `maxBytesPerPoll` as a bytes-based limit for how much data we poll from shared buffer at a time. Default is `1000000` bytes.
deprecate `recordBufferSize`, use `recordBufferSizeBytes` instead. Warning is logged if `recordBufferSize` is specified
deprecate `maxRecordsPerPoll`, use `maxBytesPerPoll` instead. Warning is logged if maxRecordsPerPoll` is specified
2024-01-19 14:30:21 -05:00
|
|
|
|
maxBytesPerPoll
|
2019-09-17 15:47:30 -04:00
|
|
|
|
maxRecordsPerPollrecordsPerFetchfetchDelayMillisreplicasfetchDelayMillisrecordsPerFetchfetchDelayMillismaxRecordsPerPollamazon-kinesis-client1
|
|
|
|
|
numKinesisShards
|
|
|
|
|
numProcessors
|
|
|
|
|
q.size
|
2019-10-16 02:19:17 -04:00
|
|
|
|
repartitionTransitionDuration
|
2019-09-17 15:47:30 -04:00
|
|
|
|
replicastaskCounttaskCount
|
2023-08-17 17:13:10 -04:00
|
|
|
|
resetOffsets
|
2019-09-17 15:47:30 -04:00
|
|
|
|
resetuseEarliestSequenceNumberPOST
|
|
|
|
|
resumePOST
|
|
|
|
|
statusrecentErrorsdruid.supervisor.maxStoredExceptionEventsstatedetailedStatestatedetailedStatestatestatePENDINGRUNNINGSUSPENDEDSTOPPINGUNHEALTHY_SUPERVISORUNHEALTHY_TASKSdetailedStatestatedruid.supervisor.unhealthinessThresholddruid.supervisor.taskUnhealthinessThresholdtaskDurationtaskCountreplicasdetailedStatedetailedStateRUNNINGPOST
|
|
|
|
|
supervisorPOST
|
|
|
|
|
supervisorfetchThreadsfetchDelayMillisrecordsPerFetchmaxRecordsPerPollpoll
|
|
|
|
|
suspendPOST
|
|
|
|
|
taskCounttaskDurationreplicas
|
|
|
|
|
taskCounttaskDurationtaskDurationPOST
|
|
|
|
|
taskDurationstartDelayperioduseEarliestSequenceNumbercompletionTimeouttaskDurationlateMessageRejectionPeriodPT1HearlyMessageRejectionPeriodPT1HPT1HrecordsPerFetchfetchDelayMillisawsAssumedRoleArnawsExternalIddeaggregateGET
|
|
|
|
|
terminatePOST
|
|
|
|
|
terminatedruid.worker.capacitytaskDurationcompletionTimeoutreplicastaskCountreplicas
|
2019-10-16 02:19:17 -04:00
|
|
|
|
PT2M
|
|
|
|
|
kinesis.us
|
|
|
|
|
amazonaws.com
|
|
|
|
|
PT6H
|
|
|
|
|
GetRecords
|
|
|
|
|
KCL
|
|
|
|
|
signalled
|
|
|
|
|
ProvisionedThroughputExceededException
|
|
|
|
|
Deaggregation
|
2019-09-17 15:47:30 -04:00
|
|
|
|
baz
|
|
|
|
|
customJson
|
|
|
|
|
lookupParseSpec
|
|
|
|
|
namespaceParseSpec
|
|
|
|
|
simpleJson
|
|
|
|
|
dimensionSpec
|
|
|
|
|
flattenSpec
|
|
|
|
|
binaryAsString
|
2023-10-02 20:09:23 -04:00
|
|
|
|
replaceMissingValueWith
|
2023-05-19 12:42:27 -04:00
|
|
|
|
sslFactory
|
2019-09-17 15:47:30 -04:00
|
|
|
|
sslMode
|
|
|
|
|
Proto
|
|
|
|
|
metrics.desc
|
|
|
|
|
metrics.desc.
|
|
|
|
|
metrics.proto.
|
|
|
|
|
metrics_pb
|
|
|
|
|
protoMessageType
|
|
|
|
|
timeAndDims
|
|
|
|
|
tmp
|
|
|
|
|
SigV4
|
|
|
|
|
jvm.config
|
|
|
|
|
kms
|
|
|
|
|
s3
|
|
|
|
|
s3a
|
|
|
|
|
s3n
|
|
|
|
|
uris
|
|
|
|
|
KeyManager
|
|
|
|
|
SSLContext
|
|
|
|
|
TrustManager
|
|
|
|
|
GenericUDAFVariance
|
|
|
|
|
Golub
|
|
|
|
|
J.L.
|
|
|
|
|
LeVeque
|
|
|
|
|
Numer
|
|
|
|
|
chunk1
|
|
|
|
|
chunk2
|
|
|
|
|
stddev
|
|
|
|
|
t1
|
|
|
|
|
t2
|
|
|
|
|
variance1
|
|
|
|
|
variance2
|
|
|
|
|
varianceFold
|
|
|
|
|
variance_pop
|
|
|
|
|
variance_sample
|
|
|
|
|
Berry_statbook
|
|
|
|
|
Berry_statbook_chpt6.pdf
|
|
|
|
|
S.E.
|
|
|
|
|
engineering.com
|
|
|
|
|
jcb0773
|
|
|
|
|
n1
|
|
|
|
|
n2
|
|
|
|
|
p1
|
|
|
|
|
p2
|
|
|
|
|
pvalue2tailedZtest
|
|
|
|
|
sqrt
|
|
|
|
|
successCount1
|
|
|
|
|
successCount2
|
|
|
|
|
www.isixsigma.com
|
|
|
|
|
www.paypal
|
|
|
|
|
www.ucs.louisiana.edu
|
|
|
|
|
zscore
|
|
|
|
|
zscore2sample
|
|
|
|
|
ztests
|
|
|
|
|
DistinctCount
|
|
|
|
|
artifactId
|
|
|
|
|
com.example
|
|
|
|
|
common.runtime.properties
|
2021-01-07 00:15:29 -05:00
|
|
|
|
druid-aws-rds-extensions
|
2019-09-17 15:47:30 -04:00
|
|
|
|
druid-cassandra-storage
|
2022-09-06 03:06:57 -04:00
|
|
|
|
druid-compressed-bigdecimal
|
2019-09-17 15:47:30 -04:00
|
|
|
|
druid-distinctcount
|
|
|
|
|
druid-ec2-extensions
|
|
|
|
|
druid-kafka-extraction-namespace
|
|
|
|
|
druid-kafka-indexing-service
|
|
|
|
|
druid-opentsdb-emitter
|
|
|
|
|
druid-protobuf-extensions
|
|
|
|
|
druid-tdigestsketch
|
|
|
|
|
druid.apache.org
|
|
|
|
|
groupId
|
|
|
|
|
jvm-global
|
|
|
|
|
kafka-emitter
|
|
|
|
|
org.apache.druid.extensions.contrib.
|
|
|
|
|
pull-deps
|
|
|
|
|
sqlserver-metadata-storage
|
|
|
|
|
statsd-emitter
|
|
|
|
|
coords
|
|
|
|
|
dimName
|
|
|
|
|
maxCoords
|
2019-10-01 17:59:30 -04:00
|
|
|
|
Mb
|
2019-09-17 15:47:30 -04:00
|
|
|
|
minCoords
|
|
|
|
|
Metaspace
|
|
|
|
|
dev
|
|
|
|
|
AggregatorFactory
|
|
|
|
|
ArchiveTask
|
|
|
|
|
ComplexMetrics
|
|
|
|
|
DataSegmentArchiver
|
|
|
|
|
DataSegmentKiller
|
|
|
|
|
DataSegmentMover
|
2024-06-11 17:17:40 -04:00
|
|
|
|
URIDataPuller
|
2019-09-17 15:47:30 -04:00
|
|
|
|
DataSegmentPusher
|
|
|
|
|
DruidModule
|
|
|
|
|
ExtractionFns
|
|
|
|
|
HdfsStorageDruidModule
|
|
|
|
|
JacksonInject
|
|
|
|
|
MapBinder
|
|
|
|
|
MoveTask
|
|
|
|
|
ObjectMapper
|
|
|
|
|
PasswordProvider
|
|
|
|
|
PostAggregators
|
|
|
|
|
QueryRunnerFactory
|
2023-05-19 12:42:27 -04:00
|
|
|
|
segmentmetadataquery
|
2019-09-17 15:47:30 -04:00
|
|
|
|
SegmentMetadataQuery
|
|
|
|
|
SegmentMetadataQueryQueryToolChest
|
|
|
|
|
loadSpec
|
|
|
|
|
multibind
|
|
|
|
|
pom.xml
|
|
|
|
|
0.6.x
|
|
|
|
|
0.7.x
|
|
|
|
|
0.7.x.
|
|
|
|
|
TimeAndDims
|
2023-07-17 23:29:57 -04:00
|
|
|
|
catalogProperties
|
|
|
|
|
catalogUri
|
2019-09-17 15:47:30 -04:00
|
|
|
|
column2
|
|
|
|
|
column_1
|
|
|
|
|
column_n
|
|
|
|
|
com.opencsv
|
|
|
|
|
ctrl
|
2022-10-11 14:37:28 -04:00
|
|
|
|
descriptorString
|
Kafka Input Format for headers, key and payload parsing (#11630)
### Description
Today we ingest a number of high cardinality metrics into Druid across dimensions. These metrics are rolled up on a per minute basis, and are very useful when looking at metrics on a partition or client basis. Events is another class of data that provides useful information about a particular incident/scenario inside a Kafka cluster. Events themselves are carried inside kafka payload, but nonetheless there are some very useful metadata that is carried in kafka headers that can serve as useful dimension for aggregation and in turn bringing better insights.
PR(https://github.com/apache/druid/pull/10730) introduced support of Kafka headers in InputFormats.
We still need an input format to parse out the headers and translate those into relevant columns in Druid. Until that’s implemented, none of the information available in the Kafka message headers would be exposed. So first there is a need to write an input format that can parse headers in any given format(provided we support the format) like we parse payloads today. Apart from headers there is also some useful information present in the key portion of the kafka record. We also need a way to expose the data present in the key as druid columns. We need a generic way to express at configuration time what attributes from headers, key and payload need to be ingested into druid. We need to keep the design generic enough so that users can specify different parsers for headers, key and payload.
This PR is designed to solve the above by providing wrapper around any existing input formats and merging the data into a single unified Druid row.
Lets look at a sample input format from the above discussion
"inputFormat":
{
"type": "kafka", // New input format type
"headerLabelPrefix": "kafka.header.", // Label prefix for header columns, this will avoid collusions while merging columns
"recordTimestampLabelPrefix": "kafka.", // Kafka record's timestamp is made available in case payload does not carry timestamp
"headerFormat": // Header parser specifying that values are of type string
{
"type": "string"
},
"valueFormat": // Value parser from json parsing
{
"type": "json",
"flattenSpec": {
"useFieldDiscovery": true,
"fields": [...]
}
},
"keyFormat": // Key parser also from json parsing
{
"type": "json"
}
}
Since we have independent sections for header, key and payload, it will enable parsing each section with its own parser, eg., headers coming in as string and payload as json.
KafkaInputFormat will be the uber class extending inputFormat interface and will be responsible for creating individual parsers for header, key and payload, blend the data resolving conflicts in columns and generating a single unified InputRow for Druid ingestion.
"headerFormat" will allow users to plug parser type for the header values and will add default header prefix as "kafka.header."(can be overridden) for attributes to avoid collision while merging attributes with payload.
Kafka payload parser will be responsible for parsing the Value portion of the Kafka record. This is where most of the data will come from and we should be able to plugin existing parser. One thing to note here is that if batching is performed, then the code is augmenting header and key values to every record in the batch.
Kafka key parser will handle parsing Key portion of the Kafka record and will ingest the Key with dimension name as "kafka.key".
## KafkaInputFormat Class:
This is the class that orchestrates sending the consumerRecord to each parser, retrieve rows, merge the columns into one final row for Druid consumption. KafkaInputformat should make sure to release the resources that gets allocated as a part of reader in CloseableIterator<InputRow> during normal and exception cases.
During conflicts in dimension/metrics names, the code will prefer dimension names from payload and ignore the dimension either from headers/key. This is done so that existing input formats can be easily migrated to this new format without worrying about losing information.
2021-10-07 11:56:27 -04:00
|
|
|
|
headerFormat
|
|
|
|
|
headerLabelPrefix
|
2023-07-17 23:29:57 -04:00
|
|
|
|
icebergFilter
|
|
|
|
|
icebergCatalog
|
2019-09-17 15:47:30 -04:00
|
|
|
|
jsonLowercase
|
Kafka Input Format for headers, key and payload parsing (#11630)
### Description
Today we ingest a number of high cardinality metrics into Druid across dimensions. These metrics are rolled up on a per minute basis, and are very useful when looking at metrics on a partition or client basis. Events is another class of data that provides useful information about a particular incident/scenario inside a Kafka cluster. Events themselves are carried inside kafka payload, but nonetheless there are some very useful metadata that is carried in kafka headers that can serve as useful dimension for aggregation and in turn bringing better insights.
PR(https://github.com/apache/druid/pull/10730) introduced support of Kafka headers in InputFormats.
We still need an input format to parse out the headers and translate those into relevant columns in Druid. Until that’s implemented, none of the information available in the Kafka message headers would be exposed. So first there is a need to write an input format that can parse headers in any given format(provided we support the format) like we parse payloads today. Apart from headers there is also some useful information present in the key portion of the kafka record. We also need a way to expose the data present in the key as druid columns. We need a generic way to express at configuration time what attributes from headers, key and payload need to be ingested into druid. We need to keep the design generic enough so that users can specify different parsers for headers, key and payload.
This PR is designed to solve the above by providing wrapper around any existing input formats and merging the data into a single unified Druid row.
Lets look at a sample input format from the above discussion
"inputFormat":
{
"type": "kafka", // New input format type
"headerLabelPrefix": "kafka.header.", // Label prefix for header columns, this will avoid collusions while merging columns
"recordTimestampLabelPrefix": "kafka.", // Kafka record's timestamp is made available in case payload does not carry timestamp
"headerFormat": // Header parser specifying that values are of type string
{
"type": "string"
},
"valueFormat": // Value parser from json parsing
{
"type": "json",
"flattenSpec": {
"useFieldDiscovery": true,
"fields": [...]
}
},
"keyFormat": // Key parser also from json parsing
{
"type": "json"
}
}
Since we have independent sections for header, key and payload, it will enable parsing each section with its own parser, eg., headers coming in as string and payload as json.
KafkaInputFormat will be the uber class extending inputFormat interface and will be responsible for creating individual parsers for header, key and payload, blend the data resolving conflicts in columns and generating a single unified InputRow for Druid ingestion.
"headerFormat" will allow users to plug parser type for the header values and will add default header prefix as "kafka.header."(can be overridden) for attributes to avoid collision while merging attributes with payload.
Kafka payload parser will be responsible for parsing the Value portion of the Kafka record. This is where most of the data will come from and we should be able to plugin existing parser. One thing to note here is that if batching is performed, then the code is augmenting header and key values to every record in the batch.
Kafka key parser will handle parsing Key portion of the Kafka record and will ingest the Key with dimension name as "kafka.key".
## KafkaInputFormat Class:
This is the class that orchestrates sending the consumerRecord to each parser, retrieve rows, merge the columns into one final row for Druid consumption. KafkaInputformat should make sure to release the resources that gets allocated as a part of reader in CloseableIterator<InputRow> during normal and exception cases.
During conflicts in dimension/metrics names, the code will prefer dimension names from payload and ignore the dimension either from headers/key. This is done so that existing input formats can be easily migrated to this new format without worrying about losing information.
2021-10-07 11:56:27 -04:00
|
|
|
|
kafka
|
|
|
|
|
KafkaStringHeaderFormat
|
|
|
|
|
kafka.header.
|
|
|
|
|
kafka.key
|
|
|
|
|
kafka.timestamp
|
2023-08-24 09:49:59 -04:00
|
|
|
|
kafka.topic
|
Kafka Input Format for headers, key and payload parsing (#11630)
### Description
Today we ingest a number of high cardinality metrics into Druid across dimensions. These metrics are rolled up on a per minute basis, and are very useful when looking at metrics on a partition or client basis. Events is another class of data that provides useful information about a particular incident/scenario inside a Kafka cluster. Events themselves are carried inside kafka payload, but nonetheless there are some very useful metadata that is carried in kafka headers that can serve as useful dimension for aggregation and in turn bringing better insights.
PR(https://github.com/apache/druid/pull/10730) introduced support of Kafka headers in InputFormats.
We still need an input format to parse out the headers and translate those into relevant columns in Druid. Until that’s implemented, none of the information available in the Kafka message headers would be exposed. So first there is a need to write an input format that can parse headers in any given format(provided we support the format) like we parse payloads today. Apart from headers there is also some useful information present in the key portion of the kafka record. We also need a way to expose the data present in the key as druid columns. We need a generic way to express at configuration time what attributes from headers, key and payload need to be ingested into druid. We need to keep the design generic enough so that users can specify different parsers for headers, key and payload.
This PR is designed to solve the above by providing wrapper around any existing input formats and merging the data into a single unified Druid row.
Lets look at a sample input format from the above discussion
"inputFormat":
{
"type": "kafka", // New input format type
"headerLabelPrefix": "kafka.header.", // Label prefix for header columns, this will avoid collusions while merging columns
"recordTimestampLabelPrefix": "kafka.", // Kafka record's timestamp is made available in case payload does not carry timestamp
"headerFormat": // Header parser specifying that values are of type string
{
"type": "string"
},
"valueFormat": // Value parser from json parsing
{
"type": "json",
"flattenSpec": {
"useFieldDiscovery": true,
"fields": [...]
}
},
"keyFormat": // Key parser also from json parsing
{
"type": "json"
}
}
Since we have independent sections for header, key and payload, it will enable parsing each section with its own parser, eg., headers coming in as string and payload as json.
KafkaInputFormat will be the uber class extending inputFormat interface and will be responsible for creating individual parsers for header, key and payload, blend the data resolving conflicts in columns and generating a single unified InputRow for Druid ingestion.
"headerFormat" will allow users to plug parser type for the header values and will add default header prefix as "kafka.header."(can be overridden) for attributes to avoid collision while merging attributes with payload.
Kafka payload parser will be responsible for parsing the Value portion of the Kafka record. This is where most of the data will come from and we should be able to plugin existing parser. One thing to note here is that if batching is performed, then the code is augmenting header and key values to every record in the batch.
Kafka key parser will handle parsing Key portion of the Kafka record and will ingest the Key with dimension name as "kafka.key".
## KafkaInputFormat Class:
This is the class that orchestrates sending the consumerRecord to each parser, retrieve rows, merge the columns into one final row for Druid consumption. KafkaInputformat should make sure to release the resources that gets allocated as a part of reader in CloseableIterator<InputRow> during normal and exception cases.
During conflicts in dimension/metrics names, the code will prefer dimension names from payload and ignore the dimension either from headers/key. This is done so that existing input formats can be easily migrated to this new format without worrying about losing information.
2021-10-07 11:56:27 -04:00
|
|
|
|
keyColumnName
|
|
|
|
|
keyFormat
|
2019-09-17 15:47:30 -04:00
|
|
|
|
listDelimiter
|
2024-02-02 02:32:30 -05:00
|
|
|
|
lowerOpen
|
2023-01-17 11:41:57 -05:00
|
|
|
|
timestamp
|
Kafka Input Format for headers, key and payload parsing (#11630)
### Description
Today we ingest a number of high cardinality metrics into Druid across dimensions. These metrics are rolled up on a per minute basis, and are very useful when looking at metrics on a partition or client basis. Events is another class of data that provides useful information about a particular incident/scenario inside a Kafka cluster. Events themselves are carried inside kafka payload, but nonetheless there are some very useful metadata that is carried in kafka headers that can serve as useful dimension for aggregation and in turn bringing better insights.
PR(https://github.com/apache/druid/pull/10730) introduced support of Kafka headers in InputFormats.
We still need an input format to parse out the headers and translate those into relevant columns in Druid. Until that’s implemented, none of the information available in the Kafka message headers would be exposed. So first there is a need to write an input format that can parse headers in any given format(provided we support the format) like we parse payloads today. Apart from headers there is also some useful information present in the key portion of the kafka record. We also need a way to expose the data present in the key as druid columns. We need a generic way to express at configuration time what attributes from headers, key and payload need to be ingested into druid. We need to keep the design generic enough so that users can specify different parsers for headers, key and payload.
This PR is designed to solve the above by providing wrapper around any existing input formats and merging the data into a single unified Druid row.
Lets look at a sample input format from the above discussion
"inputFormat":
{
"type": "kafka", // New input format type
"headerLabelPrefix": "kafka.header.", // Label prefix for header columns, this will avoid collusions while merging columns
"recordTimestampLabelPrefix": "kafka.", // Kafka record's timestamp is made available in case payload does not carry timestamp
"headerFormat": // Header parser specifying that values are of type string
{
"type": "string"
},
"valueFormat": // Value parser from json parsing
{
"type": "json",
"flattenSpec": {
"useFieldDiscovery": true,
"fields": [...]
}
},
"keyFormat": // Key parser also from json parsing
{
"type": "json"
}
}
Since we have independent sections for header, key and payload, it will enable parsing each section with its own parser, eg., headers coming in as string and payload as json.
KafkaInputFormat will be the uber class extending inputFormat interface and will be responsible for creating individual parsers for header, key and payload, blend the data resolving conflicts in columns and generating a single unified InputRow for Druid ingestion.
"headerFormat" will allow users to plug parser type for the header values and will add default header prefix as "kafka.header."(can be overridden) for attributes to avoid collision while merging attributes with payload.
Kafka payload parser will be responsible for parsing the Value portion of the Kafka record. This is where most of the data will come from and we should be able to plugin existing parser. One thing to note here is that if batching is performed, then the code is augmenting header and key values to every record in the batch.
Kafka key parser will handle parsing Key portion of the Kafka record and will ingest the Key with dimension name as "kafka.key".
## KafkaInputFormat Class:
This is the class that orchestrates sending the consumerRecord to each parser, retrieve rows, merge the columns into one final row for Druid consumption. KafkaInputformat should make sure to release the resources that gets allocated as a part of reader in CloseableIterator<InputRow> during normal and exception cases.
During conflicts in dimension/metrics names, the code will prefer dimension names from payload and ignore the dimension either from headers/key. This is done so that existing input formats can be easily migrated to this new format without worrying about losing information.
2021-10-07 11:56:27 -04:00
|
|
|
|
timestampColumnName
|
2019-09-17 15:47:30 -04:00
|
|
|
|
timestampSpec
|
2024-02-02 02:32:30 -05:00
|
|
|
|
upperOpen
|
2021-02-27 17:25:35 -05:00
|
|
|
|
urls
|
Kafka Input Format for headers, key and payload parsing (#11630)
### Description
Today we ingest a number of high cardinality metrics into Druid across dimensions. These metrics are rolled up on a per minute basis, and are very useful when looking at metrics on a partition or client basis. Events is another class of data that provides useful information about a particular incident/scenario inside a Kafka cluster. Events themselves are carried inside kafka payload, but nonetheless there are some very useful metadata that is carried in kafka headers that can serve as useful dimension for aggregation and in turn bringing better insights.
PR(https://github.com/apache/druid/pull/10730) introduced support of Kafka headers in InputFormats.
We still need an input format to parse out the headers and translate those into relevant columns in Druid. Until that’s implemented, none of the information available in the Kafka message headers would be exposed. So first there is a need to write an input format that can parse headers in any given format(provided we support the format) like we parse payloads today. Apart from headers there is also some useful information present in the key portion of the kafka record. We also need a way to expose the data present in the key as druid columns. We need a generic way to express at configuration time what attributes from headers, key and payload need to be ingested into druid. We need to keep the design generic enough so that users can specify different parsers for headers, key and payload.
This PR is designed to solve the above by providing wrapper around any existing input formats and merging the data into a single unified Druid row.
Lets look at a sample input format from the above discussion
"inputFormat":
{
"type": "kafka", // New input format type
"headerLabelPrefix": "kafka.header.", // Label prefix for header columns, this will avoid collusions while merging columns
"recordTimestampLabelPrefix": "kafka.", // Kafka record's timestamp is made available in case payload does not carry timestamp
"headerFormat": // Header parser specifying that values are of type string
{
"type": "string"
},
"valueFormat": // Value parser from json parsing
{
"type": "json",
"flattenSpec": {
"useFieldDiscovery": true,
"fields": [...]
}
},
"keyFormat": // Key parser also from json parsing
{
"type": "json"
}
}
Since we have independent sections for header, key and payload, it will enable parsing each section with its own parser, eg., headers coming in as string and payload as json.
KafkaInputFormat will be the uber class extending inputFormat interface and will be responsible for creating individual parsers for header, key and payload, blend the data resolving conflicts in columns and generating a single unified InputRow for Druid ingestion.
"headerFormat" will allow users to plug parser type for the header values and will add default header prefix as "kafka.header."(can be overridden) for attributes to avoid collision while merging attributes with payload.
Kafka payload parser will be responsible for parsing the Value portion of the Kafka record. This is where most of the data will come from and we should be able to plugin existing parser. One thing to note here is that if batching is performed, then the code is augmenting header and key values to every record in the batch.
Kafka key parser will handle parsing Key portion of the Kafka record and will ingest the Key with dimension name as "kafka.key".
## KafkaInputFormat Class:
This is the class that orchestrates sending the consumerRecord to each parser, retrieve rows, merge the columns into one final row for Druid consumption. KafkaInputformat should make sure to release the resources that gets allocated as a part of reader in CloseableIterator<InputRow> during normal and exception cases.
During conflicts in dimension/metrics names, the code will prefer dimension names from payload and ignore the dimension either from headers/key. This is done so that existing input formats can be easily migrated to this new format without worrying about losing information.
2021-10-07 11:56:27 -04:00
|
|
|
|
valueFormat
|
2019-09-17 15:47:30 -04:00
|
|
|
|
1GB
|
2019-10-09 14:12:00 -04:00
|
|
|
|
IOConfig
|
2019-09-17 15:47:30 -04:00
|
|
|
|
compactionTask
|
|
|
|
|
compactionTasks
|
|
|
|
|
numShards
|
|
|
|
|
IngestSegment
|
|
|
|
|
maxSizes
|
2023-11-17 02:02:28 -05:00
|
|
|
|
snapshotTime
|
2019-09-17 15:47:30 -04:00
|
|
|
|
windowPeriod
|
|
|
|
|
2012-01-01T00
|
|
|
|
|
2012-01-03T00
|
|
|
|
|
2012-01-05T00
|
|
|
|
|
2012-01-07T00
|
|
|
|
|
500MB
|
|
|
|
|
CombineTextInputFormat
|
|
|
|
|
HadoopIndexTask
|
|
|
|
|
InputFormat
|
|
|
|
|
InputSplit
|
|
|
|
|
JobHistory
|
|
|
|
|
a.example.com
|
|
|
|
|
assumeGrouped
|
2021-04-09 00:03:00 -04:00
|
|
|
|
awaitSegmentAvailabilityTimeoutMillis
|
2019-09-17 15:47:30 -04:00
|
|
|
|
cleanupOnFailure
|
|
|
|
|
combineText
|
|
|
|
|
connectURI
|
|
|
|
|
dataGranularity
|
|
|
|
|
datetime
|
|
|
|
|
f.example.com
|
|
|
|
|
filePattern
|
|
|
|
|
forceExtendableShardSpecs
|
|
|
|
|
ignoreInvalidRows
|
|
|
|
|
ignoreWhenNoSegments
|
|
|
|
|
indexSpecForIntermediatePersists
|
|
|
|
|
index_hadoop
|
|
|
|
|
inputPath
|
|
|
|
|
inputSpecs
|
|
|
|
|
interval1
|
|
|
|
|
interval2
|
|
|
|
|
jobProperties
|
|
|
|
|
leaveIntermediate
|
|
|
|
|
logParseExceptions
|
|
|
|
|
mapred.map.tasks
|
|
|
|
|
mapreduce.job.maps
|
|
|
|
|
maxParseExceptions
|
|
|
|
|
maxPartitionSize
|
|
|
|
|
maxSplitSize
|
|
|
|
|
metadataUpdateSpec
|
|
|
|
|
numBackgroundPersistThreads
|
|
|
|
|
overwriteFiles
|
|
|
|
|
partitionDimension
|
|
|
|
|
partitionDimensions
|
|
|
|
|
partitionSpec
|
|
|
|
|
pathFormat
|
|
|
|
|
segmentOutputPath
|
|
|
|
|
segmentTable
|
|
|
|
|
shardSpec
|
|
|
|
|
single_dim
|
2023-07-17 23:29:57 -04:00
|
|
|
|
tableName
|
2019-09-17 15:47:30 -04:00
|
|
|
|
targetPartitionSize
|
2019-09-20 16:59:18 -04:00
|
|
|
|
targetRowsPerSegment
|
2019-09-17 15:47:30 -04:00
|
|
|
|
useCombiner
|
|
|
|
|
useExplicitVersion
|
|
|
|
|
useNewAggs
|
|
|
|
|
useYarnRMJobStatusFallback
|
|
|
|
|
workingPath
|
|
|
|
|
z.example.com
|
|
|
|
|
150MB
|
|
|
|
|
DataSchema
|
|
|
|
|
DefaultPassword
|
|
|
|
|
EnvironmentVariablePasswordProvider
|
|
|
|
|
IOConfig
|
|
|
|
|
PartitionsSpec
|
|
|
|
|
PasswordProviders
|
2019-10-18 16:24:14 -04:00
|
|
|
|
SegmentsSplitHintSpec
|
|
|
|
|
SplitHintSpec
|
2020-02-25 23:59:53 -05:00
|
|
|
|
accessKeyId
|
2019-09-17 15:47:30 -04:00
|
|
|
|
appendToExisting
|
|
|
|
|
baseDir
|
|
|
|
|
chatHandlerNumRetries
|
|
|
|
|
chatHandlerTimeout
|
2021-12-03 06:07:14 -05:00
|
|
|
|
cityName
|
2019-09-17 15:47:30 -04:00
|
|
|
|
connectorConfig
|
DruidInputSource: Fix issues in column projection, timestamp handling. (#10267)
* DruidInputSource: Fix issues in column projection, timestamp handling.
DruidInputSource, DruidSegmentReader changes:
1) Remove "dimensions" and "metrics". They are not necessary, because we
can compute which columns we need to read based on what is going to
be used by the timestamp, transform, dimensions, and metrics.
2) Start using ColumnsFilter (see below) to decide which columns we need
to read.
3) Actually respect the "timestampSpec". Previously, it was ignored, and
the timestamp of the returned InputRows was set to the `__time` column
of the input datasource.
(1) and (2) together fix a bug in which the DruidInputSource would not
properly read columns that are used as inputs to a transformSpec.
(3) fixes a bug where the timestampSpec would be ignored if you attempted
to set the column to something other than `__time`.
(1) and (3) are breaking changes.
Web console changes:
1) Remove "Dimensions" and "Metrics" from the Druid input source.
2) Set timestampSpec to `{"column": "__time", "format": "millis"}` for
compatibility with the new behavior.
Other changes:
1) Add ColumnsFilter, a new class that allows input readers to determine
which columns they need to read. Currently, it's only used by the
DruidInputSource, but it could be used by other columnar input sources
in the future.
2) Add a ColumnsFilter to InputRowSchema.
3) Remove the metric names from InputRowSchema (they were unused).
4) Add InputRowSchemas.fromDataSchema method that computes the proper
ColumnsFilter for given timestamp, dimensions, transform, and metrics.
5) Add "getRequiredColumns" method to TransformSpec to support the above.
* Various fixups.
* Uncomment incorrectly commented lines.
* Move TransformSpecTest to the proper module.
* Add druid.indexer.task.ignoreTimestampSpecForDruidInputSource setting.
* Fix.
* Fix build.
* Checkstyle.
* Misc fixes.
* Fix test.
* Move config.
* Fix imports.
* Fixup.
* Fix ShuffleResourceTest.
* Add import.
* Smarter exclusions.
* Fixes based on tests.
Also, add TIME_COLUMN constant in the web console.
* Adjustments for tests.
* Reorder test data.
* Update docs.
* Update docs to say Druid 0.22.0 instead of 0.21.0.
* Fix test.
* Fix ITAutoCompactionTest.
* Changes from review & from merging.
2021-03-25 13:32:21 -04:00
|
|
|
|
countryName
|
2023-05-19 12:42:27 -04:00
|
|
|
|
dataSchema
|
2021-04-01 15:29:36 -04:00
|
|
|
|
dropExisting
|
2019-09-17 15:47:30 -04:00
|
|
|
|
foldCase
|
|
|
|
|
forceGuaranteedRollup
|
|
|
|
|
httpAuthenticationPassword
|
|
|
|
|
httpAuthenticationUsername
|
|
|
|
|
ingestSegment
|
2019-12-05 19:50:00 -05:00
|
|
|
|
InputSource
|
|
|
|
|
DruidInputSource
|
2021-01-06 01:19:09 -05:00
|
|
|
|
maxColumnsToMerge
|
2019-09-17 15:47:30 -04:00
|
|
|
|
maxInputSegmentBytesPerTask
|
|
|
|
|
maxNumConcurrentSubTasks
|
|
|
|
|
maxNumSegmentsToMerge
|
|
|
|
|
maxRetry
|
|
|
|
|
pushTimeout
|
|
|
|
|
reportParseExceptions
|
2020-02-25 23:59:53 -05:00
|
|
|
|
secretAccessKey
|
2019-09-17 15:47:30 -04:00
|
|
|
|
segmentWriteOutMediumFactory
|
|
|
|
|
sql
|
|
|
|
|
sqls
|
2019-10-18 16:24:14 -04:00
|
|
|
|
splitHintSpec
|
2019-09-17 15:47:30 -04:00
|
|
|
|
taskStatusCheckPeriodMs
|
|
|
|
|
timeChunk
|
|
|
|
|
totalNumMergeTasks
|
2020-02-25 20:49:16 -05:00
|
|
|
|
prefetchTriggerBytes
|
2021-04-09 00:03:00 -04:00
|
|
|
|
awaitSegmentAvailabilityTimeoutMillis
|
2021-12-03 06:07:14 -05:00
|
|
|
|
baseDir
|
|
|
|
|
httpAuthenticationUsername
|
|
|
|
|
DefaultPassword
|
|
|
|
|
PasswordProviders
|
|
|
|
|
EnvironmentVariablePasswordProvider
|
2022-11-11 02:46:40 -05:00
|
|
|
|
ingestSegment
|
2021-12-03 06:07:14 -05:00
|
|
|
|
maxInputSegmentBytesPerTask
|
|
|
|
|
150MB
|
|
|
|
|
foldCase
|
|
|
|
|
sqls
|
|
|
|
|
connectorConfig
|
2022-11-11 02:46:40 -05:00
|
|
|
|
httpAuthenticationPassword
|
2021-12-03 06:07:14 -05:00
|
|
|
|
accessKeyId
|
|
|
|
|
secretAccessKey
|
|
|
|
|
accessKeyId
|
|
|
|
|
httpAuthenticationPassword
|
|
|
|
|
countryName
|
|
|
|
|
appendToExisting
|
|
|
|
|
dropExisting
|
2022-11-11 02:46:40 -05:00
|
|
|
|
timeChunk
|
2021-12-03 06:07:14 -05:00
|
|
|
|
PartitionsSpec
|
|
|
|
|
forceGuaranteedRollup
|
|
|
|
|
reportParseExceptions
|
|
|
|
|
pushTimeout
|
|
|
|
|
segmentWriteOutMediumFactory
|
2019-09-17 15:47:30 -04:00
|
|
|
|
product_category
|
|
|
|
|
product_id
|
|
|
|
|
product_name
|
|
|
|
|
BUILD_SEGMENTS
|
|
|
|
|
DETERMINE_PARTITIONS
|
|
|
|
|
forceTimeChunkLock
|
|
|
|
|
taskLockTimeout
|
2023-07-17 23:29:57 -04:00
|
|
|
|
warehouseSource
|
|
|
|
|
warehousePath
|
2023-02-08 01:01:34 -05:00
|
|
|
|
index.md
|
2019-09-17 15:47:30 -04:00
|
|
|
|
DOUBLE_ARRAY
|
|
|
|
|
DOY
|
|
|
|
|
DateTimeFormat
|
|
|
|
|
LONG_ARRAY
|
|
|
|
|
Los_Angeles
|
|
|
|
|
P3M
|
|
|
|
|
PT12H
|
|
|
|
|
STRING_ARRAY
|
|
|
|
|
String.format
|
|
|
|
|
acos
|
|
|
|
|
args
|
|
|
|
|
arr1
|
|
|
|
|
arr2
|
|
|
|
|
array_append
|
|
|
|
|
array_concat
|
2023-05-19 12:42:27 -04:00
|
|
|
|
ARRAY_CONCAT
|
2021-04-22 21:30:16 -04:00
|
|
|
|
array_set_add
|
|
|
|
|
array_set_add_all
|
2019-09-17 15:47:30 -04:00
|
|
|
|
array_contains
|
|
|
|
|
array_length
|
|
|
|
|
array_offset
|
|
|
|
|
array_offset_of
|
|
|
|
|
array_ordinal
|
|
|
|
|
array_ordinal_of
|
|
|
|
|
array_overlap
|
|
|
|
|
array_prepend
|
|
|
|
|
array_slice
|
|
|
|
|
array_to_string
|
2024-04-19 00:15:15 -04:00
|
|
|
|
scalar_in_array
|
2019-09-17 15:47:30 -04:00
|
|
|
|
asin
|
|
|
|
|
atan
|
|
|
|
|
atan2
|
2021-01-28 14:16:53 -05:00
|
|
|
|
bitwise
|
|
|
|
|
bitwiseAnd
|
|
|
|
|
bitwiseComplement
|
|
|
|
|
bitwiseConvertDoubleToLongBits
|
|
|
|
|
bitwiseConvertLongBitsToDouble
|
|
|
|
|
bitwiseOr
|
|
|
|
|
bitwiseShiftLeft
|
|
|
|
|
bitwiseShiftRight
|
|
|
|
|
bitwiseXor
|
2019-09-17 15:47:30 -04:00
|
|
|
|
bloom_filter_test
|
|
|
|
|
cartesian_fold
|
|
|
|
|
cartesian_map
|
|
|
|
|
case_searched
|
|
|
|
|
case_simple
|
|
|
|
|
cbrt
|
|
|
|
|
concat
|
|
|
|
|
copysign
|
|
|
|
|
expm1
|
|
|
|
|
expr
|
|
|
|
|
expr1
|
|
|
|
|
expr2
|
2022-08-19 20:12:19 -04:00
|
|
|
|
expr3
|
|
|
|
|
expr4
|
2019-09-17 15:47:30 -04:00
|
|
|
|
fromIndex
|
|
|
|
|
getExponent
|
|
|
|
|
hypot
|
|
|
|
|
ipv4_match
|
|
|
|
|
ipv4_parse
|
|
|
|
|
ipv4_stringify
|
2023-12-08 02:09:06 -05:00
|
|
|
|
ipv6_match
|
|
|
|
|
|
|
|
|
|
# IPv6 Address Example Sections
|
|
|
|
|
75e9
|
|
|
|
|
efa4
|
|
|
|
|
29c6
|
|
|
|
|
85f6
|
|
|
|
|
232c
|
|
|
|
|
|
|
|
|
|
isnull
|
2019-09-17 15:47:30 -04:00
|
|
|
|
java.lang.Math
|
|
|
|
|
java.lang.String
|
2023-11-28 05:11:19 -05:00
|
|
|
|
JNA
|
2019-09-17 15:47:30 -04:00
|
|
|
|
log10
|
|
|
|
|
log1p
|
|
|
|
|
lpad
|
|
|
|
|
ltrim
|
|
|
|
|
nextUp
|
|
|
|
|
nextafter
|
2021-12-02 19:40:23 -05:00
|
|
|
|
notnull
|
2019-09-17 15:47:30 -04:00
|
|
|
|
nvl
|
|
|
|
|
parse_long
|
|
|
|
|
regexp_extract
|
2020-06-03 17:31:37 -04:00
|
|
|
|
regexp_like
|
2023-06-29 16:47:57 -04:00
|
|
|
|
regexp_replace
|
2020-09-14 12:57:54 -04:00
|
|
|
|
contains_string
|
|
|
|
|
icontains_string
|
2019-09-17 15:47:30 -04:00
|
|
|
|
result1
|
|
|
|
|
result2
|
|
|
|
|
rint
|
|
|
|
|
rpad
|
|
|
|
|
rtrim
|
2021-11-17 11:22:41 -05:00
|
|
|
|
safe_divide
|
2019-09-17 15:47:30 -04:00
|
|
|
|
scalb
|
|
|
|
|
signum
|
|
|
|
|
str1
|
|
|
|
|
str2
|
|
|
|
|
string_to_array
|
2020-09-14 22:44:58 -04:00
|
|
|
|
stringAny
|
2023-08-08 18:49:29 -04:00
|
|
|
|
stringFirst
|
|
|
|
|
stringLast
|
2023-05-19 12:42:27 -04:00
|
|
|
|
Strlen
|
2019-09-17 15:47:30 -04:00
|
|
|
|
strlen
|
|
|
|
|
strpos
|
|
|
|
|
timestamp_ceil
|
|
|
|
|
timestamp_extract
|
|
|
|
|
timestamp_floor
|
|
|
|
|
timestamp_format
|
|
|
|
|
timestamp_parse
|
|
|
|
|
timestamp_shift
|
|
|
|
|
todegrees
|
|
|
|
|
toradians
|
|
|
|
|
ulp
|
|
|
|
|
unix_timestamp
|
|
|
|
|
value1
|
|
|
|
|
value2
|
|
|
|
|
valueOf
|
2021-08-13 13:27:49 -04:00
|
|
|
|
IEC
|
|
|
|
|
human_readable_binary_byte_format
|
|
|
|
|
human_readable_decimal_byte_format
|
|
|
|
|
human_readable_decimal_format
|
2019-09-17 15:47:30 -04:00
|
|
|
|
RADStack
|
|
|
|
|
00.000Z
|
|
|
|
|
2015-09-12T03
|
|
|
|
|
2015-09-12T05
|
|
|
|
|
2016-06-27_2016-06-28
|
|
|
|
|
Param
|
|
|
|
|
SupervisorSpec
|
|
|
|
|
dropRule
|
|
|
|
|
druid.query.segmentMetadata.defaultHistory
|
|
|
|
|
isointerval
|
|
|
|
|
json
|
|
|
|
|
loadRule
|
|
|
|
|
maxTime
|
|
|
|
|
minTime
|
|
|
|
|
numCandidates
|
|
|
|
|
param
|
|
|
|
|
segmentId1
|
|
|
|
|
segmentId2
|
|
|
|
|
taskId
|
|
|
|
|
taskid
|
|
|
|
|
un
|
2023-05-19 12:42:27 -04:00
|
|
|
|
|
2021-06-30 16:42:45 -04:00
|
|
|
|
100MiB
|
|
|
|
|
128MiB
|
2019-09-17 15:47:30 -04:00
|
|
|
|
15ms
|
2021-06-30 16:42:45 -04:00
|
|
|
|
2.5MiB
|
|
|
|
|
24GiB
|
|
|
|
|
256MiB
|
|
|
|
|
30GiB-60GiB
|
|
|
|
|
4GiB
|
2019-09-17 15:47:30 -04:00
|
|
|
|
5MB
|
2021-06-30 16:42:45 -04:00
|
|
|
|
64KiB
|
|
|
|
|
8GiB
|
2019-09-17 15:47:30 -04:00
|
|
|
|
G1GC
|
|
|
|
|
GroupBys
|
|
|
|
|
QoS-type
|
|
|
|
|
DumpSegment
|
|
|
|
|
SegmentMetadata
|
|
|
|
|
__time
|
|
|
|
|
bitmapSerdeFactory
|
|
|
|
|
columnName
|
|
|
|
|
index.zip
|
|
|
|
|
time-iso8601
|
|
|
|
|
hadoopStorageDirectory
|
|
|
|
|
0.14.x
|
2022-08-04 02:16:05 -04:00
|
|
|
|
G1
|
|
|
|
|
Temurin
|
2019-09-17 15:47:30 -04:00
|
|
|
|
0.14.x
|
|
|
|
|
1s
|
|
|
|
|
Bufferpool
|
2022-08-04 02:16:05 -04:00
|
|
|
|
Filesystem
|
2019-09-17 15:47:30 -04:00
|
|
|
|
JVMMonitor
|
2024-04-11 18:44:56 -04:00
|
|
|
|
jvmVersion
|
2019-09-17 15:47:30 -04:00
|
|
|
|
QueryCountStatsMonitor
|
|
|
|
|
Sys
|
|
|
|
|
SysMonitor
|
|
|
|
|
TaskCountStatsMonitor
|
2020-09-29 02:50:38 -04:00
|
|
|
|
TaskSlotCountStatsMonitor
|
2022-04-26 12:44:44 -04:00
|
|
|
|
WorkerTaskCountStatsMonitor
|
2022-07-14 01:09:03 -04:00
|
|
|
|
workerVersion
|
2019-09-17 15:47:30 -04:00
|
|
|
|
bufferCapacity
|
2020-06-25 15:20:25 -04:00
|
|
|
|
bufferpoolName
|
2019-09-17 15:47:30 -04:00
|
|
|
|
cms
|
|
|
|
|
cpuName
|
|
|
|
|
cpuTime
|
2021-11-07 06:21:44 -05:00
|
|
|
|
druid.server.http.numThreads
|
|
|
|
|
druid.server.http.queueSize
|
2019-09-17 15:47:30 -04:00
|
|
|
|
fsDevName
|
|
|
|
|
fsDirName
|
|
|
|
|
fsOptions
|
|
|
|
|
fsSysTypeName
|
|
|
|
|
fsTypeName
|
|
|
|
|
g1
|
|
|
|
|
gcGen
|
|
|
|
|
gcName
|
|
|
|
|
handoffed
|
|
|
|
|
hasFilters
|
|
|
|
|
memKind
|
|
|
|
|
nativeQueryIds
|
|
|
|
|
netAddress
|
|
|
|
|
netHwaddr
|
|
|
|
|
netName
|
2021-07-30 10:59:26 -04:00
|
|
|
|
noticeType
|
2019-09-17 15:47:30 -04:00
|
|
|
|
numComplexMetrics
|
|
|
|
|
numDimensions
|
|
|
|
|
numMetrics
|
|
|
|
|
poolKind
|
|
|
|
|
poolName
|
|
|
|
|
remoteAddress
|
2021-12-10 12:40:52 -05:00
|
|
|
|
segmentAvailabilityConfirmed
|
2019-09-17 15:47:30 -04:00
|
|
|
|
serviceName
|
2022-11-11 01:30:08 -05:00
|
|
|
|
taskActionType
|
2022-05-23 15:32:47 -04:00
|
|
|
|
taskIngestionMode
|
2019-09-17 15:47:30 -04:00
|
|
|
|
taskStatus
|
|
|
|
|
taskType
|
2021-11-07 06:21:44 -05:00
|
|
|
|
threadPoolNumBusyThreads.
|
|
|
|
|
threadPoolNumIdleThreads
|
|
|
|
|
threadPoolNumTotalThreads.
|
2019-09-17 15:47:30 -04:00
|
|
|
|
CDH
|
|
|
|
|
Classloader
|
|
|
|
|
assembly.sbt
|
|
|
|
|
build.sbt
|
|
|
|
|
classloader
|
|
|
|
|
druid_build
|
|
|
|
|
mapred-default
|
|
|
|
|
mapred-site
|
|
|
|
|
sbt
|
|
|
|
|
scala-2
|
|
|
|
|
org.apache.hadoop
|
|
|
|
|
proxy.com.
|
|
|
|
|
remoteRepository
|
|
|
|
|
JBOD
|
|
|
|
|
druid.processing.buffer.sizeBytes.
|
|
|
|
|
druid.processing.numMergeBuffers
|
|
|
|
|
druid.processing.numThreads
|
|
|
|
|
tmpfs
|
|
|
|
|
broadcastByInterval
|
|
|
|
|
broadcastByPeriod
|
|
|
|
|
broadcastForever
|
|
|
|
|
colocatedDataSources
|
|
|
|
|
dropBeforeByPeriod
|
|
|
|
|
dropByInterval
|
|
|
|
|
dropByPeriod
|
|
|
|
|
dropForever
|
|
|
|
|
loadByInterval
|
|
|
|
|
loadByPeriod
|
|
|
|
|
loadForever
|
|
|
|
|
700MB
|
2021-06-30 16:42:45 -04:00
|
|
|
|
128GiB
|
|
|
|
|
16GiB
|
|
|
|
|
256GiB
|
|
|
|
|
4GiB
|
|
|
|
|
512GiB
|
|
|
|
|
64GiB
|
2019-09-17 15:47:30 -04:00
|
|
|
|
Nano-Quickstart
|
|
|
|
|
i3
|
|
|
|
|
i3.16xlarge
|
|
|
|
|
i3.2xlarge
|
|
|
|
|
i3.4xlarge
|
|
|
|
|
i3.8xlarge
|
|
|
|
|
CN
|
|
|
|
|
subjectAltNames
|
|
|
|
|
HyperUnique
|
|
|
|
|
hyperUnique
|
|
|
|
|
longSum
|
|
|
|
|
groupBys
|
|
|
|
|
dataSourceMetadata
|
|
|
|
|
ExtractionDimensionSpec
|
|
|
|
|
SimpleDateFormat
|
|
|
|
|
bar_1
|
|
|
|
|
dimensionSpecs
|
|
|
|
|
isWhitelist
|
|
|
|
|
joda
|
|
|
|
|
nullHandling
|
|
|
|
|
product_1
|
|
|
|
|
product_3
|
|
|
|
|
registeredLookup
|
|
|
|
|
timeFormat
|
|
|
|
|
tz
|
|
|
|
|
v3
|
|
|
|
|
weekyears
|
|
|
|
|
___bar
|
|
|
|
|
caseSensitive
|
|
|
|
|
extractionFn
|
|
|
|
|
insensitive_contains
|
|
|
|
|
last_name
|
|
|
|
|
lowerStrict
|
|
|
|
|
upperStrict
|
|
|
|
|
1970-01-01T00
|
|
|
|
|
P2W
|
|
|
|
|
PT0.750S
|
|
|
|
|
PT1H30M
|
|
|
|
|
TimeseriesQuery
|
|
|
|
|
D1
|
|
|
|
|
D2
|
|
|
|
|
D3
|
|
|
|
|
druid.query.groupBy.defaultStrategy
|
2022-03-08 16:13:11 -05:00
|
|
|
|
druid.query.groupBy.maxSelectorDictionarySize
|
2019-09-17 15:47:30 -04:00
|
|
|
|
druid.query.groupBy.maxMergingDictionarySize
|
|
|
|
|
druid.query.groupBy.maxOnDiskStorage
|
|
|
|
|
druid.query.groupBy.maxResults.
|
|
|
|
|
groupByStrategy
|
|
|
|
|
maxOnDiskStorage
|
|
|
|
|
maxResults
|
|
|
|
|
orderby
|
|
|
|
|
orderbys
|
|
|
|
|
outputName
|
|
|
|
|
pushdown
|
|
|
|
|
row1
|
|
|
|
|
subtotalsSpec
|
2022-05-21 13:28:54 -04:00
|
|
|
|
tradeoff
|
2019-09-17 15:47:30 -04:00
|
|
|
|
HavingSpec
|
|
|
|
|
HavingSpecs
|
|
|
|
|
dimSelector
|
|
|
|
|
equalTo
|
|
|
|
|
greaterThan
|
|
|
|
|
lessThan
|
|
|
|
|
DefaultDimensionSpec
|
|
|
|
|
druid-hll
|
|
|
|
|
isInputHyperUnique
|
|
|
|
|
pre-join
|
|
|
|
|
DefaultLimitSpec
|
|
|
|
|
OrderByColumnSpec
|
|
|
|
|
OrderByColumnSpecs
|
|
|
|
|
dimensionOrder
|
|
|
|
|
60_000
|
|
|
|
|
kafka-extraction-namespace
|
|
|
|
|
mins
|
2023-05-19 12:42:27 -04:00
|
|
|
|
tierName
|
2019-09-17 15:47:30 -04:00
|
|
|
|
row2
|
|
|
|
|
row3
|
|
|
|
|
row4
|
|
|
|
|
t3
|
|
|
|
|
t4
|
|
|
|
|
t5
|
2022-02-16 10:23:26 -05:00
|
|
|
|
groupByEnableMultiValueUnnesting
|
|
|
|
|
unnesting
|
2019-09-17 15:47:30 -04:00
|
|
|
|
500ms
|
|
|
|
|
tenant_id
|
|
|
|
|
fieldAccess
|
|
|
|
|
finalizingFieldAccess
|
|
|
|
|
hyperUniqueCardinality
|
2021-07-27 11:26:05 -04:00
|
|
|
|
brokerService
|
2019-09-17 15:47:30 -04:00
|
|
|
|
bySegment
|
|
|
|
|
doubleSum
|
|
|
|
|
druid.broker.cache.populateCache
|
|
|
|
|
druid.broker.cache.populateResultLevelCache
|
|
|
|
|
druid.broker.cache.useCache
|
|
|
|
|
druid.broker.cache.useResultLevelCache
|
|
|
|
|
druid.historical.cache.populateCache
|
|
|
|
|
druid.historical.cache.useCache
|
parallel broker merges on fork join pool (#8578)
* sketch of broker parallel merges done in small batches on fork join pool
* fix non-terminating sequences, auto compute parallelism
* adjust benches
* adjust benchmarks
* now hella more faster, fixed dumb
* fix
* remove comments
* log.info for debug
* javadoc
* safer block for sequence to yielder conversion
* refactor LifecycleForkJoinPool into LifecycleForkJoinPoolProvider which wraps a ForkJoinPool
* smooth yield rate adjustment, more logs to help tune
* cleanup, less logs
* error handling, bug fixes, on by default, more parallel, more tests
* remove unused var
* comments
* timeboundary mergeFn
* simplify, more javadoc
* formatting
* pushdown config
* use nanos consistently, move logs back to debug level, bit more javadoc
* static terminal result batch
* javadoc for nullability of createMergeFn
* cleanup
* oops
* fix race, add docs
* spelling, remove todo, add unhandled exception log
* cleanup, revert unintended change
* another unintended change
* review stuff
* add ParallelMergeCombiningSequenceBenchmark, fixes
* hyper-threading is the enemy
* fix initial start delay, lol
* parallelism computer now balances partition sizes to partition counts using sqrt of sequence count instead of sequence count by 2
* fix those important style issues with the benchmarks code
* lazy sequence creation for benchmarks
* more benchmark comments
* stable sequence generation time
* update defaults to use 100ms target time, 4096 batch size, 16384 initial yield, also update user docs
* add jmh thread based benchmarks, cleanup some stuff
* oops
* style
* add spread to jmh thread benchmark start range, more comments to benchmarks parameters and purpose
* retool benchmark to allow modeling more typical heterogenous heavy workloads
* spelling
* fix
* refactor benchmarks
* formatting
* docs
* add maxThreadStartDelay parameter to threaded benchmark
* why does catch need to be on its own line but else doesnt
2019-11-07 14:58:46 -05:00
|
|
|
|
enableParallelMerge
|
2021-12-15 02:21:53 -05:00
|
|
|
|
enableJoinLeftTableScanDirect
|
|
|
|
|
enableJoinFilterPushDown
|
|
|
|
|
enableJoinFilterRewrite
|
2022-08-13 11:07:43 -04:00
|
|
|
|
enableRewriteJoinToFilter
|
2021-12-15 02:21:53 -05:00
|
|
|
|
enableJoinFilterRewriteValueColumnFilters
|
2023-08-08 18:49:29 -04:00
|
|
|
|
floatFirst
|
|
|
|
|
floatLast
|
2019-09-17 15:47:30 -04:00
|
|
|
|
floatSum
|
2021-12-15 02:21:53 -05:00
|
|
|
|
joinFilterRewriteMaxSize
|
2019-09-17 15:47:30 -04:00
|
|
|
|
maxQueuedBytes
|
|
|
|
|
maxScatterGatherBytes
|
|
|
|
|
minTopNThreshold
|
parallel broker merges on fork join pool (#8578)
* sketch of broker parallel merges done in small batches on fork join pool
* fix non-terminating sequences, auto compute parallelism
* adjust benches
* adjust benchmarks
* now hella more faster, fixed dumb
* fix
* remove comments
* log.info for debug
* javadoc
* safer block for sequence to yielder conversion
* refactor LifecycleForkJoinPool into LifecycleForkJoinPoolProvider which wraps a ForkJoinPool
* smooth yield rate adjustment, more logs to help tune
* cleanup, less logs
* error handling, bug fixes, on by default, more parallel, more tests
* remove unused var
* comments
* timeboundary mergeFn
* simplify, more javadoc
* formatting
* pushdown config
* use nanos consistently, move logs back to debug level, bit more javadoc
* static terminal result batch
* javadoc for nullability of createMergeFn
* cleanup
* oops
* fix race, add docs
* spelling, remove todo, add unhandled exception log
* cleanup, revert unintended change
* another unintended change
* review stuff
* add ParallelMergeCombiningSequenceBenchmark, fixes
* hyper-threading is the enemy
* fix initial start delay, lol
* parallelism computer now balances partition sizes to partition counts using sqrt of sequence count instead of sequence count by 2
* fix those important style issues with the benchmarks code
* lazy sequence creation for benchmarks
* more benchmark comments
* stable sequence generation time
* update defaults to use 100ms target time, 4096 batch size, 16384 initial yield, also update user docs
* add jmh thread based benchmarks, cleanup some stuff
* oops
* style
* add spread to jmh thread benchmark start range, more comments to benchmarks parameters and purpose
* retool benchmark to allow modeling more typical heterogenous heavy workloads
* spelling
* fix
* refactor benchmarks
* formatting
* docs
* add maxThreadStartDelay parameter to threaded benchmark
* why does catch need to be on its own line but else doesnt
2019-11-07 14:58:46 -05:00
|
|
|
|
parallelMergeInitialYieldRows
|
|
|
|
|
parallelMergeParallelism
|
|
|
|
|
parallelMergeSmallBatchRows
|
2019-09-17 15:47:30 -04:00
|
|
|
|
populateCache
|
|
|
|
|
populateResultLevelCache
|
|
|
|
|
queryId
|
|
|
|
|
row-matchers
|
|
|
|
|
serializeDateTimeAsLong
|
|
|
|
|
serializeDateTimeAsLongInner
|
|
|
|
|
skipEmptyBuckets
|
|
|
|
|
useCache
|
|
|
|
|
useResultLevelCache
|
|
|
|
|
vectorSize
|
2021-12-21 16:07:53 -05:00
|
|
|
|
enableJoinLeftTableScanDirect
|
|
|
|
|
enableJoinFilterPushDown
|
|
|
|
|
enableJoinFilterRewrite
|
|
|
|
|
enableJoinFilterRewriteValueColumnFilters
|
|
|
|
|
joinFilterRewriteMaxSize
|
2021-06-30 16:42:45 -04:00
|
|
|
|
7KiB
|
2019-09-17 15:47:30 -04:00
|
|
|
|
DatasourceMetadata
|
|
|
|
|
TimeBoundary
|
|
|
|
|
errorClass
|
|
|
|
|
errorMessage
|
|
|
|
|
x-jackson-smile
|
|
|
|
|
batchSize
|
|
|
|
|
compactedList
|
|
|
|
|
druid.query.scan.legacy
|
|
|
|
|
druid.query.scan.maxRowsQueuedForOrdering
|
|
|
|
|
druid.query.scan.maxSegmentPartitionsOrderedInMemory
|
|
|
|
|
maxRowsQueuedForOrdering
|
|
|
|
|
maxSegmentPartitionsOrderedInMemory
|
|
|
|
|
resultFormat
|
|
|
|
|
valueVector
|
|
|
|
|
SearchQuerySpec
|
|
|
|
|
cursorOnly
|
|
|
|
|
druid.query.search.searchStrategy
|
|
|
|
|
queryableIndexSegment
|
|
|
|
|
searchDimensions
|
|
|
|
|
searchStrategy
|
|
|
|
|
useIndexes
|
|
|
|
|
ContainsSearchQuerySpec
|
|
|
|
|
FragmentSearchQuerySpec
|
|
|
|
|
InsensitiveContainsSearchQuerySpec
|
|
|
|
|
RegexSearchQuerySpec
|
|
|
|
|
analysisType
|
|
|
|
|
analysisTypes
|
2023-07-13 12:37:36 -04:00
|
|
|
|
aggregatorMergeStrategy
|
2019-09-17 15:47:30 -04:00
|
|
|
|
lenientAggregatorMerge
|
|
|
|
|
minmax
|
|
|
|
|
segmentMetadata
|
|
|
|
|
toInclude
|
|
|
|
|
PagingSpec
|
|
|
|
|
fromNext
|
|
|
|
|
pagingSpec
|
|
|
|
|
BoundFilter
|
2023-05-19 12:42:27 -04:00
|
|
|
|
GroupByQuery
|
2019-09-17 15:47:30 -04:00
|
|
|
|
SearchQuery
|
|
|
|
|
TopNMetricSpec
|
|
|
|
|
compareTo
|
|
|
|
|
file12
|
|
|
|
|
file2
|
2022-02-11 17:43:30 -05:00
|
|
|
|
_x_
|
2019-09-17 15:47:30 -04:00
|
|
|
|
fieldName1
|
|
|
|
|
fieldName2
|
|
|
|
|
DimensionTopNMetricSpec
|
|
|
|
|
metricSpec
|
|
|
|
|
previousStop
|
|
|
|
|
GroupByQuery
|
|
|
|
|
top500
|
|
|
|
|
outputType
|
|
|
|
|
1.9TB
|
|
|
|
|
16CPU
|
|
|
|
|
WebUpd8
|
|
|
|
|
m5.2xlarge
|
|
|
|
|
metadata.storage.
|
2021-06-30 16:42:45 -04:00
|
|
|
|
256GiB
|
|
|
|
|
128GiB
|
2023-05-19 12:42:27 -04:00
|
|
|
|
|
2019-09-17 15:47:30 -04:00
|
|
|
|
PATH_TO_DRUID
|
|
|
|
|
namenode
|
2023-05-19 12:42:27 -04:00
|
|
|
|
|
2019-09-17 15:47:30 -04:00
|
|
|
|
segmentID
|
|
|
|
|
segmentIds
|
2023-05-19 12:42:27 -04:00
|
|
|
|
|
2019-09-17 15:47:30 -04:00
|
|
|
|
dstIP
|
|
|
|
|
dstPort
|
|
|
|
|
srcIP
|
|
|
|
|
srcPort
|
2023-05-19 12:42:27 -04:00
|
|
|
|
|
2019-09-17 15:47:30 -04:00
|
|
|
|
common_runtime_properties
|
|
|
|
|
druid.extensions.directory
|
|
|
|
|
druid.extensions.loadList
|
|
|
|
|
druid.hadoop.security.kerberos.keytab
|
|
|
|
|
druid.hadoop.security.kerberos.principal
|
|
|
|
|
druid.indexer.logs.directory
|
|
|
|
|
druid.indexer.logs.type
|
|
|
|
|
druid.storage.storageDirectory
|
|
|
|
|
druid.storage.type
|
|
|
|
|
hdfs.headless.keytab
|
|
|
|
|
indexing_log
|
|
|
|
|
keytabs
|
|
|
|
|
dsql
|
|
|
|
|
2015-09-12T12
|
2022-08-24 12:23:22 -04:00
|
|
|
|
clickstreams
|
|
|
|
|
uid
|
|
|
|
|
_k_
|
|
|
|
|
Bridgerton
|
|
|
|
|
Hellmar
|
2019-09-17 15:47:30 -04:00
|
|
|
|
bear-111
|
2020-11-23 18:03:13 -05:00
|
|
|
|
10KiB
|
|
|
|
|
2GiB
|
|
|
|
|
512KiB
|
2020-07-30 21:58:48 -04:00
|
|
|
|
1GiB
|
|
|
|
|
KiB
|
|
|
|
|
GiB
|
2019-09-17 15:47:30 -04:00
|
|
|
|
00.000Z
|
|
|
|
|
100ms
|
|
|
|
|
10ms
|
|
|
|
|
1GB
|
|
|
|
|
1_000_000
|
|
|
|
|
2012-01-01T00
|
|
|
|
|
2GB
|
|
|
|
|
524288000L
|
2021-06-30 16:42:45 -04:00
|
|
|
|
5MiB
|
2019-09-17 15:47:30 -04:00
|
|
|
|
8u60
|
|
|
|
|
Autoscaler
|
2021-10-25 15:16:21 -04:00
|
|
|
|
APPROX_COUNT_DISTINCT_BUILTIN
|
2019-09-17 15:47:30 -04:00
|
|
|
|
AvaticaConnectionBalancer
|
|
|
|
|
File.getFreeSpace
|
|
|
|
|
File.getTotalSpace
|
|
|
|
|
ForkJoinPool
|
2020-04-28 06:13:39 -04:00
|
|
|
|
GCE
|
2019-09-17 15:47:30 -04:00
|
|
|
|
HadoopIndexTasks
|
|
|
|
|
HttpEmitter
|
|
|
|
|
HttpPostEmitter
|
|
|
|
|
InetAddress.getLocalHost
|
2021-04-09 03:12:28 -04:00
|
|
|
|
IOConfig
|
2019-09-17 15:47:30 -04:00
|
|
|
|
JRE8u60
|
|
|
|
|
KeyManager
|
|
|
|
|
L1
|
|
|
|
|
L2
|
2020-04-28 06:13:39 -04:00
|
|
|
|
ListManagedInstances
|
2019-09-17 15:47:30 -04:00
|
|
|
|
LoadSpec
|
|
|
|
|
LoggingEmitter
|
|
|
|
|
Los_Angeles
|
|
|
|
|
MDC
|
|
|
|
|
NoopServiceEmitter
|
2021-10-08 23:52:03 -04:00
|
|
|
|
NUMA
|
2019-09-17 15:47:30 -04:00
|
|
|
|
ONLY_EVENTS
|
|
|
|
|
P1D
|
|
|
|
|
P1W
|
|
|
|
|
PT-1S
|
|
|
|
|
PT0.050S
|
|
|
|
|
PT10M
|
|
|
|
|
PT10S
|
|
|
|
|
PT15M
|
|
|
|
|
PT1800S
|
|
|
|
|
PT1M
|
|
|
|
|
PT1S
|
|
|
|
|
PT24H
|
|
|
|
|
PT300S
|
|
|
|
|
PT30S
|
2021-04-20 20:10:43 -04:00
|
|
|
|
PT3600S
|
2019-09-17 15:47:30 -04:00
|
|
|
|
PT5M
|
|
|
|
|
PT5S
|
|
|
|
|
PT60S
|
|
|
|
|
PT90M
|
|
|
|
|
Param
|
|
|
|
|
Runtime.maxMemory
|
|
|
|
|
SSLContext
|
|
|
|
|
SegmentMetadata
|
|
|
|
|
SegmentWriteOutMediumFactory
|
|
|
|
|
ServiceEmitter
|
|
|
|
|
System.getProperty
|
|
|
|
|
TLSv1.2
|
|
|
|
|
TrustManager
|
|
|
|
|
TuningConfig
|
|
|
|
|
_N_
|
|
|
|
|
_default
|
|
|
|
|
_default_tier
|
|
|
|
|
addr
|
|
|
|
|
affinityConfig
|
|
|
|
|
allowAll
|
2020-04-15 19:12:20 -04:00
|
|
|
|
ANDed
|
2019-09-17 15:47:30 -04:00
|
|
|
|
array_mod
|
2020-04-28 06:13:39 -04:00
|
|
|
|
autoscale
|
|
|
|
|
autoscalers
|
2019-09-17 15:47:30 -04:00
|
|
|
|
batch_index_task
|
|
|
|
|
cgroup
|
|
|
|
|
classloader
|
|
|
|
|
com.metamx
|
|
|
|
|
common.runtime.properties
|
|
|
|
|
cpuacct
|
|
|
|
|
dataSourceName
|
|
|
|
|
datetime
|
|
|
|
|
defaultHistory
|
|
|
|
|
doubleMax
|
|
|
|
|
doubleMin
|
|
|
|
|
doubleSum
|
|
|
|
|
druid.enableTlsPort
|
|
|
|
|
druid.indexer.autoscale.workerVersion
|
|
|
|
|
druid.service
|
|
|
|
|
druid.storage.disableAcl
|
|
|
|
|
druid_audit
|
|
|
|
|
druid_config
|
|
|
|
|
druid_dataSource
|
|
|
|
|
druid_pendingSegments
|
|
|
|
|
druid_rules
|
|
|
|
|
druid_segments
|
|
|
|
|
druid_supervisors
|
2023-02-23 16:57:59 -05:00
|
|
|
|
druid_tasklocks
|
|
|
|
|
druid_tasklogs
|
2019-09-17 15:47:30 -04:00
|
|
|
|
druid_tasks
|
2020-04-15 19:12:20 -04:00
|
|
|
|
DruidQueryRel
|
2022-05-10 22:05:15 -04:00
|
|
|
|
durationToRetain
|
2019-09-17 15:47:30 -04:00
|
|
|
|
ec2
|
|
|
|
|
equalDistribution
|
|
|
|
|
extractionFn
|
2022-11-11 02:46:40 -05:00
|
|
|
|
filename
|
2019-09-17 15:47:30 -04:00
|
|
|
|
file.encoding
|
|
|
|
|
fillCapacity
|
|
|
|
|
first_location
|
|
|
|
|
floatMax
|
2020-09-14 22:44:58 -04:00
|
|
|
|
floatAny
|
2019-09-17 15:47:30 -04:00
|
|
|
|
floatMin
|
|
|
|
|
floatSum
|
|
|
|
|
freeSpacePercent
|
2020-04-28 06:13:39 -04:00
|
|
|
|
gce
|
|
|
|
|
gce-extensions
|
2019-09-17 15:47:30 -04:00
|
|
|
|
getCanonicalHostName
|
|
|
|
|
groupBy
|
|
|
|
|
hdfs
|
|
|
|
|
httpRemote
|
|
|
|
|
indexTask
|
|
|
|
|
info_dir
|
2020-04-15 19:12:20 -04:00
|
|
|
|
inlining
|
2019-09-17 15:47:30 -04:00
|
|
|
|
java.class.path
|
|
|
|
|
java.io.tmpdir
|
|
|
|
|
javaOpts
|
|
|
|
|
javaOptsArray
|
Making optimal usage of multiple segment cache locations (#8038)
* #7641 - Changing segment distribution algorithm to distribute segments to multiple segment cache locations
* Fixing indentation
* WIP
* Adding interface for location strategy selection, least bytes used strategy impl, round-robin strategy impl, locationSelectorStrategy config with least bytes used strategy as the default strategy
* fixing code style
* Fixing test
* Adding a method visible only for testing, fixing tests
* 1. Changing the method contract to return an iterator of locations instead of a single best location. 2. Check style fixes
* fixing the conditional statement
* Added testSegmentDistributionUsingLeastBytesUsedStrategy, fixed testSegmentDistributionUsingRoundRobinStrategy
* to trigger CI build
* Add documentation for the selection strategy configuration
* to re trigger CI build
* updated docs as per review comments, made LeastBytesUsedStorageLocationSelectorStrategy.getLocations a synchronzied method, other minor fixes
* In checkLocationConfigForNull method, using getLocations() to check for null instead of directly referring to the locations variable so that tests overriding getLocations() method do not fail
* Implementing review comments. Added tests for StorageLocationSelectorStrategy
* Checkstyle fixes
* Adding java doc comments for StorageLocationSelectorStrategy interface
* checkstyle
* empty commit to retrigger build
* Empty commit
* Adding suppressions for words leastBytesUsed and roundRobin of ../docs/configuration/index.md file
* Impl review comments including updating docs as suggested
* Removing checkLocationConfigForNull(), @NotEmpty annotation serves the purpose
* Round robin iterator to keep track of the no. of iterations, impl review comments, added tests for round robin strategy
* Fixing the round robin iterator
* Removed numLocationsToTry, updated java docs
* changing property attribute value from tier to type
* Fixing assert messages
2019-09-28 02:17:44 -04:00
|
|
|
|
leastBytesUsed
|
2019-09-17 15:47:30 -04:00
|
|
|
|
loadList
|
|
|
|
|
loadqueuepeon
|
|
|
|
|
loadspec
|
|
|
|
|
localStorage
|
|
|
|
|
maxHeaderSize
|
|
|
|
|
maxQueuedBytes
|
|
|
|
|
maxSize
|
|
|
|
|
middlemanager
|
|
|
|
|
minTimeMs
|
|
|
|
|
minmax
|
|
|
|
|
mins
|
2020-04-15 19:12:20 -04:00
|
|
|
|
nullable
|
2019-09-17 15:47:30 -04:00
|
|
|
|
orderby
|
|
|
|
|
orderbys
|
|
|
|
|
org.apache.druid
|
|
|
|
|
org.apache.druid.jetty.RequestLog
|
|
|
|
|
org.apache.hadoop
|
2023-09-07 07:24:33 -04:00
|
|
|
|
OSHI
|
|
|
|
|
OshiSysMonitor
|
2019-09-17 15:47:30 -04:00
|
|
|
|
overlord.html
|
|
|
|
|
pendingSegments
|
|
|
|
|
pre-flight
|
2020-04-15 19:12:20 -04:00
|
|
|
|
preloaded
|
2019-09-17 15:47:30 -04:00
|
|
|
|
queryType
|
|
|
|
|
remoteTaskRunnerConfig
|
|
|
|
|
rendezvousHash
|
2021-05-05 08:39:36 -04:00
|
|
|
|
replicants
|
2019-09-17 15:47:30 -04:00
|
|
|
|
resultsets
|
Making optimal usage of multiple segment cache locations (#8038)
* #7641 - Changing segment distribution algorithm to distribute segments to multiple segment cache locations
* Fixing indentation
* WIP
* Adding interface for location strategy selection, least bytes used strategy impl, round-robin strategy impl, locationSelectorStrategy config with least bytes used strategy as the default strategy
* fixing code style
* Fixing test
* Adding a method visible only for testing, fixing tests
* 1. Changing the method contract to return an iterator of locations instead of a single best location. 2. Check style fixes
* fixing the conditional statement
* Added testSegmentDistributionUsingLeastBytesUsedStrategy, fixed testSegmentDistributionUsingRoundRobinStrategy
* to trigger CI build
* Add documentation for the selection strategy configuration
* to re trigger CI build
* updated docs as per review comments, made LeastBytesUsedStorageLocationSelectorStrategy.getLocations a synchronzied method, other minor fixes
* In checkLocationConfigForNull method, using getLocations() to check for null instead of directly referring to the locations variable so that tests overriding getLocations() method do not fail
* Implementing review comments. Added tests for StorageLocationSelectorStrategy
* Checkstyle fixes
* Adding java doc comments for StorageLocationSelectorStrategy interface
* checkstyle
* empty commit to retrigger build
* Empty commit
* Adding suppressions for words leastBytesUsed and roundRobin of ../docs/configuration/index.md file
* Impl review comments including updating docs as suggested
* Removing checkLocationConfigForNull(), @NotEmpty annotation serves the purpose
* Round robin iterator to keep track of the no. of iterations, impl review comments, added tests for round robin strategy
* Fixing the round robin iterator
* Removed numLocationsToTry, updated java docs
* changing property attribute value from tier to type
* Fixing assert messages
2019-09-28 02:17:44 -04:00
|
|
|
|
roundRobin
|
2019-09-17 15:47:30 -04:00
|
|
|
|
runtime.properties
|
|
|
|
|
runtime.properties.
|
|
|
|
|
s3
|
|
|
|
|
s3a
|
|
|
|
|
s3n
|
|
|
|
|
slf4j
|
|
|
|
|
sql
|
|
|
|
|
sqlQuery
|
|
|
|
|
successfulSending
|
2023-09-07 07:24:33 -04:00
|
|
|
|
[S]igar
|
2019-09-17 15:47:30 -04:00
|
|
|
|
taskBlackListCleanupPeriod
|
|
|
|
|
tasklogs
|
|
|
|
|
timeBoundary
|
DruidInputSource: Fix issues in column projection, timestamp handling. (#10267)
* DruidInputSource: Fix issues in column projection, timestamp handling.
DruidInputSource, DruidSegmentReader changes:
1) Remove "dimensions" and "metrics". They are not necessary, because we
can compute which columns we need to read based on what is going to
be used by the timestamp, transform, dimensions, and metrics.
2) Start using ColumnsFilter (see below) to decide which columns we need
to read.
3) Actually respect the "timestampSpec". Previously, it was ignored, and
the timestamp of the returned InputRows was set to the `__time` column
of the input datasource.
(1) and (2) together fix a bug in which the DruidInputSource would not
properly read columns that are used as inputs to a transformSpec.
(3) fixes a bug where the timestampSpec would be ignored if you attempted
to set the column to something other than `__time`.
(1) and (3) are breaking changes.
Web console changes:
1) Remove "Dimensions" and "Metrics" from the Druid input source.
2) Set timestampSpec to `{"column": "__time", "format": "millis"}` for
compatibility with the new behavior.
Other changes:
1) Add ColumnsFilter, a new class that allows input readers to determine
which columns they need to read. Currently, it's only used by the
DruidInputSource, but it could be used by other columnar input sources
in the future.
2) Add a ColumnsFilter to InputRowSchema.
3) Remove the metric names from InputRowSchema (they were unused).
4) Add InputRowSchemas.fromDataSchema method that computes the proper
ColumnsFilter for given timestamp, dimensions, transform, and metrics.
5) Add "getRequiredColumns" method to TransformSpec to support the above.
* Various fixups.
* Uncomment incorrectly commented lines.
* Move TransformSpecTest to the proper module.
* Add druid.indexer.task.ignoreTimestampSpecForDruidInputSource setting.
* Fix.
* Fix build.
* Checkstyle.
* Misc fixes.
* Fix test.
* Move config.
* Fix imports.
* Fixup.
* Fix ShuffleResourceTest.
* Add import.
* Smarter exclusions.
* Fixes based on tests.
Also, add TIME_COLUMN constant in the web console.
* Adjustments for tests.
* Reorder test data.
* Update docs.
* Update docs to say Druid 0.22.0 instead of 0.21.0.
* Fix test.
* Fix ITAutoCompactionTest.
* Changes from review & from merging.
2021-03-25 13:32:21 -04:00
|
|
|
|
timestampSpec
|
2019-09-17 15:47:30 -04:00
|
|
|
|
tmp
|
|
|
|
|
tmpfs
|
|
|
|
|
truststore
|
|
|
|
|
tuningConfig
|
2020-04-15 19:12:20 -04:00
|
|
|
|
unioning
|
2019-09-17 15:47:30 -04:00
|
|
|
|
useIndexes
|
|
|
|
|
user.timezone
|
|
|
|
|
v0.12.0
|
|
|
|
|
versionReplacementString
|
|
|
|
|
workerId
|
|
|
|
|
yyyy-MM-dd
|
2019-10-17 15:57:19 -04:00
|
|
|
|
taskType
|
|
|
|
|
index_kafka
|
|
|
|
|
c1
|
|
|
|
|
c2
|
|
|
|
|
ds1
|
|
|
|
|
equalDistributionWithCategorySpec
|
|
|
|
|
fillCapacityWithCategorySpec
|
|
|
|
|
WorkerCategorySpec
|
|
|
|
|
workerCategorySpec
|
|
|
|
|
CategoryConfig
|
2019-09-17 15:47:30 -04:00
|
|
|
|
logsearch
|
|
|
|
|
2000-01-01T01
|
|
|
|
|
DateTimeFormat
|
|
|
|
|
JsonPath
|
|
|
|
|
autodetect
|
|
|
|
|
createBitmapIndex
|
|
|
|
|
dimensionExclusions
|
|
|
|
|
expr
|
|
|
|
|
jackson-jq
|
|
|
|
|
missingValue
|
2021-01-27 03:34:56 -05:00
|
|
|
|
skipBytesInMemoryOverheadCheck
|
2019-09-17 15:47:30 -04:00
|
|
|
|
spatialDimensions
|
2024-04-01 05:28:03 -04:00
|
|
|
|
radiusUnit
|
|
|
|
|
euclidean
|
|
|
|
|
kilometers
|
2019-09-17 15:47:30 -04:00
|
|
|
|
useFieldDiscovery
|
|
|
|
|
4CPU
|
|
|
|
|
cityName
|
|
|
|
|
countryIsoCode
|
|
|
|
|
countryName
|
|
|
|
|
isAnonymous
|
|
|
|
|
isMinor
|
|
|
|
|
isNew
|
|
|
|
|
isRobot
|
|
|
|
|
isUnpatrolled
|
|
|
|
|
metroCode
|
|
|
|
|
regionIsoCode
|
2020-04-04 12:02:24 -04:00
|
|
|
|
regionName
|
2021-06-30 16:42:45 -04:00
|
|
|
|
4GiB
|
|
|
|
|
512GiB
|
2020-04-04 12:02:24 -04:00
|
|
|
|
json
|
|
|
|
|
metastore
|
2020-04-09 13:43:11 -04:00
|
|
|
|
UserGroupInformation
|
|
|
|
|
CVE-2019-17571
|
|
|
|
|
CVE-2019-12399
|
|
|
|
|
CVE-2018-17196
|
2020-07-08 02:12:39 -04:00
|
|
|
|
bin.tar.gz
|
2020-07-30 21:58:48 -04:00
|
|
|
|
0s
|
|
|
|
|
1T
|
|
|
|
|
3G
|
|
|
|
|
1_000
|
|
|
|
|
1_000_000
|
|
|
|
|
1_000_000_000
|
|
|
|
|
1_000_000_000_000
|
|
|
|
|
1_000_000_000_000_000
|
|
|
|
|
Giga
|
|
|
|
|
Tera
|
|
|
|
|
Peta
|
|
|
|
|
KiB
|
|
|
|
|
MiB
|
|
|
|
|
GiB
|
|
|
|
|
TiB
|
|
|
|
|
PiB
|
2021-03-31 15:46:25 -04:00
|
|
|
|
protobuf
|
|
|
|
|
Golang
|
2021-05-25 15:49:49 -04:00
|
|
|
|
multiValueHandling
|
2021-11-16 13:13:35 -05:00
|
|
|
|
_n_
|
2022-08-27 00:19:24 -04:00
|
|
|
|
KLL
|
|
|
|
|
KllFloatsSketch
|
|
|
|
|
KllDoublesSketch
|
|
|
|
|
PMF
|
|
|
|
|
CDF
|
|
|
|
|
maxStreamLength
|
|
|
|
|
toString
|
2022-06-21 23:35:03 -04:00
|
|
|
|
100TB
|
2022-09-06 03:06:57 -04:00
|
|
|
|
compressedBigDecimal
|
|
|
|
|
limitSpec
|
|
|
|
|
metricsSpec
|
|
|
|
|
postAggregations
|
|
|
|
|
SaleAmount
|
|
|
|
|
IngestionSpec
|
|
|
|
|
druid-compressed-bigdecimal
|
|
|
|
|
doubleSum
|
2022-11-11 02:46:40 -05:00
|
|
|
|
ANY_VALUE
|
|
|
|
|
APPROX_COUNT_DISTINCT_DS_HLL
|
|
|
|
|
APPROX_COUNT_DISTINCT_DS_THETA
|
|
|
|
|
APPROX_QUANTILE_DS
|
|
|
|
|
APPROX_QUANTILE_FIXED_BUCKETS
|
|
|
|
|
ARRAY_CONCAT_AGG
|
|
|
|
|
BIT_AND
|
|
|
|
|
BIT_OR
|
|
|
|
|
BIT_XOR
|
|
|
|
|
BITWISE_AND
|
|
|
|
|
BITWISE_COMPLEMENT
|
|
|
|
|
BITWISE_CONVERT_DOUBLE_TO_LONG_BITS
|
|
|
|
|
BITWISE_CONVERT_LONG_BITS_TO_DOUBLE
|
|
|
|
|
BITWISE_OR
|
|
|
|
|
BITWISE_SHIFT_LEFT
|
|
|
|
|
BITWISE_SHIFT_RIGHT
|
|
|
|
|
BITWISE_XOR
|
|
|
|
|
BLOOM_FILTER
|
|
|
|
|
BTRIM
|
|
|
|
|
CHAR_LENGTH
|
|
|
|
|
CHARACTER_LENGTH
|
|
|
|
|
CURRENT_DATE
|
|
|
|
|
CURRENT_TIMESTAMP
|
|
|
|
|
DATE_TRUNC
|
2023-12-11 12:12:06 -05:00
|
|
|
|
DECODE_BASE64_COMPLEX
|
|
|
|
|
DECODE_BASE64_UTF8
|
2022-11-11 02:46:40 -05:00
|
|
|
|
DS_CDF
|
|
|
|
|
DS_GET_QUANTILE
|
|
|
|
|
DS_GET_QUANTILES
|
|
|
|
|
DS_HISTOGRAM
|
|
|
|
|
DS_HLL
|
|
|
|
|
DS_QUANTILE_SUMMARY
|
|
|
|
|
DS_QUANTILES_SKETCH
|
|
|
|
|
DS_RANK
|
|
|
|
|
DS_THETA
|
2023-03-28 09:17:12 -04:00
|
|
|
|
DS_TUPLE_DOUBLES
|
|
|
|
|
DS_TUPLE_DOUBLES_INTERSECT
|
|
|
|
|
DS_TUPLE_DOUBLES_METRICS_SUM_ESTIMATE
|
|
|
|
|
DS_TUPLE_DOUBLES_NOT
|
|
|
|
|
DS_TUPLE_DOUBLES_UNION
|
2022-11-11 02:46:40 -05:00
|
|
|
|
EARLIEST_BY
|
2022-07-13 21:59:55 -04:00
|
|
|
|
_e_
|
2022-11-11 02:46:40 -05:00
|
|
|
|
HLL_SKETCH_ESTIMATE
|
|
|
|
|
HLL_SKETCH_ESTIMATE_WITH_ERROR_BOUNDS
|
|
|
|
|
HLL_SKETCH_TO_STRING
|
|
|
|
|
HLL_SKETCH_UNION
|
|
|
|
|
LATEST_BY
|
2022-07-13 21:59:55 -04:00
|
|
|
|
base-10
|
2022-11-11 02:46:40 -05:00
|
|
|
|
MV_APPEND
|
|
|
|
|
MV_CONCAT
|
|
|
|
|
MV_CONTAINS
|
|
|
|
|
MV_FILTER_NONE
|
|
|
|
|
MV_FILTER_ONLY
|
|
|
|
|
MV_LENGTH
|
|
|
|
|
MV_OFFSET
|
|
|
|
|
MV_OFFSET_OF
|
|
|
|
|
MV_ORDINAL
|
|
|
|
|
MV_ORDINAL_OF
|
|
|
|
|
MV_OVERLAP
|
|
|
|
|
MV_PREPEND
|
|
|
|
|
MV_SLICE
|
|
|
|
|
MV_TO_STRING
|
|
|
|
|
NULLIF
|
2022-07-13 21:59:55 -04:00
|
|
|
|
_n_th
|
2022-11-11 02:46:40 -05:00
|
|
|
|
STDDEV_POP
|
|
|
|
|
STDDEV_SAMP
|
|
|
|
|
STRING_FORMAT
|
|
|
|
|
STRING_TO_MV
|
|
|
|
|
SUBSTR
|
|
|
|
|
TDIGEST_GENERATE_SKETCH
|
|
|
|
|
TDIGEST_QUANTILE
|
|
|
|
|
TEXTCAT
|
|
|
|
|
THETA_SKETCH_ESTIMATE
|
|
|
|
|
THETA_SKETCH_ESTIMATE_WITH_ERROR_BOUNDS
|
|
|
|
|
THETA_SKETCH_INTERSECT
|
|
|
|
|
THETA_SKETCH_NOT
|
|
|
|
|
THETA_SKETCH_UNION
|
|
|
|
|
TIME_CEIL
|
|
|
|
|
TIME_EXTRACT
|
|
|
|
|
TIME_FLOOR
|
|
|
|
|
TIME_FORMAT
|
|
|
|
|
TIME_IN_INTERVAL
|
|
|
|
|
TIMESTAMP_TO_MILLIS
|
|
|
|
|
TIMESTAMPADD
|
|
|
|
|
TIMESTAMPDIFF
|
|
|
|
|
TRUNC
|
|
|
|
|
VAR_POP
|
2022-07-13 21:59:55 -04:00
|
|
|
|
VAR_SAMP
|
2022-08-09 06:44:22 -04:00
|
|
|
|
KTable
|
|
|
|
|
Aotearoa
|
|
|
|
|
Czechia
|
2022-08-22 21:47:40 -04:00
|
|
|
|
Zeelund
|
2023-05-19 12:42:27 -04:00
|
|
|
|
nano
|
Druid automated quickstart (#13365)
* Druid automated quickstart
* remove conf/druid/single-server/quickstart/_common/historical/jvm.config
* Minor changes in python script
* Add lower bound memory for some services
* Additional runtime properties for services
* Update supervise script to accept command arguments, corresponding changes in druid-quickstart.py
* File end newline
* Limit the ability to start multiple instances of a service, documentation changes
* simplify script arguments
* restore changes in medium profile
* run-druid refactor
* compute and pass middle manager runtime properties to run-druid
supervise script changes to process java opts array
use argparse, leave free memory, logging
* Remove extra quotes from mm task javaopts array
* Update logic to compute minimum memory
* simplify run-druid
* remove debug options from run-druid
* resolve the config_path provided
* comment out service specific runtime properties which are computed in the code
* simplify run-druid
* clean up docs, naming changes
* Throw ValueError exception on illegal state
* update docs
* rename args, compute_only -> compute, run_zk -> zk
* update help documentation
* update help documentation
* move task memory computation into separate method
* Add validation checks
* remove print
* Add validations
* remove start-druid bash script, rename start-druid-main
* Include tasks in lower bound memory calculation
* Fix test
* 256m instead of 256g
* caffeine cache uses 5% of heap
* ensure min task count is 2, task count is monotonic
* update configs and documentation for runtime props in conf/druid/single-server/quickstart
* Update docs
* Specify memory argument for each profile in single-server.md
* Update middleManager runtime.properties
* Move quickstart configs to conf/druid/base, add bash launch script, support python2
* Update supervise script
* rename base config directory to auto
* rename python script, changes to pass repeated args to supervise
* remove exmaples/conf/druid/base dir
* add docs
* restore changes in conf dir
* update start-druid-auto
* remove hashref for commands in supervise script
* start-druid-main java_opts array is comma separated
* update entry point script name in python script
* Update help docs
* documentation changes
* docs changes
* update docs
* add support for running indexer
* update supported services list
* update help
* Update python.md
* remove dir
* update .spelling
* Remove dependency on psutil and pathlib
* update docs
* Update get_physical_memory method
* Update help docs
* update docs
* update method to get physical memory on python
* udpate spelling
* update .spelling
* minor change
* Minor change
* memory comptuation for indexer
* update start-druid
* Update python.md
* Update single-server.md
* Update python.md
* run python3 --version to check if python is installed
* Update supervise script
* start-druid: echo message if python not found
* update anchor text
* minor change
* Update condition in supervise script
* JVM not jvm in docs
2022-12-09 14:04:02 -05:00
|
|
|
|
MacOS
|
|
|
|
|
RHEL
|
|
|
|
|
psutil
|
|
|
|
|
pathlib
|
2023-01-27 17:29:43 -05:00
|
|
|
|
kttm_simple
|
|
|
|
|
dist_country
|
2023-05-19 12:42:27 -04:00
|
|
|
|
# Extensions
|
|
|
|
|
druid-avro-extensions
|
|
|
|
|
druid-azure-extensions
|
|
|
|
|
druid-basic-security
|
|
|
|
|
druid-bloom-filter
|
|
|
|
|
druid-datasketches
|
|
|
|
|
druid-google-extensions
|
|
|
|
|
druid-hdfs-storage
|
|
|
|
|
druid-histogram
|
|
|
|
|
druid-kafka-extraction-name
|
|
|
|
|
druid-kafka-indexing-service
|
|
|
|
|
druid-kinesis-indexing-service
|
|
|
|
|
druid-kerberos
|
|
|
|
|
druid-lookups-cached-global
|
|
|
|
|
druid-lookups-cached-single
|
|
|
|
|
druid-multi-stage-query
|
|
|
|
|
druid-orc-extensions
|
|
|
|
|
druid-parquet-extensions
|
|
|
|
|
druid-protobuf-extensions
|
|
|
|
|
druid-ranger-security
|
|
|
|
|
druid-s3-extensions
|
|
|
|
|
druid-ec2-extensions
|
|
|
|
|
druid-aws-rds-extensions
|
|
|
|
|
druid-stats
|
|
|
|
|
mysql-metadata-storage
|
|
|
|
|
postgresql-metadata-storage
|
|
|
|
|
simple-client-sslcontext
|
|
|
|
|
druid-pac4j
|
|
|
|
|
druid-kubernetes-extensions
|
|
|
|
|
aliyun-oss-extensions
|
|
|
|
|
ambari-metrics-emitter
|
|
|
|
|
druid-cassandra-storage
|
|
|
|
|
druid-cloudfiles-extensions
|
|
|
|
|
druid-compressed-bigdecimal
|
|
|
|
|
druid-distinctcount
|
|
|
|
|
druid-redis-cache
|
|
|
|
|
druid-time-min-max
|
|
|
|
|
sqlserver-metadata-storage
|
|
|
|
|
graphite-emitter|Graphite metrics emitter
|
|
|
|
|
statsd-emitter|StatsD metrics emitter
|
|
|
|
|
kafka-emitter|Kafka metrics emitter
|
|
|
|
|
druid-thrift-extensions
|
|
|
|
|
druid-opentsdb-emitter
|
|
|
|
|
materialized-view-selection
|
|
|
|
|
materialized-view-maintenance
|
|
|
|
|
druid-moving-average-query
|
|
|
|
|
druid-influxdb-emitter
|
|
|
|
|
druid-momentsketch
|
|
|
|
|
druid-tdigestsketch
|
|
|
|
|
gce-extensions
|
|
|
|
|
prometheus-emitter
|
2023-05-22 18:41:07 -04:00
|
|
|
|
kubernetes-overlord-extensions
|
|
|
|
|
UCS
|
|
|
|
|
ISO646-US
|
2023-07-27 17:24:55 -04:00
|
|
|
|
completeTasks
|
|
|
|
|
runningTasks
|
|
|
|
|
waitingTasks
|
|
|
|
|
pendingTasks
|
|
|
|
|
shutdownAllTasks
|
2023-07-27 15:58:37 -04:00
|
|
|
|
supervisorId
|
|
|
|
|
suspendAll
|
|
|
|
|
resumeAll
|
|
|
|
|
terminateAll
|
2023-07-25 21:42:47 -04:00
|
|
|
|
selfDiscovered
|
2023-07-27 15:58:37 -04:00
|
|
|
|
loadstatus
|
2023-07-27 17:24:55 -04:00
|
|
|
|
isLeader
|
2023-08-21 14:34:41 -04:00
|
|
|
|
taskslots
|
2023-08-15 16:45:25 -04:00
|
|
|
|
loadstatus
|
|
|
|
|
sqlQueryId
|
2024-01-03 18:36:05 -05:00
|
|
|
|
useAzureCredentialsChain
|
|
|
|
|
DefaultAzureCredential
|
2023-11-06 14:34:42 -05:00
|
|
|
|
LAST_VALUE
|
2023-11-20 15:34:42 -05:00
|
|
|
|
markUnused
|
|
|
|
|
markUsed
|
|
|
|
|
segmentId
|
2023-11-29 17:32:49 -05:00
|
|
|
|
aggregateMultipleValues
|
2024-01-25 13:29:16 -05:00
|
|
|
|
appRegistrationClientId
|
|
|
|
|
appRegistrationClientSecret
|
|
|
|
|
tenantId
|
2024-01-24 04:47:33 -05:00
|
|
|
|
relativeError
|
|
|
|
|
ddSketch
|
|
|
|
|
DDSketch
|
|
|
|
|
druid-ddsketch
|
|
|
|
|
numBins
|
2024-01-14 12:52:30 -05:00
|
|
|
|
|
|
|
|
|
- ../docs/development/extensions-contrib/spectator-histogram.md
|
|
|
|
|
SpectatorHistogram
|
|
|
|
|
PercentileBuckets
|
|
|
|
|
spectatorHistogram
|
|
|
|
|
spectatorHistogramTimer
|
|
|
|
|
spectatorHistogramDistribution
|
|
|
|
|
percentileSpectatorHistogram
|
|
|
|
|
percentilesSpectatorHistogram
|
2024-01-24 04:47:33 -05:00
|
|
|
|
|
|
|
|
|
- ../docs/development/extensions-contrib/ddsketch-quantiles.md
|
2024-01-23 09:47:07 -05:00
|
|
|
|
quantilesFromDDSketch
|
|
|
|
|
quantileFromDDSketch
|
2024-01-24 04:47:33 -05:00
|
|
|
|
collapsingLowestDense
|