druid/website/.spelling

# Licensed to the Apache Software Foundation (ASF) under one or more
# contributor license agreements.  See the NOTICE file distributed with
# this work for additional information regarding copyright ownership.
# The ASF licenses this file to You under the Apache License, Version 2.0
# (the "License"); you may not use this file except in compliance with
# the License.  You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

# markdown-spellcheck spelling configuration file
# Format - lines beginning # are comments
# global dictionary is at the start, file overrides afterwards
# one word per line, to define a file override use ' - filename'
# where filename is relative to this configuration file
32-bit
500MiB
64-bit
ACL
ACLs
APIs
AvroStorage
ARN
AWS
AWS_CONTAINER_CREDENTIALS_RELATIVE_URI
AWS_CONTAINER_CREDENTIALS_FULL_URI
Actian
Authorizer
Avatica
Avro
Azul
BCP
Base64
Base64-encoded
ByteBuffer
concat
CIDR
CORS
CNF
CPUs
CSVs
Ceph
CloudWatch
ColumnDescriptor
Corretto
DDL
DML
DNS
DRUIDVERSION
DataSketches
DateTime
DateType
dimensionsSpec
DimensionSpec
DimensionSpecs
Dockerfile
DogStatsD
Double.NEGATIVE_INFINITY
Double.NEGATIVE_INFINITY.
Double.POSITIVE_INFINITY
Double.POSITIVE_INFINITY.
Dropwizard
dropwizard
DruidInputSource
DruidSQL
DynamicConfigProvider
EC2
EC2ContainerCredentialsProviderWrapper
ECS
EMR
EMRFS
ETL
Elasticsearch
Enums
FirehoseFactory
FlattenSpec
Float.NEGATIVE_INFINITY
Float.NEGATIVE_INFINITY.
Float.POSITIVE_INFINITY
Float.POSITIVE_INFINITY.
ForwardedRequestCustomizer
GC
GPG
GSSAPI
GUIs
GroupBy
Guice
HDFS
HDFSFirehose
HLL
HashSet
Homebrew
HyperLogLog
IAM
IANA
IETF
IP
IPv4
IS_BROADCAST
IS_JOINABLE
IS0
ISO-8601
ISO8601
IndexSpec
IndexTask
InfluxDB
InputFormat
InputSource
InputSources
Integer.MAX_VALUE
ioConfig
JBOD
JDBC
JDK
JDK7
JDK8
JKS
JMX
JRE
JS
JSON
JsonPath
JSONPath
JSSE
JVM
JVMs
Joda
JsonProperty
Jupyter
KMS
Kerberized
Kerberos
KeyStores
Kinesis
Kubernetes
LRU
LZ4
LZO
LimitSpec
Long.MAX_VALUE
Long.MAX_VALUE.
Long.MIN_VALUE
Long.MIN_VALUE.
Lucene
MapBD
MapDB
MariaDB
MiddleManager
MiddleManagers
Montréal
Murmur3
MVCC
NFS
OCF
OLAP
OOMs
OpenJDK
OpenLDAP
OpenTSDB
OutputStream
ParAccel
ParseSpec
ParseSpecs
Protobuf
pull-deps
RDBMS
RDDs
RDS
Rackspace
Redis
S3
SDK
SIGAR
SPNEGO
SqlInputSource
SQLServer
SSD
SSDs
SSL
Samza
Splunk
SqlFirehose
SqlParameter
SslContextFactory
StatsD
SYSTEM_TABLE
TCP
TGT
TLS
TopN
TopNs
UI
UIs
URI
URIs
UTF-16
UTF-8
UTF8
XMLs
ZK
ZSTD
accessor
ad-hoc
aggregator
aggregators
ambari
analytics
arrayElement
assumeRoleArn
assumeRoleExternalId
async
authorizer
authorizers
autocomplete
autodiscovery
autoscaler
autoscaling
averager
averagers
backend
backfills
backpressure
base64
big-endian
bigint
blobstore
boolean
breakpoint
broadcasted
checksums
classpath
clickstream
clientConfig
codebase
codec
colocated
colocation
compactable
compactionTask
config
configs
consumerProperties
cron
csv
customizable
dataset
datasets
datasketches
datasource
datasources
dbcp
deepstore
denormalization
denormalize
denormalized
deprioritization
deprioritizes
dequeued
deserialization
deserialize
deserialized
deserializes
downtimes
druid
druid–kubernetes-extensions
e.g.
encodings
endian
endpointConfig
enum
expectedType
expr
failover
featureSpec
findColumnsFromHeader
filenames
filesystem
firefox
firehose
firehoses
fromPigAvroStorage
frontends
granularities
granularitySpec
gzip
gzipped
hadoop
hasher
hashtable
high-QPS
historicals
hostname
hostnames
http
https
idempotency
i.e.
influxdb
ingestionSpec
injective
inlined
inSubQueryThreshold
interruptible
isAllowList
jackson-jq
javadoc
joinable
json_keys
json_object
json_paths
json_query
json_value
kerberos
keystore
keytool
keytab
kubernetes
laning
lifecycle
localhost
log4j
log4j2
log4j2.xml
lookback
lookups
mapreduce
masse
maxNumericInFilters
maxNumFiles
maxNumSegments
max_map_count
memcached
mergeable
metadata
millis
misconfiguration
misconfigured
mostAvailableSize
multitenancy
multitenant
mysql
namespace
namespaced
namespaces
natively
netflow
non-nullable
noop
numerics
numShards
parameterized
parse_json
parseable
partitioner
partitionFunction
partitionsSpec
pathParts
performant
plaintext
pluggable
postgres
postgresql
pre-aggregated
pre-aggregates
pre-aggregating
pre-aggregation
pre-computation
pre-compute
pre-computing
pre-configured
pre-filtered
pre-filtering
pre-generated
pre-made
pre-processing
preemptible
prefetch
prefetched
prefetching
prepend
prepended
prepending
prepends
prepopulated
preprocessing
priori
procs
processFromRaw
programmatically
proto
proxied
proxyConfig
QPS
quantile
quantiles
queryable
quickstart
realtime
rebalance
redis
regexes
reimported
reindex
reindexing
reingest
reingesting
reingestion
repo
requireSSL
rollup
rollups
rsync
runtime
schemas
schemaless
searchable
secondaryPartitionPruning
seekable-stream
servlet
setProcessingThreadNames
simple-client-sslcontext
sharded
sharding
skipHeaderRows
Smoosh
smoosh
smooshed
splittable
ssl
sslmode
stdout
storages
stringified
subarray
subnet
subqueries
subquery
subsecond
substring
subtask
subtasks
supervisorTaskId
symlink
syntaxes
tiering
timeseries
timestamp
timestamps
to_json_string
tradeoffs
transformSpec
try_parse_json
tsv
ulimit
unannounce
unannouncements
unary
unassign
uncomment
underutilization
unintuitive
unioned
unmergeable
unmerged
UNNEST
unparseable
unparsed
unsetting
untrusted
useFilterCNF
useJqSyntax
useSSL
uptime
uris
urls
useFieldDiscovery
v1
v2
vCPUs
validator
varchar
vectorizable
vectorize
vectorizeVirtualColumns
versioning
virtualColumns
w.r.t.
whitelist
whitelisted
whitespace
wildcard
wildcards
xml
znode
znodes
APPROX_COUNT_DISTINCT
APPROX_QUANTILE
ARRAY_AGG
BIGINT
CATALOG_NAME
CHARACTER_MAXIMUM_LENGTH
CHARACTER_OCTET_LENGTH
CHARACTER_SET_NAME
COLLATION_NAME
COLUMN_DEFAULT
COLUMN_NAME
Concats
DATA_TYPE
DATETIME_PRECISION
DEFAULT_CHARACTER_SET_CATALOG
DEFAULT_CHARACTER_SET_NAME
DEFAULT_CHARACTER_SET_SCHEMA
ISODOW
ISOYEAR
IS_NULLABLE
JDBC_TYPE
MIDDLE_MANAGER
MILLIS_TO_TIMESTAMP
NULLable
NUMERIC_PRECISION
NUMERIC_PRECISION_RADIX
NUMERIC_SCALE
ORDINAL_POSITION
POSIX
PT1M
PT5M
SCHEMA_NAME
SCHEMA_OWNER
SERVER_SEGMENTS
SMALLINT
SQL_PATH
STRING_AGG
SYSTEM_TABLE
TABLE_CATALOG
TABLE_NAME
TABLE_SCHEMA
TABLE_TYPE
TIME_PARSE
TIME_SHIFT
TINYINT
VARCHAR
avg_num_rows
avg_size
created_time
current_size
detailed_state
druid.server.maxSize
druid.server.tier
druid.sql.planner.maxSemiJoinRowsInMemory
druid.sql.planner.sqlTimeZone
druid.sql.planner.useApproximateCountDistinct
druid.sql.planner.useApproximateTopN
druid.sql.planner.useGroupingSetForExactDistinct
druid.sql.planner.useNativeQueryExplain
error_msg
exprs
group_id
interval_expr
is_active
is_available
is_leader
is_overshadowed
is_published
is_realtime
java.sql.Types
last_compaction_state
max_size
num_replicas
num_rows
num_segments
partition_num
plaintext_port
queue_insertion_time
runner_status
segment_id
server_type
shard_spec
sqlTimeZone
supervisor_id
sys
sys.segments
task_id
timestamp_expr
tls_port
total_size
useApproximateCountDistinct
useGroupingSetForExactDistinct
useApproximateTopN
wikipedia
enableTimeBoundaryPlanning
TimeBoundary
druid.query.default.context.enableTimeBoundaryPlanning
IEC
 - ../docs/comparisons/druid-vs-elasticsearch.md
100x
 - ../docs/configuration/logging.md
_common
appenders
 - ../docs/dependencies/deep-storage.md
druid-hdfs-storage
druid-s3-extensions
druid.sql.planner.maxNumericInFilters
 - ../docs/dependencies/metadata-storage.md
BasicDataSource
 - ../docs/dependencies/zookeeper.md
LeaderLatch
3.5.x
3.4.x
 - ../docs/design/auth.md
AllowAll
AuthenticationResult
AuthorizationLoadingLookupTest
HttpClient
allowAll
authenticatorChain
defaultUser
 - ../docs/design/coordinator.md
inputSegmentSizeBytes
skipOffsetFromLatest
 - ../docs/design/router.md
brokerService
c3.2xlarge
defaultManualBrokerService
maxPriority
minPriority
runtime.properties
timeBoundary
 - ../docs/design/segments.md
0x0
0x9
2GB
300mb-700mb
Bieber
IndexTask-based
Ke
datasource_intervalStart_intervalEnd_version_partitionNum
partitionNum
v9
 - ../docs/development/build.md
3.x
8u92
DskipTests
Papache-release
Pdist
Ddruid.console.skip
yaml
Phadoop3
dist-hadoop3
hadoop3
hadoop2
2.x.x
3.x.x
 - ../docs/development/extensions-contrib/ambari-metrics-emitter.md
ambari-metrics
metricName
trustStore
 - ../docs/development/extensions-core/azure.md
StaticAzureBlobStoreFirehose
StaticS3Firehose
fetchTimeout
gz
maxCacheCapacityBytes
maxFetchCapacityBytes
maxFetchRetry
prefetchTriggerBytes
shardSpecs
sharedAccessStorageToken
 - ../docs/development/extensions-contrib/cloudfiles.md
StaticCloudFilesFirehose
cloudfiles
rackspace-cloudfiles-uk
rackspace-cloudfiles-us
StaticAzureBlobStoreFirehose
gz
shardSpecs
maxCacheCapacityBytes
maxFetchCapacityBytes
fetchTimeout
maxFetchRetry
 - ../docs/development/extensions-contrib/distinctcount.md
distinctCount
groupBy
maxIntermediateRows
numValuesPerPass
queryGranularity
segmentGranularity
topN
visitor_id
 - ../docs/development/extensions-contrib/influx.md
cpu
web_requests
 - ../docs/development/extensions-contrib/influxdb-emitter.md
_
druid_
druid_cache_total
druid_hits
druid_query
historical001
 - ../docs/development/extensions-contrib/materialized-view.md
HadoopTuningConfig
TuningConfig
base-dataSource's
baseDataSource
baseDataSource-hashCode
classpathPrefix
derivativeDataSource
druid.extensions.hadoopDependenciesDir
hadoopDependencyCoordinates
maxTaskCount
metricsSpec
queryType
tuningConfig
 - ../docs/development/extensions-contrib/momentsketch-quantiles.md
arcsinh
fieldName
momentSketchMerge
momentsketch
 - ../docs/development/extensions-contrib/moving-average-query.md
10-minutes
MeanNoNulls
P1D
cycleSize
doubleMax
doubleAny
doubleMean
doubleMeanNoNulls
doubleMin
doubleSum
druid.generic.useDefaultValueForNull
druid.generic.ignoreNullsForStringCardinality
limitSpec
longMax
longAny
longMean
longMeanNoNulls
longMin
longSum
movingAverage
postAggregations
postAveragers
pull-deps
 - ../docs/development/extensions-contrib/opentsdb-emitter.md
defaultMetrics.json
namespacePrefix
src
 - ../docs/development/extensions-contrib/redis-cache.md
loadList
pull-deps
PT2S
 - ../docs/development/extensions-contrib/sqlserver.md
com.microsoft.sqlserver.jdbc.SQLServerDriver
sqljdbc
 - ../docs/development/extensions-contrib/statsd.md
convertRange
- ../docs/development/extensions-contrib/prometheus.md
HTTPServer
conversionFactor
prometheus
Pushgateway
 - ../docs/development/extensions-contrib/tdigestsketch-quantiles.md
postAggregator
quantileFromTDigestSketch
quantilesFromTDigestSketch
tDigestSketch
 - ../docs/development/extensions-contrib/thrift.md
HadoopDruidIndexer
LzoThriftBlock
SequenceFile
classname
hadoop-lzo
inputFormat
inputSpec
ioConfig
parseSpec
thriftClass
thriftJar
 - ../docs/development/extensions-contrib/time-min-max.md
timeMax
timeMin
 - ../docs/development/extensions-contrib/aliyun-oss-extensions.md
Alibaba
Aliyun
aliyun-oss-extensions
AccessKey
accessKey
aliyun-oss
json
OSS
oss
secretKey
url
 - ../docs/development/extensions-core/approximate-histograms.md
approxHistogram
approxHistogramFold
fixedBucketsHistogram
bucketNum
lowerLimit
numBuckets
upperLimit
 - ../docs/development/extensions-core/avro.md
AVRO-1124
Avro-1124
SchemaRepo
avro
avroBytesDecoder
protoBytesDecoder
flattenSpec
jq
org.apache.druid.extensions
schemaRepository
schema_inline
subjectAndIdConverter
url
 - ../docs/development/extensions-core/bloom-filter.md
BloomKFilter
bitset
outputStream
 - ../docs/development/extensions-core/datasketches-hll.md
HLLSketchBuild
HLLSketchMerge
lgK
log2
tgtHllType
 - ../docs/development/extensions-core/datasketches-quantiles.md
CDF
DoublesSketch
maxStreamLength
PMF
quantilesDoublesSketch
toString
 - ../docs/development/extensions-core/datasketches-theta.md
isInputThetaSketch
thetaSketch
user_id
 - ../docs/development/extensions-core/datasketches-tuple.md
ArrayOfDoublesSketch
arrayOfDoublesSketch
metricColumns
nominalEntries
numberOfValues
 - ../docs/development/extensions-core/druid-basic-security.md
INFORMATION_SCHEMA
MyBasicAuthenticator
MyBasicAuthorizer
authenticatorName
authorizerName
druid_system
pollingPeriod
roleName
LDAP
ldap
MyBasicMetadataAuthenticator
MyBasicLDAPAuthenticator
MyBasicMetadataAuthorizer
MyBasicLDAPAuthorizer
credentialsValidator
sAMAccountName
objectClass
initialAdminRole
adminGroupMapping
groupMappingName
 - ../docs/development/extensions-core/druid-kerberos.md
8KiB
HttpComponents
MyKerberosAuthenticator
RFC-4559
SPNego
_HOST
 - ../docs/development/extensions-core/druid-lookups.md
cacheFactory
concurrencyLevel
dataFetcher
expireAfterAccess
expireAfterWrite
initialCapacity
loadingCacheSpec
maxEntriesSize
maxStoreSize
maximumSize
onHeapPolling
pollPeriod
reverseLoadingCacheSpec
 - ../docs/development/extensions-core/druid-pac4j.md
OAuth
Okta
OpenID
pac4j
 - ../docs/development/extensions-core/kubernetes.md
Env
POD_NAME
POD_NAMESPACE
ConfigMap
PT17S
 - ../docs/development/extensions-core/google.md
GCS
StaticGoogleBlobStoreFirehose
 - ../docs/development/extensions-core/hdfs.md
gcs-connector
hadoop2
hdfs
 - ../docs/development/extensions-core/kafka-extraction-namespace.md
Aotearoa
Czechia
KTable
LookupExtractorFactory
Zeelund
zookeeper.connect
 - ../docs/development/extensions-core/kafka-ingestion.md
0.11.x.
00Z
2016-01-01T11
2016-01-01T12
2016-01-01T14
CONNECTING_TO_STREAM
CREATING_TASKS
DISCOVERING_INITIAL_TASKS
KafkaSupervisorIOConfig
KafkaSupervisorTuningConfig
LOST_CONTACT_WITH_STREAM
OffsetOutOfRangeException
P2147483647D
PT10M
PT10S
PT1H
PT30M
PT30S
PT5S
PT80S
SASL
SegmentWriteOutMediumFactory
UNABLE_TO_CONNECT_TO_STREAM
UNHEALTHY_SUPERVISOR
UNHEALTHY_TASKS
dimensionCompression
earlyMessageRejectionPeriod
indexSpec
intermediateHandoffPeriod
longEncoding
maxBytesInMemory
maxPendingPersists
maxRowsInMemory
maxRowsPerSegment
maxSavedParseExceptions
maxTotalRows
metricCompression
numKafkaPartitions
taskCount
taskDuration
 - ../docs/development/extensions-core/kinesis-ingestion.md
9.2dist
KinesisSupervisorIOConfig
KinesisSupervisorTuningConfig
Resharding
resharding
LZ4LZFuncompressedLZ4LZ4LZFuncompressednoneLZ4autolongsautolongslongstypeconcisetyperoaringcompressRunOnSerializationtruetypestreamendpointreplicastaskCounttaskCount
deaggregate
druid-kinesis-indexing-service
maxRecordsPerPoll
maxRecordsPerPollrecordsPerFetchfetchDelayMillisreplicasfetchDelayMillisrecordsPerFetchfetchDelayMillismaxRecordsPerPollamazon-kinesis-client1
numKinesisShards
numProcessors
q.size
repartitionTransitionDuration
replicastaskCounttaskCount
resetuseEarliestSequenceNumberPOST
resumePOST
statusrecentErrorsdruid.supervisor.maxStoredExceptionEventsstatedetailedStatestatedetailedStatestatestatePENDINGRUNNINGSUSPENDEDSTOPPINGUNHEALTHY_SUPERVISORUNHEALTHY_TASKSdetailedStatestatedruid.supervisor.unhealthinessThresholddruid.supervisor.taskUnhealthinessThresholdtaskDurationtaskCountreplicasdetailedStatedetailedStateRUNNINGPOST
supervisorPOST
supervisorfetchThreadsfetchDelayMillisrecordsPerFetchmaxRecordsPerPollpoll
suspendPOST
taskCounttaskDurationreplicas
taskCounttaskDurationtaskDurationPOST
taskDurationstartDelayperioduseEarliestSequenceNumbercompletionTimeouttaskDurationlateMessageRejectionPeriodPT1HearlyMessageRejectionPeriodPT1HPT1HrecordsPerFetchfetchDelayMillisawsAssumedRoleArnawsExternalIddeaggregateGET
terminatePOST
terminatedruid.worker.capacitytaskDurationcompletionTimeoutreplicastaskCountreplicas
PT2M
kinesis.us
amazonaws.com
PT6H
GetRecords
KCL
signalled
ProvisionedThroughputExceededException
Deaggregation
 - ../docs/development/extensions-core/lookups-cached-global.md
baz
customJson
lookupParseSpec
namespaceParseSpec
simpleJson
 - ../docs/development/extensions-core/orc.md
dimensionSpec
flattenSpec
 - ../docs/development/extensions-core/parquet.md
binaryAsString
 - ../docs/development/extensions-core/postgresql.md
sslFactory's
sslMode
 - ../docs/development/extensions-core/protobuf.md
Proto
metrics.desc
metrics.desc.
metrics.proto.
metrics_pb
protoMessageType
timeAndDims
tmp
 - ../docs/development/extensions-core/s3.md
SigV4
jvm.config
kms
s3
s3a
s3n
uris
 - ../docs/development/extensions-core/simple-client-sslcontext.md
KeyManager
SSLContext
TrustManager
 - ../docs/development/extensions-core/stats.md
GenericUDAFVariance
Golub
J.L.
LeVeque
Numer
chunk1
chunk2
stddev
t1
t2
variance1
variance2
varianceFold
variance_pop
variance_sample
 - ../docs/development/extensions-core/test-stats.md
Berry_statbook
Berry_statbook_chpt6.pdf
S.E.
engineering.com
jcb0773
n1
n2
p1
p2
pvalue2tailedZtest
sqrt
successCount1
successCount2
www.isixsigma.com
www.paypal
www.ucs.louisiana.edu
zscore
zscore2sample
ztests
 - ../docs/development/extensions.md
DistinctCount
artifactId
com.example
common.runtime.properties
druid-aws-rds-extensions
druid-cassandra-storage
druid-distinctcount
druid-ec2-extensions
druid-kafka-extraction-namespace
druid-kafka-indexing-service
druid-opentsdb-emitter
druid-protobuf-extensions
druid-tdigestsketch
druid.apache.org
groupId
jvm-global
kafka-emitter
org.apache.druid.extensions.contrib.
pull-deps
sqlserver-metadata-storage
statsd-emitter
 - ../docs/development/geo.md
coords
dimName
maxCoords
Mb
minCoords
 - ../docs/development/javascript.md
Metaspace
dev
 - ../docs/development/modules.md
AggregatorFactory
ArchiveTask
ComplexMetrics
DataSegmentArchiver
DataSegmentKiller
DataSegmentMover
DataSegmentPuller
DataSegmentPusher
DruidModule
ExtractionFns
HdfsStorageDruidModule
JacksonInject
MapBinder
MoveTask
ObjectMapper
PasswordProvider
PostAggregators
QueryRunnerFactory
SegmentMetadataQuery
SegmentMetadataQueryQueryToolChest
StaticS3FirehoseFactory
loadSpec
multibind
pom.xml
 - ../docs/ingestion/data-formats.md
0.6.x
0.7.x
0.7.x.
TimeAndDims
column2
column_1
column_n
com.opencsv
ctrl
headerFormat
headerLabelPrefix
jsonLowercase
kafka
KafkaStringHeaderFormat
kafka.header.
kafka.key
kafka.timestamp
keyColumnName
keyFormat
listDelimiter
timestampColumnName
timestampSpec
urls
valueFormat
 - ../docs/ingestion/data-management.md
1GB
IOConfig
compactionTask
compactionTasks
ingestSegmentFirehose
numShards
 - ../docs/ingestion/faq.md
IngestSegment
IngestSegmentFirehose
maxSizes
windowPeriod
 - ../docs/ingestion/hadoop.md
2012-01-01T00
2012-01-03T00
2012-01-05T00
2012-01-07T00
500MB
CombineTextInputFormat
HadoopIndexTask
InputFormat
InputSplit
JobHistory
a.example.com
assumeGrouped
awaitSegmentAvailabilityTimeoutMillis
cleanupOnFailure
combineText
connectURI
dataGranularity
datetime
f.example.com
filePattern
forceExtendableShardSpecs
ignoreInvalidRows
ignoreWhenNoSegments
indexSpecForIntermediatePersists
index_hadoop
inputPath
inputSpecs
interval1
interval2
jobProperties
leaveIntermediate
logParseExceptions
mapred.map.tasks
mapreduce.job.maps
maxParseExceptions
maxPartitionSize
maxSplitSize
metadataUpdateSpec
numBackgroundPersistThreads
overwriteFiles
partitionDimension
partitionDimensions
partitionSpec
pathFormat
segmentOutputPath
segmentTable
shardSpec
single_dim
targetPartitionSize
targetRowsPerSegment
useCombiner
useExplicitVersion
useNewAggs
useYarnRMJobStatusFallback
workingPath
z.example.com
 - ../docs/ingestion/native-batch.md
150MB
CombiningFirehose
DataSchema
DefaultPassword
EnvironmentVariablePasswordProvider
HttpFirehose
IOConfig
InlineFirehose
LocalFirehose
PartitionsSpec
PasswordProviders
SegmentsSplitHintSpec
SplitHintSpec
accessKeyId
appendToExisting
baseDir
chatHandlerNumRetries
chatHandlerTimeout
cityName
connectorConfig
countryName
dataSchema's
dropExisting
foldCase
forceGuaranteedRollup
httpAuthenticationPassword
httpAuthenticationUsername
ingestSegment
InputSource
DruidInputSource
maxColumnsToMerge
maxInputSegmentBytesPerTask
maxNumConcurrentSubTasks
maxNumSegmentsToMerge
maxRetry
pushTimeout
reportParseExceptions
secretAccessKey
segmentWriteOutMediumFactory
sql
sqls
splitHintSpec
taskStatusCheckPeriodMs
timeChunk
totalNumMergeTasks
StaticS3Firehose
prefetchTriggerBytes
awaitSegmentAvailabilityTimeoutMillis
 - ../docs/ingestion/native-batch-firehose.md
LocalFirehose 
baseDir
HttpFirehose 
httpAuthenticationUsername
DefaultPassword
PasswordProviders
EnvironmentVariablePasswordProvider
ingestSegment 
maxInputSegmentBytesPerTask
150MB
foldCase
sqls
connectorConfig
InlineFirehose 
CombiningFirehose
httpAuthenticationPassword 
 - ../docs/ingestion/native-batch-input-source.md
accessKeyId
secretAccessKey
accessKeyId
httpAuthenticationPassword
countryName
 - ../docs/ingestion/native-batch-simple-task.md
dataSchema's
appendToExisting
dropExisting
timeChunk 
PartitionsSpec
forceGuaranteedRollup
reportParseExceptions
pushTimeout
segmentWriteOutMediumFactory
 - ../docs/ingestion/schema-design.md
product_category
product_id
product_name
 - ../docs/ingestion/tasks.md
BUILD_SEGMENTS
DETERMINE_PARTITIONS
forceTimeChunkLock
taskLockTimeout
 - ../docs/misc/math-expr.md
DOUBLE_ARRAY
DOY
DateTimeFormat
LONG_ARRAY
Los_Angeles
P3M
PT12H
STRING_ARRAY
String.format
acos
args
arr1
arr2
array_append
array_concat
array_set_add
array_set_add_all
array_contains
array_length
array_offset
array_offset_of
array_ordinal
array_ordinal_of
array_overlap
array_prepend
array_slice
array_to_string
asin
atan
atan2
bitwise
bitwiseAnd
bitwiseComplement
bitwiseConvertDoubleToLongBits
bitwiseConvertLongBitsToDouble
bitwiseOr
bitwiseShiftLeft
bitwiseShiftRight
bitwiseXor
bloom_filter_test
cartesian_fold
cartesian_map
case_searched
case_simple
cbrt
concat
copysign
expm1
expr
expr1
expr2
expr3
expr4
fromIndex
getExponent
hypot
ipv4_match
ipv4_parse
isnull
ipv4_stringify
java.lang.Math
java.lang.String
log10
log1p
lpad
ltrim
nextUp
nextafter
notnull
nvl
parse_long
regexp_extract
regexp_like
contains_string
icontains_string
result1
result2
rint
rpad
rtrim
safe_divide
scalb
signum
str1
str2
string_to_array
stringAny
strlen
strpos
timestamp_ceil
timestamp_extract
timestamp_floor
timestamp_format
timestamp_parse
timestamp_shift
todegrees
toradians
ulp
unix_timestamp
value1
value2
valueOf
IEC
human_readable_binary_byte_format
human_readable_decimal_byte_format
human_readable_decimal_format
 - ../docs/misc/papers-and-talks.md
RADStack
 - ../docs/operations/api-reference.md
00.000Z
2015-09-12T03
2015-09-12T05
2016-06-27_2016-06-28
Param
SupervisorSpec
dropRule
druid.query.segmentMetadata.defaultHistory
isointerval
json
loadRule
maxTime
minTime
numCandidates
param
segmentId1
segmentId2
taskId
taskid
un
 - ../docs/operations/basic-cluster-tuning.md
100MiB
128MiB
15ms
2.5MiB
24GiB
256MiB
30GiB-60GiB
4GiB
5MB
64KiB
8GiB
G1GC
GroupBys
QoS-type
 - ../docs/operations/dump-segment.md
DumpSegment
SegmentMetadata
__time
bitmapSerdeFactory
columnName
index.zip
time-iso8601
 - ../docs/operations/export-metadata.md
hadoopStorageDirectory
 - ../docs/operations/insert-segment-to-db.md
0.14.x
 - ../docs/operations/java.md
G1
Temurin
 - ../docs/operations/metrics.md
0.14.x
1s
Bufferpool
EventReceiverFirehose
EventReceiverFirehoseMonitor
Filesystem
JVMMonitor
QueryCountStatsMonitor
RealtimeMetricsMonitor
Sys
SysMonitor
TaskCountStatsMonitor
TaskSlotCountStatsMonitor
WorkerTaskCountStatsMonitor
workerVersion
bufferCapacity
bufferpoolName
cms
cpuName
cpuTime
druid.server.http.numThreads
druid.server.http.queueSize
fsDevName
fsDirName
fsOptions
fsSysTypeName
fsTypeName
g1
gcGen
gcName
handoffed
hasFilters
memKind
nativeQueryIds
netAddress
netHwaddr
netName
noticeType
numComplexMetrics
numDimensions
numMetrics
poolKind
poolName
remoteAddress
segmentAvailabilityConfirmed
serviceName
taskIngestionMode
taskStatus
taskType
threadPoolNumBusyThreads.
threadPoolNumIdleThreads
threadPoolNumTotalThreads.
 - ../docs/operations/other-hadoop.md
CDH
Classloader
assembly.sbt
build.sbt
classloader
druid_build
mapred-default
mapred-site
sbt
scala-2
 - ../docs/operations/pull-deps.md
org.apache.hadoop
proxy.com.
remoteRepository
 - ../docs/operations/recommendations.md
JBOD
druid.processing.buffer.sizeBytes.
druid.processing.numMergeBuffers
druid.processing.numThreads
tmpfs
 - ../docs/operations/rule-configuration.md
broadcastByInterval
broadcastByPeriod
broadcastForever
colocatedDataSources
dropBeforeByPeriod
dropByInterval
dropByPeriod
dropForever
loadByInterval
loadByPeriod
loadForever
 - ../docs/operations/segment-optimization.md
700MB
 - ../docs/operations/single-server.md
128GiB
16GiB
256GiB
4GiB
512GiB
64GiB
Nano-Quickstart
i3
i3.16xlarge
i3.2xlarge
i3.4xlarge
i3.8xlarge
 - ../docs/operations/tls-support.md
CN
subjectAltNames
 - ../docs/querying/aggregations.md
HyperUnique
hyperUnique
longSum
 - ../docs/querying/datasource.md
groupBys
 - ../docs/querying/datasourcemetadataquery.md
dataSourceMetadata
 - ../docs/querying/dimensionspecs.md
ExtractionDimensionSpec
SimpleDateFormat
bar_1
dimensionSpecs
isWhitelist
joda
nullHandling
product_1
product_3
registeredLookup
timeFormat
tz
v3
weekyears
 - ../docs/querying/filters.md
___bar
caseSensitive
extractionFn
insensitive_contains
last_name
lowerStrict
upperStrict
 - ../docs/querying/granularities.md
1970-01-01T00
P2W
PT0.750S
PT1H30M
TimeseriesQuery
 - ../docs/querying/groupbyquery.md
D1
D2
D3
druid.query.groupBy.defaultStrategy
druid.query.groupBy.maxSelectorDictionarySize
druid.query.groupBy.maxMergingDictionarySize
druid.query.groupBy.maxOnDiskStorage
druid.query.groupBy.maxResults.
groupByStrategy
maxOnDiskStorage
maxResults
orderby
orderbys
outputName
pre-existing
pushdown
row1
subtotalsSpec
tradeoff
unnested
unnesting
 - ../docs/querying/having.md
HavingSpec
HavingSpecs
dimSelector
equalTo
greaterThan
lessThan
 - ../docs/querying/hll-old.md
DefaultDimensionSpec
druid-hll
isInputHyperUnique
 - ../docs/querying/joins.md
pre-join
 - ../docs/querying/limitspec.md
DefaultLimitSpec
OrderByColumnSpec
OrderByColumnSpecs
dimensionOrder
 - ../docs/querying/lookups.md
60_000
kafka-extraction-namespace
mins
tierName
 - ../docs/querying/multi-value-dimensions.md
row2
row3
row4
t3
t4
t5
groupByEnableMultiValueUnnesting
unnesting
 - ../docs/querying/multitenancy.md
500ms
tenant_id
 - ../docs/querying/post-aggregations.md
fieldAccess
finalizingFieldAccess
hyperUniqueCardinality
 - ../docs/querying/query-context.md
brokerService
bySegment
doubleSum
druid.broker.cache.populateCache
druid.broker.cache.populateResultLevelCache
druid.broker.cache.useCache
druid.broker.cache.useResultLevelCache
druid.historical.cache.populateCache
druid.historical.cache.useCache
enableParallelMerge
enableJoinLeftTableScanDirect
enableJoinFilterPushDown
enableJoinFilterRewrite
enableRewriteJoinToFilter
enableJoinFilterRewriteValueColumnFilters
floatSum
joinFilterRewriteMaxSize
maxQueuedBytes
maxScatterGatherBytes
minTopNThreshold
parallelMergeInitialYieldRows
parallelMergeParallelism
parallelMergeSmallBatchRows
populateCache
populateResultLevelCache
queryId
row-matchers
serializeDateTimeAsLong
serializeDateTimeAsLongInner
skipEmptyBuckets
useCache
useResultLevelCache
vectorSize
enableJoinLeftTableScanDirect
enableJoinFilterPushDown
enableJoinFilterRewrite
enableJoinFilterRewriteValueColumnFilters
joinFilterRewriteMaxSize
 - ../docs/querying/querying.md
7KiB
DatasourceMetadata
TimeBoundary
errorClass
errorMessage
x-jackson-smile
 - ../docs/querying/scan-query.md
batchSize
compactedList
druid.query.scan.legacy
druid.query.scan.maxRowsQueuedForOrdering
druid.query.scan.maxSegmentPartitionsOrderedInMemory
maxRowsQueuedForOrdering
maxSegmentPartitionsOrderedInMemory
resultFormat
valueVector
 - ../docs/querying/searchquery.md
SearchQuerySpec
cursorOnly
druid.query.search.searchStrategy
queryableIndexSegment
searchDimensions
searchStrategy
useIndexes
 - ../docs/querying/searchqueryspec.md
ContainsSearchQuerySpec
FragmentSearchQuerySpec
InsensitiveContainsSearchQuerySpec
RegexSearchQuerySpec
 - ../docs/querying/segmentmetadataquery.md
analysisType
analysisTypes
lenientAggregatorMerge
minmax
segmentMetadata
toInclude
 - ../docs/querying/select-query.md
PagingSpec
fromNext
pagingSpec
 - ../docs/querying/sorting-orders.md
BoundFilter
GroupByQuery's
SearchQuery
TopNMetricSpec
compareTo
file12
file2
 - ../docs/querying/sql-operators.md
_x_
 - ../docs/querying/timeseriesquery.md
fieldName1
fieldName2
 - ../docs/querying/topnmetricspec.md
DimensionTopNMetricSpec
metricSpec
previousStop
 - ../docs/querying/topnquery.md
GroupByQuery
top500
 - ../docs/querying/virtual-columns.md
outputType
 - ../docs/tutorials/cluster.md
1.9TB
16CPU
WebUpd8
m5.2xlarge
metadata.storage.
256GiB
128GiB
 - ../docs/tutorials/tutorial-batch-hadoop.md
PATH_TO_DRUID
namenode
 - ../docs/tutorials/tutorial-delete-data.md
segmentID
segmentIds
 - ../docs/tutorials/tutorial-ingestion-spec.md
dstIP
dstPort
srcIP
srcPort
 - ../docs/tutorials/tutorial-kerberos-hadoop.md
common_runtime_properties
druid.extensions.directory
druid.extensions.loadList
druid.hadoop.security.kerberos.keytab
druid.hadoop.security.kerberos.principal
druid.indexer.logs.directory
druid.indexer.logs.type
druid.storage.storageDirectory
druid.storage.type
hdfs.headless.keytab
indexing_log
keytabs
 - ../docs/tutorials/tutorial-query.md
dsql
 - ../docs/tutorials/tutorial-retention.md
2015-09-12T12
 - ../docs/tutorials/tutorial-sketches-theta.md
clickstreams
uid
_k_
Bridgerton
Hellmar
 - ../docs/tutorials/tutorial-update-data.md
bear-111
 - ../docs/configuration/index.md
10KiB
2GiB
512KiB
1GiB
KiB
GiB
00.000Z
100ms
10ms
1GB
1_000_000
2012-01-01T00
2GB
30_000
524288000L
5MiB
8u60
Autoscaler
APPROX_COUNT_DISTINCT_BUILTIN
AvaticaConnectionBalancer
EventReceiverFirehose
File.getFreeSpace
File.getTotalSpace
ForkJoinPool
GCE
HadoopIndexTasks
HttpEmitter
HttpPostEmitter
InetAddress.getLocalHost
IOConfig
JRE8u60
KeyManager
L1
L2
ListManagedInstances
LoadSpec
LoggingEmitter
Los_Angeles
MDC
NoopServiceEmitter
NUMA
ONLY_EVENTS
P1D
P1W
PT-1S
PT0.050S
PT10M
PT10S
PT15M
PT1800S
PT1M
PT1S
PT24H
PT300S
PT30S
PT3600S
PT5M
PT5S
PT60S
PT90M
Param
Runtime.maxMemory
SSLContext
SegmentMetadata
SegmentWriteOutMediumFactory
ServiceEmitter
System.getProperty
TLSv1.2
TrustManager
TuningConfig
_N_
_default
_default_tier
addr
affinityConfig
allowAll
ANDed
array_mod
autoscale
autoscalers
batch_index_task
cgroup
classloader
com.metamx
common.runtime.properties
cpuacct
dataSourceName
datetime
defaultHistory
doubleMax
doubleMin
doubleSum
druid.enableTlsPort
druid.indexer.autoscale.workerVersion
druid.service
druid.storage.disableAcl
druid_audit
druid_config
druid_dataSource
druid_pendingSegments
druid_rules
druid_segments
druid_supervisors
druid_taskLock
druid_taskLog
druid_tasks
DruidQueryRel
durationToRetain
ec2
equalDistribution
extractionFn
file.encoding
fillCapacity
first_location
floatMax
floatAny
floatMin
floatSum
freeSpacePercent
gce
gce-extensions
getCanonicalHostName
groupBy
hdfs
httpRemote
indexTask
info_dir
inlining
java.class.path
java.io.tmpdir
javaOpts
javaOptsArray
leastBytesUsed
loadList
loadqueuepeon
loadspec
localStorage
maxHeaderSize
maxQueuedBytes
maxSize
middlemanager
minTimeMs
minmax
mins
nullable
orderby
orderbys
org.apache.druid
org.apache.druid.jetty.RequestLog
org.apache.hadoop
overlord.html
pendingSegments
pre-flight
preloaded
queryType
remoteTaskRunnerConfig
rendezvousHash
replicants
resultsets
roundRobin
runtime.properties
runtime.properties.
s3
s3a
s3n
slf4j
sql
sqlQuery
successfulSending
taskBlackListCleanupPeriod
tasklogs
timeBoundary
timestampSpec
tmp
tmpfs
truststore
tuningConfig
unioning
useIndexes
user.timezone
v0.12.0
versionReplacementString
workerId
yyyy-MM-dd
taskType
index_kafka
c1
c2
ds1
equalDistributionWithCategorySpec
fillCapacityWithCategorySpec
WorkerCategorySpec
workerCategorySpec
CategoryConfig
 - ../docs/design/index.md
logsearch
 - ../docs/ingestion/index.md
2000-01-01T01
DateTimeFormat
JsonPath
autodetect
createBitmapIndex
dimensionExclusions
expr
jackson-jq
missingValue
skipBytesInMemoryOverheadCheck
spatialDimensions
useFieldDiscovery
 - ../docs/tutorials/index.md
4CPU
cityName
countryIsoCode
countryName
isAnonymous
isMinor
isNew
isRobot
isUnpatrolled
metroCode
regionIsoCode
regionName
4GiB
512GiB
 - ../docs/development/extensions-core/druid-ranger-security.md
json
metastore
UserGroupInformation
CVE-2019-17571
CVE-2019-12399
CVE-2018-17196
bin.tar.gz
 - ../docs/configuration/human-readable-byte.md
0s
1T
3G
1_000
1_000_000
1_000_000_000
1_000_000_000_000
1_000_000_000_000_000
Giga
Tera
Peta
KiB
MiB
GiB
TiB
PiB
protobuf
Golang
multiValueHandling
_n_
100TB
 - ../docs/querying/sql-functions.md
ANY_VALUE 
APPROX_COUNT_DISTINCT_DS_HLL 
APPROX_COUNT_DISTINCT_DS_THETA 
APPROX_QUANTILE_DS 
APPROX_QUANTILE_FIXED_BUCKETS 
ARRAY_CONCAT_AGG 
BIT_AND 
BIT_OR 
BIT_XOR 
BITWISE_AND 
BITWISE_COMPLEMENT 
BITWISE_CONVERT_DOUBLE_TO_LONG_BITS 
BITWISE_CONVERT_LONG_BITS_TO_DOUBLE 
BITWISE_OR 
BITWISE_SHIFT_LEFT 
BITWISE_SHIFT_RIGHT 
BITWISE_XOR 
BLOOM_FILTER 
BTRIM 
CHAR_LENGTH 
CHARACTER_LENGTH 
CURRENT_DATE 
CURRENT_TIMESTAMP 
DATE_TRUNC 
DS_CDF 
DS_GET_QUANTILE 
DS_GET_QUANTILES 
DS_HISTOGRAM 
DS_HLL 
DS_QUANTILE_SUMMARY 
DS_QUANTILES_SKETCH 
DS_RANK 
DS_THETA 
EARLIEST_BY 
_e_
HLL_SKETCH_ESTIMATE 
HLL_SKETCH_ESTIMATE_WITH_ERROR_BOUNDS 
HLL_SKETCH_TO_STRING 
HLL_SKETCH_UNION 
LATEST_BY 
base-10
MV_APPEND 
MV_CONCAT 
MV_CONTAINS 
MV_FILTER_NONE 
MV_FILTER_ONLY 
MV_LENGTH 
MV_OFFSET 
MV_OFFSET_OF 
MV_ORDINAL 
MV_ORDINAL_OF 
MV_OVERLAP 
MV_PREPEND 
MV_SLICE 
MV_TO_STRING 
NULLIF 
_n_th
STDDEV_POP 
STDDEV_SAMP 
STRING_FORMAT 
STRING_TO_MV 
SUBSTR 
TDIGEST_GENERATE_SKETCH 
TDIGEST_QUANTILE 
TEXTCAT 
THETA_SKETCH_ESTIMATE 
THETA_SKETCH_ESTIMATE_WITH_ERROR_BOUNDS 
THETA_SKETCH_INTERSECT 
THETA_SKETCH_NOT 
THETA_SKETCH_UNION 
TIME_CEIL 
TIME_EXTRACT 
TIME_FLOOR 
TIME_FORMAT 
TIME_IN_INTERVAL 
TIMESTAMP_TO_MILLIS 
TIMESTAMPADD 
TIMESTAMPDIFF 
TRUNC 
VAR_POP 
VAR_SAMP
KTable
Aotearoa
Czechia
Zeelund
-												Spellcheck docs (#8548)

* Spellcheck docs

Fix spelling mistakes in docs and add CI job for running spellcheck on
docs.

* Add missing license header

											
										
										
											2019-09-17 15:47:30 -04:00
+								# Licensed to the Apache Software Foundation (ASF) under one or more
 								# contributor license agreements.  See the NOTICE file distributed with
 								# this work for additional information regarding copyright ownership.
 								# The ASF licenses this file to You under the Apache License, Version 2.0
 								# (the "License"); you may not use this file except in compliance with
 								# the License.  You may obtain a copy of the License at
 								#
 								#     http://www.apache.org/licenses/LICENSE-2.0
 								#
 								# Unless required by applicable law or agreed to in writing, software
 								# distributed under the License is distributed on an "AS IS" BASIS,
 								# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 								# See the License for the specific language governing permissions and
 								# limitations under the License.
 								# markdown-spellcheck spelling configuration file
 								# Format - lines beginning # are comments
 								# global dictionary is at the start, file overrides afterwards
 								# one word per line, to define a file override use ' - filename'
 								# where filename is relative to this configuration file
 -bit
-												Add maxNumFiles to splitHintSpec (#10243)

* Add maxNumFiles to splitHintSpec

* missing link

* fix build failure; use maxNumFiles for integration tests

* spelling

* lower default

* Update docs/ingestion/native-batch.md

Co-authored-by: Abhishek Agarwal <1477457+abhishekagarwal87@users.noreply.github.com>

* address comments; change default maxSplitSize

* spelling

* typos and doc

* same change for segments splitHintSpec

* fix build

* fix build

Co-authored-by: Abhishek Agarwal <1477457+abhishekagarwal87@users.noreply.github.com>
											
										
										
											2020-08-21 12:43:58 -04:00
+MiB
-												Spellcheck docs (#8548)

* Spellcheck docs

Fix spelling mistakes in docs and add CI job for running spellcheck on
docs.

* Add missing license header

											
										
										
											2019-09-17 15:47:30 -04:00
+-bit
 								ACL
-												Allow user to set group.id for Kafka ingestion task (#11147)

* allow user to set group.id for Kafka ingestion task

* fix test coverage by removing deprecated code and add doc

* fix typo

* Update docs/development/extensions-core/kafka-ingestion.md

Co-authored-by: frank chen <frankchen@apache.org>

Co-authored-by: frank chen <frankchen@apache.org>
											
										
										
											2021-05-08 23:56:19 -04:00
+								ACLs
-												Spellcheck docs (#8548)

* Spellcheck docs

Fix spelling mistakes in docs and add CI job for running spellcheck on
docs.

* Add missing license header

											
										
										
											2019-09-17 15:47:30 -04:00
+								APIs
-												Add Pig-specific file handling to Avro parser (#9258)

* Add processing for data files from AvroStorage

* Add words to spellings file

											
										
										
											2020-02-11 00:53:11 -05:00
+								AvroStorage
-												S3 ingestion can assume role (#10995)

* feature s3 assume role

* feature s3 assume role

* feature s3 assume role

* feature s3 assume role

* feature s3 assume role

* feature s3 assume role

* tests fix

* spelling fix

* sts fix

Co-authored-by: egor-ryashin <egor.ryashin@rilldata.com>
											
										
										
											2021-06-09 06:32:35 -04:00
+								ARN
-												Spellcheck docs (#8548)

* Spellcheck docs

Fix spelling mistakes in docs and add CI job for running spellcheck on
docs.

* Add missing license header

											
										
										
											2019-09-17 15:47:30 -04:00
+								AWS
-												Add credentials for ECS (#8651)

* Add credentials for ECS

* Fix import order

* Update S3 authentication methods table

* Update .spelling for new documentation

											
										
										
											2019-10-12 12:12:14 -04:00
+								AWS_CONTAINER_CREDENTIALS_RELATIVE_URI
 								AWS_CONTAINER_CREDENTIALS_FULL_URI
-												Spellcheck docs (#8548)

* Spellcheck docs

Fix spelling mistakes in docs and add CI job for running spellcheck on
docs.

* Add missing license header

											
										
										
											2019-09-17 15:47:30 -04:00
+								Actian
 								Authorizer
 								Avatica
 								Avro
 								Azul
 								BCP
 								Base64
 								Base64-encoded
 								ByteBuffer
-												Support JsonPath functions in JsonPath expressions (#11722)

* Add jsonPath functions support

* Add jsonPath function test for Avro

* Add jsonPath function length() to Orc

* Add jsonPath function length() to Parquet

* Add more tests to ORC format

* update doc

* Fix exception during ingestion

* Add IT test case

* Revert "Fix exception during ingestion"

This reverts commit 5a5484b9ea9d984622149c8113a566269cc10842.

* update IT test case

* Add 'keys()'

* Commit IT test case

* Fix UT
											
										
										
											2021-12-09 21:53:23 -05:00
+								concat
-												Spellcheck docs (#8548)

* Spellcheck docs

Fix spelling mistakes in docs and add CI job for running spellcheck on
docs.

* Add missing license header

											
										
										
											2019-09-17 15:47:30 -04:00
+								CIDR
 								CORS
-												remove experimental from Kinesis with caveats (#10998)

* remove experimental from Kinesis with caveats

* add suggested known issue

* spelling fixes
											
										
										
											2021-03-29 16:57:58 -04:00
+								CNF
-												Spellcheck docs (#8548)

* Spellcheck docs

Fix spelling mistakes in docs and add CI job for running spellcheck on
docs.

* Add missing license header

											
										
										
											2019-09-17 15:47:30 -04:00
+								CPUs
 								CSVs
 								Ceph
-												remove experimental from Kinesis with caveats (#10998)

* remove experimental from Kinesis with caveats

* add suggested known issue

* spelling fixes
											
										
										
											2021-03-29 16:57:58 -04:00
+								CloudWatch
-												Spellcheck docs (#8548)

* Spellcheck docs

Fix spelling mistakes in docs and add CI job for running spellcheck on
docs.

* Add missing license header

											
										
										
											2019-09-17 15:47:30 -04:00
+								ColumnDescriptor
 								Corretto
 								DDL
 								DML
 								DNS
 								DRUIDVERSION
 								DataSketches
 								DateTime
 								DateType
-												Update automatic compaction docs with consistent terminology (#12416)

* specify automatic compaction where applicable

* Apply suggestions from code review

Co-authored-by: Katya Macedo  <38017980+ektravel@users.noreply.github.com>

* update for style and consistency

* implement suggested feedback

* remove duplicate example

* Apply suggestions from code review

Co-authored-by: Katya Macedo  <38017980+ektravel@users.noreply.github.com>

* Update docs/ingestion/compaction.md

Co-authored-by: Katya Macedo  <38017980+ektravel@users.noreply.github.com>

* Update docs/operations/api-reference.md

* update .spelling

* Adopt review suggestions

Co-authored-by: Katya Macedo  <38017980+ektravel@users.noreply.github.com>
											
										
										
											2022-05-03 19:22:25 -04:00
+								dimensionsSpec
-												Spellcheck docs (#8548)

* Spellcheck docs

Fix spelling mistakes in docs and add CI job for running spellcheck on
docs.

* Add missing license header

											
										
										
											2019-09-17 15:47:30 -04:00
+								DimensionSpec
 								DimensionSpecs
 								Dockerfile
 								DogStatsD
 								Double.NEGATIVE_INFINITY
 								Double.NEGATIVE_INFINITY.
 								Double.POSITIVE_INFINITY
 								Double.POSITIVE_INFINITY.
-												Implementing dropwizard emitter for druid (#7363)

* Implementing dropwizard emitter for druid

making metric manager and alert emitters as optional

* Refactor and make things work

more improvements

improve docs

refactrings

* Fix teamcity inspections

* review comments

* more review comments

* add limit to max number of gauges

* update pom version

* fix pom

* review comments

* review comment

* review comments

* fix broken doc link

review comments

review comments

* review comments

* fix checkstyle

* more spell check fixes

* fix travis failures

											
										
										
											2019-10-01 17:59:30 -04:00
+								Dropwizard
 								dropwizard
-												Doc update for the new input source and the new input format (#9171)

* Doc update for new input source and input format.

- The input source and input format are promoted in all docs under docs/ingestion
- All input sources including core extension ones are located in docs/ingestion/native-batch.md
- All input formats and parsers including core extension ones are localted in docs/ingestion/data-formats.md
- New behavior of the parallel task with different partitionsSpecs are documented in docs/ingestion/native-batch.md

* parquet

* add warning for range partitioning with sequential mode

* hdfs + s3, gs

* add fs impl for gs

* address comments

* address comments

* gcs

											
										
										
											2020-01-17 18:52:05 -05:00
+								DruidInputSource
-												Spellcheck docs (#8548)

* Spellcheck docs

Fix spelling mistakes in docs and add CI job for running spellcheck on
docs.

* Add missing license header

											
										
										
											2019-09-17 15:47:30 -04:00
+								DruidSQL
-												document DynamicConfigProvider for kafka consumer properties (#10658)

* document DynamicConfigProvider for kafka consumer properties

* Update docs/development/extensions-core/kafka-ingestion.md

Co-authored-by: Jihoon Son <jihoonson@apache.org>

* Update docs/development/extensions-core/kafka-ingestion.md

* fix doc build

Co-authored-by: Jihoon Son <jihoonson@apache.org>
											
										
										
											2020-12-10 11:24:33 -05:00
+								DynamicConfigProvider
-												Spellcheck docs (#8548)

* Spellcheck docs

Fix spelling mistakes in docs and add CI job for running spellcheck on
docs.

* Add missing license header

											
										
										
											2019-09-17 15:47:30 -04:00
+								EC2
-												Add credentials for ECS (#8651)

* Add credentials for ECS

* Fix import order

* Update S3 authentication methods table

* Update .spelling for new documentation

											
										
										
											2019-10-12 12:12:14 -04:00
+								EC2ContainerCredentialsProviderWrapper
 								ECS
-												Spellcheck docs (#8548)

* Spellcheck docs

Fix spelling mistakes in docs and add CI job for running spellcheck on
docs.

* Add missing license header

											
										
										
											2019-09-17 15:47:30 -04:00
+								EMR
 								EMRFS
 								ETL
 								Elasticsearch
-												Avro union support (#10505)

* Avro union support

* Document new union support

* Add support for AvroStreamInputFormat and fix checkstyle

* Extend multi-member union test schema and format

* Some additional docs and add Enums to spelling

* Rename explodeUnions -> extractUnions

* explode -> extract

* ByType

* Correct spelling error
											
										
										
											2021-07-07 01:05:41 -04:00
+								Enums
-												Spellcheck docs (#8548)

* Spellcheck docs

Fix spelling mistakes in docs and add CI job for running spellcheck on
docs.

* Add missing license header

											
										
										
											2019-09-17 15:47:30 -04:00
+								FirehoseFactory
-												Doc update for the new input source and the new input format (#9171)

* Doc update for new input source and input format.

- The input source and input format are promoted in all docs under docs/ingestion
- All input sources including core extension ones are located in docs/ingestion/native-batch.md
- All input formats and parsers including core extension ones are localted in docs/ingestion/data-formats.md
- New behavior of the parallel task with different partitionsSpecs are documented in docs/ingestion/native-batch.md

* parquet

* add warning for range partitioning with sequential mode

* hdfs + s3, gs

* add fs impl for gs

* address comments

* address comments

* gcs

											
										
										
											2020-01-17 18:52:05 -05:00
+								FlattenSpec
-												Spellcheck docs (#8548)

* Spellcheck docs

Fix spelling mistakes in docs and add CI job for running spellcheck on
docs.

* Add missing license header

											
										
										
											2019-09-17 15:47:30 -04:00
+								Float.NEGATIVE_INFINITY
-												Fixing a few typos and style issues (#11883)

* grammar and format work

* light writing touchup

Co-authored-by: Charles Smith <techdocsmith@gmail.com>
											
										
										
											2021-11-16 13:13:35 -05:00
+								Float.NEGATIVE_INFINITY.
-												Spellcheck docs (#8548)

* Spellcheck docs

Fix spelling mistakes in docs and add CI job for running spellcheck on
docs.

* Add missing license header

											
										
										
											2019-09-17 15:47:30 -04:00
+								Float.POSITIVE_INFINITY
-												Fixing a few typos and style issues (#11883)

* grammar and format work

* light writing touchup

Co-authored-by: Charles Smith <techdocsmith@gmail.com>
											
										
										
											2021-11-16 13:13:35 -05:00
+								Float.POSITIVE_INFINITY.
-												optionally enable Jetty ForwardedRequestCustomizer (#9010)

* optionally enable Jetty ForwardedRequestCustomizer

* fix doc build

											
										
										
											2019-12-12 20:00:08 -05:00
+								ForwardedRequestCustomizer
-												Spellcheck docs (#8548)

* Spellcheck docs

Fix spelling mistakes in docs and add CI job for running spellcheck on
docs.

* Add missing license header

											
										
										
											2019-09-17 15:47:30 -04:00
+								GC
 								GPG
 								GSSAPI
 								GUIs
 								GroupBy
 								Guice
 								HDFS
-												Doc update for the new input source and the new input format (#9171)

* Doc update for new input source and input format.

- The input source and input format are promoted in all docs under docs/ingestion
- All input sources including core extension ones are located in docs/ingestion/native-batch.md
- All input formats and parsers including core extension ones are localted in docs/ingestion/data-formats.md
- New behavior of the parallel task with different partitionsSpecs are documented in docs/ingestion/native-batch.md

* parquet

* add warning for range partitioning with sequential mode

* hdfs + s3, gs

* add fs impl for gs

* address comments

* address comments

* gcs

											
										
										
											2020-01-17 18:52:05 -05:00
+								HDFSFirehose
-												Spellcheck docs (#8548)

* Spellcheck docs

Fix spelling mistakes in docs and add CI job for running spellcheck on
docs.

* Add missing license header

											
										
										
											2019-09-17 15:47:30 -04:00
+								HLL
 								HashSet
 								Homebrew
 								HyperLogLog
-												AWS RDS token based password  provider (#9518)

* refresh db pwd

* aws iam token password provider

* fix analyze-dependencies build

* fix doc build

* add  ut for BasicDataSourceExt

* more doc updates

* more  doc update

* moving aws  token password  provider to new extension

* remove duplicate changes

* make  all config inline

* extension docs

* refresh db  password  in SQL Firehose code path as well

* add ut

* fix build

* add new extension to distribution

* rds lib is not provided

* fix license build

* add version to license

* change parent version to 0.19.0-snapshot

* address review comments

* fix core/ code coverage

* Update server/src/main/java/org/apache/druid/metadata/BasicDataSourceExt.java

Co-authored-by: Clint Wylie <cjwylie@gmail.com>

* address review comments

* fix spellchecker

* remove inadvertant website file change

Co-authored-by: Clint Wylie <cjwylie@gmail.com>
											
										
										
											2021-01-07 00:15:29 -05:00
+								IAM
-												Spellcheck docs (#8548)

* Spellcheck docs

Fix spelling mistakes in docs and add CI job for running spellcheck on
docs.

* Add missing license header

											
										
										
											2019-09-17 15:47:30 -04:00
+								IANA
 								IETF
-												Add TrustedDomain Authenticator (#8248)

* Add TrustedDomain Authenticator

update javadoc

Add nullable annotations

Add cautionary note

fix travis failure

* add IP to spell checker

											
										
										
											2019-09-25 14:25:03 -04:00
+								IP
-												Spellcheck docs (#8548)

* Spellcheck docs

Fix spelling mistakes in docs and add CI job for running spellcheck on
docs.

* Add missing license header

											
										
										
											2019-09-17 15:47:30 -04:00
+								IPv4
-												Information schema doc update (#10081)

* add docs for IS_JOINABLE and IS_BROADCAST to INFORMATION_SCHEMA docs

* fixes

* oops

* revert noise

* missed one

* spellbot
											
										
										
											2020-06-30 00:08:13 -04:00
+								IS_BROADCAST
 								IS_JOINABLE
-												Spellcheck docs (#8548)

* Spellcheck docs

Fix spelling mistakes in docs and add CI job for running spellcheck on
docs.

* Add missing license header

											
										
										
											2019-09-17 15:47:30 -04:00
+								IS0
 								ISO-8601
 								ISO8601
 								IndexSpec
 								IndexTask
 								InfluxDB
-												add parquet support to native batch (#8883)

* add parquet support to native batch

* cleanup

* implement toJson for sampler support

* better binaryAsString test

* docs

* i hate spellcheck

* refactor toMap conversion so can be shared through flattenerMaker, default impls should be good enough for orc+avro, fixup for merge with latest

* add comment, fix some stuff

* adjustments

* fix accident

* tweaks

											
										
										
											2019-11-22 13:49:16 -05:00
+								InputFormat
-												Doc update for the new input source and the new input format (#9171)

* Doc update for new input source and input format.

- The input source and input format are promoted in all docs under docs/ingestion
- All input sources including core extension ones are located in docs/ingestion/native-batch.md
- All input formats and parsers including core extension ones are localted in docs/ingestion/data-formats.md
- New behavior of the parallel task with different partitionsSpecs are documented in docs/ingestion/native-batch.md

* parquet

* add warning for range partitioning with sequential mode

* hdfs + s3, gs

* add fs impl for gs

* address comments

* address comments

* gcs

											
										
										
											2020-01-17 18:52:05 -05:00
+								InputSource
-												Add Sql InputSource (#9449)

* Add Sql InputSource

* Add spelling

* Use separate DruidModule

* Change module name

* Fix docs

* Use sqltestutils for tests

* Add additional tests

* Fix inspection

* Add module test

* Fix md in docs

* Remove annotation

Co-authored-by: Atul Mohan <atulmohan@yahoo-inc.com>
											
										
										
											2020-06-09 15:55:20 -04:00
+								InputSources
-												Spellcheck docs (#8548)

* Spellcheck docs

Fix spelling mistakes in docs and add CI job for running spellcheck on
docs.

* Add missing license header

											
										
										
											2019-09-17 15:47:30 -04:00
+								Integer.MAX_VALUE
-												Update automatic compaction docs with consistent terminology (#12416)

* specify automatic compaction where applicable

* Apply suggestions from code review

Co-authored-by: Katya Macedo  <38017980+ektravel@users.noreply.github.com>

* update for style and consistency

* implement suggested feedback

* remove duplicate example

* Apply suggestions from code review

Co-authored-by: Katya Macedo  <38017980+ektravel@users.noreply.github.com>

* Update docs/ingestion/compaction.md

Co-authored-by: Katya Macedo  <38017980+ektravel@users.noreply.github.com>

* Update docs/operations/api-reference.md

* update .spelling

* Adopt review suggestions

Co-authored-by: Katya Macedo  <38017980+ektravel@users.noreply.github.com>
											
										
										
											2022-05-03 19:22:25 -04:00
+								ioConfig
-												fix spelling errors triggered by another doc PR (#8653)


											
										
										
											2019-10-09 02:43:58 -04:00
+								JBOD
-												Spellcheck docs (#8548)

* Spellcheck docs

Fix spelling mistakes in docs and add CI job for running spellcheck on
docs.

* Add missing license header

											
										
										
											2019-09-17 15:47:30 -04:00
+								JDBC
 								JDK
 								JDK7
 								JDK8
 								JKS
-												Implementing dropwizard emitter for druid (#7363)

* Implementing dropwizard emitter for druid

making metric manager and alert emitters as optional

* Refactor and make things work

more improvements

improve docs

refactrings

* Fix teamcity inspections

* review comments

* more review comments

* add limit to max number of gauges

* update pom version

* fix pom

* review comments

* review comment

* review comments

* fix broken doc link

review comments

review comments

* review comments

* fix checkstyle

* more spell check fixes

* fix travis failures

											
										
										
											2019-10-01 17:59:30 -04:00
+								JMX
-												Spellcheck docs (#8548)

* Spellcheck docs

Fix spelling mistakes in docs and add CI job for running spellcheck on
docs.

* Add missing license header

											
										
										
											2019-09-17 15:47:30 -04:00
+								JRE
 								JS
 								JSON
-												Doc update for the new input source and the new input format (#9171)

* Doc update for new input source and input format.

- The input source and input format are promoted in all docs under docs/ingestion
- All input sources including core extension ones are located in docs/ingestion/native-batch.md
- All input formats and parsers including core extension ones are localted in docs/ingestion/data-formats.md
- New behavior of the parallel task with different partitionsSpecs are documented in docs/ingestion/native-batch.md

* parquet

* add warning for range partitioning with sequential mode

* hdfs + s3, gs

* add fs impl for gs

* address comments

* address comments

* gcs

											
										
										
											2020-01-17 18:52:05 -05:00
+								JsonPath
-												basic docs for nested column query functions (#12922)

* basic docs for nested column query functions
											
										
										
											2022-08-19 20:12:19 -04:00
+								JSONPath
-												Security overview documentation (#10339)

* initial file

* initial file

* security overview added

* ldap added

* spacing adjustments

* nits

* security graphics and doc review

* Update docs/operations/security-overview.md

Co-authored-by: Jonathan Wei <jon-wei@users.noreply.github.com>

* Update docs/operations/security-user-auth.md

Co-authored-by: Jonathan Wei <jon-wei@users.noreply.github.com>

* Update docs/operations/security-overview.md

Co-authored-by: Jonathan Wei <jon-wei@users.noreply.github.com>

* Update docs/operations/security-overview.md

Co-authored-by: Jonathan Wei <jon-wei@users.noreply.github.com>

* updates frm review

* review comments

* finish up review and light edits

* broken links

* spell check

Co-authored-by: Jonathan Wei <jon-wei@users.noreply.github.com>
											
										
										
											2020-11-19 18:24:58 -05:00
+								JSSE
-												Spellcheck docs (#8548)

* Spellcheck docs

Fix spelling mistakes in docs and add CI job for running spellcheck on
docs.

* Add missing license header

											
										
										
											2019-09-17 15:47:30 -04:00
+								JVM
 								JVMs
 								Joda
 								JsonProperty
-												Add details to the Docker tutorial (#11463)

* Add details to the Docker tutorial

Added links, explanations and other details to the Docker
tutorial to make it easier for first-time users.

* Fix spelling error

And add "Jupyter" to the spelling dictionary.

* Update docs/tutorials/docker.md

* Update docs/tutorials/docker.md

Co-authored-by: sthetland <steve.hetland@imply.io>

* Update docs/tutorials/docker.md

Co-authored-by: sthetland <steve.hetland@imply.io>

* Update docs/tutorials/docker.md

* Update docs/tutorials/docker.md

Co-authored-by: sthetland <steve.hetland@imply.io>

Co-authored-by: Charles Smith <techdocsmith@gmail.com>
Co-authored-by: sthetland <steve.hetland@imply.io>
											
										
										
											2021-08-24 11:49:29 -04:00
+								Jupyter
-												Spellcheck docs (#8548)

* Spellcheck docs

Fix spelling mistakes in docs and add CI job for running spellcheck on
docs.

* Add missing license header

											
										
										
											2019-09-17 15:47:30 -04:00
+								KMS
 								Kerberized
 								Kerberos
-												Update security overview with additional recommendations (#11016)

* updatee security overview with additional recommendations for improved security

* address first set of review questions

* Update docs/operations/security-overview.md

* Update docs/operations/security-overview.md

* apply changes from review

* Update docs/operations/security-overview.md

Co-authored-by: Suneet Saldanha <suneet@apache.org>

* Update docs/operations/security-overview.md

Co-authored-by: Suneet Saldanha <suneet@apache.org>

* Update docs/operations/security-overview.md

Co-authored-by: Suneet Saldanha <suneet@apache.org>

* Update security-overview.md

fix additional comments & typos cc: @suneet-s, @jihoonsoon

Co-authored-by: Suneet Saldanha <suneet@apache.org>
											
										
										
											2021-04-14 11:58:17 -04:00
+								KeyStores
-												Spellcheck docs (#8548)

* Spellcheck docs

Fix spelling mistakes in docs and add CI job for running spellcheck on
docs.

* Add missing license header

											
										
										
											2019-09-17 15:47:30 -04:00
+								Kinesis
-												add documentation for druid docker and k8s operator (#8802)

* add documentation for druid docker and k8s operator

* address review comment and add Kubernetes to spelling file

											
										
										
											2019-11-06 15:56:21 -05:00
+								Kubernetes
-												Spellcheck docs (#8548)

* Spellcheck docs

Fix spelling mistakes in docs and add CI job for running spellcheck on
docs.

* Add missing license header

											
										
										
											2019-09-17 15:47:30 -04:00
+								LRU
 								LZ4
 								LZO
 								LimitSpec
 								Long.MAX_VALUE
-												Fixing a few typos and style issues (#11883)

* grammar and format work

* light writing touchup

Co-authored-by: Charles Smith <techdocsmith@gmail.com>
											
										
										
											2021-11-16 13:13:35 -05:00
+								Long.MAX_VALUE.
-												Spellcheck docs (#8548)

* Spellcheck docs

Fix spelling mistakes in docs and add CI job for running spellcheck on
docs.

* Add missing license header

											
										
										
											2019-09-17 15:47:30 -04:00
+								Long.MIN_VALUE
-												Fixing a few typos and style issues (#11883)

* grammar and format work

* light writing touchup

Co-authored-by: Charles Smith <techdocsmith@gmail.com>
											
										
										
											2021-11-16 13:13:35 -05:00
+								Long.MIN_VALUE.
-												Spellcheck docs (#8548)

* Spellcheck docs

Fix spelling mistakes in docs and add CI job for running spellcheck on
docs.

* Add missing license header

											
										
										
											2019-09-17 15:47:30 -04:00
+								Lucene
 								MapBD
 								MapDB
-												MySQL extension with MariaDB connector docs (#11608)

* add docs for mariadb support via mysql extensions

* add logging so you know what druid knows

* homogenize

* spelling

* missed a couple
											
										
										
											2021-08-19 04:52:26 -04:00
+								MariaDB
-												Spellcheck docs (#8548)

* Spellcheck docs

Fix spelling mistakes in docs and add CI job for running spellcheck on
docs.

* Add missing license header

											
										
										
											2019-09-17 15:47:30 -04:00
+								MiddleManager
 								MiddleManagers
 								Montréal
 								Murmur3
-												Minor edits to architecture page to improve flow (#11465)

* Minor edits to architecture page to improve flow

* Fixed spelling issue
											
										
										
											2021-08-09 10:48:29 -04:00
+								MVCC
-												Spellcheck docs (#8548)

* Spellcheck docs

Fix spelling mistakes in docs and add CI job for running spellcheck on
docs.

* Add missing license header

											
										
										
											2019-09-17 15:47:30 -04:00
+								NFS
-												Add support for Avro OCF using InputFormat (#9671)

* Add AvroOCFInputFormat

* Support supplying a reader schema in AvroOCFInputFormat

* Add docs for Avro OCF input format

* Address review comments

* Address second round of review
											
										
										
											2020-05-16 17:09:12 -04:00
+								OCF
-												Spellcheck docs (#8548)

* Spellcheck docs

Fix spelling mistakes in docs and add CI job for running spellcheck on
docs.

* Add missing license header

											
										
										
											2019-09-17 15:47:30 -04:00
+								OLAP
 								OOMs
 								OpenJDK
-												Security overview documentation (#10339)

* initial file

* initial file

* security overview added

* ldap added

* spacing adjustments

* nits

* security graphics and doc review

* Update docs/operations/security-overview.md

Co-authored-by: Jonathan Wei <jon-wei@users.noreply.github.com>

* Update docs/operations/security-user-auth.md

Co-authored-by: Jonathan Wei <jon-wei@users.noreply.github.com>

* Update docs/operations/security-overview.md

Co-authored-by: Jonathan Wei <jon-wei@users.noreply.github.com>

* Update docs/operations/security-overview.md

Co-authored-by: Jonathan Wei <jon-wei@users.noreply.github.com>

* updates frm review

* review comments

* finish up review and light edits

* broken links

* spell check

Co-authored-by: Jonathan Wei <jon-wei@users.noreply.github.com>
											
										
										
											2020-11-19 18:24:58 -05:00
+								OpenLDAP
-												Spellcheck docs (#8548)

* Spellcheck docs

Fix spelling mistakes in docs and add CI job for running spellcheck on
docs.

* Add missing license header

											
										
										
											2019-09-17 15:47:30 -04:00
+								OpenTSDB
 								OutputStream
 								ParAccel
 								ParseSpec
 								ParseSpecs
 								Protobuf
-												Add more guidelines on the use of aliyun-oss-extensions (#11420)

* Add more description

Signed-off-by: frank chen <frank.chen021@outlook.com>

* Update prefixes usage and Add troubleshooting section

* Add endpoint configuration recommendation

* Fix link

* resolve review comments


											
										
										
											2021-08-09 20:27:35 -04:00
+								pull-deps
-												Spellcheck docs (#8548)

* Spellcheck docs

Fix spelling mistakes in docs and add CI job for running spellcheck on
docs.

* Add missing license header

											
										
										
											2019-09-17 15:47:30 -04:00
+								RDBMS
 								RDDs
-												AWS RDS token based password  provider (#9518)

* refresh db pwd

* aws iam token password provider

* fix analyze-dependencies build

* fix doc build

* add  ut for BasicDataSourceExt

* more doc updates

* more  doc update

* moving aws  token password  provider to new extension

* remove duplicate changes

* make  all config inline

* extension docs

* refresh db  password  in SQL Firehose code path as well

* add ut

* fix build

* add new extension to distribution

* rds lib is not provided

* fix license build

* add version to license

* change parent version to 0.19.0-snapshot

* address review comments

* fix core/ code coverage

* Update server/src/main/java/org/apache/druid/metadata/BasicDataSourceExt.java

Co-authored-by: Clint Wylie <cjwylie@gmail.com>

* address review comments

* fix spellchecker

* remove inadvertant website file change

Co-authored-by: Clint Wylie <cjwylie@gmail.com>
											
										
										
											2021-01-07 00:15:29 -05:00
+								RDS
-												Spellcheck docs (#8548)

* Spellcheck docs

Fix spelling mistakes in docs and add CI job for running spellcheck on
docs.

* Add missing license header

											
										
										
											2019-09-17 15:47:30 -04:00
+								Rackspace
 								Redis
 								S3
 								SDK
 								SIGAR
 								SPNEGO
-												Add Sql InputSource (#9449)

* Add Sql InputSource

* Add spelling

* Use separate DruidModule

* Change module name

* Fix docs

* Use sqltestutils for tests

* Add additional tests

* Fix inspection

* Add module test

* Fix md in docs

* Remove annotation

Co-authored-by: Atul Mohan <atulmohan@yahoo-inc.com>
											
										
										
											2020-06-09 15:55:20 -04:00
+								SqlInputSource
-												Spellcheck docs (#8548)

* Spellcheck docs

Fix spelling mistakes in docs and add CI job for running spellcheck on
docs.

* Add missing license header

											
										
										
											2019-09-17 15:47:30 -04:00
+								SQLServer
 								SSD
 								SSDs
 								SSL
 								Samza
 								Splunk
 								SqlFirehose
-												sql support for dynamic parameters (#6974)

* sql support for dynamic parameters

* fixup

* javadocs

* fixup from merge

* formatting

* fixes

* fix it

* doc fix

* remove druid fallback self-join parameterized test

* unused imports

* ignore test for now

* fix imports

* fixup

* fix merge

* merge fixup

* fix test that cannot vectorize

* fixup and more better

* dependency thingo

* fix docs

* tweaks

* fix docs

* spelling

* unused imports after merge

* review stuffs

* add comment

* add ignore text

* review stuffs

											
										
										
											2020-02-19 16:09:20 -05:00
+								SqlParameter
-												Security overview documentation (#10339)

* initial file

* initial file

* security overview added

* ldap added

* spacing adjustments

* nits

* security graphics and doc review

* Update docs/operations/security-overview.md

Co-authored-by: Jonathan Wei <jon-wei@users.noreply.github.com>

* Update docs/operations/security-user-auth.md

Co-authored-by: Jonathan Wei <jon-wei@users.noreply.github.com>

* Update docs/operations/security-overview.md

Co-authored-by: Jonathan Wei <jon-wei@users.noreply.github.com>

* Update docs/operations/security-overview.md

Co-authored-by: Jonathan Wei <jon-wei@users.noreply.github.com>

* updates frm review

* review comments

* finish up review and light edits

* broken links

* spell check

Co-authored-by: Jonathan Wei <jon-wei@users.noreply.github.com>
											
										
										
											2020-11-19 18:24:58 -05:00
+								SslContextFactory
-												Spellcheck docs (#8548)

* Spellcheck docs

Fix spelling mistakes in docs and add CI job for running spellcheck on
docs.

* Add missing license header

											
										
										
											2019-09-17 15:47:30 -04:00
+								StatsD
-												add optional system schema authorization (#11720)

* add optional system schema authorization

* remove unused

* adjust docs

* doc fixes, missing ldap config change for integration tests

* style
											
										
										
											2021-09-21 16:28:26 -04:00
+								SYSTEM_TABLE
-												Spellcheck docs (#8548)

* Spellcheck docs

Fix spelling mistakes in docs and add CI job for running spellcheck on
docs.

* Add missing license header

											
										
										
											2019-09-17 15:47:30 -04:00
+								TCP
 								TGT
 								TLS
 								TopN
 								TopNs
 								UI
 								UIs
 								URI
 								URIs
 								UTF-16
 								UTF-8
 								UTF8
 								XMLs
 								ZK
-												Adding supported compression formats for native batch ingestion (#10306)

* Adding supported compression formats for native batch ingestion

* Update docs/ingestion/native-batch.md

Co-authored-by: sthetland <steve.hetland@imply.io>

* fix spellcheck

Co-authored-by: Suneet Saldanha <suneet@apache.org>
Co-authored-by: sthetland <steve.hetland@imply.io>
											
										
										
											2020-08-26 15:39:48 -04:00
+								ZSTD
-												Spellcheck docs (#8548)

* Spellcheck docs

Fix spelling mistakes in docs and add CI job for running spellcheck on
docs.

* Add missing license header

											
										
										
											2019-09-17 15:47:30 -04:00
+								accessor
 								ad-hoc
 								aggregator
 								aggregators
 								ambari
 								analytics
-												basic docs for nested column query functions (#12922)

* basic docs for nested column query functions
											
										
										
											2022-08-19 20:12:19 -04:00
+								arrayElement
-												S3 ingestion can assume role (#10995)

* feature s3 assume role

* feature s3 assume role

* feature s3 assume role

* feature s3 assume role

* feature s3 assume role

* feature s3 assume role

* tests fix

* spelling fix

* sts fix

Co-authored-by: egor-ryashin <egor.ryashin@rilldata.com>
											
										
										
											2021-06-09 06:32:35 -04:00
+								assumeRoleArn
 								assumeRoleExternalId
-												parallel broker merges on fork join pool (#8578)

* sketch of broker parallel merges done in small batches on fork join pool

* fix non-terminating sequences, auto compute parallelism

* adjust benches

* adjust benchmarks

* now hella more faster, fixed dumb

* fix

* remove comments

* log.info for debug

* javadoc

* safer block for sequence to yielder conversion

* refactor LifecycleForkJoinPool into LifecycleForkJoinPoolProvider which wraps a ForkJoinPool

* smooth yield rate adjustment, more logs to help tune

* cleanup, less logs

* error handling, bug fixes, on by default, more parallel, more tests

* remove unused var

* comments

* timeboundary mergeFn

* simplify, more javadoc

* formatting

* pushdown config

* use nanos consistently, move logs back to debug level, bit more javadoc

* static terminal result batch

* javadoc for nullability of createMergeFn

* cleanup

* oops

* fix race, add docs

* spelling, remove todo, add unhandled exception log

* cleanup, revert unintended change

* another unintended change

* review stuff

* add ParallelMergeCombiningSequenceBenchmark, fixes

* hyper-threading is the enemy

* fix initial start delay, lol

* parallelism computer now balances partition sizes to partition counts using sqrt of sequence count instead of sequence count by 2

* fix those important style issues with the benchmarks code

* lazy sequence creation for benchmarks

* more benchmark comments

* stable sequence generation time

* update defaults to use 100ms target time, 4096 batch size, 16384 initial yield, also update user docs

* add jmh thread based benchmarks, cleanup some stuff

* oops

* style

* add spread to jmh thread benchmark start range, more comments to benchmarks parameters and purpose

* retool benchmark to allow modeling more typical heterogenous heavy workloads

* spelling

* fix

* refactor benchmarks

* formatting

* docs

* add maxThreadStartDelay parameter to threaded benchmark

* why does catch need to be on its own line but else doesnt

											
										
										
											2019-11-07 14:58:46 -05:00
+								async
-												Spellcheck docs (#8548)

* Spellcheck docs

Fix spelling mistakes in docs and add CI job for running spellcheck on
docs.

* Add missing license header

											
										
										
											2019-09-17 15:47:30 -04:00
+								authorizer
 								authorizers
 								autocomplete
 								autodiscovery
 								autoscaler
 								autoscaling
 								averager
 								averagers
 								backend
 								backfills
 								backpressure
 								base64
 								big-endian
-												basic docs for nested column query functions (#12922)

* basic docs for nested column query functions
											
										
										
											2022-08-19 20:12:19 -04:00
+								bigint
-												Spellcheck docs (#8548)

* Spellcheck docs

Fix spelling mistakes in docs and add CI job for running spellcheck on
docs.

* Add missing license header

											
										
										
											2019-09-17 15:47:30 -04:00
+								blobstore
 								boolean
 								breakpoint
 								broadcasted
 								checksums
 								classpath
 								clickstream
-												S3 Ingestion from non-default endpoints (#11798)

* Add endpoint support for s3inputsource

* Changes to tests

* Fix docs

* Fix config

* Fix inspections

* Fix spelling

* Remove password from toString
											
										
										
											2022-07-15 14:03:34 -04:00
+								clientConfig
-												Spellcheck docs (#8548)

* Spellcheck docs

Fix spelling mistakes in docs and add CI job for running spellcheck on
docs.

* Add missing license header

											
										
										
											2019-09-17 15:47:30 -04:00
+								codebase
 								codec
 								colocated
 								colocation
 								compactable
-												First refactor of compaction (#10935)

* first pass compaction refactor. includes updated behavior for queryGranularity. removes duplicated doc

* fix links, typos, some reorganization

* fix spelling. TBD still there for work in progress

* updates tutorial examples, adds more clarification around compaction use cases

* add granularity spec to automatic compaction config

* final edits

* spelling fixes

* apply suggestions from review

* upadtes from review

* last edits

* move note

* clarify null

* fix links & spelling

* latest review

* edits to auto-compaction config

* add back rollup

* fix links & spelling

* Update compaction.md

add granularityspec to example
											
										
										
											2021-03-24 14:41:44 -04:00
+								compactionTask
-												Spellcheck docs (#8548)

* Spellcheck docs

Fix spelling mistakes in docs and add CI job for running spellcheck on
docs.

* Add missing license header

											
										
										
											2019-09-17 15:47:30 -04:00
+								config
 								configs
-												document DynamicConfigProvider for kafka consumer properties (#10658)

* document DynamicConfigProvider for kafka consumer properties

* Update docs/development/extensions-core/kafka-ingestion.md

Co-authored-by: Jihoon Son <jihoonson@apache.org>

* Update docs/development/extensions-core/kafka-ingestion.md

* fix doc build

Co-authored-by: Jihoon Son <jihoonson@apache.org>
											
										
										
											2020-12-10 11:24:33 -05:00
+								consumerProperties
-												Spellcheck docs (#8548)

* Spellcheck docs

Fix spelling mistakes in docs and add CI job for running spellcheck on
docs.

* Add missing license header

											
										
										
											2019-09-17 15:47:30 -04:00
+								cron
 								csv
 								customizable
 								dataset
 								datasets
 								datasketches
 								datasource
 								datasources
 								dbcp
-												option to use deep storage for storing shuffle data (#11507)

Fixes #11297.
Description

Description and design in the proposal #11297
Key changed/added classes in this PR

    *DataSegmentPusher
    *ShuffleClient
    *PartitionStat
    *PartitionLocation
    *IntermediaryDataManager

											
										
										
											2021-08-13 16:40:25 -04:00
+								deepstore
-												Spellcheck docs (#8548)

* Spellcheck docs

Fix spelling mistakes in docs and add CI job for running spellcheck on
docs.

* Add missing license header

											
										
										
											2019-09-17 15:47:30 -04:00
+								denormalization
 								denormalize
 								denormalized
-												threshold based automatic query prioritization (#9493)

* threshold based automatic query prioritization

* fixes

* spelling and fixes

* fix docs

* spelling

* checkstyle

* adjustments

* doc fix
											
										
										
											2020-03-13 04:41:54 -04:00
+								deprioritization
 								deprioritizes
-												Spellcheck docs (#8548)

* Spellcheck docs

Fix spelling mistakes in docs and add CI job for running spellcheck on
docs.

* Add missing license header

											
										
										
											2019-09-17 15:47:30 -04:00
+								dequeued
 								deserialization
 								deserialize
 								deserialized
-												basic docs for nested column query functions (#12922)

* basic docs for nested column query functions
											
										
										
											2022-08-19 20:12:19 -04:00
+								deserializes
-												Spellcheck docs (#8548)

* Spellcheck docs

Fix spelling mistakes in docs and add CI job for running spellcheck on
docs.

* Add missing license header

											
										
										
											2019-09-17 15:47:30 -04:00
+								downtimes
-												druid extension for OpenID Connect auth using pac4j lib (#8992)

* druid pac4j security extension for OpenID Connect OAuth 2.0 authentication

* update version in druid-pac4j pom

* introducing unauthorized resource filter

* authenticated but authorized /unified-webconsole.html

* use httpReq.getRequestURI() for matching callback path

* add documentation

* minor doc addition

* licesne file updates

* make dependency analyze succeed

* fix doc build

* hopefully fixes doc build

* hopefully fixes license check build

* yet another try on fixing license build

* revert unintentional changes to website folder

* update version to 0.18.0-SNAPSHOT

* check session and its expiry on each request

* add crypto service

* code for encrypting the cookie

* update doc with cookiePassphrase

* update license yaml

* make sessionstore in Pac4jFilter private non static

* make Pac4jFilter fields final

* okta: use sha256 for hmac

* remove incubating

* add UTs for crypto util and session store impl

* use standard charsets

* add license header

* remove unused file

* add org.objenesis.objenesis to license.yaml

* a bit of nit changes  in CryptoService  and embedding EncryptionResult for clarity

* rename alg  to cipherAlgName

* take cipher alg name, mode and padding as input

* add java doc  for CryptoService  and make it more understandable

* another  UT for CryptoService

* cache pac4j Config

* use generics clearly in Pac4jSessionStore

* update cookiePassphrase doc to mention PasswordProvider

* mark stuff Nullable where appropriate in Pac4jSessionStore

* update doc to mention jdbc

* add error log on reaching callback resource

* javadoc  for Pac4jCallbackResource

* introduce NOOP_HTTP_ACTION_ADAPTER

* add correct module name in license file

* correct extensions folder name in licenses.yaml

* replace druid-kubernetes-extensions to druid-pac4j

* cache SecureRandom instance

* rename UnauthorizedResourceFilter to AuthenticationOnlyResourceFilter
											
										
										
											2020-03-23 21:15:45 -04:00
+								druid
-												kubernetes based discovery druid extension to run Druid on K8S without Zookeeper (#10544)

* honor zk enablement config in more places in druid code

* kubernetes based discovery module

* fix spotbugs check

* fix intellij checks error

* fix doc link to kubernetes.md from extension

* make spellchecker happy

* update license.yaml

* fix dependency check errors

* update extension coverage

* UTs for BaseNodeRoleWatcher

* fix forbidden-api check

* update k8s module coverage ignores

* add Bouncy Castle License being same as MIT License for license checking purposes

* further update licenses.yaml

* label/annotation pre-existence assumption

* address review comment
											
										
										
											2020-12-15 00:10:31 -05:00
+								druid–kubernetes-extensions
-												Spellcheck docs (#8548)

* Spellcheck docs

Fix spelling mistakes in docs and add CI job for running spellcheck on
docs.

* Add missing license header

											
										
										
											2019-09-17 15:47:30 -04:00
+								e.g.
 								encodings
 								endian
-												S3 Ingestion from non-default endpoints (#11798)

* Add endpoint support for s3inputsource

* Changes to tests

* Fix docs

* Fix config

* Fix inspections

* Fix spelling

* Remove password from toString
											
										
										
											2022-07-15 14:03:34 -04:00
+								endpointConfig
-												Spellcheck docs (#8548)

* Spellcheck docs

Fix spelling mistakes in docs and add CI job for running spellcheck on
docs.

* Add missing license header

											
										
										
											2019-09-17 15:47:30 -04:00
+								enum
-												basic docs for nested column query functions (#12922)

* basic docs for nested column query functions
											
										
										
											2022-08-19 20:12:19 -04:00
+								expectedType
-												Doc update for the new input source and the new input format (#9171)

* Doc update for new input source and input format.

- The input source and input format are promoted in all docs under docs/ingestion
- All input sources including core extension ones are located in docs/ingestion/native-batch.md
- All input formats and parsers including core extension ones are localted in docs/ingestion/data-formats.md
- New behavior of the parallel task with different partitionsSpecs are documented in docs/ingestion/native-batch.md

* parquet

* add warning for range partitioning with sequential mode

* hdfs + s3, gs

* add fs impl for gs

* address comments

* address comments

* gcs

											
										
										
											2020-01-17 18:52:05 -05:00
+								expr
-												Spellcheck docs (#8548)

* Spellcheck docs

Fix spelling mistakes in docs and add CI job for running spellcheck on
docs.

* Add missing license header

											
										
										
											2019-09-17 15:47:30 -04:00
+								failover
-												Doc update for the new input source and the new input format (#9171)

* Doc update for new input source and input format.

- The input source and input format are promoted in all docs under docs/ingestion
- All input sources including core extension ones are located in docs/ingestion/native-batch.md
- All input formats and parsers including core extension ones are localted in docs/ingestion/data-formats.md
- New behavior of the parallel task with different partitionsSpecs are documented in docs/ingestion/native-batch.md

* parquet

* add warning for range partitioning with sequential mode

* hdfs + s3, gs

* add fs impl for gs

* address comments

* address comments

* gcs

											
										
										
											2020-01-17 18:52:05 -05:00
+								featureSpec
 								findColumnsFromHeader
-												Spellcheck docs (#8548)

* Spellcheck docs

Fix spelling mistakes in docs and add CI job for running spellcheck on
docs.

* Add missing license header

											
										
										
											2019-09-17 15:47:30 -04:00
+								filenames
 								filesystem
 								firefox
 								firehose
 								firehoses
-												Add Pig-specific file handling to Avro parser (#9258)

* Add processing for data files from AvroStorage

* Add words to spellings file

											
										
										
											2020-02-11 00:53:11 -05:00
+								fromPigAvroStorage
-												Spellcheck docs (#8548)

* Spellcheck docs

Fix spelling mistakes in docs and add CI job for running spellcheck on
docs.

* Add missing license header

											
										
										
											2019-09-17 15:47:30 -04:00
+								frontends
 								granularities
-												First refactor of compaction (#10935)

* first pass compaction refactor. includes updated behavior for queryGranularity. removes duplicated doc

* fix links, typos, some reorganization

* fix spelling. TBD still there for work in progress

* updates tutorial examples, adds more clarification around compaction use cases

* add granularity spec to automatic compaction config

* final edits

* spelling fixes

* apply suggestions from review

* upadtes from review

* last edits

* move note

* clarify null

* fix links & spelling

* latest review

* edits to auto-compaction config

* add back rollup

* fix links & spelling

* Update compaction.md

add granularityspec to example
											
										
										
											2021-03-24 14:41:44 -04:00
+								granularitySpec
-												Spellcheck docs (#8548)

* Spellcheck docs

Fix spelling mistakes in docs and add CI job for running spellcheck on
docs.

* Add missing license header

											
										
										
											2019-09-17 15:47:30 -04:00
+								gzip
 								gzipped
 								hadoop
 								hasher
 								hashtable
-												Add setProcessingThreadNames context parameter. (#12514)

setting thread names takes a measurable amount of time in the case where segment scans are very quick. In high-QPS testing we found a slight performance boost from turning off processing thread renaming. This option makes that possible.
											
										
										
											2022-05-16 04:12:00 -04:00
+								high-QPS
-												Spellcheck docs (#8548)

* Spellcheck docs

Fix spelling mistakes in docs and add CI job for running spellcheck on
docs.

* Add missing license header

											
										
										
											2019-09-17 15:47:30 -04:00
+								historicals
 								hostname
 								hostnames
 								http
 								https
-												Update dictionary for spell check (#10152)


											
										
										
											2020-07-08 02:12:39 -04:00
+								idempotency
-												Spellcheck docs (#8548)

* Spellcheck docs

Fix spelling mistakes in docs and add CI job for running spellcheck on
docs.

* Add missing license header

											
										
										
											2019-09-17 15:47:30 -04:00
+								i.e.
 								influxdb
-												Add support for optional aws credentials for s3 for ingestion (#9375)

* Add support for optional cloud (aws, gcs, etc.) credentials for s3 for ingestion

* Add support for optional cloud (aws, gcs, etc.) credentials for s3 for ingestion

* Add support for optional cloud (aws, gcs, etc.) credentials for s3 for ingestion

* fix build failure

* fix failing build

* fix failing build

* Code cleanup

* fix failing test

* Removed CloudConfigProperties and make specific class for each cloudInputSource

* Removed CloudConfigProperties and make specific class for each cloudInputSource

* pass s3ConfigProperties for split

* lazy init s3client

* update docs

* fix docs check

* address comments

* add ServerSideEncryptingAmazonS3.Builder

* fix failing checkstyle

* fix typo

* wrap the ServerSideEncryptingAmazonS3.Builder in a provider

* added java docs for S3InputSource constructor

* added java docs for S3InputSource constructor

* remove wrap the ServerSideEncryptingAmazonS3.Builder in a provider

											
										
										
											2020-02-25 23:59:53 -05:00
+								ingestionSpec
-												Spellcheck docs (#8548)

* Spellcheck docs

Fix spelling mistakes in docs and add CI job for running spellcheck on
docs.

* Add missing license header

											
										
										
											2019-09-17 15:47:30 -04:00
+								injective
 								inlined
-												Convert inQueryThreshold into query context parameter. (#12357)

Added Calcites InQueryThreshold as a query context parameter. Setting this parameter appropriately reduces the time taken for queries with large number of values in their IN conditions.
											
										
										
											2022-03-22 09:03:57 -04:00
+								inSubQueryThreshold
-												Spellcheck docs (#8548)

* Spellcheck docs

Fix spelling mistakes in docs and add CI job for running spellcheck on
docs.

* Add missing license header

											
										
										
											2019-09-17 15:47:30 -04:00
+								interruptible
-												basic docs for nested column query functions (#12922)

* basic docs for nested column query functions
											
										
										
											2022-08-19 20:12:19 -04:00
+								isAllowList
-												Doc update for the new input source and the new input format (#9171)

* Doc update for new input source and input format.

- The input source and input format are promoted in all docs under docs/ingestion
- All input sources including core extension ones are located in docs/ingestion/native-batch.md
- All input formats and parsers including core extension ones are localted in docs/ingestion/data-formats.md
- New behavior of the parallel task with different partitionsSpecs are documented in docs/ingestion/native-batch.md

* parquet

* add warning for range partitioning with sequential mode

* hdfs + s3, gs

* add fs impl for gs

* address comments

* address comments

* gcs

											
										
										
											2020-01-17 18:52:05 -05:00
+								jackson-jq
-												Spellcheck docs (#8548)

* Spellcheck docs

Fix spelling mistakes in docs and add CI job for running spellcheck on
docs.

* Add missing license header

											
										
										
											2019-09-17 15:47:30 -04:00
+								javadoc
-												Information schema doc update (#10081)

* add docs for IS_JOINABLE and IS_BROADCAST to INFORMATION_SCHEMA docs

* fixes

* oops

* revert noise

* missed one

* spellbot
											
										
										
											2020-06-30 00:08:13 -04:00
+								joinable
-												basic docs for nested column query functions (#12922)

* basic docs for nested column query functions
											
										
										
											2022-08-19 20:12:19 -04:00
+								json_keys
 								json_object
 								json_paths
 								json_query
 								json_value
-												Spellcheck docs (#8548)

* Spellcheck docs

Fix spelling mistakes in docs and add CI job for running spellcheck on
docs.

* Add missing license header

											
										
										
											2019-09-17 15:47:30 -04:00
+								kerberos
 								keystore
-												Update security overview with additional recommendations (#11016)

* updatee security overview with additional recommendations for improved security

* address first set of review questions

* Update docs/operations/security-overview.md

* Update docs/operations/security-overview.md

* apply changes from review

* Update docs/operations/security-overview.md

Co-authored-by: Suneet Saldanha <suneet@apache.org>

* Update docs/operations/security-overview.md

Co-authored-by: Suneet Saldanha <suneet@apache.org>

* Update docs/operations/security-overview.md

Co-authored-by: Suneet Saldanha <suneet@apache.org>

* Update security-overview.md

fix additional comments & typos cc: @suneet-s, @jihoonsoon

Co-authored-by: Suneet Saldanha <suneet@apache.org>
											
										
										
											2021-04-14 11:58:17 -04:00
+								keytool
-												Spellcheck docs (#8548)

* Spellcheck docs

Fix spelling mistakes in docs and add CI job for running spellcheck on
docs.

* Add missing license header

											
										
										
											2019-09-17 15:47:30 -04:00
+								keytab
-												kubernetes based discovery druid extension to run Druid on K8S without Zookeeper (#10544)

* honor zk enablement config in more places in druid code

* kubernetes based discovery module

* fix spotbugs check

* fix intellij checks error

* fix doc link to kubernetes.md from extension

* make spellchecker happy

* update license.yaml

* fix dependency check errors

* update extension coverage

* UTs for BaseNodeRoleWatcher

* fix forbidden-api check

* update k8s module coverage ignores

* add Bouncy Castle License being same as MIT License for license checking purposes

* further update licenses.yaml

* label/annotation pre-existence assumption

* address review comment
											
										
										
											2020-12-15 00:10:31 -05:00
+								kubernetes
-												query laning and load shedding (#9407)

* prototype

* merge QueryScheduler and QueryManager

* everything in its right place

* adjustments

* docs

* fixes

* doc fixes

* use resilience4j instead of semaphore

* more tests

* simplify

* checkstyle

* spelling

* oops heh

* remove unused

* simplify

* concurrency tests

* add SqlResource tests, refactor error response

* add json config tests

* use LongAdder instead of AtomicLong

* remove test only stuffs from scheduler

* javadocs, etc

* style

* partial review stuffs

* adjust

* review stuffs

* more javadoc

* error response documentation

* spelling

* preserve user specified lane for NoSchedulingStrategy

* more test, why not

* doc adjustment

* style

* missed review for make a thing a constant

* fixes and tests

* fix test

* Update docs/configuration/index.md

Co-Authored-By: sthetland <steve.hetland@imply.io>

* doc update

Co-authored-by: sthetland <steve.hetland@imply.io>
											
										
										
											2020-03-10 05:57:16 -04:00
+								laning
-												Spellcheck docs (#8548)

* Spellcheck docs

Fix spelling mistakes in docs and add CI job for running spellcheck on
docs.

* Add missing license header

											
										
										
											2019-09-17 15:47:30 -04:00
+								lifecycle
 								localhost
 								log4j
 								log4j2
 								log4j2.xml
 								lookback
 								lookups
 								mapreduce
 								masse
-												Moving in filter check to broker (#12195)

* Moving in filter check to broker

* Adding more unit tests, making error message meaningful

* Spelling and doc changes

* Updating default to -1 and making this feature hide by default. The number of IN filters can grow upto a max limit of 100

* Removing upper limit of 100, updated docs

* Making documentation more meaningful

* Moving check outside to PlannerConfig, updating test cases and adding back max limit

* Updated with some additional code comments

* Missed removing one line during the checkin

* Addressing doc changes and one forbidden API correction

* Final doc change

* Adding a speling exception, correcting a testcase

* Reading entire filter tree to address combinations of ANDs and ORs

* Specifying in docs that, this case works only for ORs

* Revert "Reading entire filter tree to address combinations of ANDs and ORs"

This reverts commit 81ca8f8496777eec41907899957b39ca99ccbada.

* Covering a class cast exception and updating docs

* Counting changed

Co-authored-by: Jihoon Son <jihoonson@apache.org>
											
										
										
											2022-02-15 23:45:07 -05:00
+								maxNumericInFilters
-												Add maxNumFiles to splitHintSpec (#10243)

* Add maxNumFiles to splitHintSpec

* missing link

* fix build failure; use maxNumFiles for integration tests

* spelling

* lower default

* Update docs/ingestion/native-batch.md

Co-authored-by: Abhishek Agarwal <1477457+abhishekagarwal87@users.noreply.github.com>

* address comments; change default maxSplitSize

* spelling

* typos and doc

* same change for segments splitHintSpec

* fix build

* fix build

Co-authored-by: Abhishek Agarwal <1477457+abhishekagarwal87@users.noreply.github.com>
											
										
										
											2020-08-21 12:43:58 -04:00
+								maxNumFiles
 								maxNumSegments
-												fix spelling errors triggered by another doc PR (#8653)


											
										
										
											2019-10-09 02:43:58 -04:00
+								max_map_count
-												Spellcheck docs (#8548)

* Spellcheck docs

Fix spelling mistakes in docs and add CI job for running spellcheck on
docs.

* Add missing license header

											
										
										
											2019-09-17 15:47:30 -04:00
+								memcached
 								mergeable
 								metadata
 								millis
 								misconfiguration
-												First refactor of compaction (#10935)

* first pass compaction refactor. includes updated behavior for queryGranularity. removes duplicated doc

* fix links, typos, some reorganization

* fix spelling. TBD still there for work in progress

* updates tutorial examples, adds more clarification around compaction use cases

* add granularity spec to automatic compaction config

* final edits

* spelling fixes

* apply suggestions from review

* upadtes from review

* last edits

* move note

* clarify null

* fix links & spelling

* latest review

* edits to auto-compaction config

* add back rollup

* fix links & spelling

* Update compaction.md

add granularityspec to example
											
										
										
											2021-03-24 14:41:44 -04:00
+								misconfigured
-												Add MostAvailableSizeStorageLocationSelectorStrategy (#8879)

* Add MostAvailableSize LocationSelectorStrategy

* Add doc for mostAvailableSize strategy

* Fix docs for mostAvailableSize

											
										
										
											2020-01-23 16:42:03 -05:00
+								mostAvailableSize
-												Spellcheck docs (#8548)

* Spellcheck docs

Fix spelling mistakes in docs and add CI job for running spellcheck on
docs.

* Add missing license header

											
										
										
											2019-09-17 15:47:30 -04:00
+								multitenancy
 								multitenant
 								mysql
 								namespace
 								namespaced
 								namespaces
 								natively
 								netflow
 								non-nullable
 								noop
 								numerics
-												First refactor of compaction (#10935)

* first pass compaction refactor. includes updated behavior for queryGranularity. removes duplicated doc

* fix links, typos, some reorganization

* fix spelling. TBD still there for work in progress

* updates tutorial examples, adds more clarification around compaction use cases

* add granularity spec to automatic compaction config

* final edits

* spelling fixes

* apply suggestions from review

* upadtes from review

* last edits

* move note

* clarify null

* fix links & spelling

* latest review

* edits to auto-compaction config

* add back rollup

* fix links & spelling

* Update compaction.md

add granularityspec to example
											
										
										
											2021-03-24 14:41:44 -04:00
+								numShards
-												Spellcheck docs (#8548)

* Spellcheck docs

Fix spelling mistakes in docs and add CI job for running spellcheck on
docs.

* Add missing license header

											
										
										
											2019-09-17 15:47:30 -04:00
+								parameterized
-												basic docs for nested column query functions (#12922)

* basic docs for nested column query functions
											
										
										
											2022-08-19 20:12:19 -04:00
+								parse_json
-												Spellcheck docs (#8548)

* Spellcheck docs

Fix spelling mistakes in docs and add CI job for running spellcheck on
docs.

* Add missing license header

											
										
										
											2019-09-17 15:47:30 -04:00
+								parseable
 								partitioner
-												Store hash partition function in dataSegment and allow segment pruning only when hash partition function is provided (#10288)

* Store hash partition function in dataSegment and allow segment pruning only when hash partition function is provided

* query context

* fix tests; add more test

* javadoc

* docs and more tests

* remove default and hadoop tests

* consistent name and fix javadoc

* spelling and field name

* default function for partitionsSpec

* other comments

* address comments

* fix tests and spelling

* test

* doc
											
										
										
											2020-09-24 19:32:56 -04:00
+								partitionFunction
-												Integration tests and docs for auto compaction with different partitioning (#10354)

* Working

* add test

* doc

* fix test

* split other integration test

* exclude other-index from other tests

* doc anchor fix

* adjust task slots and number of merge tasks

* spell check

* reduce maxNumConcurrentSubTasks to 1

* maxNumConcurrentSubtasks for range partitinoing

* reduce memory for historical

* change group name
											
										
										
											2020-09-15 14:28:09 -04:00
+								partitionsSpec
-												basic docs for nested column query functions (#12922)

* basic docs for nested column query functions
											
										
										
											2022-08-19 20:12:19 -04:00
+								pathParts
-												Spellcheck docs (#8548)

* Spellcheck docs

Fix spelling mistakes in docs and add CI job for running spellcheck on
docs.

* Add missing license header

											
										
										
											2019-09-17 15:47:30 -04:00
+								performant
 								plaintext
 								pluggable
 								postgres
 								postgresql
 								pre-aggregated
 								pre-aggregates
 								pre-aggregating
 								pre-aggregation
 								pre-computation
 								pre-compute
 								pre-computing
 								pre-configured
-												document useFilterCNF query context parameter (#9647)

* document useFilterCNF query context parameter

* move context key to QueryContexts

* Update .spelling
											
										
										
											2020-04-17 01:12:20 -04:00
+								pre-filtered
 								pre-filtering
-												Spellcheck docs (#8548)

* Spellcheck docs

Fix spelling mistakes in docs and add CI job for running spellcheck on
docs.

* Add missing license header

											
										
										
											2019-09-17 15:47:30 -04:00
+								pre-generated
 								pre-made
 								pre-processing
 								preemptible
 								prefetch
 								prefetched
 								prefetching
 								prepend
 								prepended
 								prepending
 								prepends
-												Druid Quickstart refactor and update (#9766)

* Update data-formats.md

Per Suneet, "Since you're editing this file can you also fix the json on line 177 please - it's missing a comma after the }"

* Light text cleanup

* Removing discussion of sample data, since it's repeated in the data loading tutorial, and not immediately relevant here.

* Update index.md

* original quickstart full first pass

* original quickstart full first pass

* first pass all the way through

* straggler

* image touchups and finished old tutorial

* a bit of finishing up

* Review comments

* fixing links

* spell checking gymnastics
											
										
										
											2020-04-30 15:07:28 -04:00
+								prepopulated
-												Spellcheck docs (#8548)

* Spellcheck docs

Fix spelling mistakes in docs and add CI job for running spellcheck on
docs.

* Add missing license header

											
										
										
											2019-09-17 15:47:30 -04:00
+								preprocessing
 								priori
-												Fix documentation for Kinesis fetchThreads. (#10156)

* Fix documentation for Kinesis fetchThreads

The default was changed in #9819, but the documentation wasn't updated.

* Add 'procs' to spelling.
											
										
										
											2020-07-08 22:47:09 -04:00
+								procs
-												basic docs for nested column query functions (#12922)

* basic docs for nested column query functions
											
										
										
											2022-08-19 20:12:19 -04:00
+								processFromRaw
-												Spellcheck docs (#8548)

* Spellcheck docs

Fix spelling mistakes in docs and add CI job for running spellcheck on
docs.

* Add missing license header

											
										
										
											2019-09-17 15:47:30 -04:00
+								programmatically
 								proto
 								proxied
-												S3 Ingestion from non-default endpoints (#11798)

* Add endpoint support for s3inputsource

* Changes to tests

* Fix docs

* Fix config

* Fix inspections

* Fix spelling

* Remove password from toString
											
										
										
											2022-07-15 14:03:34 -04:00
+								proxyConfig
-												Add setProcessingThreadNames context parameter. (#12514)

setting thread names takes a measurable amount of time in the case where segment scans are very quick. In high-QPS testing we found a slight performance boost from turning off processing thread renaming. This option makes that possible.
											
										
										
											2022-05-16 04:12:00 -04:00
+								QPS
-												Spellcheck docs (#8548)

* Spellcheck docs

Fix spelling mistakes in docs and add CI job for running spellcheck on
docs.

* Add missing license header

											
										
										
											2019-09-17 15:47:30 -04:00
+								quantile
 								quantiles
 								queryable
 								quickstart
 								realtime
 								rebalance
 								redis
 								regexes
 								reimported
 								reindex
 								reindexing
 								reingest
 								reingesting
 								reingestion
 								repo
-												Allow list for JDBC connection properties to address CVE-2021-26919 (#11047)

* Allow list for JDBC connection properties to address CVE-2021-26919

* fix tests for java 11
											
										
										
											2021-04-01 20:30:47 -04:00
+								requireSSL
-												Spellcheck docs (#8548)

* Spellcheck docs

Fix spelling mistakes in docs and add CI job for running spellcheck on
docs.

* Add missing license header

											
										
										
											2019-09-17 15:47:30 -04:00
+								rollup
 								rollups
 								rsync
 								runtime
 								schemas
-												Document config for ingesting null columns (#12389)

* config for ingesting null columns

* add link

* edit .spelling

* what happens if storeEmptyColumns is disabled
											
										
										
											2022-04-05 12:15:42 -04:00
+								schemaless
-												Spellcheck docs (#8548)

* Spellcheck docs

Fix spelling mistakes in docs and add CI job for running spellcheck on
docs.

* Add missing license header

											
										
										
											2019-09-17 15:47:30 -04:00
+								searchable
-												Store hash partition function in dataSegment and allow segment pruning only when hash partition function is provided (#10288)

* Store hash partition function in dataSegment and allow segment pruning only when hash partition function is provided

* query context

* fix tests; add more test

* javadoc

* docs and more tests

* remove default and hadoop tests

* consistent name and fix javadoc

* spelling and field name

* default function for partitionsSpec

* other comments

* address comments

* fix tests and spelling

* test

* doc
											
										
										
											2020-09-24 19:32:56 -04:00
+								secondaryPartitionPruning
-												Update dictionary for spell check (#10152)


											
										
										
											2020-07-08 02:12:39 -04:00
+								seekable-stream
-												optionally enable Jetty ForwardedRequestCustomizer (#9010)

* optionally enable Jetty ForwardedRequestCustomizer

* fix doc build

											
										
										
											2019-12-12 20:00:08 -05:00
+								servlet
-												Add setProcessingThreadNames context parameter. (#12514)

setting thread names takes a measurable amount of time in the case where segment scans are very quick. In high-QPS testing we found a slight performance boost from turning off processing thread renaming. This option makes that possible.
											
										
										
											2022-05-16 04:12:00 -04:00
+								setProcessingThreadNames
-												druid-pac4j: add ability to use custom ssl trust store while talking to auth  server (#9637)

* druid-pac4j: add ability for custom ssl trust store for talking to auth
server

* fix nimbusds DefaultResourceRetriever name in comment
											
										
										
											2020-04-10 21:01:59 -04:00
+								simple-client-sslcontext
-												Spellcheck docs (#8548)

* Spellcheck docs

Fix spelling mistakes in docs and add CI job for running spellcheck on
docs.

* Add missing license header

											
										
										
											2019-09-17 15:47:30 -04:00
+								sharded
 								sharding
-												Doc update for the new input source and the new input format (#9171)

* Doc update for new input source and input format.

- The input source and input format are promoted in all docs under docs/ingestion
- All input sources including core extension ones are located in docs/ingestion/native-batch.md
- All input formats and parsers including core extension ones are localted in docs/ingestion/data-formats.md
- New behavior of the parallel task with different partitionsSpecs are documented in docs/ingestion/native-batch.md

* parquet

* add warning for range partitioning with sequential mode

* hdfs + s3, gs

* add fs impl for gs

* address comments

* address comments

* gcs

											
										
										
											2020-01-17 18:52:05 -05:00
+								skipHeaderRows
-												Remove the time bit, fix headings (#12808)

* Remove the time bit, fix headings

* Adopt review suggestions

* Edits

* Update smoosh file description

* Adopt review suggestions

* Update spelling
											
										
										
											2022-07-20 18:37:57 -04:00
+								Smoosh
 								smoosh
-												Spellcheck docs (#8548)

* Spellcheck docs

Fix spelling mistakes in docs and add CI job for running spellcheck on
docs.

* Add missing license header

											
										
										
											2019-09-17 15:47:30 -04:00
+								smooshed
 								splittable
-												Allow list for JDBC connection properties to address CVE-2021-26919 (#11047)

* Allow list for JDBC connection properties to address CVE-2021-26919

* fix tests for java 11
											
										
										
											2021-04-01 20:30:47 -04:00
+								ssl
 								sslmode
-												Spellcheck docs (#8548)

* Spellcheck docs

Fix spelling mistakes in docs and add CI job for running spellcheck on
docs.

* Add missing license header

											
										
										
											2019-09-17 15:47:30 -04:00
+								stdout
 								storages
 								stringified
 								subarray
 								subnet
 								subqueries
 								subquery
 								subsecond
 								substring
-												Add maxNumFiles to splitHintSpec (#10243)

* Add maxNumFiles to splitHintSpec

* missing link

* fix build failure; use maxNumFiles for integration tests

* spelling

* lower default

* Update docs/ingestion/native-batch.md

Co-authored-by: Abhishek Agarwal <1477457+abhishekagarwal87@users.noreply.github.com>

* address comments; change default maxSplitSize

* spelling

* typos and doc

* same change for segments splitHintSpec

* fix build

* fix build

Co-authored-by: Abhishek Agarwal <1477457+abhishekagarwal87@users.noreply.github.com>
											
										
										
											2020-08-21 12:43:58 -04:00
+								subtask
-												Update dictionary for spell check (#10152)


											
										
										
											2020-07-08 02:12:39 -04:00
+								subtasks
-												Add shuffle metrics for parallel indexing (#10359)

* Add shuffle metrics for parallel indexing

* javadoc and concurrency test

* concurrency

* fix javadoc

* Feature flag

* doc

* fix doc and add a test

* checkstyle

* add tests

* fix build and address comments
											
										
										
											2020-10-10 22:35:17 -04:00
+								supervisorTaskId
-												Spellcheck docs (#8548)

* Spellcheck docs

Fix spelling mistakes in docs and add CI job for running spellcheck on
docs.

* Add missing license header

											
										
										
											2019-09-17 15:47:30 -04:00
+								symlink
-												basic docs for nested column query functions (#12922)

* basic docs for nested column query functions
											
										
										
											2022-08-19 20:12:19 -04:00
+								syntaxes
-												Spellcheck docs (#8548)

* Spellcheck docs

Fix spelling mistakes in docs and add CI job for running spellcheck on
docs.

* Add missing license header

											
										
										
											2019-09-17 15:47:30 -04:00
+								tiering
 								timeseries
 								timestamp
 								timestamps
-												basic docs for nested column query functions (#12922)

* basic docs for nested column query functions
											
										
										
											2022-08-19 20:12:19 -04:00
+								to_json_string
-												Spellcheck docs (#8548)

* Spellcheck docs

Fix spelling mistakes in docs and add CI job for running spellcheck on
docs.

* Add missing license header

											
										
										
											2019-09-17 15:47:30 -04:00
+								tradeoffs
-												Support filtering data in Auto Compaction (#11922)

* add impl

* fix checkstyle

* add test

* add test

* add unit tests

* fix unit tests

* fix unit tests

* fix unit tests

* add IT

* add IT

* add comments

* fix spelling
											
										
										
											2021-11-24 13:56:38 -05:00
+								transformSpec
-												basic docs for nested column query functions (#12922)

* basic docs for nested column query functions
											
										
										
											2022-08-19 20:12:19 -04:00
+								try_parse_json
-												Spellcheck docs (#8548)

* Spellcheck docs

Fix spelling mistakes in docs and add CI job for running spellcheck on
docs.

* Add missing license header

											
										
										
											2019-09-17 15:47:30 -04:00
+								tsv
-												fix spelling errors triggered by another doc PR (#8653)


											
										
										
											2019-10-09 02:43:58 -04:00
+								ulimit
-												Spellcheck docs (#8548)

* Spellcheck docs

Fix spelling mistakes in docs and add CI job for running spellcheck on
docs.

* Add missing license header

											
										
										
											2019-09-17 15:47:30 -04:00
+								unannounce
 								unannouncements
 								unary
 								unassign
 								uncomment
 								underutilization
 								unintuitive
 								unioned
 								unmergeable
 								unmerged
-												option to use deep storage for storing shuffle data (#11507)

Fixes #11297.
Description

Description and design in the proposal #11297
Key changed/added classes in this PR

    *DataSegmentPusher
    *ShuffleClient
    *PartitionStat
    *PartitionLocation
    *IntermediaryDataManager

											
										
										
											2021-08-13 16:40:25 -04:00
+								UNNEST
-												Spellcheck docs (#8548)

* Spellcheck docs

Fix spelling mistakes in docs and add CI job for running spellcheck on
docs.

* Add missing license header

											
										
										
											2019-09-17 15:47:30 -04:00
+								unparseable
 								unparsed
-												Druid Quickstart refactor and update (#9766)

* Update data-formats.md

Per Suneet, "Since you're editing this file can you also fix the json on line 177 please - it's missing a comma after the }"

* Light text cleanup

* Removing discussion of sample data, since it's repeated in the data loading tutorial, and not immediately relevant here.

* Update index.md

* original quickstart full first pass

* original quickstart full first pass

* first pass all the way through

* straggler

* image touchups and finished old tutorial

* a bit of finishing up

* Review comments

* fixing links

* spell checking gymnastics
											
										
										
											2020-04-30 15:07:28 -04:00
+								unsetting
-												Security overview documentation (#10339)

* initial file

* initial file

* security overview added

* ldap added

* spacing adjustments

* nits

* security graphics and doc review

* Update docs/operations/security-overview.md

Co-authored-by: Jonathan Wei <jon-wei@users.noreply.github.com>

* Update docs/operations/security-user-auth.md

Co-authored-by: Jonathan Wei <jon-wei@users.noreply.github.com>

* Update docs/operations/security-overview.md

Co-authored-by: Jonathan Wei <jon-wei@users.noreply.github.com>

* Update docs/operations/security-overview.md

Co-authored-by: Jonathan Wei <jon-wei@users.noreply.github.com>

* updates frm review

* review comments

* finish up review and light edits

* broken links

* spell check

Co-authored-by: Jonathan Wei <jon-wei@users.noreply.github.com>
											
										
										
											2020-11-19 18:24:58 -05:00
+								untrusted
-												document useFilterCNF query context parameter (#9647)

* document useFilterCNF query context parameter

* move context key to QueryContexts

* Update .spelling
											
										
										
											2020-04-17 01:12:20 -04:00
+								useFilterCNF
-												basic docs for nested column query functions (#12922)

* basic docs for nested column query functions
											
										
										
											2022-08-19 20:12:19 -04:00
+								useJqSyntax
-												Allow list for JDBC connection properties to address CVE-2021-26919 (#11047)

* Allow list for JDBC connection properties to address CVE-2021-26919

* fix tests for java 11
											
										
										
											2021-04-01 20:30:47 -04:00
+								useSSL
-												Spellcheck docs (#8548)

* Spellcheck docs

Fix spelling mistakes in docs and add CI job for running spellcheck on
docs.

* Add missing license header

											
										
										
											2019-09-17 15:47:30 -04:00
+								uptime
-												add google cloud storage InputSource for native batch (#8907)

* add google cloud storage InputSource for native batch

* rename

* checkstyle

* fix

* fix spelling

* review comments

											
										
										
											2019-11-19 22:49:43 -05:00
+								uris
-												Add config and header support for confluent schema registry.  (#10314)

* Add config and header support for confluent schema registry. (porting code from https://github.com/apache/druid/pull/9096)

* Add Eclipse Public License 2.0 to license check

* Update licenses.yaml, revert changes to check-licenses.py and dependencies for integration-tests

* Add spelling exception and remove unused dependency

* Use non-deprecated getSchemaById() and remove duplicated license entry

* Update docs/ingestion/data-formats.md

Co-authored-by: Clint Wylie <cjwylie@gmail.com>

* Added check for schema being null, as per Confluent code

* Missing imports and whitespace

* Updated unit tests with AvroSchema

Co-authored-by: Sergio Spinatelli <sergio.spinatelli.extern@7-tv.de>
Co-authored-by: Sergio Spinatelli <sergio.spinatelli.extern@joyn.de>
Co-authored-by: Clint Wylie <cjwylie@gmail.com>
											
										
										
											2021-02-27 17:25:35 -05:00
+								urls
-												Doc update for the new input source and the new input format (#9171)

* Doc update for new input source and input format.

- The input source and input format are promoted in all docs under docs/ingestion
- All input sources including core extension ones are located in docs/ingestion/native-batch.md
- All input formats and parsers including core extension ones are localted in docs/ingestion/data-formats.md
- New behavior of the parallel task with different partitionsSpecs are documented in docs/ingestion/native-batch.md

* parquet

* add warning for range partitioning with sequential mode

* hdfs + s3, gs

* add fs impl for gs

* address comments

* address comments

* gcs

											
										
										
											2020-01-17 18:52:05 -05:00
+								useFieldDiscovery
-												Spellcheck docs (#8548)

* Spellcheck docs

Fix spelling mistakes in docs and add CI job for running spellcheck on
docs.

* Add missing license header

											
										
										
											2019-09-17 15:47:30 -04:00
+								v1
 								v2
 								vCPUs
 								validator
-												basic docs for nested column query functions (#12922)

* basic docs for nested column query functions
											
										
										
											2022-08-19 20:12:19 -04:00
+								varchar
-												Spellcheck docs (#8548)

* Spellcheck docs

Fix spelling mistakes in docs and add CI job for running spellcheck on
docs.

* Add missing license header

											
										
										
											2019-09-17 15:47:30 -04:00
+								vectorizable
 								vectorize
-												add vectorizeVirtualColumns query context parameter (#10432)

* add vectorizeVirtualColumns query context parameter

* oops

* spelling

* default to false, more docs

* fix test

* fix spelling
											
										
										
											2020-09-28 21:48:34 -04:00
+								vectorizeVirtualColumns
-												Spellcheck docs (#8548)

* Spellcheck docs

Fix spelling mistakes in docs and add CI job for running spellcheck on
docs.

* Add missing license header

											
										
										
											2019-09-17 15:47:30 -04:00
+								versioning
-												document virtualColumns in native query documentation, fix some redirects (#12917)

* document virtualColumns in native query documentation, fix some redirects

* after all that, forgot to run spellcheck locally

* review stuff
											
										
										
											2022-08-18 23:49:23 -04:00
+								virtualColumns
-												Spellcheck docs (#8548)

* Spellcheck docs

Fix spelling mistakes in docs and add CI job for running spellcheck on
docs.

* Add missing license header

											
										
										
											2019-09-17 15:47:30 -04:00
+								w.r.t.
 								whitelist
 								whitelisted
 								whitespace
 								wildcard
-												Add HDFS firehose (#8754)

* Add HDFS firehose.

* Tests, support for lists of paths.

* Fixups.

* Update list of firehoses.

* Wildcards is a word.

											
										
										
											2019-10-28 11:07:38 -04:00
+								wildcards
-												Spellcheck docs (#8548)

* Spellcheck docs

Fix spelling mistakes in docs and add CI job for running spellcheck on
docs.

* Add missing license header

											
										
										
											2019-09-17 15:47:30 -04:00
+								xml
 								znode
 								znodes
-												Refactor SQL docs (#12239)

* refactor and link fixes

* add sql docs to left nav

* code format for needle

* updated web console script

* link fixes

* update earliest/latest functions

* edits for grammar and style

* more link fixes

* another link

* update with #12226

* update .spelling file
											
										
										
											2022-02-11 17:43:30 -05:00
+								APPROX_COUNT_DISTINCT
 								APPROX_QUANTILE
 								ARRAY_AGG
 								BIGINT
 								CATALOG_NAME
 								CHARACTER_MAXIMUM_LENGTH
 								CHARACTER_OCTET_LENGTH
 								CHARACTER_SET_NAME
 								COLLATION_NAME
 								COLUMN_DEFAULT
 								COLUMN_NAME
 								Concats
 								DATA_TYPE
 								DATETIME_PRECISION
 								DEFAULT_CHARACTER_SET_CATALOG
 								DEFAULT_CHARACTER_SET_NAME
 								DEFAULT_CHARACTER_SET_SCHEMA
 								ISODOW
 								ISOYEAR
 								IS_NULLABLE
 								JDBC_TYPE
 								MIDDLE_MANAGER
-												Add TIME_IN_INTERVAL SQL operator. (#12662)

* Add TIME_IN_INTERVAL SQL operator.

The operator is implemented as a convertlet rather than an
OperatorConversion, because this allows it to be equivalent to using
the >= and < operators directly.

* SqlParserPos cannot be null here.

* Remove unused import.

* Doc updates.

* Add words to dictionary.
											
										
										
											2022-06-21 16:05:37 -04:00
+								MILLIS_TO_TIMESTAMP
-												Refactor SQL docs (#12239)

* refactor and link fixes

* add sql docs to left nav

* code format for needle

* updated web console script

* link fixes

* update earliest/latest functions

* edits for grammar and style

* more link fixes

* another link

* update with #12226

* update .spelling file
											
										
										
											2022-02-11 17:43:30 -05:00
+								NULLable
 								NUMERIC_PRECISION
 								NUMERIC_PRECISION_RADIX
 								NUMERIC_SCALE
 								ORDINAL_POSITION
-												Add TIME_IN_INTERVAL SQL operator. (#12662)

* Add TIME_IN_INTERVAL SQL operator.

The operator is implemented as a convertlet rather than an
OperatorConversion, because this allows it to be equivalent to using
the >= and < operators directly.

* SqlParserPos cannot be null here.

* Remove unused import.

* Doc updates.

* Add words to dictionary.
											
										
										
											2022-06-21 16:05:37 -04:00
+								POSIX
-												Refactor SQL docs (#12239)

* refactor and link fixes

* add sql docs to left nav

* code format for needle

* updated web console script

* link fixes

* update earliest/latest functions

* edits for grammar and style

* more link fixes

* another link

* update with #12226

* update .spelling file
											
										
										
											2022-02-11 17:43:30 -05:00
+								PT1M
 								PT5M
 								SCHEMA_NAME
 								SCHEMA_OWNER
 								SERVER_SEGMENTS
 								SMALLINT
 								SQL_PATH
 								STRING_AGG
 								SYSTEM_TABLE
 								TABLE_CATALOG
 								TABLE_NAME
 								TABLE_SCHEMA
 								TABLE_TYPE
 								TIME_PARSE
 								TIME_SHIFT
 								TINYINT
 								VARCHAR
 								avg_num_rows
 								avg_size
 								created_time
 								current_size
 								detailed_state
 								druid.server.maxSize
 								druid.server.tier
 								druid.sql.planner.maxSemiJoinRowsInMemory
 								druid.sql.planner.sqlTimeZone
 								druid.sql.planner.useApproximateCountDistinct
 								druid.sql.planner.useApproximateTopN
-												Update .spelling (#12940)


											
										
										
											2022-08-22 21:47:40 -04:00
+								druid.sql.planner.useGroupingSetForExactDistinct
 								druid.sql.planner.useNativeQueryExplain
-												Refactor SQL docs (#12239)

* refactor and link fixes

* add sql docs to left nav

* code format for needle

* updated web console script

* link fixes

* update earliest/latest functions

* edits for grammar and style

* more link fixes

* another link

* update with #12226

* update .spelling file
											
										
										
											2022-02-11 17:43:30 -05:00
+								error_msg
 								exprs
 								group_id
 								interval_expr
-												SQL: Add is_active to sys.segments, update examples and docs. (#11550)

* SQL: Add is_active to sys.segments, update examples and docs.

is_active is short for:

  (is_published = 1 AND is_overshadowed = 0) OR is_realtime = 1

It's important because this represents "all the segments that should
be queryable, whether or not they actually are right now". Most of the
time, this is the set of segments that people will want to look at.

The web console already adds this filter to a lot of its queries,
proving its usefulness.

This patch also reworks the caveat at the bottom of the sys.segments
section, so its information is mixed into the description of each result
field. This should make it more likely for people to see the information.

* Wording updates.

* Adjustments for spellcheck.

* Adjust IT.
											
										
										
											2022-05-19 17:23:28 -04:00
+								is_active
-												Refactor SQL docs (#12239)

* refactor and link fixes

* add sql docs to left nav

* code format for needle

* updated web console script

* link fixes

* update earliest/latest functions

* edits for grammar and style

* more link fixes

* another link

* update with #12226

* update .spelling file
											
										
										
											2022-02-11 17:43:30 -05:00
+								is_available
 								is_leader
 								is_overshadowed
 								is_published
 								is_realtime
 								java.sql.Types
 								last_compaction_state
 								max_size
 								num_replicas
 								num_rows
 								num_segments
 								partition_num
 								plaintext_port
 								queue_insertion_time
 								runner_status
 								segment_id
 								server_type
 								shard_spec
 								sqlTimeZone
 								supervisor_id
 								sys
 								sys.segments
 								task_id
 								timestamp_expr
 								tls_port
 								total_size
 								useApproximateCountDistinct
 								useGroupingSetForExactDistinct
 								useApproximateTopN
 								wikipedia
-												Add feature flag for sql planning of TimeBoundary queries (#12491)

* Add feature flag for sql planning of TimeBoundary queries

* fixup! Add feature flag for sql planning of TimeBoundary queries

* Add documentation for enableTimeBoundaryPlanning

* fixup! Add documentation for enableTimeBoundaryPlanning
											
										
										
											2022-05-10 05:53:42 -04:00
+								enableTimeBoundaryPlanning
 								TimeBoundary
 								druid.query.default.context.enableTimeBoundaryPlanning
-												Refactor SQL docs (#12239)

* refactor and link fixes

* add sql docs to left nav

* code format for needle

* updated web console script

* link fixes

* update earliest/latest functions

* edits for grammar and style

* more link fixes

* another link

* update with #12226

* update .spelling file
											
										
										
											2022-02-11 17:43:30 -05:00
+								IEC
-												Spellcheck docs (#8548)

* Spellcheck docs

Fix spelling mistakes in docs and add CI job for running spellcheck on
docs.

* Add missing license header

											
										
										
											2019-09-17 15:47:30 -04:00
+								 - ../docs/comparisons/druid-vs-elasticsearch.md
 x
 								 - ../docs/configuration/logging.md
 								_common
-												Enforce console logging for peon process (#12067)

Currently all Druid processes share the same log4j2 configuration file located in _common directory. Since peon processes are spawned by middle manager process, they derivate the environment variables from the middle manager. These variables include those in the log4j2.xml controlling to which file the logger writes the log.

But current task logging mechanism requires the peon processes to output the log to console so that the middle manager can redirect the console output to a file and upload this file to task log storage.

So, this PR imposes this requirement to peon processes, whatever the configuration is in the shared log4j2.xml, peon processes always write the log to console.


											
										
										
											2022-05-16 05:37:21 -04:00
+								appenders
-												Spellcheck docs (#8548)

* Spellcheck docs

Fix spelling mistakes in docs and add CI job for running spellcheck on
docs.

* Add missing license header

											
										
										
											2019-09-17 15:47:30 -04:00
+								 - ../docs/dependencies/deep-storage.md
 								druid-hdfs-storage
 								druid-s3-extensions
-												Moving in filter check to broker (#12195)

* Moving in filter check to broker

* Adding more unit tests, making error message meaningful

* Spelling and doc changes

* Updating default to -1 and making this feature hide by default. The number of IN filters can grow upto a max limit of 100

* Removing upper limit of 100, updated docs

* Making documentation more meaningful

* Moving check outside to PlannerConfig, updating test cases and adding back max limit

* Updated with some additional code comments

* Missed removing one line during the checkin

* Addressing doc changes and one forbidden API correction

* Final doc change

* Adding a speling exception, correcting a testcase

* Reading entire filter tree to address combinations of ANDs and ORs

* Specifying in docs that, this case works only for ORs

* Revert "Reading entire filter tree to address combinations of ANDs and ORs"

This reverts commit 81ca8f8496777eec41907899957b39ca99ccbada.

* Covering a class cast exception and updating docs

* Counting changed

Co-authored-by: Jihoon Son <jihoonson@apache.org>
											
										
										
											2022-02-15 23:45:07 -05:00
+								druid.sql.planner.maxNumericInFilters
-												Spellcheck docs (#8548)

* Spellcheck docs

Fix spelling mistakes in docs and add CI job for running spellcheck on
docs.

* Add missing license header

											
										
										
											2019-09-17 15:47:30 -04:00
+								 - ../docs/dependencies/metadata-storage.md
 								BasicDataSource
 								 - ../docs/dependencies/zookeeper.md
-												Update .spelling (#11977)


											
										
										
											2021-11-23 01:28:51 -05:00
+								LeaderLatch
-												remove ZooKeeper 3.4 support + pass tests with Java 15 (#11073)

With this change, Druid will only support ZooKeeper 3.5.x and later.

In order to support Java 15 we need to switch to ZK 3.5.x client libraries and drop support for ZK 3.4.x
(see #10780 for the detailed reasons) 

* remove ZooKeeper 3.4.x compatibility
* exclude additional ZK 3.5.x netty dependencies to ensure we use our version
* keep ZooKeeper version used for integration tests in sync with client library version
* remove the need to specify ZK version at runtime for docker
* add support to run integration tests with JDK 15
* build and run unit tests with Java 15 in travis
											
										
										
											2021-05-25 15:49:49 -04:00
+.5.x
 .4.x
-												Spellcheck docs (#8548)

* Spellcheck docs

Fix spelling mistakes in docs and add CI job for running spellcheck on
docs.

* Add missing license header

											
										
										
											2019-09-17 15:47:30 -04:00
+								 - ../docs/design/auth.md
 								AllowAll
 								AuthenticationResult
 								AuthorizationLoadingLookupTest
 								HttpClient
 								allowAll
 								authenticatorChain
 								defaultUser
 								 - ../docs/design/coordinator.md
 								inputSegmentSizeBytes
 								skipOffsetFromLatest
 								 - ../docs/design/router.md
-												Select broker based on query context parameter `brokerService` (#11495)

This change allows the selection of a specific broker service (or broker tier) by the Router.

The newly added ManualTieredBrokerSelectorStrategy works as follows:

Check for the parameter brokerService in the query context. If this is a valid broker service, use it.
Check if the field defaultManualBrokerService has been set in the strategy. If this is a valid broker service, use it.
Move on to the next strategy
											
										
										
											2021-07-27 11:26:05 -04:00
+								brokerService
-												Spellcheck docs (#8548)

* Spellcheck docs

Fix spelling mistakes in docs and add CI job for running spellcheck on
docs.

* Add missing license header

											
										
										
											2019-09-17 15:47:30 -04:00
+								c3.2xlarge
-												Select broker based on query context parameter `brokerService` (#11495)

This change allows the selection of a specific broker service (or broker tier) by the Router.

The newly added ManualTieredBrokerSelectorStrategy works as follows:

Check for the parameter brokerService in the query context. If this is a valid broker service, use it.
Check if the field defaultManualBrokerService has been set in the strategy. If this is a valid broker service, use it.
Move on to the next strategy
											
										
										
											2021-07-27 11:26:05 -04:00
+								defaultManualBrokerService
-												Spellcheck docs (#8548)

* Spellcheck docs

Fix spelling mistakes in docs and add CI job for running spellcheck on
docs.

* Add missing license header

											
										
										
											2019-09-17 15:47:30 -04:00
+								maxPriority
 								minPriority
 								runtime.properties
 								timeBoundary
 								 - ../docs/design/segments.md
 x0
 x9
 GB
 mb-700mb
 								Bieber
 								IndexTask-based
 								Ke
 								datasource_intervalStart_intervalEnd_version_partitionNum
 								partitionNum
 								v9
 								 - ../docs/development/build.md
 .x
 u92
 								DskipTests
 								Papache-release
 								Pdist
-												add some details to the build doc (#9885)

* update initial build command

* add some details for building

* fix spelling check errors

* fix spelling check warnings

Signed-off-by: frank chen <frank.chen021@outlook.com>
											
										
										
											2020-05-21 15:35:54 -04:00
+								Ddruid.console.skip
 								yaml
-												Support for hadoop 3  via maven profiles (#11794)

Add support for hadoop 3 profiles . Most of the details are captured in #11791 .
We use a combination of maven profiles and resource filtering to achieve this. Hadoop2 is supported by default and a new maven profile with the name hadoop3 is created. This will allow the user to choose the profile which is best suited for the use case.
											
										
										
											2021-10-30 13:16:24 -04:00
+								Phadoop3
 								dist-hadoop3
 								hadoop3
 								hadoop2
 .x.x
 .x.x
-												Spellcheck docs (#8548)

* Spellcheck docs

Fix spelling mistakes in docs and add CI job for running spellcheck on
docs.

* Add missing license header

											
										
										
											2019-09-17 15:47:30 -04:00
+								 - ../docs/development/extensions-contrib/ambari-metrics-emitter.md
 								ambari-metrics
 								metricName
 								trustStore
-												Move Azure extension into Core (#9394)

* Move Azure extension into Core

Moving the azure extension into Core.

* * Fix build failure

* * Add The MIT License (MIT) to list of compatible licenses

* * Address review comments

* * change reference to contrib azure to core azure

* * Fix spelling mistakes.

											
										
										
											2020-02-25 20:49:16 -05:00
+								 - ../docs/development/extensions-core/azure.md
-												Spellcheck docs (#8548)

* Spellcheck docs

Fix spelling mistakes in docs and add CI job for running spellcheck on
docs.

* Add missing license header

											
										
										
											2019-09-17 15:47:30 -04:00
+								StaticAzureBlobStoreFirehose
 								StaticS3Firehose
 								fetchTimeout
 								gz
 								maxCacheCapacityBytes
 								maxFetchCapacityBytes
 								maxFetchRetry
 								prefetchTriggerBytes
 								shardSpecs
-												Adding Shared Access resource support for azure (#12266)

Azure Blob storage has multiple modes of authentication. One of them is Shared access resource
. This is very useful in cases when we do not want to add the account key in the druid properties .


											
										
										
											2022-02-22 07:57:43 -05:00
+								sharedAccessStorageToken
-												Spellcheck docs (#8548)

* Spellcheck docs

Fix spelling mistakes in docs and add CI job for running spellcheck on
docs.

* Add missing license header

											
										
										
											2019-09-17 15:47:30 -04:00
+								 - ../docs/development/extensions-contrib/cloudfiles.md
 								StaticCloudFilesFirehose
 								cloudfiles
 								rackspace-cloudfiles-uk
 								rackspace-cloudfiles-us
-												Move Azure extension into Core (#9394)

* Move Azure extension into Core

Moving the azure extension into Core.

* * Fix build failure

* * Add The MIT License (MIT) to list of compatible licenses

* * Address review comments

* * change reference to contrib azure to core azure

* * Fix spelling mistakes.

											
										
										
											2020-02-25 20:49:16 -05:00
+								StaticAzureBlobStoreFirehose
 								gz
 								shardSpecs
 								maxCacheCapacityBytes
 								maxFetchCapacityBytes
 								fetchTimeout
 								maxFetchRetry
-												Spellcheck docs (#8548)

* Spellcheck docs

Fix spelling mistakes in docs and add CI job for running spellcheck on
docs.

* Add missing license header

											
										
										
											2019-09-17 15:47:30 -04:00
+								 - ../docs/development/extensions-contrib/distinctcount.md
 								distinctCount
 								groupBy
 								maxIntermediateRows
 								numValuesPerPass
 								queryGranularity
 								segmentGranularity
 								topN
 								visitor_id
 								 - ../docs/development/extensions-contrib/influx.md
 								cpu
 								web_requests
 								 - ../docs/development/extensions-contrib/influxdb-emitter.md
 								_
 								druid_
 								druid_cache_total
 								druid_hits
 								druid_query
 								historical001
 								 - ../docs/development/extensions-contrib/materialized-view.md
 								HadoopTuningConfig
 								TuningConfig
 								base-dataSource's
 								baseDataSource
 								baseDataSource-hashCode
 								classpathPrefix
 								derivativeDataSource
 								druid.extensions.hadoopDependenciesDir
 								hadoopDependencyCoordinates
 								maxTaskCount
 								metricsSpec
 								queryType
 								tuningConfig
 								 - ../docs/development/extensions-contrib/momentsketch-quantiles.md
 								arcsinh
 								fieldName
 								momentSketchMerge
 								momentsketch
 								 - ../docs/development/extensions-contrib/moving-average-query.md
 -minutes
 								MeanNoNulls
 								P1D
 								cycleSize
 								doubleMax
-												Vectorized ANY aggregators (#10338)

* WIP vectorized ANY aggregators

* tests

* fix aggs

* cleanup

* code review + tests

* docs

* use NilVectorSelector when needed

* fix spellcheck

* dont instantiate vectors

* cleanup
											
										
										
											2020-09-14 22:44:58 -04:00
+								doubleAny
-												Spellcheck docs (#8548)

* Spellcheck docs

Fix spelling mistakes in docs and add CI job for running spellcheck on
docs.

* Add missing license header

											
										
										
											2019-09-17 15:47:30 -04:00
+								doubleMean
 								doubleMeanNoNulls
 								doubleMin
-												update druid expression docs to indicate that array functions do not work at indexing time (#8734)

* update druid expression docs to indicate that array functions are not supported in transformSpec

* fix unrelated spelling check

											
										
										
											2019-10-25 01:04:08 -04:00
+								doubleSum
-												Spellcheck docs (#8548)

* Spellcheck docs

Fix spelling mistakes in docs and add CI job for running spellcheck on
docs.

* Add missing license header

											
										
										
											2019-09-17 15:47:30 -04:00
+								druid.generic.useDefaultValueForNull
-												Introducing a new config to ignore nulls while computing String Cardinality (#12345)

* Counting nulls in String cardinality with a config

* Adding tests for the new config

* Wrapping the vectorize part to allow backward compatibility

* Adding different tests, cleaning the code and putting the check at the proper position, handling hasRow() and hasValue() changes

* Updating testcase and code

* Adding null handling test to improve coverage

* Checkstyle fix

* Adding 1 more change in docs

* Making docs clearer
											
										
										
											2022-03-29 17:31:36 -04:00
+								druid.generic.ignoreNullsForStringCardinality
-												Spellcheck docs (#8548)

* Spellcheck docs

Fix spelling mistakes in docs and add CI job for running spellcheck on
docs.

* Add missing license header

											
										
										
											2019-09-17 15:47:30 -04:00
+								limitSpec
 								longMax
-												Vectorized ANY aggregators (#10338)

* WIP vectorized ANY aggregators

* tests

* fix aggs

* cleanup

* code review + tests

* docs

* use NilVectorSelector when needed

* fix spellcheck

* dont instantiate vectors

* cleanup
											
										
										
											2020-09-14 22:44:58 -04:00
+								longAny
-												Spellcheck docs (#8548)

* Spellcheck docs

Fix spelling mistakes in docs and add CI job for running spellcheck on
docs.

* Add missing license header

											
										
										
											2019-09-17 15:47:30 -04:00
+								longMean
 								longMeanNoNulls
 								longMin
-												update druid expression docs to indicate that array functions do not work at indexing time (#8734)

* update druid expression docs to indicate that array functions are not supported in transformSpec

* fix unrelated spelling check

											
										
										
											2019-10-25 01:04:08 -04:00
+								longSum
-												Spellcheck docs (#8548)

* Spellcheck docs

Fix spelling mistakes in docs and add CI job for running spellcheck on
docs.

* Add missing license header

											
										
										
											2019-09-17 15:47:30 -04:00
+								movingAverage
 								postAggregations
 								postAveragers
-												Improve doc of movingAverage (#11262)

* Make doc more directive

Signed-off-by: frank chen <frank.chen021@outlook.com>

* Add limitation

Signed-off-by: frank chen <frank.chen021@outlook.com>

* Suppress spelling check error
											
										
										
											2021-05-28 01:10:55 -04:00
+								pull-deps
-												Spellcheck docs (#8548)

* Spellcheck docs

Fix spelling mistakes in docs and add CI job for running spellcheck on
docs.

* Add missing license header

											
										
										
											2019-09-17 15:47:30 -04:00
+								 - ../docs/development/extensions-contrib/opentsdb-emitter.md
 								defaultMetrics.json
-												Add config option for namespacePrefix (#9372)

* Add config option for namespacePrefix

opentsdb emitter sends metric names to opentsdb verbatim as what druid
names them, for example "query.count", this doesn't fit well with a
central opentsdb server which might have namespaced metrics, for example
"druid.query.count". This adds support for adding an optional prefix.

The prefix also gets a trailing dot (.), after it, so the metric name
becomes <namespacePrefix>.<metricname>

configureable as "druid.emitter.opentsdb.namespacePrefix", as
documented.

Co-authored-by: Martin Gerholm <martin.gerholm@deltaprojects.com>
Signed-off-by: Martin Gerholm <martin.gerholm@deltaprojects.com>
Signed-off-by: Björn Zettergren <bjorn.zettergren@deltaprojects.com>

* Spelling for PR #9372

Added "namespacePrefix" to .spelling exceptions, it's a variable name
used in documentation for opentsdb-emitter.

* fixing tests for PR #9372

changed naming of variables to be more descriptive
added test of prefix being an empty string: "".
added a conditional to buildNamespacePrefix to check for empty string
being fed if EventConverter called without OpentsdbEmitterConfig
instance.

* fixing checkstyle errors for PR #9372

used == to compare literal string, should be equals()

* cleaned up and updated PR #9372

Created a buildMetric function as suggested by clintropolis, and
removed redundant tests for empty strings as they're only used when
calling EventConverter directly without going through
OpentsdbEmitterConfig.

* consistent naming of tests PR #9372

Changed names of tests in files to match better with what it was
actually testing

changed check for Strings.isNullOrEmpty to just check for `null`, as
empty string valued `namespacePrefix` is handled in
OpentsdbEmitterConfig.

Co-authored-by: Martin Gerholm <inspector-martin@users.noreply.github.com>

											
										
										
											2020-02-20 17:01:41 -05:00
+								namespacePrefix
-												Spellcheck docs (#8548)

* Spellcheck docs

Fix spelling mistakes in docs and add CI job for running spellcheck on
docs.

* Add missing license header

											
										
										
											2019-09-17 15:47:30 -04:00
+								src
 								 - ../docs/development/extensions-contrib/redis-cache.md
 								loadList
-												Redis cache extension enhancement (#10240)

* support redis cluster

* add 'password', 'database' properties

* test cases passed

* update doc

* some improvements

* fix CI

* add more test cases to improve branch coverage

* fix dependency check for test

* resolve review comments
											
										
										
											2020-08-23 22:29:04 -04:00
+								pull-deps
 								PT2S
-												Spellcheck docs (#8548)

* Spellcheck docs

Fix spelling mistakes in docs and add CI job for running spellcheck on
docs.

* Add missing license header

											
										
										
											2019-09-17 15:47:30 -04:00
+								 - ../docs/development/extensions-contrib/sqlserver.md
 								com.microsoft.sqlserver.jdbc.SQLServerDriver
 								sqljdbc
 								 - ../docs/development/extensions-contrib/statsd.md
 								convertRange
-												prometheus metric exporter (#10412)

* prometheus-emitter

* use existing jetty server to expose prometheus collection endpoint

* unused variables

* better variable names

* removed unused dependencies

* more metric definitions

* reorganize

* use prometheus HTTPServer instead of hooking into Jetty server

* temporary empty help string

* temporary non-empty help.  fix incorrect dimension value in JSON (also updated statsd json)

* added full help text.  added metric conversion factor for timers that are not using seconds. Correct metric dimension name in documentation

* added documentation for prometheus emitter

* safety for invalid labelNames

* fix travis checks

* Unit test and better sanitization of metrics names and label values

* add precondition to check namespace against regex

* use precompiled regex

* remove static imports. fix metric types

* better docs. fix possible NPE in PrometheusEmitterConfig. Guard against multiple calls to PrometheusEmitter.start()

* Update regex for label-value replacements to allow internal numeric values.  Additional tests

* Adds missing license header
updates website/.spelling to add words used in prometheus-emitter docs.
updates docs/operations/metrics.md to correct the spelling of
bufferPoolName

* fixes version in extensions-contrib/prometheus-emitter

* fix style guide errors

* update import ordering

* add another word to website/.spelling

* remove unthrown declared exception

* remove unused import

* Pushgateway strategy for metrics

* typo

* Format fix and nullable strategy

* Update pom file for prometheus-emitter

* code review comments. Counter to gauge for cache metrics, periodical task to pushGateway

* Syntax fix

* Dimension label regex include numeric character back, fix previous commit

* bump prometheus-emitter pom dev version

* Remove scheduled task inside poen that push metrics

* Fix checkstyle

* Unit test coverage

* Unit test coverage

* Spelling

* Doc fix

* spelling

Co-authored-by: Michael Schiff <michael.schiff@tubemogul.com>
Co-authored-by: Michael Schiff <schiff.michael@gmail.com>
Co-authored-by: Tianxin Zhao <tianxin.zhao@tubemogul.com>
Co-authored-by: Tianxin Zhao <tizhao@adobe.com>
											
										
										
											2021-03-09 17:37:31 -05:00
+								- ../docs/development/extensions-contrib/prometheus.md
 								HTTPServer
 								conversionFactor
 								prometheus
 								Pushgateway
-												Spellcheck docs (#8548)

* Spellcheck docs

Fix spelling mistakes in docs and add CI job for running spellcheck on
docs.

* Add missing license header

											
										
										
											2019-09-17 15:47:30 -04:00
+								 - ../docs/development/extensions-contrib/tdigestsketch-quantiles.md
 								postAggregator
 								quantileFromTDigestSketch
 								quantilesFromTDigestSketch
 								tDigestSketch
 								 - ../docs/development/extensions-contrib/thrift.md
 								HadoopDruidIndexer
 								LzoThriftBlock
 								SequenceFile
 								classname
 								hadoop-lzo
 								inputFormat
 								inputSpec
 								ioConfig
 								parseSpec
 								thriftClass
 								thriftJar
 								 - ../docs/development/extensions-contrib/time-min-max.md
 								timeMax
 								timeMin
-												support Aliyun OSS service as deep storage (#9898)

* init commit, all tests passed

* fix format

Signed-off-by: frank chen <frank.chen021@outlook.com>

* data stored successfully

* modify config path

* add doc

* add aliyun-oss extension to project

* remove descriptor deletion code to avoid warning message output by aliyun client

* fix warnings reported by lgtm-com

* fix ci warnings

Signed-off-by: frank chen <frank.chen021@outlook.com>

* fix errors reported by intellj inspection check

Signed-off-by: frank chen <frank.chen021@outlook.com>

* fix doc spelling check

Signed-off-by: frank chen <frank.chen021@outlook.com>

* fix dependency warnings reported by ci

Signed-off-by: frank chen <frank.chen021@outlook.com>

* fix warnings reported by CI

Signed-off-by: frank chen <frank.chen021@outlook.com>

* add package configuration to support showing extension info

Signed-off-by: frank chen <frank.chen021@outlook.com>

* add IT test cases and fix bugs

Signed-off-by: frank chen <frank.chen021@outlook.com>

* 1. code review comments adopted
2. change schema from 'aliyun-oss' to 'oss'

Signed-off-by: frank chen <frank.chen021@outlook.com>

* add license info

Signed-off-by: frank chen <frank.chen021@outlook.com>

* fix doc

Signed-off-by: frank chen <frank.chen021@outlook.com>

* exclude execution of IT testcases of OSS extension from CI

Signed-off-by: frank chen <frank.chen021@outlook.com>

* put the extensions under contrib group and add to distribution

* fix names in test cases

* add unit test to cover OssInputSource

* fix names in test cases

* fix dependency problem reported by CI

Signed-off-by: frank chen <frank.chen021@outlook.com>
											
										
										
											2020-07-02 01:20:53 -04:00
+								 - ../docs/development/extensions-contrib/aliyun-oss-extensions.md
-												Add more guidelines on the use of aliyun-oss-extensions (#11420)

* Add more description

Signed-off-by: frank chen <frank.chen021@outlook.com>

* Update prefixes usage and Add troubleshooting section

* Add endpoint configuration recommendation

* Fix link

* resolve review comments


											
										
										
											2021-08-09 20:27:35 -04:00
+								Alibaba
-												support Aliyun OSS service as deep storage (#9898)

* init commit, all tests passed

* fix format

Signed-off-by: frank chen <frank.chen021@outlook.com>

* data stored successfully

* modify config path

* add doc

* add aliyun-oss extension to project

* remove descriptor deletion code to avoid warning message output by aliyun client

* fix warnings reported by lgtm-com

* fix ci warnings

Signed-off-by: frank chen <frank.chen021@outlook.com>

* fix errors reported by intellj inspection check

Signed-off-by: frank chen <frank.chen021@outlook.com>

* fix doc spelling check

Signed-off-by: frank chen <frank.chen021@outlook.com>

* fix dependency warnings reported by ci

Signed-off-by: frank chen <frank.chen021@outlook.com>

* fix warnings reported by CI

Signed-off-by: frank chen <frank.chen021@outlook.com>

* add package configuration to support showing extension info

Signed-off-by: frank chen <frank.chen021@outlook.com>

* add IT test cases and fix bugs

Signed-off-by: frank chen <frank.chen021@outlook.com>

* 1. code review comments adopted
2. change schema from 'aliyun-oss' to 'oss'

Signed-off-by: frank chen <frank.chen021@outlook.com>

* add license info

Signed-off-by: frank chen <frank.chen021@outlook.com>

* fix doc

Signed-off-by: frank chen <frank.chen021@outlook.com>

* exclude execution of IT testcases of OSS extension from CI

Signed-off-by: frank chen <frank.chen021@outlook.com>

* put the extensions under contrib group and add to distribution

* fix names in test cases

* add unit test to cover OssInputSource

* fix names in test cases

* fix dependency problem reported by CI

Signed-off-by: frank chen <frank.chen021@outlook.com>
											
										
										
											2020-07-02 01:20:53 -04:00
+								Aliyun
-												Add more guidelines on the use of aliyun-oss-extensions (#11420)

* Add more description

Signed-off-by: frank chen <frank.chen021@outlook.com>

* Update prefixes usage and Add troubleshooting section

* Add endpoint configuration recommendation

* Fix link

* resolve review comments


											
										
										
											2021-08-09 20:27:35 -04:00
+								aliyun-oss-extensions
-												support Aliyun OSS service as deep storage (#9898)

* init commit, all tests passed

* fix format

Signed-off-by: frank chen <frank.chen021@outlook.com>

* data stored successfully

* modify config path

* add doc

* add aliyun-oss extension to project

* remove descriptor deletion code to avoid warning message output by aliyun client

* fix warnings reported by lgtm-com

* fix ci warnings

Signed-off-by: frank chen <frank.chen021@outlook.com>

* fix errors reported by intellj inspection check

Signed-off-by: frank chen <frank.chen021@outlook.com>

* fix doc spelling check

Signed-off-by: frank chen <frank.chen021@outlook.com>

* fix dependency warnings reported by ci

Signed-off-by: frank chen <frank.chen021@outlook.com>

* fix warnings reported by CI

Signed-off-by: frank chen <frank.chen021@outlook.com>

* add package configuration to support showing extension info

Signed-off-by: frank chen <frank.chen021@outlook.com>

* add IT test cases and fix bugs

Signed-off-by: frank chen <frank.chen021@outlook.com>

* 1. code review comments adopted
2. change schema from 'aliyun-oss' to 'oss'

Signed-off-by: frank chen <frank.chen021@outlook.com>

* add license info

Signed-off-by: frank chen <frank.chen021@outlook.com>

* fix doc

Signed-off-by: frank chen <frank.chen021@outlook.com>

* exclude execution of IT testcases of OSS extension from CI

Signed-off-by: frank chen <frank.chen021@outlook.com>

* put the extensions under contrib group and add to distribution

* fix names in test cases

* add unit test to cover OssInputSource

* fix names in test cases

* fix dependency problem reported by CI

Signed-off-by: frank chen <frank.chen021@outlook.com>
											
										
										
											2020-07-02 01:20:53 -04:00
+								AccessKey
-												Add more guidelines on the use of aliyun-oss-extensions (#11420)

* Add more description

Signed-off-by: frank chen <frank.chen021@outlook.com>

* Update prefixes usage and Add troubleshooting section

* Add endpoint configuration recommendation

* Fix link

* resolve review comments


											
										
										
											2021-08-09 20:27:35 -04:00
+								accessKey
-												support Aliyun OSS service as deep storage (#9898)

* init commit, all tests passed

* fix format

Signed-off-by: frank chen <frank.chen021@outlook.com>

* data stored successfully

* modify config path

* add doc

* add aliyun-oss extension to project

* remove descriptor deletion code to avoid warning message output by aliyun client

* fix warnings reported by lgtm-com

* fix ci warnings

Signed-off-by: frank chen <frank.chen021@outlook.com>

* fix errors reported by intellj inspection check

Signed-off-by: frank chen <frank.chen021@outlook.com>

* fix doc spelling check

Signed-off-by: frank chen <frank.chen021@outlook.com>

* fix dependency warnings reported by ci

Signed-off-by: frank chen <frank.chen021@outlook.com>

* fix warnings reported by CI

Signed-off-by: frank chen <frank.chen021@outlook.com>

* add package configuration to support showing extension info

Signed-off-by: frank chen <frank.chen021@outlook.com>

* add IT test cases and fix bugs

Signed-off-by: frank chen <frank.chen021@outlook.com>

* 1. code review comments adopted
2. change schema from 'aliyun-oss' to 'oss'

Signed-off-by: frank chen <frank.chen021@outlook.com>

* add license info

Signed-off-by: frank chen <frank.chen021@outlook.com>

* fix doc

Signed-off-by: frank chen <frank.chen021@outlook.com>

* exclude execution of IT testcases of OSS extension from CI

Signed-off-by: frank chen <frank.chen021@outlook.com>

* put the extensions under contrib group and add to distribution

* fix names in test cases

* add unit test to cover OssInputSource

* fix names in test cases

* fix dependency problem reported by CI

Signed-off-by: frank chen <frank.chen021@outlook.com>
											
										
										
											2020-07-02 01:20:53 -04:00
+								aliyun-oss
-												Add more guidelines on the use of aliyun-oss-extensions (#11420)

* Add more description

Signed-off-by: frank chen <frank.chen021@outlook.com>

* Update prefixes usage and Add troubleshooting section

* Add endpoint configuration recommendation

* Fix link

* resolve review comments


											
										
										
											2021-08-09 20:27:35 -04:00
+								json
 								OSS
-												support Aliyun OSS service as deep storage (#9898)

* init commit, all tests passed

* fix format

Signed-off-by: frank chen <frank.chen021@outlook.com>

* data stored successfully

* modify config path

* add doc

* add aliyun-oss extension to project

* remove descriptor deletion code to avoid warning message output by aliyun client

* fix warnings reported by lgtm-com

* fix ci warnings

Signed-off-by: frank chen <frank.chen021@outlook.com>

* fix errors reported by intellj inspection check

Signed-off-by: frank chen <frank.chen021@outlook.com>

* fix doc spelling check

Signed-off-by: frank chen <frank.chen021@outlook.com>

* fix dependency warnings reported by ci

Signed-off-by: frank chen <frank.chen021@outlook.com>

* fix warnings reported by CI

Signed-off-by: frank chen <frank.chen021@outlook.com>

* add package configuration to support showing extension info

Signed-off-by: frank chen <frank.chen021@outlook.com>

* add IT test cases and fix bugs

Signed-off-by: frank chen <frank.chen021@outlook.com>

* 1. code review comments adopted
2. change schema from 'aliyun-oss' to 'oss'

Signed-off-by: frank chen <frank.chen021@outlook.com>

* add license info

Signed-off-by: frank chen <frank.chen021@outlook.com>

* fix doc

Signed-off-by: frank chen <frank.chen021@outlook.com>

* exclude execution of IT testcases of OSS extension from CI

Signed-off-by: frank chen <frank.chen021@outlook.com>

* put the extensions under contrib group and add to distribution

* fix names in test cases

* add unit test to cover OssInputSource

* fix names in test cases

* fix dependency problem reported by CI

Signed-off-by: frank chen <frank.chen021@outlook.com>
											
										
										
											2020-07-02 01:20:53 -04:00
+								oss
-												Add more guidelines on the use of aliyun-oss-extensions (#11420)

* Add more description

Signed-off-by: frank chen <frank.chen021@outlook.com>

* Update prefixes usage and Add troubleshooting section

* Add endpoint configuration recommendation

* Fix link

* resolve review comments


											
										
										
											2021-08-09 20:27:35 -04:00
+								secretKey
-												support Aliyun OSS service as deep storage (#9898)

* init commit, all tests passed

* fix format

Signed-off-by: frank chen <frank.chen021@outlook.com>

* data stored successfully

* modify config path

* add doc

* add aliyun-oss extension to project

* remove descriptor deletion code to avoid warning message output by aliyun client

* fix warnings reported by lgtm-com

* fix ci warnings

Signed-off-by: frank chen <frank.chen021@outlook.com>

* fix errors reported by intellj inspection check

Signed-off-by: frank chen <frank.chen021@outlook.com>

* fix doc spelling check

Signed-off-by: frank chen <frank.chen021@outlook.com>

* fix dependency warnings reported by ci

Signed-off-by: frank chen <frank.chen021@outlook.com>

* fix warnings reported by CI

Signed-off-by: frank chen <frank.chen021@outlook.com>

* add package configuration to support showing extension info

Signed-off-by: frank chen <frank.chen021@outlook.com>

* add IT test cases and fix bugs

Signed-off-by: frank chen <frank.chen021@outlook.com>

* 1. code review comments adopted
2. change schema from 'aliyun-oss' to 'oss'

Signed-off-by: frank chen <frank.chen021@outlook.com>

* add license info

Signed-off-by: frank chen <frank.chen021@outlook.com>

* fix doc

Signed-off-by: frank chen <frank.chen021@outlook.com>

* exclude execution of IT testcases of OSS extension from CI

Signed-off-by: frank chen <frank.chen021@outlook.com>

* put the extensions under contrib group and add to distribution

* fix names in test cases

* add unit test to cover OssInputSource

* fix names in test cases

* fix dependency problem reported by CI

Signed-off-by: frank chen <frank.chen021@outlook.com>
											
										
										
											2020-07-02 01:20:53 -04:00
+								url
-												Spellcheck docs (#8548)

* Spellcheck docs

Fix spelling mistakes in docs and add CI job for running spellcheck on
docs.

* Add missing license header

											
										
										
											2019-09-17 15:47:30 -04:00
+								 - ../docs/development/extensions-core/approximate-histograms.md
 								approxHistogram
 								approxHistogramFold
-												Add vectorization for druid-histogram extension (#10304)

* First draft

* Remove redundant code from FixedBucketsHistogramAggregator classes

* Add test cases for new classes

* Fix tests in sql compatible mode

* Typo fix

* Fix comment

* Add spelling

* Vectorize only for supported types

* Rename internal aggregator files

* Fix tests
											
										
										
											2020-09-09 16:56:33 -04:00
+								fixedBucketsHistogram
-												Spellcheck docs (#8548)

* Spellcheck docs

Fix spelling mistakes in docs and add CI job for running spellcheck on
docs.

* Add missing license header

											
										
										
											2019-09-17 15:47:30 -04:00
+								bucketNum
 								lowerLimit
 								numBuckets
 								upperLimit
 								 - ../docs/development/extensions-core/avro.md
 								AVRO-1124
 								Avro-1124
 								SchemaRepo
 								avro
 								avroBytesDecoder
-												add protobuf inputformat (#11018)

* add protobuf inputformat

* repair pom

* alter intermediateRow to type of Dynamicmessage

* add document

* refine test

* fix document

* add protoBytesDecoder

* refine document and add ser test

* add hash

* add schema registry ser test

Co-authored-by: yuanyi <yuanyi@freewheel.tv>
											
										
										
											2021-04-13 01:03:13 -04:00
+								protoBytesDecoder
-												Fix Avro support in Web Console (#10232)

* Fix Avro OCF detection prefix and run formation detection on raw input

* Support Avro Fixed and Enum types correctly

* Check Avro version byte in format detection

* Add test for AvroOCFReader.sample

Ensures that the Sampler doesn't receive raw input that it can't
serialize into JSON.

* Document Avro type handling

* Add TS unit tests for guessInputFormat
											
										
										
											2020-10-08 00:08:22 -04:00
+								flattenSpec
-												Spellcheck docs (#8548)

* Spellcheck docs

Fix spelling mistakes in docs and add CI job for running spellcheck on
docs.

* Add missing license header

											
										
										
											2019-09-17 15:47:30 -04:00
+								jq
 								org.apache.druid.extensions
 								schemaRepository
 								schema_inline
 								subjectAndIdConverter
 								url
 								 - ../docs/development/extensions-core/bloom-filter.md
 								BloomKFilter
 								bitset
 								outputStream
 								 - ../docs/development/extensions-core/datasketches-hll.md
 								HLLSketchBuild
 								HLLSketchMerge
 								lgK
 								log2
 								tgtHllType
 								 - ../docs/development/extensions-core/datasketches-quantiles.md
 								CDF
 								DoublesSketch
-												Configurable maxStreamLength for doubles sketches (#11574)

* Configurable maxStreamLength for doubles sketches

* fix equals/hashcode and it test failure

* fix test

* fix it test

* benchmark

* doc

* grouping key

* fix comment

* dependency check

* Update docs/development/extensions-core/datasketches-quantiles.md

Co-authored-by: Charles Smith <techdocsmith@gmail.com>

* Update docs/querying/sql.md

Co-authored-by: Charles Smith <techdocsmith@gmail.com>

* Update docs/querying/sql.md

Co-authored-by: Charles Smith <techdocsmith@gmail.com>

* Update docs/querying/sql.md

Co-authored-by: Charles Smith <techdocsmith@gmail.com>

* Update docs/querying/sql.md

Co-authored-by: Charles Smith <techdocsmith@gmail.com>

* Update docs/querying/sql.md

Co-authored-by: Charles Smith <techdocsmith@gmail.com>

* Update docs/querying/sql.md

Co-authored-by: Charles Smith <techdocsmith@gmail.com>

* Update docs/querying/sql.md

Co-authored-by: Charles Smith <techdocsmith@gmail.com>

Co-authored-by: Charles Smith <techdocsmith@gmail.com>
											
										
										
											2021-08-31 17:56:37 -04:00
+								maxStreamLength
-												Spellcheck docs (#8548)

* Spellcheck docs

Fix spelling mistakes in docs and add CI job for running spellcheck on
docs.

* Add missing license header

											
										
										
											2019-09-17 15:47:30 -04:00
+								PMF
 								quantilesDoublesSketch
 								toString
 								 - ../docs/development/extensions-core/datasketches-theta.md
 								isInputThetaSketch
 								thetaSketch
 								user_id
 								 - ../docs/development/extensions-core/datasketches-tuple.md
 								ArrayOfDoublesSketch
 								arrayOfDoublesSketch
 								metricColumns
 								nominalEntries
 								numberOfValues
 								 - ../docs/development/extensions-core/druid-basic-security.md
 								INFORMATION_SCHEMA
 								MyBasicAuthenticator
 								MyBasicAuthorizer
 								authenticatorName
 								authorizerName
 								druid_system
 								pollingPeriod
 								roleName
-												Support LDAP authentication/authorization (#6972)

* Support LDAP authentication/authorization

* fixed integration-tests

* fixed Travis CI build errors related to druid-security module

* fixed failing test

* fixed failing test header

* added comments, force build

* fixes for strict compilation spotbugs checks

* removed authenticator rolling credential update feature

* removed escalator rolling credential update feature

* fixed teamcity inspection deprecated API usage error

* fixed checkstyle execution error, removed unused import

* removed cached config as part of removing authenticator rolling credential update feature

* removed config bundle entity as part of removing authenticator rolling credential update feature

* refactored ldao configuration

* added support for SSLContext configuration and TLSCertificateChecker

* removed check to return authentication failure when user has no group assigned, will be checked and handled by the authorizer

* Separate out authorizer checks between metadata-backed store user and LDAP user/groups

* refactored BasicSecuritySSLSocketFactory usage to fix strict compilation spotbugs checks

* fixes build issue

* final review comments updates

* final review comments updates

* fixed LGTM and spellcheck alerts

* Fixed Avatica auth failure error message check

* Updated metadata credentials validator exception message string, replaced DB with metadata store

											
										
										
											2019-10-08 20:08:27 -04:00
+								LDAP
 								ldap
 								MyBasicMetadataAuthenticator
 								MyBasicLDAPAuthenticator
 								MyBasicMetadataAuthorizer
 								MyBasicLDAPAuthorizer
 								credentialsValidator
 								sAMAccountName
 								objectClass
 								initialAdminRole
 								adminGroupMapping
 								groupMappingName
-												Spellcheck docs (#8548)

* Spellcheck docs

Fix spelling mistakes in docs and add CI job for running spellcheck on
docs.

* Add missing license header

											
										
										
											2019-09-17 15:47:30 -04:00
+								 - ../docs/development/extensions-core/druid-kerberos.md
-												Eliminate ambiguities of KB/MB/GB in the doc (#11333)

* GB ---> GiB

* suppress spelling check

* MB --> MiB, KB --> KiB

* Use IEC binary prefix

* Add reference link

* Fix doc style
											
										
										
											2021-06-30 16:42:45 -04:00
+KiB
-												Spellcheck docs (#8548)

* Spellcheck docs

Fix spelling mistakes in docs and add CI job for running spellcheck on
docs.

* Add missing license header

											
										
										
											2019-09-17 15:47:30 -04:00
+								HttpComponents
 								MyKerberosAuthenticator
 								RFC-4559
 								SPNego
 								_HOST
 								 - ../docs/development/extensions-core/druid-lookups.md
 								cacheFactory
 								concurrencyLevel
 								dataFetcher
 								expireAfterAccess
 								expireAfterWrite
 								initialCapacity
 								loadingCacheSpec
 								maxEntriesSize
 								maxStoreSize
 								maximumSize
 								onHeapPolling
 								pollPeriod
 								reverseLoadingCacheSpec
-												druid extension for OpenID Connect auth using pac4j lib (#8992)

* druid pac4j security extension for OpenID Connect OAuth 2.0 authentication

* update version in druid-pac4j pom

* introducing unauthorized resource filter

* authenticated but authorized /unified-webconsole.html

* use httpReq.getRequestURI() for matching callback path

* add documentation

* minor doc addition

* licesne file updates

* make dependency analyze succeed

* fix doc build

* hopefully fixes doc build

* hopefully fixes license check build

* yet another try on fixing license build

* revert unintentional changes to website folder

* update version to 0.18.0-SNAPSHOT

* check session and its expiry on each request

* add crypto service

* code for encrypting the cookie

* update doc with cookiePassphrase

* update license yaml

* make sessionstore in Pac4jFilter private non static

* make Pac4jFilter fields final

* okta: use sha256 for hmac

* remove incubating

* add UTs for crypto util and session store impl

* use standard charsets

* add license header

* remove unused file

* add org.objenesis.objenesis to license.yaml

* a bit of nit changes  in CryptoService  and embedding EncryptionResult for clarity

* rename alg  to cipherAlgName

* take cipher alg name, mode and padding as input

* add java doc  for CryptoService  and make it more understandable

* another  UT for CryptoService

* cache pac4j Config

* use generics clearly in Pac4jSessionStore

* update cookiePassphrase doc to mention PasswordProvider

* mark stuff Nullable where appropriate in Pac4jSessionStore

* update doc to mention jdbc

* add error log on reaching callback resource

* javadoc  for Pac4jCallbackResource

* introduce NOOP_HTTP_ACTION_ADAPTER

* add correct module name in license file

* correct extensions folder name in licenses.yaml

* replace druid-kubernetes-extensions to druid-pac4j

* cache SecureRandom instance

* rename UnauthorizedResourceFilter to AuthenticationOnlyResourceFilter
											
										
										
											2020-03-23 21:15:45 -04:00
+								 - ../docs/development/extensions-core/druid-pac4j.md
 								OAuth
 								Okta
 								OpenID
 								pac4j
-												kubernetes based discovery druid extension to run Druid on K8S without Zookeeper (#10544)

* honor zk enablement config in more places in druid code

* kubernetes based discovery module

* fix spotbugs check

* fix intellij checks error

* fix doc link to kubernetes.md from extension

* make spellchecker happy

* update license.yaml

* fix dependency check errors

* update extension coverage

* UTs for BaseNodeRoleWatcher

* fix forbidden-api check

* update k8s module coverage ignores

* add Bouncy Castle License being same as MIT License for license checking purposes

* further update licenses.yaml

* label/annotation pre-existence assumption

* address review comment
											
										
										
											2020-12-15 00:10:31 -05:00
+								 - ../docs/development/extensions-core/kubernetes.md
 								Env
 								POD_NAME
 								POD_NAMESPACE
 								ConfigMap
 								PT17S
-												Spellcheck docs (#8548)

* Spellcheck docs

Fix spelling mistakes in docs and add CI job for running spellcheck on
docs.

* Add missing license header

											
										
										
											2019-09-17 15:47:30 -04:00
+								 - ../docs/development/extensions-core/google.md
 								GCS
 								StaticGoogleBlobStoreFirehose
 								 - ../docs/development/extensions-core/hdfs.md
 								gcs-connector
 								hadoop2
 								hdfs
 								 - ../docs/development/extensions-core/kafka-extraction-namespace.md
-												Change Kafka Lookup Extractor to not register consumer group (#12842)

* change kafka lookups module to not commit offsets

The current behaviour of the Kafka lookup extractor is to not commit
offsets by assigning a unique ID to the consumer group and setting
auto.offset.reset to earliest. This does the job but also pollutes the
Kafka broker with a bunch of "ghost" consumer groups that will never again be
used.

To fix this, we now set enable.auto.commit to false, which prevents the
ghost consumer groups being created in the first place.

* update docs to include new enable.auto.commit setting behaviour

* update kafka-lookup-extractor documentation

Provide some additional detail on functionality and configuration.
Hopefully this will make it clearer how the extractor works for
developers who aren't so familiar with Kafka.

* add comments better explaining the logic of the code

* add spelling exceptions for kafka lookup docs
											
										
										
											2022-08-09 06:44:22 -04:00
+								Aotearoa
 								Czechia
 								KTable
-												Spellcheck docs (#8548)

* Spellcheck docs

Fix spelling mistakes in docs and add CI job for running spellcheck on
docs.

* Add missing license header

											
										
										
											2019-09-17 15:47:30 -04:00
+								LookupExtractorFactory
-												Change Kafka Lookup Extractor to not register consumer group (#12842)

* change kafka lookups module to not commit offsets

The current behaviour of the Kafka lookup extractor is to not commit
offsets by assigning a unique ID to the consumer group and setting
auto.offset.reset to earliest. This does the job but also pollutes the
Kafka broker with a bunch of "ghost" consumer groups that will never again be
used.

To fix this, we now set enable.auto.commit to false, which prevents the
ghost consumer groups being created in the first place.

* update docs to include new enable.auto.commit setting behaviour

* update kafka-lookup-extractor documentation

Provide some additional detail on functionality and configuration.
Hopefully this will make it clearer how the extractor works for
developers who aren't so familiar with Kafka.

* add comments better explaining the logic of the code

* add spelling exceptions for kafka lookup docs
											
										
										
											2022-08-09 06:44:22 -04:00
+								Zeelund
-												Spellcheck docs (#8548)

* Spellcheck docs

Fix spelling mistakes in docs and add CI job for running spellcheck on
docs.

* Add missing license header

											
										
										
											2019-09-17 15:47:30 -04:00
+								zookeeper.connect
 								 - ../docs/development/extensions-core/kafka-ingestion.md
 .11.x.
 Z
 -01-01T11
 -01-01T12
 -01-01T14
 								CONNECTING_TO_STREAM
 								CREATING_TASKS
 								DISCOVERING_INITIAL_TASKS
 								KafkaSupervisorIOConfig
 								KafkaSupervisorTuningConfig
 								LOST_CONTACT_WITH_STREAM
 								OffsetOutOfRangeException
 								P2147483647D
 								PT10M
 								PT10S
 								PT1H
 								PT30M
 								PT30S
 								PT5S
 								PT80S
-												Docs - update dynamic config provider topic (#11795)

* update dynamic config provider

* update topic

* add examples for dynamic config provider:

* Update docs/development/extensions-core/kafka-ingestion.md

Co-authored-by: Katya Macedo  <38017980+ektravel@users.noreply.github.com>

* Update docs/development/extensions-core/kafka-ingestion.md

Co-authored-by: Katya Macedo  <38017980+ektravel@users.noreply.github.com>

* Update docs/development/extensions-core/kafka-ingestion.md

Co-authored-by: Katya Macedo  <38017980+ektravel@users.noreply.github.com>

* Update docs/operations/dynamic-config-provider.md

Co-authored-by: Katya Macedo  <38017980+ektravel@users.noreply.github.com>

* Update docs/operations/dynamic-config-provider.md

Co-authored-by: Katya Macedo  <38017980+ektravel@users.noreply.github.com>

* Update docs/operations/dynamic-config-provider.md

Co-authored-by: Katya Macedo  <38017980+ektravel@users.noreply.github.com>

* Update docs/operations/dynamic-config-provider.md

Co-authored-by: Katya Macedo  <38017980+ektravel@users.noreply.github.com>

* Update docs/development/extensions-core/kafka-ingestion.md

Co-authored-by: Katya Macedo  <38017980+ektravel@users.noreply.github.com>

* Update docs/operations/dynamic-config-provider.md

Co-authored-by: Clint Wylie <cjwylie@gmail.com>

* Update docs/operations/dynamic-config-provider.md

Co-authored-by: Clint Wylie <cjwylie@gmail.com>

* Update kafka-ingestion.md

Co-authored-by: Katya Macedo  <38017980+ektravel@users.noreply.github.com>
Co-authored-by: Clint Wylie <cjwylie@gmail.com>
											
										
										
											2021-10-14 20:51:32 -04:00
+								SASL
-												Spellcheck docs (#8548)

* Spellcheck docs

Fix spelling mistakes in docs and add CI job for running spellcheck on
docs.

* Add missing license header

											
										
										
											2019-09-17 15:47:30 -04:00
+								SegmentWriteOutMediumFactory
 								UNABLE_TO_CONNECT_TO_STREAM
 								UNHEALTHY_SUPERVISOR
 								UNHEALTHY_TASKS
 								dimensionCompression
 								earlyMessageRejectionPeriod
 								indexSpec
 								intermediateHandoffPeriod
 								longEncoding
 								maxBytesInMemory
 								maxPendingPersists
 								maxRowsInMemory
 								maxRowsPerSegment
 								maxSavedParseExceptions
 								maxTotalRows
 								metricCompression
 								numKafkaPartitions
 								taskCount
 								taskDuration
 								 - ../docs/development/extensions-core/kinesis-ingestion.md
 .2dist
 								KinesisSupervisorIOConfig
 								KinesisSupervisorTuningConfig
-												Additional Kinesis resharding fixes (#8870)

* Additional Kinesis resharding fixes

* Address PR comments

* Remove unused method

* Adjust SegmentTransactionalInsertAction null handling

* Check for unchanged metadata on empty publish

* Add logs for empty publish

* Fix javadoc

* Clear offset when invalid endOffsets are seen

* Fix LGTM alert

* Fix build

* Add resharding note to Kinesis docs

* Checkstyle

* Spelling

* Address PR comments

* Checkstyle

											
										
										
											2019-11-28 15:59:01 -05:00
+								Resharding
 								resharding
-												Spellcheck docs (#8548)

* Spellcheck docs

Fix spelling mistakes in docs and add CI job for running spellcheck on
docs.

* Add missing license header

											
										
										
											2019-09-17 15:47:30 -04:00
+								LZ4LZFuncompressedLZ4LZ4LZFuncompressednoneLZ4autolongsautolongslongstypeconcisetyperoaringcompressRunOnSerializationtruetypestreamendpointreplicastaskCounttaskCount
 								deaggregate
 								druid-kinesis-indexing-service
 								maxRecordsPerPoll
 								maxRecordsPerPollrecordsPerFetchfetchDelayMillisreplicasfetchDelayMillisrecordsPerFetchfetchDelayMillismaxRecordsPerPollamazon-kinesis-client1
 								numKinesisShards
 								numProcessors
 								q.size
-												More Kinesis resharding adjustments (#8671)

* More Kinesis resharding adjustments

* Fix TC inspection

* Fix comment'

* Adjust comment, small refactor

* Make repartition transition time configurable

* Add spellcheck exclusion

* Spelling fix

											
										
										
											2019-10-16 02:19:17 -04:00
+								repartitionTransitionDuration
-												Spellcheck docs (#8548)

* Spellcheck docs

Fix spelling mistakes in docs and add CI job for running spellcheck on
docs.

* Add missing license header

											
										
										
											2019-09-17 15:47:30 -04:00
+								replicastaskCounttaskCount
 								resetuseEarliestSequenceNumberPOST
 								resumePOST
 								statusrecentErrorsdruid.supervisor.maxStoredExceptionEventsstatedetailedStatestatedetailedStatestatestatePENDINGRUNNINGSUSPENDEDSTOPPINGUNHEALTHY_SUPERVISORUNHEALTHY_TASKSdetailedStatestatedruid.supervisor.unhealthinessThresholddruid.supervisor.taskUnhealthinessThresholdtaskDurationtaskCountreplicasdetailedStatedetailedStateRUNNINGPOST
 								supervisorPOST
 								supervisorfetchThreadsfetchDelayMillisrecordsPerFetchmaxRecordsPerPollpoll
 								suspendPOST
 								taskCounttaskDurationreplicas
 								taskCounttaskDurationtaskDurationPOST
 								taskDurationstartDelayperioduseEarliestSequenceNumbercompletionTimeouttaskDurationlateMessageRejectionPeriodPT1HearlyMessageRejectionPeriodPT1HPT1HrecordsPerFetchfetchDelayMillisawsAssumedRoleArnawsExternalIddeaggregateGET
 								terminatePOST
 								terminatedruid.worker.capacitytaskDurationcompletionTimeoutreplicastaskCountreplicas
-												More Kinesis resharding adjustments (#8671)

* More Kinesis resharding adjustments

* Fix TC inspection

* Fix comment'

* Adjust comment, small refactor

* Make repartition transition time configurable

* Add spellcheck exclusion

* Spelling fix

											
										
										
											2019-10-16 02:19:17 -04:00
+								PT2M
 								kinesis.us
 								amazonaws.com
 								PT6H
 								GetRecords
 								KCL
 								signalled
 								ProvisionedThroughputExceededException
 								Deaggregation
-												Spellcheck docs (#8548)

* Spellcheck docs

Fix spelling mistakes in docs and add CI job for running spellcheck on
docs.

* Add missing license header

											
										
										
											2019-09-17 15:47:30 -04:00
+								 - ../docs/development/extensions-core/lookups-cached-global.md
 								baz
 								customJson
 								lookupParseSpec
 								namespaceParseSpec
 								simpleJson
 								 - ../docs/development/extensions-core/orc.md
 								dimensionSpec
 								flattenSpec
 								 - ../docs/development/extensions-core/parquet.md
 								binaryAsString
 								 - ../docs/development/extensions-core/postgresql.md
 								sslFactory's
 								sslMode
 								 - ../docs/development/extensions-core/protobuf.md
 								Proto
 								metrics.desc
 								metrics.desc.
 								metrics.proto.
 								metrics_pb
 								protoMessageType
 								timeAndDims
 								tmp
 								 - ../docs/development/extensions-core/s3.md
 								SigV4
 								jvm.config
 								kms
 								s3
 								s3a
 								s3n
 								uris
 								 - ../docs/development/extensions-core/simple-client-sslcontext.md
 								KeyManager
 								SSLContext
 								TrustManager
 								 - ../docs/development/extensions-core/stats.md
 								GenericUDAFVariance
 								Golub
 								J.L.
 								LeVeque
 								Numer
 								chunk1
 								chunk2
 								stddev
 								t1
 								t2
 								variance1
 								variance2
 								varianceFold
 								variance_pop
 								variance_sample
 								 - ../docs/development/extensions-core/test-stats.md
 								Berry_statbook
 								Berry_statbook_chpt6.pdf
 								S.E.
 								engineering.com
 								jcb0773
 								n1
 								n2
 								p1
 								p2
 								pvalue2tailedZtest
 								sqrt
 								successCount1
 								successCount2
 								www.isixsigma.com
 								www.paypal
 								www.ucs.louisiana.edu
 								zscore
 								zscore2sample
 								ztests
 								 - ../docs/development/extensions.md
 								DistinctCount
 								artifactId
 								com.example
 								common.runtime.properties
-												AWS RDS token based password  provider (#9518)

* refresh db pwd

* aws iam token password provider

* fix analyze-dependencies build

* fix doc build

* add  ut for BasicDataSourceExt

* more doc updates

* more  doc update

* moving aws  token password  provider to new extension

* remove duplicate changes

* make  all config inline

* extension docs

* refresh db  password  in SQL Firehose code path as well

* add ut

* fix build

* add new extension to distribution

* rds lib is not provided

* fix license build

* add version to license

* change parent version to 0.19.0-snapshot

* address review comments

* fix core/ code coverage

* Update server/src/main/java/org/apache/druid/metadata/BasicDataSourceExt.java

Co-authored-by: Clint Wylie <cjwylie@gmail.com>

* address review comments

* fix spellchecker

* remove inadvertant website file change

Co-authored-by: Clint Wylie <cjwylie@gmail.com>
											
										
										
											2021-01-07 00:15:29 -05:00
+								druid-aws-rds-extensions
-												Spellcheck docs (#8548)

* Spellcheck docs

Fix spelling mistakes in docs and add CI job for running spellcheck on
docs.

* Add missing license header

											
										
										
											2019-09-17 15:47:30 -04:00
+								druid-cassandra-storage
 								druid-distinctcount
 								druid-ec2-extensions
 								druid-kafka-extraction-namespace
 								druid-kafka-indexing-service
 								druid-opentsdb-emitter
 								druid-protobuf-extensions
 								druid-tdigestsketch
 								druid.apache.org
 								groupId
 								jvm-global
 								kafka-emitter
 								org.apache.druid.extensions.contrib.
 								pull-deps
 								sqlserver-metadata-storage
 								statsd-emitter
 								 - ../docs/development/geo.md
 								coords
 								dimName
 								maxCoords
-												Implementing dropwizard emitter for druid (#7363)

* Implementing dropwizard emitter for druid

making metric manager and alert emitters as optional

* Refactor and make things work

more improvements

improve docs

refactrings

* Fix teamcity inspections

* review comments

* more review comments

* add limit to max number of gauges

* update pom version

* fix pom

* review comments

* review comment

* review comments

* fix broken doc link

review comments

review comments

* review comments

* fix checkstyle

* more spell check fixes

* fix travis failures

											
										
										
											2019-10-01 17:59:30 -04:00
+								Mb
-												Spellcheck docs (#8548)

* Spellcheck docs

Fix spelling mistakes in docs and add CI job for running spellcheck on
docs.

* Add missing license header

											
										
										
											2019-09-17 15:47:30 -04:00
+								minCoords
 								 - ../docs/development/javascript.md
 								Metaspace
 								dev
 								 - ../docs/development/modules.md
 								AggregatorFactory
 								ArchiveTask
 								ComplexMetrics
 								DataSegmentArchiver
 								DataSegmentKiller
 								DataSegmentMover
 								DataSegmentPuller
 								DataSegmentPusher
 								DruidModule
 								ExtractionFns
 								HdfsStorageDruidModule
 								JacksonInject
 								MapBinder
 								MoveTask
 								ObjectMapper
 								PasswordProvider
 								PostAggregators
 								QueryRunnerFactory
 								SegmentMetadataQuery
 								SegmentMetadataQueryQueryToolChest
 								StaticS3FirehoseFactory
 								loadSpec
 								multibind
 								pom.xml
 								 - ../docs/ingestion/data-formats.md
 .6.x
 .7.x
 .7.x.
 								TimeAndDims
 								column2
 								column_1
 								column_n
 								com.opencsv
 								ctrl
-												Kafka Input Format for headers, key and payload parsing (#11630)

### Description

Today we ingest a number of high cardinality metrics into Druid across dimensions. These metrics are rolled up on a per minute basis, and are very useful when looking at metrics on a partition or client basis. Events is another class of data that provides useful information about a particular incident/scenario inside a Kafka cluster. Events themselves are carried inside kafka payload, but nonetheless there are some very useful metadata that is carried in kafka headers that can serve as useful dimension for aggregation and in turn bringing better insights.

PR(https://github.com/apache/druid/pull/10730) introduced support of Kafka headers in InputFormats.

We still need an input format to parse out the headers and translate those into relevant columns in Druid. Until that’s implemented, none of the information available in the Kafka message headers would be exposed. So first there is a need to write an input format that can parse headers in any given format(provided we support the format) like we parse payloads today. Apart from headers there is also some useful information present in the key portion of the kafka record. We also need a way to expose the data present in the key as druid columns. We need a generic way to express at configuration time what attributes from headers, key and payload need to be ingested into druid. We need to keep the design generic enough so that users can specify different parsers for headers, key and payload.

This PR is designed to solve the above by providing wrapper around any existing input formats and merging the data into a single unified Druid row.

Lets look at a sample input format from the above discussion

"inputFormat":
{
    "type": "kafka",     // New input format type
    "headerLabelPrefix": "kafka.header.",   // Label prefix for header columns, this will avoid collusions while merging columns
    "recordTimestampLabelPrefix": "kafka.",  // Kafka record's timestamp is made available in case payload does not carry timestamp
    "headerFormat":  // Header parser specifying that values are of type string
    {
        "type": "string"
    },
    "valueFormat": // Value parser from json parsing
    {
        "type": "json",
        "flattenSpec": {
          "useFieldDiscovery": true,
          "fields": [...]
        }
    },
    "keyFormat":  // Key parser also from json parsing
    {
        "type": "json"
    }
}

Since we have independent sections for header, key and payload, it will enable parsing each section with its own parser, eg., headers coming in as string and payload as json. 

KafkaInputFormat will be the uber class extending inputFormat interface and will be responsible for creating individual parsers for header, key and payload, blend the data resolving conflicts in columns and generating a single unified InputRow for Druid ingestion. 

"headerFormat" will allow users to plug parser type for the header values and will add default header prefix as "kafka.header."(can be overridden) for attributes to avoid collision while merging attributes with payload.

Kafka payload parser will be responsible for parsing the Value portion of the Kafka record. This is where most of the data will come from and we should be able to plugin existing parser. One thing to note here is that if batching is performed, then the code is augmenting header and key values to every record in the batch.

Kafka key parser will handle parsing Key portion of the Kafka record and will ingest the Key with dimension name as "kafka.key".

## KafkaInputFormat Class: 
This is the class that orchestrates sending the consumerRecord to each parser, retrieve rows, merge the columns into one final row for Druid consumption. KafkaInputformat should make sure to release the resources that gets allocated as a part of reader in CloseableIterator<InputRow> during normal and exception cases.

During conflicts in dimension/metrics names, the code will prefer dimension names from payload and ignore the dimension either from headers/key. This is done so that existing input formats can be easily migrated to this new format without worrying about losing information.
											
										
										
											2021-10-07 11:56:27 -04:00
+								headerFormat
 								headerLabelPrefix
-												Spellcheck docs (#8548)

* Spellcheck docs

Fix spelling mistakes in docs and add CI job for running spellcheck on
docs.

* Add missing license header

											
										
										
											2019-09-17 15:47:30 -04:00
+								jsonLowercase
-												Kafka Input Format for headers, key and payload parsing (#11630)

### Description

Today we ingest a number of high cardinality metrics into Druid across dimensions. These metrics are rolled up on a per minute basis, and are very useful when looking at metrics on a partition or client basis. Events is another class of data that provides useful information about a particular incident/scenario inside a Kafka cluster. Events themselves are carried inside kafka payload, but nonetheless there are some very useful metadata that is carried in kafka headers that can serve as useful dimension for aggregation and in turn bringing better insights.

PR(https://github.com/apache/druid/pull/10730) introduced support of Kafka headers in InputFormats.

We still need an input format to parse out the headers and translate those into relevant columns in Druid. Until that’s implemented, none of the information available in the Kafka message headers would be exposed. So first there is a need to write an input format that can parse headers in any given format(provided we support the format) like we parse payloads today. Apart from headers there is also some useful information present in the key portion of the kafka record. We also need a way to expose the data present in the key as druid columns. We need a generic way to express at configuration time what attributes from headers, key and payload need to be ingested into druid. We need to keep the design generic enough so that users can specify different parsers for headers, key and payload.

This PR is designed to solve the above by providing wrapper around any existing input formats and merging the data into a single unified Druid row.

Lets look at a sample input format from the above discussion

"inputFormat":
{
    "type": "kafka",     // New input format type
    "headerLabelPrefix": "kafka.header.",   // Label prefix for header columns, this will avoid collusions while merging columns
    "recordTimestampLabelPrefix": "kafka.",  // Kafka record's timestamp is made available in case payload does not carry timestamp
    "headerFormat":  // Header parser specifying that values are of type string
    {
        "type": "string"
    },
    "valueFormat": // Value parser from json parsing
    {
        "type": "json",
        "flattenSpec": {
          "useFieldDiscovery": true,
          "fields": [...]
        }
    },
    "keyFormat":  // Key parser also from json parsing
    {
        "type": "json"
    }
}

Since we have independent sections for header, key and payload, it will enable parsing each section with its own parser, eg., headers coming in as string and payload as json. 

KafkaInputFormat will be the uber class extending inputFormat interface and will be responsible for creating individual parsers for header, key and payload, blend the data resolving conflicts in columns and generating a single unified InputRow for Druid ingestion. 

"headerFormat" will allow users to plug parser type for the header values and will add default header prefix as "kafka.header."(can be overridden) for attributes to avoid collision while merging attributes with payload.

Kafka payload parser will be responsible for parsing the Value portion of the Kafka record. This is where most of the data will come from and we should be able to plugin existing parser. One thing to note here is that if batching is performed, then the code is augmenting header and key values to every record in the batch.

Kafka key parser will handle parsing Key portion of the Kafka record and will ingest the Key with dimension name as "kafka.key".

## KafkaInputFormat Class: 
This is the class that orchestrates sending the consumerRecord to each parser, retrieve rows, merge the columns into one final row for Druid consumption. KafkaInputformat should make sure to release the resources that gets allocated as a part of reader in CloseableIterator<InputRow> during normal and exception cases.

During conflicts in dimension/metrics names, the code will prefer dimension names from payload and ignore the dimension either from headers/key. This is done so that existing input formats can be easily migrated to this new format without worrying about losing information.
											
										
										
											2021-10-07 11:56:27 -04:00
+								kafka
 								KafkaStringHeaderFormat
 								kafka.header.
 								kafka.key
 								kafka.timestamp
 								keyColumnName
 								keyFormat
-												Spellcheck docs (#8548)

* Spellcheck docs

Fix spelling mistakes in docs and add CI job for running spellcheck on
docs.

* Add missing license header

											
										
										
											2019-09-17 15:47:30 -04:00
+								listDelimiter
-												Kafka Input Format for headers, key and payload parsing (#11630)

### Description

Today we ingest a number of high cardinality metrics into Druid across dimensions. These metrics are rolled up on a per minute basis, and are very useful when looking at metrics on a partition or client basis. Events is another class of data that provides useful information about a particular incident/scenario inside a Kafka cluster. Events themselves are carried inside kafka payload, but nonetheless there are some very useful metadata that is carried in kafka headers that can serve as useful dimension for aggregation and in turn bringing better insights.

PR(https://github.com/apache/druid/pull/10730) introduced support of Kafka headers in InputFormats.

We still need an input format to parse out the headers and translate those into relevant columns in Druid. Until that’s implemented, none of the information available in the Kafka message headers would be exposed. So first there is a need to write an input format that can parse headers in any given format(provided we support the format) like we parse payloads today. Apart from headers there is also some useful information present in the key portion of the kafka record. We also need a way to expose the data present in the key as druid columns. We need a generic way to express at configuration time what attributes from headers, key and payload need to be ingested into druid. We need to keep the design generic enough so that users can specify different parsers for headers, key and payload.

This PR is designed to solve the above by providing wrapper around any existing input formats and merging the data into a single unified Druid row.

Lets look at a sample input format from the above discussion

"inputFormat":
{
    "type": "kafka",     // New input format type
    "headerLabelPrefix": "kafka.header.",   // Label prefix for header columns, this will avoid collusions while merging columns
    "recordTimestampLabelPrefix": "kafka.",  // Kafka record's timestamp is made available in case payload does not carry timestamp
    "headerFormat":  // Header parser specifying that values are of type string
    {
        "type": "string"
    },
    "valueFormat": // Value parser from json parsing
    {
        "type": "json",
        "flattenSpec": {
          "useFieldDiscovery": true,
          "fields": [...]
        }
    },
    "keyFormat":  // Key parser also from json parsing
    {
        "type": "json"
    }
}

Since we have independent sections for header, key and payload, it will enable parsing each section with its own parser, eg., headers coming in as string and payload as json. 

KafkaInputFormat will be the uber class extending inputFormat interface and will be responsible for creating individual parsers for header, key and payload, blend the data resolving conflicts in columns and generating a single unified InputRow for Druid ingestion. 

"headerFormat" will allow users to plug parser type for the header values and will add default header prefix as "kafka.header."(can be overridden) for attributes to avoid collision while merging attributes with payload.

Kafka payload parser will be responsible for parsing the Value portion of the Kafka record. This is where most of the data will come from and we should be able to plugin existing parser. One thing to note here is that if batching is performed, then the code is augmenting header and key values to every record in the batch.

Kafka key parser will handle parsing Key portion of the Kafka record and will ingest the Key with dimension name as "kafka.key".

## KafkaInputFormat Class: 
This is the class that orchestrates sending the consumerRecord to each parser, retrieve rows, merge the columns into one final row for Druid consumption. KafkaInputformat should make sure to release the resources that gets allocated as a part of reader in CloseableIterator<InputRow> during normal and exception cases.

During conflicts in dimension/metrics names, the code will prefer dimension names from payload and ignore the dimension either from headers/key. This is done so that existing input formats can be easily migrated to this new format without worrying about losing information.
											
										
										
											2021-10-07 11:56:27 -04:00
+								timestampColumnName
-												Spellcheck docs (#8548)

* Spellcheck docs

Fix spelling mistakes in docs and add CI job for running spellcheck on
docs.

* Add missing license header

											
										
										
											2019-09-17 15:47:30 -04:00
+								timestampSpec
-												Add config and header support for confluent schema registry.  (#10314)

* Add config and header support for confluent schema registry. (porting code from https://github.com/apache/druid/pull/9096)

* Add Eclipse Public License 2.0 to license check

* Update licenses.yaml, revert changes to check-licenses.py and dependencies for integration-tests

* Add spelling exception and remove unused dependency

* Use non-deprecated getSchemaById() and remove duplicated license entry

* Update docs/ingestion/data-formats.md

Co-authored-by: Clint Wylie <cjwylie@gmail.com>

* Added check for schema being null, as per Confluent code

* Missing imports and whitespace

* Updated unit tests with AvroSchema

Co-authored-by: Sergio Spinatelli <sergio.spinatelli.extern@7-tv.de>
Co-authored-by: Sergio Spinatelli <sergio.spinatelli.extern@joyn.de>
Co-authored-by: Clint Wylie <cjwylie@gmail.com>
											
										
										
											2021-02-27 17:25:35 -05:00
+								urls
-												Kafka Input Format for headers, key and payload parsing (#11630)

### Description

Today we ingest a number of high cardinality metrics into Druid across dimensions. These metrics are rolled up on a per minute basis, and are very useful when looking at metrics on a partition or client basis. Events is another class of data that provides useful information about a particular incident/scenario inside a Kafka cluster. Events themselves are carried inside kafka payload, but nonetheless there are some very useful metadata that is carried in kafka headers that can serve as useful dimension for aggregation and in turn bringing better insights.

PR(https://github.com/apache/druid/pull/10730) introduced support of Kafka headers in InputFormats.

We still need an input format to parse out the headers and translate those into relevant columns in Druid. Until that’s implemented, none of the information available in the Kafka message headers would be exposed. So first there is a need to write an input format that can parse headers in any given format(provided we support the format) like we parse payloads today. Apart from headers there is also some useful information present in the key portion of the kafka record. We also need a way to expose the data present in the key as druid columns. We need a generic way to express at configuration time what attributes from headers, key and payload need to be ingested into druid. We need to keep the design generic enough so that users can specify different parsers for headers, key and payload.

This PR is designed to solve the above by providing wrapper around any existing input formats and merging the data into a single unified Druid row.

Lets look at a sample input format from the above discussion

"inputFormat":
{
    "type": "kafka",     // New input format type
    "headerLabelPrefix": "kafka.header.",   // Label prefix for header columns, this will avoid collusions while merging columns
    "recordTimestampLabelPrefix": "kafka.",  // Kafka record's timestamp is made available in case payload does not carry timestamp
    "headerFormat":  // Header parser specifying that values are of type string
    {
        "type": "string"
    },
    "valueFormat": // Value parser from json parsing
    {
        "type": "json",
        "flattenSpec": {
          "useFieldDiscovery": true,
          "fields": [...]
        }
    },
    "keyFormat":  // Key parser also from json parsing
    {
        "type": "json"
    }
}

Since we have independent sections for header, key and payload, it will enable parsing each section with its own parser, eg., headers coming in as string and payload as json. 

KafkaInputFormat will be the uber class extending inputFormat interface and will be responsible for creating individual parsers for header, key and payload, blend the data resolving conflicts in columns and generating a single unified InputRow for Druid ingestion. 

"headerFormat" will allow users to plug parser type for the header values and will add default header prefix as "kafka.header."(can be overridden) for attributes to avoid collision while merging attributes with payload.

Kafka payload parser will be responsible for parsing the Value portion of the Kafka record. This is where most of the data will come from and we should be able to plugin existing parser. One thing to note here is that if batching is performed, then the code is augmenting header and key values to every record in the batch.

Kafka key parser will handle parsing Key portion of the Kafka record and will ingest the Key with dimension name as "kafka.key".

## KafkaInputFormat Class: 
This is the class that orchestrates sending the consumerRecord to each parser, retrieve rows, merge the columns into one final row for Druid consumption. KafkaInputformat should make sure to release the resources that gets allocated as a part of reader in CloseableIterator<InputRow> during normal and exception cases.

During conflicts in dimension/metrics names, the code will prefer dimension names from payload and ignore the dimension either from headers/key. This is done so that existing input formats can be easily migrated to this new format without worrying about losing information.
											
										
										
											2021-10-07 11:56:27 -04:00
+								valueFormat
-												Spellcheck docs (#8548)

* Spellcheck docs

Fix spelling mistakes in docs and add CI job for running spellcheck on
docs.

* Add missing license header

											
										
										
											2019-09-17 15:47:30 -04:00
+								 - ../docs/ingestion/data-management.md
 GB
-												Use hash of Segment IDs instead of a list of explicit segments in auto compaction (#8571)

* IOConfig for compaction task

* add javadoc, doc, unit test

* fix webconsole test

* add spelling

* address comments

* fix build and test

* address comments

											
										
										
											2019-10-09 14:12:00 -04:00
+								IOConfig
-												Spellcheck docs (#8548)

* Spellcheck docs

Fix spelling mistakes in docs and add CI job for running spellcheck on
docs.

* Add missing license header

											
										
										
											2019-09-17 15:47:30 -04:00
+								compactionTask
 								compactionTasks
 								ingestSegmentFirehose
 								numShards
 								 - ../docs/ingestion/faq.md
 								IngestSegment
 								IngestSegmentFirehose
 								maxSizes
 								windowPeriod
 								 - ../docs/ingestion/hadoop.md
 -01-01T00
 -01-03T00
 -01-05T00
 -01-07T00
 MB
 								CombineTextInputFormat
 								HadoopIndexTask
 								InputFormat
 								InputSplit
 								JobHistory
 								a.example.com
 								assumeGrouped
-												Allow client to configure batch ingestion task to wait to complete until segments are confirmed to be available by other (#10676)

* Add ability to wait for segment availability for batch jobs

* IT updates

* fix queries in legacy hadoop IT

* Fix broken indexing integration tests

* address an lgtm flag

* spell checker still flagging for hadoop doc. adding under that file header too

* fix compaction IT

* Updates to wait for availability method

* improve unit testing for patch

* fix bad indentation

* refactor waitForSegmentAvailability

* Fixes based off of review comments

* cleanup to get compile after merging with master

* fix failing test after previous logic update

* add back code that must have gotten deleted during conflict resolution

* update some logging code

* fixes to get compilation working after merge with master

* reset interrupt flag in catch block after code review pointed it out

* small changes following self-review

* fixup some issues brought on by merge with master

* small changes after review

* cleanup a little bit after merge with master

* Fix potential resource leak in AbstractBatchIndexTask

* syntax fix

* Add a Compcation TuningConfig type

* add docs stipulating the lack of support by Compaction tasks for the new config

* Fixup compilation errors after merge with master

* Remove erreneous newline
											
										
										
											2021-04-09 00:03:00 -04:00
+								awaitSegmentAvailabilityTimeoutMillis
-												Spellcheck docs (#8548)

* Spellcheck docs

Fix spelling mistakes in docs and add CI job for running spellcheck on
docs.

* Add missing license header

											
										
										
											2019-09-17 15:47:30 -04:00
+								cleanupOnFailure
 								combineText
 								connectURI
 								dataGranularity
 								datetime
 								f.example.com
 								filePattern
 								forceExtendableShardSpecs
 								ignoreInvalidRows
 								ignoreWhenNoSegments
 								indexSpecForIntermediatePersists
 								index_hadoop
 								inputPath
 								inputSpecs
 								interval1
 								interval2
 								jobProperties
 								leaveIntermediate
 								logParseExceptions
 								mapred.map.tasks
 								mapreduce.job.maps
 								maxParseExceptions
 								maxPartitionSize
 								maxSplitSize
 								metadataUpdateSpec
 								numBackgroundPersistThreads
 								overwriteFiles
 								partitionDimension
 								partitionDimensions
 								partitionSpec
 								pathFormat
 								segmentOutputPath
 								segmentTable
 								shardSpec
 								single_dim
 								targetPartitionSize
-												Rename partition spec fields (#8507)

* Rename partition spec fields

Rename partition spec fields to be consistent across the various types
(hashed, single_dim, dynamic). Specifically, use targetNumRowsPerSegment
and maxRowsPerSegment in favor of targetPartitionSize and
maxSegmentSize. Consistent and clearer names are easier for users to
understand and use.

Also fix various IntelliJ inspection warnings and doc spelling mistakes.

* Fix test

* Improve docs

* Add targetRowsPerSegment to HashedPartitionsSpec

											
										
										
											2019-09-20 16:59:18 -04:00
+								targetRowsPerSegment
-												Spellcheck docs (#8548)

* Spellcheck docs

Fix spelling mistakes in docs and add CI job for running spellcheck on
docs.

* Add missing license header

											
										
										
											2019-09-17 15:47:30 -04:00
+								useCombiner
 								useExplicitVersion
 								useNewAggs
 								useYarnRMJobStatusFallback
 								workingPath
 								z.example.com
 								 - ../docs/ingestion/native-batch.md
 MB
 								CombiningFirehose
 								DataSchema
 								DefaultPassword
 								EnvironmentVariablePasswordProvider
 								HttpFirehose
 								IOConfig
 								InlineFirehose
 								LocalFirehose
 								PartitionsSpec
 								PasswordProviders
-												Auto compaction based on parallel indexing (#8570)

* Auto compaction based on parallel indexing

* javadoc and doc

* typo

* update spell

* addressing comments

* address comments

* fix log

* fix build

* fix test

* increase default max input segment bytes per task

* fix test

											
										
										
											2019-10-18 16:24:14 -04:00
+								SegmentsSplitHintSpec
 								SplitHintSpec
-												Add support for optional aws credentials for s3 for ingestion (#9375)

* Add support for optional cloud (aws, gcs, etc.) credentials for s3 for ingestion

* Add support for optional cloud (aws, gcs, etc.) credentials for s3 for ingestion

* Add support for optional cloud (aws, gcs, etc.) credentials for s3 for ingestion

* fix build failure

* fix failing build

* fix failing build

* Code cleanup

* fix failing test

* Removed CloudConfigProperties and make specific class for each cloudInputSource

* Removed CloudConfigProperties and make specific class for each cloudInputSource

* pass s3ConfigProperties for split

* lazy init s3client

* update docs

* fix docs check

* address comments

* add ServerSideEncryptingAmazonS3.Builder

* fix failing checkstyle

* fix typo

* wrap the ServerSideEncryptingAmazonS3.Builder in a provider

* added java docs for S3InputSource constructor

* added java docs for S3InputSource constructor

* remove wrap the ServerSideEncryptingAmazonS3.Builder in a provider

											
										
										
											2020-02-25 23:59:53 -05:00
+								accessKeyId
-												Spellcheck docs (#8548)

* Spellcheck docs

Fix spelling mistakes in docs and add CI job for running spellcheck on
docs.

* Add missing license header

											
										
										
											2019-09-17 15:47:30 -04:00
+								appendToExisting
 								baseDir
 								chatHandlerNumRetries
 								chatHandlerTimeout
-												Docs: Add multi-dimension partitioning doc; refactor native batch and separate into smaller topics. (#11983)

Adds documentation for multi-dimension partitioning. cc: @kfaraz
Refactors the native batch partitioning topic as follows:

Native batch ingestion covers parallel-index
Native batch simple task indexing covers index
Native batch input sources covers ioSource
Native batch ingestion with firehose covers deprecated firehose
											
										
										
											2021-12-03 06:07:14 -05:00
+								cityName
-												Spellcheck docs (#8548)

* Spellcheck docs

Fix spelling mistakes in docs and add CI job for running spellcheck on
docs.

* Add missing license header

											
										
										
											2019-09-17 15:47:30 -04:00
+								connectorConfig
-												DruidInputSource: Fix issues in column projection, timestamp handling. (#10267)

* DruidInputSource: Fix issues in column projection, timestamp handling.

DruidInputSource, DruidSegmentReader changes:

1) Remove "dimensions" and "metrics". They are not necessary, because we
   can compute which columns we need to read based on what is going to
   be used by the timestamp, transform, dimensions, and metrics.
2) Start using ColumnsFilter (see below) to decide which columns we need
   to read.
3) Actually respect the "timestampSpec". Previously, it was ignored, and
   the timestamp of the returned InputRows was set to the `__time` column
   of the input datasource.

(1) and (2) together fix a bug in which the DruidInputSource would not
properly read columns that are used as inputs to a transformSpec.

(3) fixes a bug where the timestampSpec would be ignored if you attempted
to set the column to something other than `__time`.

(1) and (3) are breaking changes.

Web console changes:

1) Remove "Dimensions" and "Metrics" from the Druid input source.
2) Set timestampSpec to `{"column": "__time", "format": "millis"}` for
   compatibility with the new behavior.

Other changes:

1) Add ColumnsFilter, a new class that allows input readers to determine
   which columns they need to read. Currently, it's only used by the
   DruidInputSource, but it could be used by other columnar input sources
   in the future.
2) Add a ColumnsFilter to InputRowSchema.
3) Remove the metric names from InputRowSchema (they were unused).
4) Add InputRowSchemas.fromDataSchema method that computes the proper
   ColumnsFilter for given timestamp, dimensions, transform, and metrics.
5) Add "getRequiredColumns" method to TransformSpec to support the above.

* Various fixups.

* Uncomment incorrectly commented lines.

* Move TransformSpecTest to the proper module.

* Add druid.indexer.task.ignoreTimestampSpecForDruidInputSource setting.

* Fix.

* Fix build.

* Checkstyle.

* Misc fixes.

* Fix test.

* Move config.

* Fix imports.

* Fixup.

* Fix ShuffleResourceTest.

* Add import.

* Smarter exclusions.

* Fixes based on tests.

Also, add TIME_COLUMN constant in the web console.

* Adjustments for tests.

* Reorder test data.

* Update docs.

* Update docs to say Druid 0.22.0 instead of 0.21.0.

* Fix test.

* Fix ITAutoCompactionTest.

* Changes from review & from merging.
											
										
										
											2021-03-25 13:32:21 -04:00
+								countryName
-												Spellcheck docs (#8548)

* Spellcheck docs

Fix spelling mistakes in docs and add CI job for running spellcheck on
docs.

* Add missing license header

											
										
										
											2019-09-17 15:47:30 -04:00
+								dataSchema's
-												Add an option for ingestion task to drop (mark unused) all existing segments that are contained by interval in the ingestionSpec (#11025)

* Auto-Compaction can run indefinitely when segmentGranularity is changed from coarser to finer.

* Add option to drop segments after ingestion

* fix checkstyle

* add tests

* add tests

* add tests

* fix test

* add tests

* fix checkstyle

* fix checkstyle

* add docs

* fix docs

* address comments

* address comments

* fix spelling
											
										
										
											2021-04-01 15:29:36 -04:00
+								dropExisting
-												Spellcheck docs (#8548)

* Spellcheck docs

Fix spelling mistakes in docs and add CI job for running spellcheck on
docs.

* Add missing license header

											
										
										
											2019-09-17 15:47:30 -04:00
+								foldCase
 								forceGuaranteedRollup
 								httpAuthenticationPassword
 								httpAuthenticationUsername
 								ingestSegment
-												Add DruidInputSource (replacement for IngestSegmentFirehose) (#8982)

* Add Druid input source and format

* Inherit dims/metrics from segment

* Add ingest segment firehose reindexing test

* Remove unnecessary module

* Fix unit tests, checkstyle

* Add doc entry

* Fix dimensionExclusions handling, add parallel index integration test

* Add spelling exclusion

* Address some PR comments

* Checkstyle

* wip

* Address rest of PR comments

* Address PR comments

											
										
										
											2019-12-05 19:50:00 -05:00
+								InputSource
 								DruidInputSource
-												Multiphase segment merge for IndexMergerV9 (#10689)

* Multiphase merge for IndexMergerV9

* JSON fix

* Cleanup temp files

* Docs

* Address logging and add IT

* Fix spelling and test unloader datasource name
											
										
										
											2021-01-06 01:19:09 -05:00
+								maxColumnsToMerge
-												Spellcheck docs (#8548)

* Spellcheck docs

Fix spelling mistakes in docs and add CI job for running spellcheck on
docs.

* Add missing license header

											
										
										
											2019-09-17 15:47:30 -04:00
+								maxInputSegmentBytesPerTask
 								maxNumConcurrentSubTasks
 								maxNumSegmentsToMerge
 								maxRetry
 								pushTimeout
 								reportParseExceptions
-												Add support for optional aws credentials for s3 for ingestion (#9375)

* Add support for optional cloud (aws, gcs, etc.) credentials for s3 for ingestion

* Add support for optional cloud (aws, gcs, etc.) credentials for s3 for ingestion

* Add support for optional cloud (aws, gcs, etc.) credentials for s3 for ingestion

* fix build failure

* fix failing build

* fix failing build

* Code cleanup

* fix failing test

* Removed CloudConfigProperties and make specific class for each cloudInputSource

* Removed CloudConfigProperties and make specific class for each cloudInputSource

* pass s3ConfigProperties for split

* lazy init s3client

* update docs

* fix docs check

* address comments

* add ServerSideEncryptingAmazonS3.Builder

* fix failing checkstyle

* fix typo

* wrap the ServerSideEncryptingAmazonS3.Builder in a provider

* added java docs for S3InputSource constructor

* added java docs for S3InputSource constructor

* remove wrap the ServerSideEncryptingAmazonS3.Builder in a provider

											
										
										
											2020-02-25 23:59:53 -05:00
+								secretAccessKey
-												Spellcheck docs (#8548)

* Spellcheck docs

Fix spelling mistakes in docs and add CI job for running spellcheck on
docs.

* Add missing license header

											
										
										
											2019-09-17 15:47:30 -04:00
+								segmentWriteOutMediumFactory
 								sql
 								sqls
-												Auto compaction based on parallel indexing (#8570)

* Auto compaction based on parallel indexing

* javadoc and doc

* typo

* update spell

* addressing comments

* address comments

* fix log

* fix build

* fix test

* increase default max input segment bytes per task

* fix test

											
										
										
											2019-10-18 16:24:14 -04:00
+								splitHintSpec
-												Spellcheck docs (#8548)

* Spellcheck docs

Fix spelling mistakes in docs and add CI job for running spellcheck on
docs.

* Add missing license header

											
										
										
											2019-09-17 15:47:30 -04:00
+								taskStatusCheckPeriodMs
 								timeChunk
 								totalNumMergeTasks
-												Move Azure extension into Core (#9394)

* Move Azure extension into Core

Moving the azure extension into Core.

* * Fix build failure

* * Add The MIT License (MIT) to list of compatible licenses

* * Address review comments

* * change reference to contrib azure to core azure

* * Fix spelling mistakes.

											
										
										
											2020-02-25 20:49:16 -05:00
+								StaticS3Firehose
 								prefetchTriggerBytes
-												Allow client to configure batch ingestion task to wait to complete until segments are confirmed to be available by other (#10676)

* Add ability to wait for segment availability for batch jobs

* IT updates

* fix queries in legacy hadoop IT

* Fix broken indexing integration tests

* address an lgtm flag

* spell checker still flagging for hadoop doc. adding under that file header too

* fix compaction IT

* Updates to wait for availability method

* improve unit testing for patch

* fix bad indentation

* refactor waitForSegmentAvailability

* Fixes based off of review comments

* cleanup to get compile after merging with master

* fix failing test after previous logic update

* add back code that must have gotten deleted during conflict resolution

* update some logging code

* fixes to get compilation working after merge with master

* reset interrupt flag in catch block after code review pointed it out

* small changes following self-review

* fixup some issues brought on by merge with master

* small changes after review

* cleanup a little bit after merge with master

* Fix potential resource leak in AbstractBatchIndexTask

* syntax fix

* Add a Compcation TuningConfig type

* add docs stipulating the lack of support by Compaction tasks for the new config

* Fixup compilation errors after merge with master

* Remove erreneous newline
											
										
										
											2021-04-09 00:03:00 -04:00
+								awaitSegmentAvailabilityTimeoutMillis
-												Docs: Add multi-dimension partitioning doc; refactor native batch and separate into smaller topics. (#11983)

Adds documentation for multi-dimension partitioning. cc: @kfaraz
Refactors the native batch partitioning topic as follows:

Native batch ingestion covers parallel-index
Native batch simple task indexing covers index
Native batch input sources covers ioSource
Native batch ingestion with firehose covers deprecated firehose
											
										
										
											2021-12-03 06:07:14 -05:00
+								 - ../docs/ingestion/native-batch-firehose.md
 								LocalFirehose
 								baseDir
 								HttpFirehose
 								httpAuthenticationUsername
 								DefaultPassword
 								PasswordProviders
 								EnvironmentVariablePasswordProvider
 								ingestSegment
 								maxInputSegmentBytesPerTask
 MB
 								foldCase
 								sqls
 								connectorConfig
 								InlineFirehose
 								CombiningFirehose
 								httpAuthenticationPassword
 								 - ../docs/ingestion/native-batch-input-source.md
 								accessKeyId
 								secretAccessKey
 								accessKeyId
 								httpAuthenticationPassword
 								countryName
 								 - ../docs/ingestion/native-batch-simple-task.md
 								dataSchema's
 								appendToExisting
 								dropExisting
 								timeChunk
 								PartitionsSpec
 								forceGuaranteedRollup
 								reportParseExceptions
 								pushTimeout
 								segmentWriteOutMediumFactory
-												Spellcheck docs (#8548)

* Spellcheck docs

Fix spelling mistakes in docs and add CI job for running spellcheck on
docs.

* Add missing license header

											
										
										
											2019-09-17 15:47:30 -04:00
+								 - ../docs/ingestion/schema-design.md
 								product_category
 								product_id
 								product_name
 								 - ../docs/ingestion/tasks.md
 								BUILD_SEGMENTS
 								DETERMINE_PARTITIONS
 								forceTimeChunkLock
 								taskLockTimeout
 								 - ../docs/misc/math-expr.md
 								DOUBLE_ARRAY
 								DOY
 								DateTimeFormat
 								LONG_ARRAY
 								Los_Angeles
 								P3M
 								PT12H
 								STRING_ARRAY
 								String.format
 								acos
 								args
 								arr1
 								arr2
 								array_append
 								array_concat
-												expression aggregator (#11104)

* add experimental expression aggregator

* add test

* fix lgtm

* fix test

* adjust test

* use not null constant

* array_set_concat docs

* add equals and hashcode and tostring

* fix it

* spelling

* do multi-value magic for expression agg, more javadocs, tests

* formatting

* fix inspection

* more better

* nullable
											
										
										
											2021-04-22 21:30:16 -04:00
+								array_set_add
 								array_set_add_all
-												Spellcheck docs (#8548)

* Spellcheck docs

Fix spelling mistakes in docs and add CI job for running spellcheck on
docs.

* Add missing license header

											
										
										
											2019-09-17 15:47:30 -04:00
+								array_contains
 								array_length
 								array_offset
 								array_offset_of
 								array_ordinal
 								array_ordinal_of
 								array_overlap
 								array_prepend
 								array_slice
 								array_to_string
 								asin
 								atan
 								atan2
-												bitwise math function expressions (#10605)

* expressions: adding bitwise expressions

* double handling and vectorization

* move conversion to Evals

* revert unintended changes

* less magic, split convert functions, fix parser for funny exponent doubles

* fix spelling exceptions list

* more spelling

* fix grammar, add more test, fix docs

* fix docs

Co-authored-by: Max Kaplan <max@maxkaplan.me>
											
										
										
											2021-01-28 14:16:53 -05:00
+								bitwise
 								bitwiseAnd
 								bitwiseComplement
 								bitwiseConvertDoubleToLongBits
 								bitwiseConvertLongBitsToDouble
 								bitwiseOr
 								bitwiseShiftLeft
 								bitwiseShiftRight
 								bitwiseXor
-												Spellcheck docs (#8548)

* Spellcheck docs

Fix spelling mistakes in docs and add CI job for running spellcheck on
docs.

* Add missing license header

											
										
										
											2019-09-17 15:47:30 -04:00
+								bloom_filter_test
 								cartesian_fold
 								cartesian_map
 								case_searched
 								case_simple
 								cbrt
 								concat
 								copysign
 								expm1
 								expr
 								expr1
 								expr2
-												basic docs for nested column query functions (#12922)

* basic docs for nested column query functions
											
										
										
											2022-08-19 20:12:19 -04:00
+								expr3
 								expr4
-												Spellcheck docs (#8548)

* Spellcheck docs

Fix spelling mistakes in docs and add CI job for running spellcheck on
docs.

* Add missing license header

											
										
										
											2019-09-17 15:47:30 -04:00
+								fromIndex
 								getExponent
 								hypot
 								ipv4_match
 								ipv4_parse
-												vectorize logical operators and boolean functions (#11184)

changes:
* adds new config, druid.expressions.useStrictBooleans which make longs the official boolean type of all expressions
* vectorize logical operators and boolean functions, some only if useStrictBooleans is true

											
										
										
											2021-12-02 19:40:23 -05:00
+								isnull
-												Spellcheck docs (#8548)

* Spellcheck docs

Fix spelling mistakes in docs and add CI job for running spellcheck on
docs.

* Add missing license header

											
										
										
											2019-09-17 15:47:30 -04:00
+								ipv4_stringify
 								java.lang.Math
 								java.lang.String
 								log10
 								log1p
 								lpad
 								ltrim
 								nextUp
 								nextafter
-												vectorize logical operators and boolean functions (#11184)

changes:
* adds new config, druid.expressions.useStrictBooleans which make longs the official boolean type of all expressions
* vectorize logical operators and boolean functions, some only if useStrictBooleans is true

											
										
										
											2021-12-02 19:40:23 -05:00
+								notnull
-												Spellcheck docs (#8548)

* Spellcheck docs

Fix spelling mistakes in docs and add CI job for running spellcheck on
docs.

* Add missing license header

											
										
										
											2019-09-17 15:47:30 -04:00
+								nvl
 								parse_long
 								regexp_extract
-												Add REGEXP_LIKE, fix bugs in REGEXP_EXTRACT. (#9893)

* Add REGEXP_LIKE, fix empty-pattern bug in REGEXP_EXTRACT.

- Add REGEXP_LIKE function that returns a boolean, and is useful in
  WHERE clauses.
- Fix REGEXP_EXTRACT return type (should be nullable; causes incorrect
  filter elision).
- Fix REGEXP_EXTRACT behavior for empty patterns: should always match
  (previously, they threw errors).
- Improve error behavior when REGEXP_EXTRACT and REGEXP_LIKE are passed
  non-literal patterns.
- Improve documentation of REGEXP_EXTRACT.

* Changes based on PR review.

* Fix arg check.

* Important fixes!

* Add speller.

* wip

* Additional tests.

* Fix up tests.

* Add validation error tests.

* Additional tests.

* Remove useless call.
											
										
										
											2020-06-03 17:31:37 -04:00
+								regexp_like
-												Support SearchQueryDimFilter in sql via new methods (#10350)

* Support SearchQueryDimFilter in sql via new methods

* Contains is a reserved word

* revert unnecessary change

* Fix toDruidExpression method

* rename methods

* java docs

* Add native functions

* revert change in dockerfile

* remove changes from dockerfile

* More tests

* travis fix

* Handle null values better
											
										
										
											2020-09-14 12:57:54 -04:00
+								contains_string
 								icontains_string
-												Spellcheck docs (#8548)

* Spellcheck docs

Fix spelling mistakes in docs and add CI job for running spellcheck on
docs.

* Add missing license header

											
										
										
											2019-09-17 15:47:30 -04:00
+								result1
 								result2
 								rint
 								rpad
 								rtrim
-												Adding safe divide function (#11904)

* IMPLY-4344: Adding safe divide function along with testcases and documentation updates

* Changing based on review comments

* Addressing review comments, fixing coding style, docs and spelling

* Checkstyle passes for all code

* Fixing expected results for infinity

* Revert "Fixing expected results for infinity"

This reverts commit 5fd5cd480dd29706dd6e4b3c736611fe8dc74c85.

* Updating test result and a space in docs
											
										
										
											2021-11-17 11:22:41 -05:00
+								safe_divide
-												Spellcheck docs (#8548)

* Spellcheck docs

Fix spelling mistakes in docs and add CI job for running spellcheck on
docs.

* Add missing license header

											
										
										
											2019-09-17 15:47:30 -04:00
+								scalb
 								signum
 								str1
 								str2
 								string_to_array
-												Vectorized ANY aggregators (#10338)

* WIP vectorized ANY aggregators

* tests

* fix aggs

* cleanup

* code review + tests

* docs

* use NilVectorSelector when needed

* fix spellcheck

* dont instantiate vectors

* cleanup
											
										
										
											2020-09-14 22:44:58 -04:00
+								stringAny
-												Spellcheck docs (#8548)

* Spellcheck docs

Fix spelling mistakes in docs and add CI job for running spellcheck on
docs.

* Add missing license header

											
										
										
											2019-09-17 15:47:30 -04:00
+								strlen
 								strpos
 								timestamp_ceil
 								timestamp_extract
 								timestamp_floor
 								timestamp_format
 								timestamp_parse
 								timestamp_shift
 								todegrees
 								toradians
 								ulp
 								unix_timestamp
 								value1
 								value2
 								valueOf
-												Add SQL functions to format numbers into human readable format (#10635)

* add binary_byte_format/decimal_byte_format/decimal_format

* clean code

* fix doc

* fix review comments

* add spelling check rules

* remove extra param

* improve type handling and null handling

* remove extra zeros

* fix tests and add space between unit suffix and number as most size-format functions do

* fix tests

* add examples

* change function names according to review comments

* fix merge

Signed-off-by: frank chen <frank.chen021@outlook.com>

* no need to configure NullHandling explicitly for tests

Signed-off-by: frank chen <frank.chen021@outlook.com>

* fix tests in SQL-Compatible mode

Signed-off-by: frank chen <frank.chen021@outlook.com>

* Resolve review comments

* Update SQL test case to check null handling

* Fix intellij inspections

* Add more examples

* Fix example
											
										
										
											2021-08-13 13:27:49 -04:00
+								IEC
 								human_readable_binary_byte_format
 								human_readable_decimal_byte_format
 								human_readable_decimal_format
-												Spellcheck docs (#8548)

* Spellcheck docs

Fix spelling mistakes in docs and add CI job for running spellcheck on
docs.

* Add missing license header

											
										
										
											2019-09-17 15:47:30 -04:00
+								 - ../docs/misc/papers-and-talks.md
 								RADStack
 								 - ../docs/operations/api-reference.md
 .000Z
 -09-12T03
 -09-12T05
 -06-27_2016-06-28
 								Param
 								SupervisorSpec
 								dropRule
 								druid.query.segmentMetadata.defaultHistory
 								isointerval
 								json
 								loadRule
 								maxTime
 								minTime
 								numCandidates
 								param
 								segmentId1
 								segmentId2
 								taskId
 								taskid
 								un
 								 - ../docs/operations/basic-cluster-tuning.md
-												Eliminate ambiguities of KB/MB/GB in the doc (#11333)

* GB ---> GiB

* suppress spelling check

* MB --> MiB, KB --> KiB

* Use IEC binary prefix

* Add reference link

* Fix doc style
											
										
										
											2021-06-30 16:42:45 -04:00
+MiB
 MiB
-												Spellcheck docs (#8548)

* Spellcheck docs

Fix spelling mistakes in docs and add CI job for running spellcheck on
docs.

* Add missing license header

											
										
										
											2019-09-17 15:47:30 -04:00
+ms
-												Eliminate ambiguities of KB/MB/GB in the doc (#11333)

* GB ---> GiB

* suppress spelling check

* MB --> MiB, KB --> KiB

* Use IEC binary prefix

* Add reference link

* Fix doc style
											
										
										
											2021-06-30 16:42:45 -04:00
+.5MiB
 GiB
 MiB
 GiB-60GiB
 GiB
-												Spellcheck docs (#8548)

* Spellcheck docs

Fix spelling mistakes in docs and add CI job for running spellcheck on
docs.

* Add missing license header

											
										
										
											2019-09-17 15:47:30 -04:00
+MB
-												Eliminate ambiguities of KB/MB/GB in the doc (#11333)

* GB ---> GiB

* suppress spelling check

* MB --> MiB, KB --> KiB

* Use IEC binary prefix

* Add reference link

* Fix doc style
											
										
										
											2021-06-30 16:42:45 -04:00
+KiB
 GiB
-												Spellcheck docs (#8548)

* Spellcheck docs

Fix spelling mistakes in docs and add CI job for running spellcheck on
docs.

* Add missing license header

											
										
										
											2019-09-17 15:47:30 -04:00
+								G1GC
 								GroupBys
 								QoS-type
 								 - ../docs/operations/dump-segment.md
 								DumpSegment
 								SegmentMetadata
 								__time
 								bitmapSerdeFactory
 								columnName
 								index.zip
 								time-iso8601
 								 - ../docs/operations/export-metadata.md
 								hadoopStorageDirectory
 								 - ../docs/operations/insert-segment-to-db.md
 .14.x
-												Improved Java 17 support and Java runtime docs. (#12839)

* Improved Java 17 support and Java runtime docs.

1) Add a "Java runtime" doc page with information about supported
   Java versions, garbage collection, and strong encapsulation..

2) Update asm and equalsverifier to versions that support Java 17.

3) Add additional "--add-opens" lines to surefire configuration, so
   tests can pass successfully under Java 17.

4) Switch openjdk15 tests to openjdk17.

5) Update FrameFile to specifically mention Java runtime incompatibility
   as the cause of not being able to use Memory.map.

6) Update SegmentLoadDropHandler to log an error for Errors too, not
   just Exceptions. This is important because an IllegalAccessError is
   encountered when the correct "--add-opens" line is not provided,
   which would otherwise be silently ignored.

7) Update example configs to use druid.indexer.runner.javaOptsArray
   instead of druid.indexer.runner.javaOpts. (The latter is deprecated.)

* Adjustments.

* Use run-java in more places.

* Add run-java.

* Update .gitignore.

* Exclude hadoop-client-api.

Brought in when building on Java 17.

* Swap one more usage of java.

* Fix the run-java script.

* Fix flag.

* Include link to Temurin.

* Spelling.

* Update examples/bin/run-java

Co-authored-by: Xavier Léauté <xl+github@xvrl.net>

Co-authored-by: Xavier Léauté <xl+github@xvrl.net>
											
										
										
											2022-08-04 02:16:05 -04:00
+								 - ../docs/operations/java.md
 								G1
 								Temurin
-												Spellcheck docs (#8548)

* Spellcheck docs

Fix spelling mistakes in docs and add CI job for running spellcheck on
docs.

* Add missing license header

											
										
										
											2019-09-17 15:47:30 -04:00
+								 - ../docs/operations/metrics.md
 .14.x
 s
 								Bufferpool
 								EventReceiverFirehose
 								EventReceiverFirehoseMonitor
-												Improved Java 17 support and Java runtime docs. (#12839)

* Improved Java 17 support and Java runtime docs.

1) Add a "Java runtime" doc page with information about supported
   Java versions, garbage collection, and strong encapsulation..

2) Update asm and equalsverifier to versions that support Java 17.

3) Add additional "--add-opens" lines to surefire configuration, so
   tests can pass successfully under Java 17.

4) Switch openjdk15 tests to openjdk17.

5) Update FrameFile to specifically mention Java runtime incompatibility
   as the cause of not being able to use Memory.map.

6) Update SegmentLoadDropHandler to log an error for Errors too, not
   just Exceptions. This is important because an IllegalAccessError is
   encountered when the correct "--add-opens" line is not provided,
   which would otherwise be silently ignored.

7) Update example configs to use druid.indexer.runner.javaOptsArray
   instead of druid.indexer.runner.javaOpts. (The latter is deprecated.)

* Adjustments.

* Use run-java in more places.

* Add run-java.

* Update .gitignore.

* Exclude hadoop-client-api.

Brought in when building on Java 17.

* Swap one more usage of java.

* Fix the run-java script.

* Fix flag.

* Include link to Temurin.

* Spelling.

* Update examples/bin/run-java

Co-authored-by: Xavier Léauté <xl+github@xvrl.net>

Co-authored-by: Xavier Léauté <xl+github@xvrl.net>
											
										
										
											2022-08-04 02:16:05 -04:00
+								Filesystem
-												Spellcheck docs (#8548)

* Spellcheck docs

Fix spelling mistakes in docs and add CI job for running spellcheck on
docs.

* Add missing license header

											
										
										
											2019-09-17 15:47:30 -04:00
+								JVMMonitor
 								QueryCountStatsMonitor
 								RealtimeMetricsMonitor
 								Sys
 								SysMonitor
 								TaskCountStatsMonitor
-												Adding task slot count metrics to Druid Overlord (#10379)

* Adding more worker metrics to Druid Overlord

* Changing the nomenclature from worker to peon as that represents the metrics that we want to monitor better

* Few more instance of worker usage replaced with peon

* Modifying the peon idle count logic to only use eligible workers available capacity

* Changing the naming to task slot count instead of peon

* Adding some unit test coverage for the new test runner apis

* Addressing Review Comments

* Modifying the TaskSlotCountStatsProvider apis so that overlords which are not leader do not emit these metrics

* Fixing the spelling issue in the docs

* Setting the annotation Nullable on the TaskSlotCountStatsProvider methods
											
										
										
											2020-09-29 02:50:38 -04:00
+								TaskSlotCountStatsMonitor
-												Worker level task metrics (#12446)

* * fix metric name inconsistency

* * add task slot metrics for middle managers

* * add new WorkerTaskCountStatsMonitor to report task count metrics
  from worker

* * more stuff

* * remove unused variable

* * more stuff

* * add javadocs

* * fix checkstyle

* * fix hadoop test failure

* * cleanup

* * add more code coverage in tests

* * fix test failure

* * add docs

* * increase code coverage

* * fix spelling

* * fix failing tests

* * remove dead code

* * fix spelling
											
										
										
											2022-04-26 12:44:44 -04:00
+								WorkerTaskCountStatsMonitor
-												* fix duplicate dimension (#12778)


											
										
										
											2022-07-14 01:09:03 -04:00
+								workerVersion
-												Spellcheck docs (#8548)

* Spellcheck docs

Fix spelling mistakes in docs and add CI job for running spellcheck on
docs.

* Add missing license header

											
										
										
											2019-09-17 15:47:30 -04:00
+								bufferCapacity
-												fix dropwizard emitter jvm bufferpoolName metric (#10075)

* fix dropwizard emitter jvm bufferpoolName metric

* fixes
											
										
										
											2020-06-25 15:20:25 -04:00
+								bufferpoolName
-												Spellcheck docs (#8548)

* Spellcheck docs

Fix spelling mistakes in docs and add CI job for running spellcheck on
docs.

* Add missing license header

											
										
										
											2019-09-17 15:47:30 -04:00
+								cms
 								cpuName
 								cpuTime
-												Add more metrics for Jetty server thread pool usage (#11113)

Add more metrics for jetty server thread pool usage so we know if we have allocated enough http threads to handle requests.


											
										
										
											2021-11-07 06:21:44 -05:00
+								druid.server.http.numThreads
 								druid.server.http.queueSize
-												Spellcheck docs (#8548)

* Spellcheck docs

Fix spelling mistakes in docs and add CI job for running spellcheck on
docs.

* Add missing license header

											
										
										
											2019-09-17 15:47:30 -04:00
+								fsDevName
 								fsDirName
 								fsOptions
 								fsSysTypeName
 								fsTypeName
 								g1
 								gcGen
 								gcName
 								handoffed
 								hasFilters
 								memKind
 								nativeQueryIds
 								netAddress
 								netHwaddr
 								netName
-												add ingest/notices/queueSize metric to give visibility into supervisor notices queue size (#11417)


											
										
										
											2021-07-30 10:59:26 -04:00
+								noticeType
-												Spellcheck docs (#8548)

* Spellcheck docs

Fix spelling mistakes in docs and add CI job for running spellcheck on
docs.

* Add missing license header

											
										
										
											2019-09-17 15:47:30 -04:00
+								numComplexMetrics
 								numDimensions
 								numMetrics
 								poolKind
 								poolName
 								remoteAddress
-												Add new metric that quantifies how long batch ingest jobs waited for segment availability and whether or not that wait was successful (#12002)

* add a unit test that tests that new metric is emitted

* remove unused import

* clarify in doc that this is for batch tasks

* fix IndexTaskTest
											
										
										
											2021-12-10 12:40:52 -05:00
+								segmentAvailabilityConfirmed
-												Spellcheck docs (#8548)

* Spellcheck docs

Fix spelling mistakes in docs and add CI job for running spellcheck on
docs.

* Add missing license header

											
										
										
											2019-09-17 15:47:30 -04:00
+								serviceName
-												Emit state of replace and append for native batch tasks (#12488)

* Emit state of replace and append for native batch tasks

* Emit count of one depending on batch ingestion mode (APPEND, OVERWRITE, REPLACE)

* Add metric to compaction job

* Avoid null ptr exc when null emitter

* Coverage

* Emit tombstone & segment counts

* Tasks need a type

* Spelling

* Integrate BatchIngestionMode in batch ingestion tasks functionality

* Typos

* Remove batch ingestion type from metric since it is already in a dimension. Move IngestionMode to AbstractTask to facilitate having mode as a dimension. Add metrics to streaming. Add missing coverage.

* Avoid inner class referenced by sub-class inspection. Refactor computation of IngestionMode to make it more robust to null IOConfig and fix test.

* Spelling

* Avoid polluting the Task interface

* Rename computeCompaction methods to avoid ambiguous java compiler error if they are passed null. Other minor cleanup.
											
										
										
											2022-05-23 15:32:47 -04:00
+								taskIngestionMode
-												Spellcheck docs (#8548)

* Spellcheck docs

Fix spelling mistakes in docs and add CI job for running spellcheck on
docs.

* Add missing license header

											
										
										
											2019-09-17 15:47:30 -04:00
+								taskStatus
 								taskType
-												Add more metrics for Jetty server thread pool usage (#11113)

Add more metrics for jetty server thread pool usage so we know if we have allocated enough http threads to handle requests.


											
										
										
											2021-11-07 06:21:44 -05:00
+								threadPoolNumBusyThreads.
 								threadPoolNumIdleThreads
 								threadPoolNumTotalThreads.
-												Spellcheck docs (#8548)

* Spellcheck docs

Fix spelling mistakes in docs and add CI job for running spellcheck on
docs.

* Add missing license header

											
										
										
											2019-09-17 15:47:30 -04:00
+								 - ../docs/operations/other-hadoop.md
 								CDH
 								Classloader
 								assembly.sbt
 								build.sbt
 								classloader
 								druid_build
 								mapred-default
 								mapred-site
 								sbt
 								scala-2
 								 - ../docs/operations/pull-deps.md
 								org.apache.hadoop
 								proxy.com.
 								remoteRepository
 								 - ../docs/operations/recommendations.md
 								JBOD
 								druid.processing.buffer.sizeBytes.
 								druid.processing.numMergeBuffers
 								druid.processing.numThreads
 								tmpfs
 								 - ../docs/operations/rule-configuration.md
 								broadcastByInterval
 								broadcastByPeriod
 								broadcastForever
 								colocatedDataSources
 								dropBeforeByPeriod
 								dropByInterval
 								dropByPeriod
 								dropForever
 								loadByInterval
 								loadByPeriod
 								loadForever
 								 - ../docs/operations/segment-optimization.md
 MB
 								 - ../docs/operations/single-server.md
-												Eliminate ambiguities of KB/MB/GB in the doc (#11333)

* GB ---> GiB

* suppress spelling check

* MB --> MiB, KB --> KiB

* Use IEC binary prefix

* Add reference link

* Fix doc style
											
										
										
											2021-06-30 16:42:45 -04:00
+GiB
 GiB
 GiB
 GiB
 GiB
 GiB
-												Spellcheck docs (#8548)

* Spellcheck docs

Fix spelling mistakes in docs and add CI job for running spellcheck on
docs.

* Add missing license header

											
										
										
											2019-09-17 15:47:30 -04:00
+								Nano-Quickstart
 								i3
 								i3.16xlarge
 								i3.2xlarge
 								i3.4xlarge
 								i3.8xlarge
 								 - ../docs/operations/tls-support.md
 								CN
 								subjectAltNames
 								 - ../docs/querying/aggregations.md
 								HyperUnique
 								hyperUnique
 								longSum
 								 - ../docs/querying/datasource.md
 								groupBys
 								 - ../docs/querying/datasourcemetadataquery.md
 								dataSourceMetadata
 								 - ../docs/querying/dimensionspecs.md
 								ExtractionDimensionSpec
 								SimpleDateFormat
 								bar_1
 								dimensionSpecs
 								isWhitelist
 								joda
 								nullHandling
 								product_1
 								product_3
 								registeredLookup
 								timeFormat
 								tz
 								v3
 								weekyears
 								 - ../docs/querying/filters.md
 								___bar
 								caseSensitive
 								extractionFn
 								insensitive_contains
 								last_name
 								lowerStrict
 								upperStrict
 								 - ../docs/querying/granularities.md
 -01-01T00
 								P2W
 								PT0.750S
 								PT1H30M
 								TimeseriesQuery
 								 - ../docs/querying/groupbyquery.md
 								D1
 								D2
 								D3
 								druid.query.groupBy.defaultStrategy
-												GroupBy: Cap dictionary-building selector memory usage. (#12309)

* GroupBy: Cap dictionary-building selector memory usage.

New context parameter "maxSelectorDictionarySize" controls when the
per-segment processing code should return early and trigger a trip
to the merge buffer.

Includes:

- Vectorized and nonvectorized implementations.
- Adjustments to GroupByQueryRunnerTest to exercise this code in
  the v2SmallDictionary suite. (Both the selector dictionary and
  the merging dictionary will be small in that suite.)
- Tests for the new config parameter.

* Fix issues from tests.

* Add "pre-existing" to dictionary.

* Simplify GroupByColumnSelectorStrategy interface by removing one of the writeToKeyBuffer methods.

* Adjustments from review comments.
											
										
										
											2022-03-08 16:13:11 -05:00
+								druid.query.groupBy.maxSelectorDictionarySize
-												Spellcheck docs (#8548)

* Spellcheck docs

Fix spelling mistakes in docs and add CI job for running spellcheck on
docs.

* Add missing license header

											
										
										
											2019-09-17 15:47:30 -04:00
+								druid.query.groupBy.maxMergingDictionarySize
 								druid.query.groupBy.maxOnDiskStorage
 								druid.query.groupBy.maxResults.
 								groupByStrategy
 								maxOnDiskStorage
 								maxResults
 								orderby
 								orderbys
 								outputName
-												GroupBy: Cap dictionary-building selector memory usage. (#12309)

* GroupBy: Cap dictionary-building selector memory usage.

New context parameter "maxSelectorDictionarySize" controls when the
per-segment processing code should return early and trigger a trip
to the merge buffer.

Includes:

- Vectorized and nonvectorized implementations.
- Adjustments to GroupByQueryRunnerTest to exercise this code in
  the v2SmallDictionary suite. (Both the selector dictionary and
  the merging dictionary will be small in that suite.)
- Tests for the new config parameter.

* Fix issues from tests.

* Add "pre-existing" to dictionary.

* Simplify GroupByColumnSelectorStrategy interface by removing one of the writeToKeyBuffer methods.

* Adjustments from review comments.
											
										
										
											2022-03-08 16:13:11 -05:00
+								pre-existing
-												Spellcheck docs (#8548)

* Spellcheck docs

Fix spelling mistakes in docs and add CI job for running spellcheck on
docs.

* Add missing license header

											
										
										
											2019-09-17 15:47:30 -04:00
+								pushdown
 								row1
 								subtotalsSpec
-												ConcurrentGrouper: Add mergeThreadLocal option, fix bug around the switch to spilling. (#12513)

* ConcurrentGrouper: Add option to always slice up merge buffers thread-locally.

Normally, the ConcurrentGrouper shares merge buffers across processing
threads until spilling starts, and then switches to a thread-local model.
This minimizes memory use and reduces likelihood of spilling, which is
good, but it creates thread contention. The new mergeThreadLocal option
causes a query to start in thread-local mode immediately, and allows us
to experiment with the relative performance of the two modes.

* Fix grammar in docs.

* Fix race in ConcurrentGrouper.

* Fix issue with timeouts.

* Remove unused import.

* Add "tradeoff" to dictionary.
											
										
										
											2022-05-21 13:28:54 -04:00
+								tradeoff
-												Adding new config for disabling group by on multiValue column (#12253)

As part of #12078 one of the followup's was to have a specific config which does not allow accidental unnesting of multi value columns if such columns become part of the grouping key.
Added a config groupByEnableMultiValueUnnesting which can be set in the query context.

The default value of groupByEnableMultiValueUnnesting is true, therefore it does not change the current engine behavior.
If groupByEnableMultiValueUnnesting is set to false, the query will fail if it encounters a multi-value column in the grouping key.
											
										
										
											2022-02-16 10:23:26 -05:00
+								unnested
 								unnesting
-												Spellcheck docs (#8548)

* Spellcheck docs

Fix spelling mistakes in docs and add CI job for running spellcheck on
docs.

* Add missing license header

											
										
										
											2019-09-17 15:47:30 -04:00
+								 - ../docs/querying/having.md
 								HavingSpec
 								HavingSpecs
 								dimSelector
 								equalTo
 								greaterThan
 								lessThan
 								 - ../docs/querying/hll-old.md
 								DefaultDimensionSpec
 								druid-hll
 								isInputHyperUnique
 								 - ../docs/querying/joins.md
 								pre-join
 								 - ../docs/querying/limitspec.md
 								DefaultLimitSpec
 								OrderByColumnSpec
 								OrderByColumnSpecs
 								dimensionOrder
 								 - ../docs/querying/lookups.md
 _000
 								kafka-extraction-namespace
 								mins
 								tierName
 								 - ../docs/querying/multi-value-dimensions.md
 								row2
 								row3
 								row4
 								t3
 								t4
 								t5
-												Adding new config for disabling group by on multiValue column (#12253)

As part of #12078 one of the followup's was to have a specific config which does not allow accidental unnesting of multi value columns if such columns become part of the grouping key.
Added a config groupByEnableMultiValueUnnesting which can be set in the query context.

The default value of groupByEnableMultiValueUnnesting is true, therefore it does not change the current engine behavior.
If groupByEnableMultiValueUnnesting is set to false, the query will fail if it encounters a multi-value column in the grouping key.
											
										
										
											2022-02-16 10:23:26 -05:00
+								groupByEnableMultiValueUnnesting
 								unnesting
-												Spellcheck docs (#8548)

* Spellcheck docs

Fix spelling mistakes in docs and add CI job for running spellcheck on
docs.

* Add missing license header

											
										
										
											2019-09-17 15:47:30 -04:00
+								 - ../docs/querying/multitenancy.md
 ms
 								tenant_id
 								 - ../docs/querying/post-aggregations.md
 								fieldAccess
 								finalizingFieldAccess
 								hyperUniqueCardinality
 								 - ../docs/querying/query-context.md
-												Select broker based on query context parameter `brokerService` (#11495)

This change allows the selection of a specific broker service (or broker tier) by the Router.

The newly added ManualTieredBrokerSelectorStrategy works as follows:

Check for the parameter brokerService in the query context. If this is a valid broker service, use it.
Check if the field defaultManualBrokerService has been set in the strategy. If this is a valid broker service, use it.
Move on to the next strategy
											
										
										
											2021-07-27 11:26:05 -04:00
+								brokerService
-												Spellcheck docs (#8548)

* Spellcheck docs

Fix spelling mistakes in docs and add CI job for running spellcheck on
docs.

* Add missing license header

											
										
										
											2019-09-17 15:47:30 -04:00
+								bySegment
 								doubleSum
 								druid.broker.cache.populateCache
 								druid.broker.cache.populateResultLevelCache
 								druid.broker.cache.useCache
 								druid.broker.cache.useResultLevelCache
 								druid.historical.cache.populateCache
 								druid.historical.cache.useCache
-												parallel broker merges on fork join pool (#8578)

* sketch of broker parallel merges done in small batches on fork join pool

* fix non-terminating sequences, auto compute parallelism

* adjust benches

* adjust benchmarks

* now hella more faster, fixed dumb

* fix

* remove comments

* log.info for debug

* javadoc

* safer block for sequence to yielder conversion

* refactor LifecycleForkJoinPool into LifecycleForkJoinPoolProvider which wraps a ForkJoinPool

* smooth yield rate adjustment, more logs to help tune

* cleanup, less logs

* error handling, bug fixes, on by default, more parallel, more tests

* remove unused var

* comments

* timeboundary mergeFn

* simplify, more javadoc

* formatting

* pushdown config

* use nanos consistently, move logs back to debug level, bit more javadoc

* static terminal result batch

* javadoc for nullability of createMergeFn

* cleanup

* oops

* fix race, add docs

* spelling, remove todo, add unhandled exception log

* cleanup, revert unintended change

* another unintended change

* review stuff

* add ParallelMergeCombiningSequenceBenchmark, fixes

* hyper-threading is the enemy

* fix initial start delay, lol

* parallelism computer now balances partition sizes to partition counts using sqrt of sequence count instead of sequence count by 2

* fix those important style issues with the benchmarks code

* lazy sequence creation for benchmarks

* more benchmark comments

* stable sequence generation time

* update defaults to use 100ms target time, 4096 batch size, 16384 initial yield, also update user docs

* add jmh thread based benchmarks, cleanup some stuff

* oops

* style

* add spread to jmh thread benchmark start range, more comments to benchmarks parameters and purpose

* retool benchmark to allow modeling more typical heterogenous heavy workloads

* spelling

* fix

* refactor benchmarks

* formatting

* docs

* add maxThreadStartDelay parameter to threaded benchmark

* why does catch need to be on its own line but else doesnt

											
										
										
											2019-11-07 14:58:46 -05:00
+								enableParallelMerge
-												Ingestion metrics doc fix (#12066)

* Ingestion metrics doc fix.

* Fixing typo

* Adding missed keywords in ignore list
											
										
										
											2021-12-15 02:21:53 -05:00
+								enableJoinLeftTableScanDirect
 								enableJoinFilterPushDown
 								enableJoinFilterRewrite
-												Enable conversion of join to filter by default (#12868)


											
										
										
											2022-08-13 11:07:43 -04:00
+								enableRewriteJoinToFilter
-												Ingestion metrics doc fix (#12066)

* Ingestion metrics doc fix.

* Fixing typo

* Adding missed keywords in ignore list
											
										
										
											2021-12-15 02:21:53 -05:00
+								enableJoinFilterRewriteValueColumnFilters
-												Spellcheck docs (#8548)

* Spellcheck docs

Fix spelling mistakes in docs and add CI job for running spellcheck on
docs.

* Add missing license header

											
										
										
											2019-09-17 15:47:30 -04:00
+								floatSum
-												Ingestion metrics doc fix (#12066)

* Ingestion metrics doc fix.

* Fixing typo

* Adding missed keywords in ignore list
											
										
										
											2021-12-15 02:21:53 -05:00
+								joinFilterRewriteMaxSize
-												Spellcheck docs (#8548)

* Spellcheck docs

Fix spelling mistakes in docs and add CI job for running spellcheck on
docs.

* Add missing license header

											
										
										
											2019-09-17 15:47:30 -04:00
+								maxQueuedBytes
 								maxScatterGatherBytes
 								minTopNThreshold
-												parallel broker merges on fork join pool (#8578)

* sketch of broker parallel merges done in small batches on fork join pool

* fix non-terminating sequences, auto compute parallelism

* adjust benches

* adjust benchmarks

* now hella more faster, fixed dumb

* fix

* remove comments

* log.info for debug

* javadoc

* safer block for sequence to yielder conversion

* refactor LifecycleForkJoinPool into LifecycleForkJoinPoolProvider which wraps a ForkJoinPool

* smooth yield rate adjustment, more logs to help tune

* cleanup, less logs

* error handling, bug fixes, on by default, more parallel, more tests

* remove unused var

* comments

* timeboundary mergeFn

* simplify, more javadoc

* formatting

* pushdown config

* use nanos consistently, move logs back to debug level, bit more javadoc

* static terminal result batch

* javadoc for nullability of createMergeFn

* cleanup

* oops

* fix race, add docs

* spelling, remove todo, add unhandled exception log

* cleanup, revert unintended change

* another unintended change

* review stuff

* add ParallelMergeCombiningSequenceBenchmark, fixes

* hyper-threading is the enemy

* fix initial start delay, lol

* parallelism computer now balances partition sizes to partition counts using sqrt of sequence count instead of sequence count by 2

* fix those important style issues with the benchmarks code

* lazy sequence creation for benchmarks

* more benchmark comments

* stable sequence generation time

* update defaults to use 100ms target time, 4096 batch size, 16384 initial yield, also update user docs

* add jmh thread based benchmarks, cleanup some stuff

* oops

* style

* add spread to jmh thread benchmark start range, more comments to benchmarks parameters and purpose

* retool benchmark to allow modeling more typical heterogenous heavy workloads

* spelling

* fix

* refactor benchmarks

* formatting

* docs

* add maxThreadStartDelay parameter to threaded benchmark

* why does catch need to be on its own line but else doesnt

											
										
										
											2019-11-07 14:58:46 -05:00
+								parallelMergeInitialYieldRows
 								parallelMergeParallelism
 								parallelMergeSmallBatchRows
-												Spellcheck docs (#8548)

* Spellcheck docs

Fix spelling mistakes in docs and add CI job for running spellcheck on
docs.

* Add missing license header

											
										
										
											2019-09-17 15:47:30 -04:00
+								populateCache
 								populateResultLevelCache
 								queryId
 								row-matchers
 								serializeDateTimeAsLong
 								serializeDateTimeAsLongInner
 								skipEmptyBuckets
 								useCache
 								useResultLevelCache
 								vectorSize
-												Removing unused processing threadpool on broker (#12070)

* Thread pool for broker

* Updating two tests to improve coverage for new method added

* Updating druidProcessingConfigTest to cover coverage

* Adding missed spelling errors caused in doc

* Adding test to cover lines of new function added
											
										
										
											2021-12-21 16:07:53 -05:00
+								enableJoinLeftTableScanDirect
 								enableJoinFilterPushDown
 								enableJoinFilterRewrite
 								enableJoinFilterRewriteValueColumnFilters
 								joinFilterRewriteMaxSize
-												Spellcheck docs (#8548)

* Spellcheck docs

Fix spelling mistakes in docs and add CI job for running spellcheck on
docs.

* Add missing license header

											
										
										
											2019-09-17 15:47:30 -04:00
+								 - ../docs/querying/querying.md
-												Eliminate ambiguities of KB/MB/GB in the doc (#11333)

* GB ---> GiB

* suppress spelling check

* MB --> MiB, KB --> KiB

* Use IEC binary prefix

* Add reference link

* Fix doc style
											
										
										
											2021-06-30 16:42:45 -04:00
+KiB
-												Spellcheck docs (#8548)

* Spellcheck docs

Fix spelling mistakes in docs and add CI job for running spellcheck on
docs.

* Add missing license header

											
										
										
											2019-09-17 15:47:30 -04:00
+								DatasourceMetadata
 								TimeBoundary
 								errorClass
 								errorMessage
 								x-jackson-smile
 								 - ../docs/querying/scan-query.md
 								batchSize
 								compactedList
 								druid.query.scan.legacy
 								druid.query.scan.maxRowsQueuedForOrdering
 								druid.query.scan.maxSegmentPartitionsOrderedInMemory
 								maxRowsQueuedForOrdering
 								maxSegmentPartitionsOrderedInMemory
 								resultFormat
 								valueVector
 								 - ../docs/querying/searchquery.md
 								SearchQuerySpec
 								cursorOnly
 								druid.query.search.searchStrategy
 								queryableIndexSegment
 								searchDimensions
 								searchStrategy
 								useIndexes
 								 - ../docs/querying/searchqueryspec.md
 								ContainsSearchQuerySpec
 								FragmentSearchQuerySpec
 								InsensitiveContainsSearchQuerySpec
 								RegexSearchQuerySpec
 								 - ../docs/querying/segmentmetadataquery.md
 								analysisType
 								analysisTypes
 								lenientAggregatorMerge
 								minmax
 								segmentMetadata
 								toInclude
 								 - ../docs/querying/select-query.md
 								PagingSpec
 								fromNext
 								pagingSpec
 								 - ../docs/querying/sorting-orders.md
 								BoundFilter
 								GroupByQuery's
 								SearchQuery
 								TopNMetricSpec
 								compareTo
 								file12
 								file2
-												Refactor SQL docs (#12239)

* refactor and link fixes

* add sql docs to left nav

* code format for needle

* updated web console script

* link fixes

* update earliest/latest functions

* edits for grammar and style

* more link fixes

* another link

* update with #12226

* update .spelling file
											
										
										
											2022-02-11 17:43:30 -05:00
+								 - ../docs/querying/sql-operators.md
 								_x_
-												Spellcheck docs (#8548)

* Spellcheck docs

Fix spelling mistakes in docs and add CI job for running spellcheck on
docs.

* Add missing license header

											
										
										
											2019-09-17 15:47:30 -04:00
+								 - ../docs/querying/timeseriesquery.md
 								fieldName1
 								fieldName2
 								 - ../docs/querying/topnmetricspec.md
 								DimensionTopNMetricSpec
 								metricSpec
 								previousStop
 								 - ../docs/querying/topnquery.md
 								GroupByQuery
 								top500
 								 - ../docs/querying/virtual-columns.md
 								outputType
 								 - ../docs/tutorials/cluster.md
 .9TB
 CPU
 								WebUpd8
 								m5.2xlarge
 								metadata.storage.
-												Eliminate ambiguities of KB/MB/GB in the doc (#11333)

* GB ---> GiB

* suppress spelling check

* MB --> MiB, KB --> KiB

* Use IEC binary prefix

* Add reference link

* Fix doc style
											
										
										
											2021-06-30 16:42:45 -04:00
+GiB
 GiB
-												Spellcheck docs (#8548)

* Spellcheck docs

Fix spelling mistakes in docs and add CI job for running spellcheck on
docs.

* Add missing license header

											
										
										
											2019-09-17 15:47:30 -04:00
+								 - ../docs/tutorials/tutorial-batch-hadoop.md
 								PATH_TO_DRUID
 								namenode
 								 - ../docs/tutorials/tutorial-delete-data.md
 								segmentID
 								segmentIds
 								 - ../docs/tutorials/tutorial-ingestion-spec.md
 								dstIP
 								dstPort
 								srcIP
 								srcPort
 								 - ../docs/tutorials/tutorial-kerberos-hadoop.md
 								common_runtime_properties
 								druid.extensions.directory
 								druid.extensions.loadList
 								druid.hadoop.security.kerberos.keytab
 								druid.hadoop.security.kerberos.principal
 								druid.indexer.logs.directory
 								druid.indexer.logs.type
 								druid.storage.storageDirectory
 								druid.storage.type
 								hdfs.headless.keytab
 								indexing_log
 								keytabs
 								 - ../docs/tutorials/tutorial-query.md
 								dsql
 								 - ../docs/tutorials/tutorial-retention.md
 -09-12T12
-												Tutorial on ingesting and querying Theta sketches (#12723)

Co-authored-by: Charles Smith <techdocsmith@gmail.com>
											
										
										
											2022-08-24 12:23:22 -04:00
+								 - ../docs/tutorials/tutorial-sketches-theta.md
 								clickstreams
 								uid
 								_k_
 								Bridgerton
 								Hellmar
-												Spellcheck docs (#8548)

* Spellcheck docs

Fix spelling mistakes in docs and add CI job for running spellcheck on
docs.

* Add missing license header

											
										
										
											2019-09-17 15:47:30 -04:00
+								 - ../docs/tutorials/tutorial-update-data.md
 								bear-111
 								 - ../docs/configuration/index.md
-												Improve doc and exception message for invalid user configurations (#10598)

* improve doc and exception message

* add spelling check rules and remove unused import

* add a test to improve test coverage
											
										
										
											2020-11-23 18:03:13 -05:00
+KiB
 GiB
 KiB
-												Support unit on byte-related properties (#10203)

* support unit suffix on byte-related properties

* add doc

* change default value of byte-related properites in example files

* fix coding style

* fix doc

* fix CI

* suppress spelling errors

* improve code according to comments

* rename Bytes to HumanReadableBytes

* add getBytesInInt to get value safely

* improve doc

* fix problem reported by CI

* fix problem reported by CI

* resolve code review comments

* improve error message

* improve code & doc according to comments

* fix CI problem

* improve doc

* suppress spelling check errors
											
										
										
											2020-07-30 21:58:48 -04:00
+GiB
 								KiB
 								GiB
-												Spellcheck docs (#8548)

* Spellcheck docs

Fix spelling mistakes in docs and add CI job for running spellcheck on
docs.

* Add missing license header

											
										
										
											2019-09-17 15:47:30 -04:00
+.000Z
 ms
 ms
 GB
 _000_000
 -01-01T00
 GB
 _000
 								524288000L
-												Eliminate ambiguities of KB/MB/GB in the doc (#11333)

* GB ---> GiB

* suppress spelling check

* MB --> MiB, KB --> KiB

* Use IEC binary prefix

* Add reference link

* Fix doc style
											
										
										
											2021-06-30 16:42:45 -04:00
+MiB
-												Spellcheck docs (#8548)

* Spellcheck docs

Fix spelling mistakes in docs and add CI job for running spellcheck on
docs.

* Add missing license header

											
										
										
											2019-09-17 15:47:30 -04:00
+u60
 								Autoscaler
-												Add druid.sql.approxCountDistinct.function property. (#11181)

* Add druid.sql.approxCountDistinct.function property.

The new property allows admins to configure the implementation for
APPROX_COUNT_DISTINCT and COUNT(DISTINCT expr) in approximate mode.

The motivation for adding this setting is to enable site admins to
switch the default HLL implementation to DataSketches.

For example, an admin can set:

  druid.sql.approxCountDistinct.function = APPROX_COUNT_DISTINCT_DS_HLL

* Fixes

* Fix tests.

* Remove erroneous cannotVectorize.

* Remove unused import.

* Remove unused test imports.
											
										
										
											2021-10-25 15:16:21 -04:00
+								APPROX_COUNT_DISTINCT_BUILTIN
-												Spellcheck docs (#8548)

* Spellcheck docs

Fix spelling mistakes in docs and add CI job for running spellcheck on
docs.

* Add missing license header

											
										
										
											2019-09-17 15:47:30 -04:00
+								AvaticaConnectionBalancer
 								EventReceiverFirehose
 								File.getFreeSpace
 								File.getTotalSpace
 								ForkJoinPool
-												Adding support for autoscaling in GCE (#8987)

* Adding support for autoscaling in GCE

* adding extra google deps also in gce pom

* fix link in doc

* remove unused deps

* adding terms to spelling file

* version in pom 0.17.0-incubating-SNAPSHOT --> 0.18.0-SNAPSHOT

* GCEXyz -> GceXyz in naming for consistency

* add preconditions

* add VisibleForTesting annotation

* typos in comments

* use StringUtils.format instead of String.format

* use custom exception instead of exit

* factorize interval time between retries

* making literal value a constant

* iter all network interfaces

* use provided on google (non api) deps

* adding missing dep

* removing unneded this and use Objects methods instead o 3-way if in hash and comparison

* adding import

* adding retries around getRunningInstances and adding limit for operation end waiting

* refactor GceEnvironmentConfig.hashCode

* 0.18.0-SNAPSHOT -> 0.19.0-SNAPSHOT

* removing unused config

* adding tests to hash and equals

* adding nullable to waitForOperationEnd

* adding testTerminate

* adding unit tests for createComputeService

* increasing retries in unrelated integration-test to prevent sporadic failure (hopefully)

* reverting queryResponseTemplate change

* adding comment for Compute.Builder.build() returning null
											
										
										
											2020-04-28 06:13:39 -04:00
+								GCE
-												Spellcheck docs (#8548)

* Spellcheck docs

Fix spelling mistakes in docs and add CI job for running spellcheck on
docs.

* Add missing license header

											
										
										
											2019-09-17 15:47:30 -04:00
+								HadoopIndexTasks
 								HttpEmitter
 								HttpPostEmitter
 								InetAddress.getLocalHost
-												Make dropExisting flag for Compaction configurable and add warning documentations (#11070)

* Make dropExisting flag for Compaction configurable

* fix checkstyle

* fix checkstyle

* fix test

* add tests

* fix spelling

* fix docs

* add IT

* fix test

* fix doc

* fix doc
											
										
										
											2021-04-09 03:12:28 -04:00
+								IOConfig
-												Spellcheck docs (#8548)

* Spellcheck docs

Fix spelling mistakes in docs and add CI job for running spellcheck on
docs.

* Add missing license header

											
										
										
											2019-09-17 15:47:30 -04:00
+								JRE8u60
 								KeyManager
 								L1
 								L2
-												Adding support for autoscaling in GCE (#8987)

* Adding support for autoscaling in GCE

* adding extra google deps also in gce pom

* fix link in doc

* remove unused deps

* adding terms to spelling file

* version in pom 0.17.0-incubating-SNAPSHOT --> 0.18.0-SNAPSHOT

* GCEXyz -> GceXyz in naming for consistency

* add preconditions

* add VisibleForTesting annotation

* typos in comments

* use StringUtils.format instead of String.format

* use custom exception instead of exit

* factorize interval time between retries

* making literal value a constant

* iter all network interfaces

* use provided on google (non api) deps

* adding missing dep

* removing unneded this and use Objects methods instead o 3-way if in hash and comparison

* adding import

* adding retries around getRunningInstances and adding limit for operation end waiting

* refactor GceEnvironmentConfig.hashCode

* 0.18.0-SNAPSHOT -> 0.19.0-SNAPSHOT

* removing unused config

* adding tests to hash and equals

* adding nullable to waitForOperationEnd

* adding testTerminate

* adding unit tests for createComputeService

* increasing retries in unrelated integration-test to prevent sporadic failure (hopefully)

* reverting queryResponseTemplate change

* adding comment for Compute.Builder.build() returning null
											
										
										
											2020-04-28 06:13:39 -04:00
+								ListManagedInstances
-												Spellcheck docs (#8548)

* Spellcheck docs

Fix spelling mistakes in docs and add CI job for running spellcheck on
docs.

* Add missing license header

											
										
										
											2019-09-17 15:47:30 -04:00
+								LoadSpec
 								LoggingEmitter
 								Los_Angeles
 								MDC
 								NoopServiceEmitter
-												Minor processor quota computation fix + docs (#11783)

* cpu/cpuset cgroup and procfs data gathering

* Renames and default values

* Formatting

* Trigger Build

* Add cgroup monitors

* Return 0 if no period

* Update

* Minor processor quota computation fix + docs

* Address comments

* Address comments

* Fix spellcheck

Co-authored-by: arunramani-imply <84351090+arunramani-imply@users.noreply.github.com>
											
										
										
											2021-10-08 23:52:03 -04:00
+								NUMA
-												Spellcheck docs (#8548)

* Spellcheck docs

Fix spelling mistakes in docs and add CI job for running spellcheck on
docs.

* Add missing license header

											
										
										
											2019-09-17 15:47:30 -04:00
+								ONLY_EVENTS
 								P1D
 								P1W
 								PT-1S
 								PT0.050S
 								PT10M
 								PT10S
 								PT15M
 								PT1800S
 								PT1M
 								PT1S
 								PT24H
 								PT300S
 								PT30S
-												Add feature to automatically remove audit logs based on retention period (#11084)

* add docs

* add impl

* fix checkstyle

* fix test

* add test

* fix checkstyle

* fix checkstyle

* fix test

* Address comments

* Address comments

* fix spelling

* fix docs
											
										
										
											2021-04-20 20:10:43 -04:00
+								PT3600S
-												Spellcheck docs (#8548)

* Spellcheck docs

Fix spelling mistakes in docs and add CI job for running spellcheck on
docs.

* Add missing license header

											
										
										
											2019-09-17 15:47:30 -04:00
+								PT5M
 								PT5S
 								PT60S
 								PT90M
 								Param
 								Runtime.maxMemory
 								SSLContext
 								SegmentMetadata
 								SegmentWriteOutMediumFactory
 								ServiceEmitter
 								System.getProperty
 								TLSv1.2
 								TrustManager
 								TuningConfig
 								_N_
 								_default
 								_default_tier
 								addr
 								affinityConfig
 								allowAll
-												Refresh query docs. (#9704)

* Refresh query docs.

Larger changes:

- New doc: querying/datasource.md describes the various kinds of
datasources you can use, and has examples for both SQL and native.
- New doc: querying/query-execution.md describes how native queries
are executed at a high level. It doesn't go into the details of specific
query engines or how queries run at a per-segment level. But I think it
would be good to add or link that content here in the future.
- Refreshed doc: querying/sql.md updated to refer to joins, reformatted
a bit, added a new "Query translation" section that explains how
queries are translated from SQL to native, and removed configuration
details (moved to configuration/index.md).
- Refreshed doc: querying/joins.md updated to refer to join datasources.

Smaller changes:

- Add helpful banners to the top of query documentation pages telling
people whether a given page describes SQL, native, or both.
- Add SQL metrics to operations/metrics.md.
- Add some color and cross-links in various places.
- Add native query component docs to the sidebar, and renamed them so
they look nicer.
- Remove Select query from the sidebar.
- Fix Broker SQL configs in configuration/index.md. Remove them from
querying/sql.md.
- Combined querying/searchquery.md and querying/searchqueryspec.md.

* Updates.

* Fix numbering.

* Fix glitches.

* Add new words to spellcheck file.

* Assorted changes.

* Further adjustments.

* Add missing punctuation.
											
										
										
											2020-04-15 19:12:20 -04:00
+								ANDed
-												Spellcheck docs (#8548)

* Spellcheck docs

Fix spelling mistakes in docs and add CI job for running spellcheck on
docs.

* Add missing license header

											
										
										
											2019-09-17 15:47:30 -04:00
+								array_mod
-												Adding support for autoscaling in GCE (#8987)

* Adding support for autoscaling in GCE

* adding extra google deps also in gce pom

* fix link in doc

* remove unused deps

* adding terms to spelling file

* version in pom 0.17.0-incubating-SNAPSHOT --> 0.18.0-SNAPSHOT

* GCEXyz -> GceXyz in naming for consistency

* add preconditions

* add VisibleForTesting annotation

* typos in comments

* use StringUtils.format instead of String.format

* use custom exception instead of exit

* factorize interval time between retries

* making literal value a constant

* iter all network interfaces

* use provided on google (non api) deps

* adding missing dep

* removing unneded this and use Objects methods instead o 3-way if in hash and comparison

* adding import

* adding retries around getRunningInstances and adding limit for operation end waiting

* refactor GceEnvironmentConfig.hashCode

* 0.18.0-SNAPSHOT -> 0.19.0-SNAPSHOT

* removing unused config

* adding tests to hash and equals

* adding nullable to waitForOperationEnd

* adding testTerminate

* adding unit tests for createComputeService

* increasing retries in unrelated integration-test to prevent sporadic failure (hopefully)

* reverting queryResponseTemplate change

* adding comment for Compute.Builder.build() returning null
											
										
										
											2020-04-28 06:13:39 -04:00
+								autoscale
 								autoscalers
-												Spellcheck docs (#8548)

* Spellcheck docs

Fix spelling mistakes in docs and add CI job for running spellcheck on
docs.

* Add missing license header

											
										
										
											2019-09-17 15:47:30 -04:00
+								batch_index_task
 								cgroup
 								classloader
 								com.metamx
 								common.runtime.properties
 								cpuacct
 								dataSourceName
 								datetime
 								defaultHistory
 								doubleMax
 								doubleMin
 								doubleSum
 								druid.enableTlsPort
 								druid.indexer.autoscale.workerVersion
 								druid.service
 								druid.storage.disableAcl
 								druid_audit
 								druid_config
 								druid_dataSource
 								druid_pendingSegments
 								druid_rules
 								druid_segments
 								druid_supervisors
 								druid_taskLock
 								druid_taskLog
 								druid_tasks
-												Refresh query docs. (#9704)

* Refresh query docs.

Larger changes:

- New doc: querying/datasource.md describes the various kinds of
datasources you can use, and has examples for both SQL and native.
- New doc: querying/query-execution.md describes how native queries
are executed at a high level. It doesn't go into the details of specific
query engines or how queries run at a per-segment level. But I think it
would be good to add or link that content here in the future.
- Refreshed doc: querying/sql.md updated to refer to joins, reformatted
a bit, added a new "Query translation" section that explains how
queries are translated from SQL to native, and removed configuration
details (moved to configuration/index.md).
- Refreshed doc: querying/joins.md updated to refer to join datasources.

Smaller changes:

- Add helpful banners to the top of query documentation pages telling
people whether a given page describes SQL, native, or both.
- Add SQL metrics to operations/metrics.md.
- Add some color and cross-links in various places.
- Add native query component docs to the sidebar, and renamed them so
they look nicer.
- Remove Select query from the sidebar.
- Fix Broker SQL configs in configuration/index.md. Remove them from
querying/sql.md.
- Combined querying/searchquery.md and querying/searchqueryspec.md.

* Updates.

* Fix numbering.

* Fix glitches.

* Add new words to spellcheck file.

* Assorted changes.

* Further adjustments.

* Add missing punctuation.
											
										
										
											2020-04-15 19:12:20 -04:00
+								DruidQueryRel
-												Allow coordinator to be configured to kill segments in future (#10877)

Allow a Druid cluster to kill segments whose interval_end is a date in the future. This can be done by setting druid.coordinator.kill.durationToRetain to a negative period. For example PT-24H would allow segments to be killed if their interval_end date was 24 hours or less into the future at the time that the kill task is generated by the system.

A cluster operator can also disregard the druid.coordinator.kill.durationToRetain entirely by setting a new configuration, druid.coordinator.kill.ignoreDurationToRetain=true. This ignores interval_end date when looking for segments to kill, and instead is capable of killing any segment marked unused. This new configuration is off by default, and a cluster operator should fully understand and accept the risks if they enable it.
											
										
										
											2022-05-10 22:05:15 -04:00
+								durationToRetain
-												Spellcheck docs (#8548)

* Spellcheck docs

Fix spelling mistakes in docs and add CI job for running spellcheck on
docs.

* Add missing license header

											
										
										
											2019-09-17 15:47:30 -04:00
+								ec2
 								equalDistribution
 								extractionFn
 								file.encoding
 								fillCapacity
 								first_location
 								floatMax
-												Vectorized ANY aggregators (#10338)

* WIP vectorized ANY aggregators

* tests

* fix aggs

* cleanup

* code review + tests

* docs

* use NilVectorSelector when needed

* fix spellcheck

* dont instantiate vectors

* cleanup
											
										
										
											2020-09-14 22:44:58 -04:00
+								floatAny
-												Spellcheck docs (#8548)

* Spellcheck docs

Fix spelling mistakes in docs and add CI job for running spellcheck on
docs.

* Add missing license header

											
										
										
											2019-09-17 15:47:30 -04:00
+								floatMin
 								floatSum
 								freeSpacePercent
-												Adding support for autoscaling in GCE (#8987)

* Adding support for autoscaling in GCE

* adding extra google deps also in gce pom

* fix link in doc

* remove unused deps

* adding terms to spelling file

* version in pom 0.17.0-incubating-SNAPSHOT --> 0.18.0-SNAPSHOT

* GCEXyz -> GceXyz in naming for consistency

* add preconditions

* add VisibleForTesting annotation

* typos in comments

* use StringUtils.format instead of String.format

* use custom exception instead of exit

* factorize interval time between retries

* making literal value a constant

* iter all network interfaces

* use provided on google (non api) deps

* adding missing dep

* removing unneded this and use Objects methods instead o 3-way if in hash and comparison

* adding import

* adding retries around getRunningInstances and adding limit for operation end waiting

* refactor GceEnvironmentConfig.hashCode

* 0.18.0-SNAPSHOT -> 0.19.0-SNAPSHOT

* removing unused config

* adding tests to hash and equals

* adding nullable to waitForOperationEnd

* adding testTerminate

* adding unit tests for createComputeService

* increasing retries in unrelated integration-test to prevent sporadic failure (hopefully)

* reverting queryResponseTemplate change

* adding comment for Compute.Builder.build() returning null
											
										
										
											2020-04-28 06:13:39 -04:00
+								gce
 								gce-extensions
-												Spellcheck docs (#8548)

* Spellcheck docs

Fix spelling mistakes in docs and add CI job for running spellcheck on
docs.

* Add missing license header

											
										
										
											2019-09-17 15:47:30 -04:00
+								getCanonicalHostName
 								groupBy
 								hdfs
 								httpRemote
 								indexTask
 								info_dir
-												Refresh query docs. (#9704)

* Refresh query docs.

Larger changes:

- New doc: querying/datasource.md describes the various kinds of
datasources you can use, and has examples for both SQL and native.
- New doc: querying/query-execution.md describes how native queries
are executed at a high level. It doesn't go into the details of specific
query engines or how queries run at a per-segment level. But I think it
would be good to add or link that content here in the future.
- Refreshed doc: querying/sql.md updated to refer to joins, reformatted
a bit, added a new "Query translation" section that explains how
queries are translated from SQL to native, and removed configuration
details (moved to configuration/index.md).
- Refreshed doc: querying/joins.md updated to refer to join datasources.

Smaller changes:

- Add helpful banners to the top of query documentation pages telling
people whether a given page describes SQL, native, or both.
- Add SQL metrics to operations/metrics.md.
- Add some color and cross-links in various places.
- Add native query component docs to the sidebar, and renamed them so
they look nicer.
- Remove Select query from the sidebar.
- Fix Broker SQL configs in configuration/index.md. Remove them from
querying/sql.md.
- Combined querying/searchquery.md and querying/searchqueryspec.md.

* Updates.

* Fix numbering.

* Fix glitches.

* Add new words to spellcheck file.

* Assorted changes.

* Further adjustments.

* Add missing punctuation.
											
										
										
											2020-04-15 19:12:20 -04:00
+								inlining
-												Spellcheck docs (#8548)

* Spellcheck docs

Fix spelling mistakes in docs and add CI job for running spellcheck on
docs.

* Add missing license header

											
										
										
											2019-09-17 15:47:30 -04:00
+								java.class.path
 								java.io.tmpdir
 								javaOpts
 								javaOptsArray
-												Making optimal usage of multiple segment cache locations (#8038)

* #7641 - Changing segment distribution algorithm to distribute segments to multiple segment cache locations

* Fixing indentation

* WIP

* Adding interface for location strategy selection, least bytes used strategy impl, round-robin strategy impl, locationSelectorStrategy config with least bytes used strategy as the default strategy

* fixing code style

* Fixing test

* Adding a method visible only for testing, fixing tests

* 1. Changing the method contract to return an iterator of locations instead of a single best location. 2. Check style fixes

* fixing the conditional statement

* Added testSegmentDistributionUsingLeastBytesUsedStrategy, fixed testSegmentDistributionUsingRoundRobinStrategy

* to trigger CI build

* Add documentation for the selection strategy configuration

* to re trigger CI build

* updated docs as per review comments, made LeastBytesUsedStorageLocationSelectorStrategy.getLocations a synchronzied method, other minor fixes

* In checkLocationConfigForNull method, using getLocations() to check for null instead of directly referring to the locations variable so that tests overriding getLocations() method do not fail

* Implementing review comments. Added tests for StorageLocationSelectorStrategy

* Checkstyle fixes

* Adding java doc comments for StorageLocationSelectorStrategy interface

* checkstyle

* empty commit to retrigger build

* Empty commit

* Adding suppressions for words leastBytesUsed and roundRobin of ../docs/configuration/index.md file

* Impl review comments including updating docs as suggested

* Removing checkLocationConfigForNull(), @NotEmpty annotation serves the purpose

* Round robin iterator to keep track of the no. of iterations, impl review comments, added tests for round robin strategy

* Fixing the round robin iterator

* Removed numLocationsToTry, updated java docs

* changing property attribute value from tier to type

* Fixing assert messages

											
										
										
											2019-09-28 02:17:44 -04:00
+								leastBytesUsed
-												Spellcheck docs (#8548)

* Spellcheck docs

Fix spelling mistakes in docs and add CI job for running spellcheck on
docs.

* Add missing license header

											
										
										
											2019-09-17 15:47:30 -04:00
+								loadList
 								loadqueuepeon
 								loadspec
 								localStorage
 								maxHeaderSize
 								maxQueuedBytes
 								maxSize
 								middlemanager
 								minTimeMs
 								minmax
 								mins
-												Refresh query docs. (#9704)

* Refresh query docs.

Larger changes:

- New doc: querying/datasource.md describes the various kinds of
datasources you can use, and has examples for both SQL and native.
- New doc: querying/query-execution.md describes how native queries
are executed at a high level. It doesn't go into the details of specific
query engines or how queries run at a per-segment level. But I think it
would be good to add or link that content here in the future.
- Refreshed doc: querying/sql.md updated to refer to joins, reformatted
a bit, added a new "Query translation" section that explains how
queries are translated from SQL to native, and removed configuration
details (moved to configuration/index.md).
- Refreshed doc: querying/joins.md updated to refer to join datasources.

Smaller changes:

- Add helpful banners to the top of query documentation pages telling
people whether a given page describes SQL, native, or both.
- Add SQL metrics to operations/metrics.md.
- Add some color and cross-links in various places.
- Add native query component docs to the sidebar, and renamed them so
they look nicer.
- Remove Select query from the sidebar.
- Fix Broker SQL configs in configuration/index.md. Remove them from
querying/sql.md.
- Combined querying/searchquery.md and querying/searchqueryspec.md.

* Updates.

* Fix numbering.

* Fix glitches.

* Add new words to spellcheck file.

* Assorted changes.

* Further adjustments.

* Add missing punctuation.
											
										
										
											2020-04-15 19:12:20 -04:00
+								nullable
-												Spellcheck docs (#8548)

* Spellcheck docs

Fix spelling mistakes in docs and add CI job for running spellcheck on
docs.

* Add missing license header

											
										
										
											2019-09-17 15:47:30 -04:00
+								orderby
 								orderbys
 								org.apache.druid
 								org.apache.druid.jetty.RequestLog
 								org.apache.hadoop
 								overlord.html
 								pendingSegments
 								pre-flight
-												Refresh query docs. (#9704)

* Refresh query docs.

Larger changes:

- New doc: querying/datasource.md describes the various kinds of
datasources you can use, and has examples for both SQL and native.
- New doc: querying/query-execution.md describes how native queries
are executed at a high level. It doesn't go into the details of specific
query engines or how queries run at a per-segment level. But I think it
would be good to add or link that content here in the future.
- Refreshed doc: querying/sql.md updated to refer to joins, reformatted
a bit, added a new "Query translation" section that explains how
queries are translated from SQL to native, and removed configuration
details (moved to configuration/index.md).
- Refreshed doc: querying/joins.md updated to refer to join datasources.

Smaller changes:

- Add helpful banners to the top of query documentation pages telling
people whether a given page describes SQL, native, or both.
- Add SQL metrics to operations/metrics.md.
- Add some color and cross-links in various places.
- Add native query component docs to the sidebar, and renamed them so
they look nicer.
- Remove Select query from the sidebar.
- Fix Broker SQL configs in configuration/index.md. Remove them from
querying/sql.md.
- Combined querying/searchquery.md and querying/searchqueryspec.md.

* Updates.

* Fix numbering.

* Fix glitches.

* Add new words to spellcheck file.

* Assorted changes.

* Further adjustments.

* Add missing punctuation.
											
										
										
											2020-04-15 19:12:20 -04:00
+								preloaded
-												Spellcheck docs (#8548)

* Spellcheck docs

Fix spelling mistakes in docs and add CI job for running spellcheck on
docs.

* Add missing license header

											
										
										
											2019-09-17 15:47:30 -04:00
+								queryType
 								remoteTaskRunnerConfig
 								rendezvousHash
-												Create dynamic config that can limit number of non-primary replicants loaded per coordination cycle (#11135)

* lay the groundwork for throttling replicant loads per RunRules execution

* Add dynamic coordinator config to control new replicant threshold.

* remove redundant line

* add some unit tests

* fix checkstyle error

* add documentation for new dynamic config

* improve docs and logs

* Alter how null is handled for new config. If null, manually set as default
											
										
										
											2021-05-05 08:39:36 -04:00
+								replicants
-												Spellcheck docs (#8548)

* Spellcheck docs

Fix spelling mistakes in docs and add CI job for running spellcheck on
docs.

* Add missing license header

											
										
										
											2019-09-17 15:47:30 -04:00
+								resultsets
-												Making optimal usage of multiple segment cache locations (#8038)

* #7641 - Changing segment distribution algorithm to distribute segments to multiple segment cache locations

* Fixing indentation

* WIP

* Adding interface for location strategy selection, least bytes used strategy impl, round-robin strategy impl, locationSelectorStrategy config with least bytes used strategy as the default strategy

* fixing code style

* Fixing test

* Adding a method visible only for testing, fixing tests

* 1. Changing the method contract to return an iterator of locations instead of a single best location. 2. Check style fixes

* fixing the conditional statement

* Added testSegmentDistributionUsingLeastBytesUsedStrategy, fixed testSegmentDistributionUsingRoundRobinStrategy

* to trigger CI build

* Add documentation for the selection strategy configuration

* to re trigger CI build

* updated docs as per review comments, made LeastBytesUsedStorageLocationSelectorStrategy.getLocations a synchronzied method, other minor fixes

* In checkLocationConfigForNull method, using getLocations() to check for null instead of directly referring to the locations variable so that tests overriding getLocations() method do not fail

* Implementing review comments. Added tests for StorageLocationSelectorStrategy

* Checkstyle fixes

* Adding java doc comments for StorageLocationSelectorStrategy interface

* checkstyle

* empty commit to retrigger build

* Empty commit

* Adding suppressions for words leastBytesUsed and roundRobin of ../docs/configuration/index.md file

* Impl review comments including updating docs as suggested

* Removing checkLocationConfigForNull(), @NotEmpty annotation serves the purpose

* Round robin iterator to keep track of the no. of iterations, impl review comments, added tests for round robin strategy

* Fixing the round robin iterator

* Removed numLocationsToTry, updated java docs

* changing property attribute value from tier to type

* Fixing assert messages

											
										
										
											2019-09-28 02:17:44 -04:00
+								roundRobin
-												Spellcheck docs (#8548)

* Spellcheck docs

Fix spelling mistakes in docs and add CI job for running spellcheck on
docs.

* Add missing license header

											
										
										
											2019-09-17 15:47:30 -04:00
+								runtime.properties
 								runtime.properties.
 								s3
 								s3a
 								s3n
 								slf4j
 								sql
 								sqlQuery
 								successfulSending
 								taskBlackListCleanupPeriod
 								tasklogs
 								timeBoundary
-												DruidInputSource: Fix issues in column projection, timestamp handling. (#10267)

* DruidInputSource: Fix issues in column projection, timestamp handling.

DruidInputSource, DruidSegmentReader changes:

1) Remove "dimensions" and "metrics". They are not necessary, because we
   can compute which columns we need to read based on what is going to
   be used by the timestamp, transform, dimensions, and metrics.
2) Start using ColumnsFilter (see below) to decide which columns we need
   to read.
3) Actually respect the "timestampSpec". Previously, it was ignored, and
   the timestamp of the returned InputRows was set to the `__time` column
   of the input datasource.

(1) and (2) together fix a bug in which the DruidInputSource would not
properly read columns that are used as inputs to a transformSpec.

(3) fixes a bug where the timestampSpec would be ignored if you attempted
to set the column to something other than `__time`.

(1) and (3) are breaking changes.

Web console changes:

1) Remove "Dimensions" and "Metrics" from the Druid input source.
2) Set timestampSpec to `{"column": "__time", "format": "millis"}` for
   compatibility with the new behavior.

Other changes:

1) Add ColumnsFilter, a new class that allows input readers to determine
   which columns they need to read. Currently, it's only used by the
   DruidInputSource, but it could be used by other columnar input sources
   in the future.
2) Add a ColumnsFilter to InputRowSchema.
3) Remove the metric names from InputRowSchema (they were unused).
4) Add InputRowSchemas.fromDataSchema method that computes the proper
   ColumnsFilter for given timestamp, dimensions, transform, and metrics.
5) Add "getRequiredColumns" method to TransformSpec to support the above.

* Various fixups.

* Uncomment incorrectly commented lines.

* Move TransformSpecTest to the proper module.

* Add druid.indexer.task.ignoreTimestampSpecForDruidInputSource setting.

* Fix.

* Fix build.

* Checkstyle.

* Misc fixes.

* Fix test.

* Move config.

* Fix imports.

* Fixup.

* Fix ShuffleResourceTest.

* Add import.

* Smarter exclusions.

* Fixes based on tests.

Also, add TIME_COLUMN constant in the web console.

* Adjustments for tests.

* Reorder test data.

* Update docs.

* Update docs to say Druid 0.22.0 instead of 0.21.0.

* Fix test.

* Fix ITAutoCompactionTest.

* Changes from review & from merging.
											
										
										
											2021-03-25 13:32:21 -04:00
+								timestampSpec
-												Spellcheck docs (#8548)

* Spellcheck docs

Fix spelling mistakes in docs and add CI job for running spellcheck on
docs.

* Add missing license header

											
										
										
											2019-09-17 15:47:30 -04:00
+								tmp
 								tmpfs
 								truststore
 								tuningConfig
-												Refresh query docs. (#9704)

* Refresh query docs.

Larger changes:

- New doc: querying/datasource.md describes the various kinds of
datasources you can use, and has examples for both SQL and native.
- New doc: querying/query-execution.md describes how native queries
are executed at a high level. It doesn't go into the details of specific
query engines or how queries run at a per-segment level. But I think it
would be good to add or link that content here in the future.
- Refreshed doc: querying/sql.md updated to refer to joins, reformatted
a bit, added a new "Query translation" section that explains how
queries are translated from SQL to native, and removed configuration
details (moved to configuration/index.md).
- Refreshed doc: querying/joins.md updated to refer to join datasources.

Smaller changes:

- Add helpful banners to the top of query documentation pages telling
people whether a given page describes SQL, native, or both.
- Add SQL metrics to operations/metrics.md.
- Add some color and cross-links in various places.
- Add native query component docs to the sidebar, and renamed them so
they look nicer.
- Remove Select query from the sidebar.
- Fix Broker SQL configs in configuration/index.md. Remove them from
querying/sql.md.
- Combined querying/searchquery.md and querying/searchqueryspec.md.

* Updates.

* Fix numbering.

* Fix glitches.

* Add new words to spellcheck file.

* Assorted changes.

* Further adjustments.

* Add missing punctuation.
											
										
										
											2020-04-15 19:12:20 -04:00
+								unioning
-												Spellcheck docs (#8548)

* Spellcheck docs

Fix spelling mistakes in docs and add CI job for running spellcheck on
docs.

* Add missing license header

											
										
										
											2019-09-17 15:47:30 -04:00
+								useIndexes
 								user.timezone
 								v0.12.0
 								versionReplacementString
 								workerId
 								yyyy-MM-dd
-												Support assign tasks to run on different categories of MiddleManagers (#7066)

* Support assign tasks to run on different tiers of MiddleManagers

* address comments

* address comments

* rename tier to category and docs

* doc

* fix doc

* fix spelling errors

* docs

											
										
										
											2019-10-17 15:57:19 -04:00
+								taskType
 								index_kafka
 								c1
 								c2
 								ds1
 								equalDistributionWithCategorySpec
 								fillCapacityWithCategorySpec
 								WorkerCategorySpec
 								workerCategorySpec
 								CategoryConfig
-												Spellcheck docs (#8548)

* Spellcheck docs

Fix spelling mistakes in docs and add CI job for running spellcheck on
docs.

* Add missing license header

											
										
										
											2019-09-17 15:47:30 -04:00
+								 - ../docs/design/index.md
 								logsearch
 								 - ../docs/ingestion/index.md
 -01-01T01
 								DateTimeFormat
 								JsonPath
 								autodetect
 								createBitmapIndex
 								dimensionExclusions
 								expr
 								jackson-jq
 								missingValue
-												Fix byte calculation for maxBytesInMemory to take into account of Sink/Hydrant Object overhead (#10740)

* Fix byte calculation for maxBytesInMemory to take into account of Sink/Hydrant Object overhead

* Fix byte calculation for maxBytesInMemory to take into account of Sink/Hydrant Object overhead

* Fix byte calculation for maxBytesInMemory to take into account of Sink/Hydrant Object overhead

* Fix byte calculation for maxBytesInMemory to take into account of Sink/Hydrant Object overhead

* fix checkstyle

* Fix byte calculation for maxBytesInMemory to take into account of Sink/Hydrant Object overhead

* Fix byte calculation for maxBytesInMemory to take into account of Sink/Hydrant Object overhead

* fix test

* fix test

* add log

* Fix byte calculation for maxBytesInMemory to take into account of Sink/Hydrant Object overhead

* address comments

* fix checkstyle

* fix checkstyle

* add config to skip overhead memory calculation

* add test for the skipBytesInMemoryOverheadCheck config

* add docs

* fix checkstyle

* fix checkstyle

* fix spelling

* address comments

* fix travis

* address comments
											
										
										
											2021-01-27 03:34:56 -05:00
+								skipBytesInMemoryOverheadCheck
-												Spellcheck docs (#8548)

* Spellcheck docs

Fix spelling mistakes in docs and add CI job for running spellcheck on
docs.

* Add missing license header

											
										
										
											2019-09-17 15:47:30 -04:00
+								spatialDimensions
 								useFieldDiscovery
 								 - ../docs/tutorials/index.md
 CPU
 								cityName
 								countryIsoCode
 								countryName
 								isAnonymous
 								isMinor
 								isNew
 								isRobot
 								isUnpatrolled
 								metroCode
 								regionIsoCode
-												Add Apache Ranger Authorization (#9579)


											
										
										
											2020-04-04 12:02:24 -04:00
+								regionName
-												Eliminate ambiguities of KB/MB/GB in the doc (#11333)

* GB ---> GiB

* suppress spelling check

* MB --> MiB, KB --> KiB

* Use IEC binary prefix

* Add reference link

* Fix doc style
											
										
										
											2021-06-30 16:42:45 -04:00
+GiB
 GiB
-												Add Apache Ranger Authorization (#9579)


											
										
										
											2020-04-04 12:02:24 -04:00
+								 - ../docs/development/extensions-core/druid-ranger-security.md
 								json
 								metastore
-												Document possible vulnerabilities for the druid-ranger-security (#9649)

* Document possible vulnerabilities for the druid-ranger-security

In certain configurations the ranger plugin can expose vulnerabilities due
to some of its dependencies having CVEs.

* Spelling checker is a bit tight
											
										
										
											2020-04-09 13:43:11 -04:00
+								UserGroupInformation
 								CVE-2019-17571
 								CVE-2019-12399
 								CVE-2018-17196
-												Update dictionary for spell check (#10152)


											
										
										
											2020-07-08 02:12:39 -04:00
+								bin.tar.gz
-												Support unit on byte-related properties (#10203)

* support unit suffix on byte-related properties

* add doc

* change default value of byte-related properites in example files

* fix coding style

* fix doc

* fix CI

* suppress spelling errors

* improve code according to comments

* rename Bytes to HumanReadableBytes

* add getBytesInInt to get value safely

* improve doc

* fix problem reported by CI

* fix problem reported by CI

* resolve code review comments

* improve error message

* improve code & doc according to comments

* fix CI problem

* improve doc

* suppress spelling check errors
											
										
										
											2020-07-30 21:58:48 -04:00
+								 - ../docs/configuration/human-readable-byte.md
 s
 T
 G
 _000
 _000_000
 _000_000_000
 _000_000_000_000
 _000_000_000_000_000
 								Giga
 								Tera
 								Peta
 								KiB
 								MiB
 								GiB
 								TiB
 								PiB
-												Add Calcite Avatica protobuf handler (#10543)


											
										
										
											2021-03-31 15:46:25 -04:00
+								protobuf
 								Golang
-												remove ZooKeeper 3.4 support + pass tests with Java 15 (#11073)

With this change, Druid will only support ZooKeeper 3.5.x and later.

In order to support Java 15 we need to switch to ZK 3.5.x client libraries and drop support for ZK 3.4.x
(see #10780 for the detailed reasons) 

* remove ZooKeeper 3.4.x compatibility
* exclude additional ZK 3.5.x netty dependencies to ensure we use our version
* keep ZooKeeper version used for integration tests in sync with client library version
* remove the need to specify ZK version at runtime for docker
* add support to run integration tests with JDK 15
* build and run unit tests with Java 15 in travis
											
										
										
											2021-05-25 15:49:49 -04:00
+								multiValueHandling
-												Fixing a few typos and style issues (#11883)

* grammar and format work

* light writing touchup

Co-authored-by: Charles Smith <techdocsmith@gmail.com>
											
										
										
											2021-11-16 13:13:35 -05:00
+								_n_
-												Update default value of `inputSegmentSizeBytes` in configuration docs (#12678)


											
										
										
											2022-06-21 23:35:03 -04:00
+TB
-												Docs: Index page with all SQL functions (#12771)

* list of all functions

* add function names to spelling file
											
										
										
											2022-07-13 21:59:55 -04:00
+								 - ../docs/querying/sql-functions.md
 								ANY_VALUE
 								APPROX_COUNT_DISTINCT_DS_HLL
 								APPROX_COUNT_DISTINCT_DS_THETA
 								APPROX_QUANTILE_DS
 								APPROX_QUANTILE_FIXED_BUCKETS
 								ARRAY_CONCAT_AGG
 								BIT_AND
 								BIT_OR
 								BIT_XOR
 								BITWISE_AND
 								BITWISE_COMPLEMENT
 								BITWISE_CONVERT_DOUBLE_TO_LONG_BITS
 								BITWISE_CONVERT_LONG_BITS_TO_DOUBLE
 								BITWISE_OR
 								BITWISE_SHIFT_LEFT
 								BITWISE_SHIFT_RIGHT
 								BITWISE_XOR
 								BLOOM_FILTER
 								BTRIM
 								CHAR_LENGTH
 								CHARACTER_LENGTH
 								CURRENT_DATE
 								CURRENT_TIMESTAMP
 								DATE_TRUNC
 								DS_CDF
 								DS_GET_QUANTILE
 								DS_GET_QUANTILES
 								DS_HISTOGRAM
 								DS_HLL
 								DS_QUANTILE_SUMMARY
 								DS_QUANTILES_SKETCH
 								DS_RANK
 								DS_THETA
 								EARLIEST_BY
 								_e_
 								HLL_SKETCH_ESTIMATE
 								HLL_SKETCH_ESTIMATE_WITH_ERROR_BOUNDS
 								HLL_SKETCH_TO_STRING
 								HLL_SKETCH_UNION
 								LATEST_BY
 								base-10
 								MV_APPEND
 								MV_CONCAT
 								MV_CONTAINS
 								MV_FILTER_NONE
 								MV_FILTER_ONLY
 								MV_LENGTH
 								MV_OFFSET
 								MV_OFFSET_OF
 								MV_ORDINAL
 								MV_ORDINAL_OF
 								MV_OVERLAP
 								MV_PREPEND
 								MV_SLICE
 								MV_TO_STRING
 								NULLIF
 								_n_th
 								STDDEV_POP
 								STDDEV_SAMP
 								STRING_FORMAT
 								STRING_TO_MV
 								SUBSTR
 								TDIGEST_GENERATE_SKETCH
 								TDIGEST_QUANTILE
 								TEXTCAT
 								THETA_SKETCH_ESTIMATE
 								THETA_SKETCH_ESTIMATE_WITH_ERROR_BOUNDS
 								THETA_SKETCH_INTERSECT
 								THETA_SKETCH_NOT
 								THETA_SKETCH_UNION
 								TIME_CEIL
 								TIME_EXTRACT
 								TIME_FLOOR
 								TIME_FORMAT
 								TIME_IN_INTERVAL
 								TIMESTAMP_TO_MILLIS
 								TIMESTAMPADD
 								TIMESTAMPDIFF
 								TRUNC
 								VAR_POP
 								VAR_SAMP
-												Change Kafka Lookup Extractor to not register consumer group (#12842)

* change kafka lookups module to not commit offsets

The current behaviour of the Kafka lookup extractor is to not commit
offsets by assigning a unique ID to the consumer group and setting
auto.offset.reset to earliest. This does the job but also pollutes the
Kafka broker with a bunch of "ghost" consumer groups that will never again be
used.

To fix this, we now set enable.auto.commit to false, which prevents the
ghost consumer groups being created in the first place.

* update docs to include new enable.auto.commit setting behaviour

* update kafka-lookup-extractor documentation

Provide some additional detail on functionality and configuration.
Hopefully this will make it clearer how the extractor works for
developers who aren't so familiar with Kafka.

* add comments better explaining the logic of the code

* add spelling exceptions for kafka lookup docs
											
										
										
											2022-08-09 06:44:22 -04:00
+								KTable
 								Aotearoa
 								Czechia
-												Update .spelling (#12940)


											
										
										
											2022-08-22 21:47:40 -04:00
+								Zeelund