From 906a704c5546a984a03e10f18f38f1506e4196ad Mon Sep 17 00:00:00 2001 From: frank chen Date: Thu, 1 Jul 2021 04:42:45 +0800 Subject: [PATCH] Eliminate ambiguities of KB/MB/GB in the doc (#11333) * GB ---> GiB * suppress spelling check * MB --> MiB, KB --> KiB * Use IEC binary prefix * Add reference link * Fix doc style --- .../CachingClusteredClientBenchmark.java | 2 +- .../apache/druid/data/input/InputEntity.java | 2 +- .../data/input/MaxSizeSplitHintSpec.java | 2 +- .../data/input/impl/prefetch/FetchConfig.java | 4 +- .../apache/druid/utils/CompressionUtils.java | 2 +- .../druid/java/util/common/FileUtilsTest.java | 2 +- .../io/smoosh/SmooshedFileMapperTest.java | 2 +- docs/configuration/index.md | 8 ++-- .../extensions-core/druid-kerberos.md | 2 +- .../extensions-core/druid-lookups.md | 2 +- docs/operations/basic-cluster-tuning.md | 34 ++++++++-------- docs/operations/single-server.md | 14 +++---- docs/querying/querying.md | 2 +- docs/tutorials/cluster.md | 22 +++++----- docs/tutorials/docker.md | 2 +- docs/tutorials/index.md | 6 +-- .../cache/loading/OffHeapLoadingCache.java | 2 +- .../org/apache/druid/indexer/JobHelper.java | 2 +- integration-tests/README.md | 6 +-- .../writeout/FileWriteOutBytesTest.java | 8 ++-- .../client/cache/MemcachedCacheBenchmark.java | 2 +- website/.spelling | 40 ++++++++++--------- 22 files changed, 86 insertions(+), 82 deletions(-) diff --git a/benchmarks/src/test/java/org/apache/druid/benchmark/query/CachingClusteredClientBenchmark.java b/benchmarks/src/test/java/org/apache/druid/benchmark/query/CachingClusteredClientBenchmark.java index ffdded1397f..fe89751c32f 100644 --- a/benchmarks/src/test/java/org/apache/druid/benchmark/query/CachingClusteredClientBenchmark.java +++ b/benchmarks/src/test/java/org/apache/druid/benchmark/query/CachingClusteredClientBenchmark.java @@ -148,7 +148,7 @@ import java.util.concurrent.TimeUnit; public class CachingClusteredClientBenchmark { private static final Logger LOG = new Logger(CachingClusteredClientBenchmark.class); - private static final int PROCESSING_BUFFER_SIZE = 10 * 1024 * 1024; // ~10MB + private static final int PROCESSING_BUFFER_SIZE = 10 * 1024 * 1024; // ~10MiB private static final String DATA_SOURCE = "ds"; public static final ObjectMapper JSON_MAPPER; diff --git a/core/src/main/java/org/apache/druid/data/input/InputEntity.java b/core/src/main/java/org/apache/druid/data/input/InputEntity.java index 70fd4d2d5f0..a253ac600cb 100644 --- a/core/src/main/java/org/apache/druid/data/input/InputEntity.java +++ b/core/src/main/java/org/apache/druid/data/input/InputEntity.java @@ -43,7 +43,7 @@ public interface InputEntity { Logger LOG = new Logger(InputEntity.class); - int DEFAULT_FETCH_BUFFER_SIZE = 4 * 1024; // 4 KB + int DEFAULT_FETCH_BUFFER_SIZE = 4 * 1024; // 4 KiB int DEFAULT_MAX_NUM_FETCH_TRIES = 3; // 3 tries including the initial try /** diff --git a/core/src/main/java/org/apache/druid/data/input/MaxSizeSplitHintSpec.java b/core/src/main/java/org/apache/druid/data/input/MaxSizeSplitHintSpec.java index d680615f4a7..0a7e36d8b7c 100644 --- a/core/src/main/java/org/apache/druid/data/input/MaxSizeSplitHintSpec.java +++ b/core/src/main/java/org/apache/druid/data/input/MaxSizeSplitHintSpec.java @@ -53,7 +53,7 @@ public class MaxSizeSplitHintSpec implements SplitHintSpec * - 'jute.maxbuffer' in ZooKeeper. This system property controls the max size of ZNode. As its default is 500KB, * task allocation can fail if the serialized ingestion spec is larger than this limit. * - 'max_allowed_packet' in MySQL. This is the max size of a communication packet sent to a MySQL server. - * The default is either 64MB or 4MB depending on MySQL version. Updating metadata store can fail if the serialized + * The default is either [64MiB](https://dev.mysql.com/doc/refman/8.0/en/server-system-variables.html#sysvar_max_allowed_packet) or 4MiB depending on MySQL version. Updating metadata store can fail if the serialized * ingestion spec is larger than this limit. * * The default is conservatively chosen as 1000. diff --git a/core/src/main/java/org/apache/druid/data/input/impl/prefetch/FetchConfig.java b/core/src/main/java/org/apache/druid/data/input/impl/prefetch/FetchConfig.java index c85b9b7bdd3..c645d73bead 100644 --- a/core/src/main/java/org/apache/druid/data/input/impl/prefetch/FetchConfig.java +++ b/core/src/main/java/org/apache/druid/data/input/impl/prefetch/FetchConfig.java @@ -27,8 +27,8 @@ import java.util.concurrent.TimeUnit; */ public class FetchConfig { - private static final long DEFAULT_MAX_CACHE_CAPACITY_BYTES = 1024 * 1024 * 1024; // 1GB - private static final long DEFAULT_MAX_FETCH_CAPACITY_BYTES = 1024 * 1024 * 1024; // 1GB + private static final long DEFAULT_MAX_CACHE_CAPACITY_BYTES = 1024 * 1024 * 1024; // 1GiB + private static final long DEFAULT_MAX_FETCH_CAPACITY_BYTES = 1024 * 1024 * 1024; // 1GiB private static final long DEFAULT_FETCH_TIMEOUT_MS = TimeUnit.SECONDS.toMillis(60); private static final int DEFAULT_MAX_FETCH_RETRY = 3; diff --git a/core/src/main/java/org/apache/druid/utils/CompressionUtils.java b/core/src/main/java/org/apache/druid/utils/CompressionUtils.java index 932ec57a154..124628fa460 100644 --- a/core/src/main/java/org/apache/druid/utils/CompressionUtils.java +++ b/core/src/main/java/org/apache/druid/utils/CompressionUtils.java @@ -367,7 +367,7 @@ public class CompressionUtils { final int otherAvailable = super.available(); // Hack. Docs say available() should return an estimate, - // so we estimate about 1KB to work around available == 0 bug in GZIPInputStream + // so we estimate about 1KiB to work around available == 0 bug in GZIPInputStream return otherAvailable == 0 ? 1 << 10 : otherAvailable; } } diff --git a/core/src/test/java/org/apache/druid/java/util/common/FileUtilsTest.java b/core/src/test/java/org/apache/druid/java/util/common/FileUtilsTest.java index a20784f3b42..325008929ac 100644 --- a/core/src/test/java/org/apache/druid/java/util/common/FileUtilsTest.java +++ b/core/src/test/java/org/apache/druid/java/util/common/FileUtilsTest.java @@ -45,7 +45,7 @@ public class FileUtilsTest long buffersMemoryBefore = BufferUtils.totalMemoryUsedByDirectAndMappedBuffers(); try (RandomAccessFile raf = new RandomAccessFile(dataFile, "rw")) { raf.write(42); - raf.setLength(1 << 20); // 1 MB + raf.setLength(1 << 20); // 1 MiB } try (MappedByteBufferHandler mappedByteBufferHandler = FileUtils.map(dataFile)) { Assert.assertEquals(42, mappedByteBufferHandler.get().get(0)); diff --git a/core/src/test/java/org/apache/druid/java/util/common/io/smoosh/SmooshedFileMapperTest.java b/core/src/test/java/org/apache/druid/java/util/common/io/smoosh/SmooshedFileMapperTest.java index 46851bbe1ff..b0cd5f6b61d 100644 --- a/core/src/test/java/org/apache/druid/java/util/common/io/smoosh/SmooshedFileMapperTest.java +++ b/core/src/test/java/org/apache/druid/java/util/common/io/smoosh/SmooshedFileMapperTest.java @@ -183,7 +183,7 @@ public class SmooshedFileMapperTest try (FileSmoosher smoosher = new FileSmoosher(baseDir)) { File dataFile = folder.newFile("data.bin"); try (RandomAccessFile raf = new RandomAccessFile(dataFile, "rw")) { - raf.setLength(1 << 20); // 1 MB + raf.setLength(1 << 20); // 1 MiB } smoosher.add(dataFile); } diff --git a/docs/configuration/index.md b/docs/configuration/index.md index 2c09b89303e..a5e9d9aa239 100644 --- a/docs/configuration/index.md +++ b/docs/configuration/index.md @@ -393,7 +393,7 @@ The Druid servers [emit various metrics](../operations/metrics.md) and alerts vi |`druid.emitter.http.flushTimeOut`|The timeout after which an event should be sent to the endpoint, even if internal buffers are not filled, in milliseconds.|not specified = no timeout| |`druid.emitter.http.batchingStrategy`|The strategy of how the batch is formatted. "ARRAY" means `[event1,event2]`, "NEWLINES" means `event1\nevent2`, ONLY_EVENTS means `event1event2`.|ARRAY| |`druid.emitter.http.maxBatchSize`|The maximum batch size, in bytes.|the minimum of (10% of JVM heap size divided by 2) or (5242880 (i. e. 5 MiB))| -|`druid.emitter.http.batchQueueSizeLimit`|The maximum number of batches in emitter queue, if there are problems with emitting.|the maximum of (2) or (10% of the JVM heap size divided by 5MB)| +|`druid.emitter.http.batchQueueSizeLimit`|The maximum number of batches in emitter queue, if there are problems with emitting.|the maximum of (2) or (10% of the JVM heap size divided by 5MiB)| |`druid.emitter.http.minHttpTimeoutMillis`|If the speed of filling batches imposes timeout smaller than that, not even trying to send batch to endpoint, because it will likely fail, not being able to send the data that fast. Configure this depending based on emitter/successfulSending/minTimeMs metric. Reasonable values are 10ms..100ms.|0| |`druid.emitter.http.recipientBaseUrl`|The base URL to emit messages to. Druid will POST JSON to be consumed at the HTTP endpoint specified by this property.|none, required config| @@ -1433,7 +1433,7 @@ Druid uses Jetty to serve HTTP requests. |Property|Description|Default| |--------|-----------|-------| -|`druid.processing.buffer.sizeBytes`|This specifies a buffer size (less than 2GiB) for the storage of intermediate results. The computation engine in the Indexer processes will use a scratch buffer of this size to do all of their intermediate computations off-heap. Larger values allow for more aggregations in a single pass over the data while smaller values can require more passes depending on the query that is being executed. [Human-readable format](human-readable-byte.md) is supported.|auto (max 1GB)| +|`druid.processing.buffer.sizeBytes`|This specifies a buffer size (less than 2GiB) for the storage of intermediate results. The computation engine in the Indexer processes will use a scratch buffer of this size to do all of their intermediate computations off-heap. Larger values allow for more aggregations in a single pass over the data while smaller values can require more passes depending on the query that is being executed. [Human-readable format](human-readable-byte.md) is supported.|auto (max 1GiB)| |`druid.processing.buffer.poolCacheMaxCount`|processing buffer pool caches the buffers for later use, this is the maximum count cache will grow to. note that pool can create more buffers than it can cache if necessary.|Integer.MAX_VALUE| |`druid.processing.formatString`|Indexer processes use this format string to name their processing threads.|processing-%s| |`druid.processing.numMergeBuffers`|The number of direct memory buffers available for merging query results. The buffers are sized by `druid.processing.buffer.sizeBytes`. This property is effectively a concurrency limit for queries that require merging buffers. If you are using any queries that require merge buffers (currently, just groupBy v2) then you should have at least two of these.|`max(2, druid.processing.numThreads / 4)`| @@ -1540,7 +1540,7 @@ Druid uses Jetty to serve HTTP requests. |Property|Description|Default| |--------|-----------|-------| -|`druid.processing.buffer.sizeBytes`|This specifies a buffer size (less than 2GiB), for the storage of intermediate results. The computation engine in both the Historical and Realtime processes will use a scratch buffer of this size to do all of their intermediate computations off-heap. Larger values allow for more aggregations in a single pass over the data while smaller values can require more passes depending on the query that is being executed. [Human-readable format](human-readable-byte.md) is supported.|auto (max 1GB)| +|`druid.processing.buffer.sizeBytes`|This specifies a buffer size (less than 2GiB), for the storage of intermediate results. The computation engine in both the Historical and Realtime processes will use a scratch buffer of this size to do all of their intermediate computations off-heap. Larger values allow for more aggregations in a single pass over the data while smaller values can require more passes depending on the query that is being executed. [Human-readable format](human-readable-byte.md) is supported.|auto (max 1GiB)| |`druid.processing.buffer.poolCacheMaxCount`|processing buffer pool caches the buffers for later use, this is the maximum count cache will grow to. note that pool can create more buffers than it can cache if necessary.|Integer.MAX_VALUE| |`druid.processing.formatString`|Realtime and Historical processes use this format string to name their processing threads.|processing-%s| |`druid.processing.numMergeBuffers`|The number of direct memory buffers available for merging query results. The buffers are sized by `druid.processing.buffer.sizeBytes`. This property is effectively a concurrency limit for queries that require merging buffers. If you are using any queries that require merge buffers (currently, just groupBy v2) then you should have at least two of these.|`max(2, druid.processing.numThreads / 4)`| @@ -1859,7 +1859,7 @@ Uses memcached as cache backend. This allows all processes to share the same cac |`druid.cache.expiration`|Memcached [expiration time](https://code.google.com/p/memcached/wiki/NewCommands#Standard_Protocol).|2592000 (30 days)| |`druid.cache.timeout`|Maximum time in milliseconds to wait for a response from Memcached.|500| |`druid.cache.hosts`|Comma separated list of Memcached hosts ``.|none| -|`druid.cache.maxObjectSize`|Maximum object size in bytes for a Memcached object.|52428800 (50 MB)| +|`druid.cache.maxObjectSize`|Maximum object size in bytes for a Memcached object.|52428800 (50 MiB)| |`druid.cache.memcachedPrefix`|Key prefix for all keys in Memcached.|druid| |`druid.cache.numConnections`|Number of memcached connections to use.|1| |`druid.cache.protocol`|Memcached communication protocol. Can be binary or text.|binary| diff --git a/docs/development/extensions-core/druid-kerberos.md b/docs/development/extensions-core/druid-kerberos.md index d2395942c08..608fa3aef9e 100644 --- a/docs/development/extensions-core/druid-kerberos.md +++ b/docs/development/extensions-core/druid-kerberos.md @@ -72,7 +72,7 @@ If required, multiple rules can be joined by newline character and specified as ### Increasing HTTP Header size for large SPNEGO negotiate header In Active Directory environment, SPNEGO token in the Authorization header includes PAC (Privilege Access Certificate) information, which includes all security groups for the user. In some cases when the user belongs to many security groups the header to grow beyond what druid can handle by default. -In such cases, max request header size that druid can handle can be increased by setting `druid.server.http.maxRequestHeaderSize` (default 8Kb) and `druid.router.http.maxRequestBufferSize` (default 8Kb). +In such cases, max request header size that druid can handle can be increased by setting `druid.server.http.maxRequestHeaderSize` (default 8KiB) and `druid.router.http.maxRequestBufferSize` (default 8KiB). ## Configuring Kerberos Escalated Client diff --git a/docs/development/extensions-core/druid-lookups.md b/docs/development/extensions-core/druid-lookups.md index b9a0adf9819..52afbd3b828 100644 --- a/docs/development/extensions-core/druid-lookups.md +++ b/docs/development/extensions-core/druid-lookups.md @@ -139,7 +139,7 @@ Off heap cache is backed by [MapDB](http://www.mapdb.org/) implementation. MapDB |Field|Type|Description|Required|default| |-----|----|-----------|--------|-------| -|maxStoreSize|double|maximal size of store in GB, if store is larger entries will start expiring|no |0| +|maxStoreSize|double|maximal size of store in GiB, if store is larger entries will start expiring|no |0| |maxEntriesSize|long| Specifies the maximum number of entries the cache may contain.|no |0 (infinite capacity)| |expireAfterAccess|long| Specifies the eviction time after last read in milliseconds.|no |0 (No read-time-based eviction when set to null)| |expireAfterWrite|long| Specifies the eviction time after last write in milliseconds.|no |0 (No write-time-based eviction when set to null)| diff --git a/docs/operations/basic-cluster-tuning.md b/docs/operations/basic-cluster-tuning.md index e19fa1f766a..6d8694801d2 100644 --- a/docs/operations/basic-cluster-tuning.md +++ b/docs/operations/basic-cluster-tuning.md @@ -40,11 +40,11 @@ The biggest contributions to heap usage on Historicals are: - Partial unmerged query results from segments - The stored maps for [lookups](../querying/lookups.md). -A general rule-of-thumb for sizing the Historical heap is `(0.5GB * number of CPU cores)`, with an upper limit of ~24GB. +A general rule-of-thumb for sizing the Historical heap is `(0.5GiB * number of CPU cores)`, with an upper limit of ~24GiB. This rule-of-thumb scales using the number of CPU cores as a convenient proxy for hardware size and level of concurrency (note: this formula is not a hard rule for sizing Historical heaps). -Having a heap that is too large can result in excessively long GC collection pauses, the ~24GB upper limit is imposed to avoid this. +Having a heap that is too large can result in excessively long GC collection pauses, the ~24GiB upper limit is imposed to avoid this. If caching is enabled on Historicals, the cache is stored on heap, sized by `druid.cache.sizeInBytes`. @@ -56,7 +56,7 @@ If you are using lookups, calculate the total size of the lookup maps being load Druid performs an atomic swap when updating lookup maps (both the old map and the new map will exist in heap during the swap), so the maximum potential heap usage from lookup maps will be (2 * total size of all loaded lookups). -Be sure to add `(2 * total size of all loaded lookups)` to your heap size in addition to the `(0.5GB * number of CPU cores)` guideline. +Be sure to add `(2 * total size of all loaded lookups)` to your heap size in addition to the `(0.5GiB * number of CPU cores)` guideline. #### Processing Threads and Buffers @@ -65,7 +65,7 @@ Please see the [General Guidelines for Processing Threads and Buffers](#processi On Historicals: - `druid.processing.numThreads` should generally be set to `(number of cores - 1)`: a smaller value can result in CPU underutilization, while going over the number of cores can result in unnecessary CPU contention. -- `druid.processing.buffer.sizeBytes` can be set to 500MB. +- `druid.processing.buffer.sizeBytes` can be set to 500MiB. - `druid.processing.numMergeBuffers`, a 1:4 ratio of merge buffers to processing threads is a reasonable choice for general use. #### Direct Memory Sizing @@ -110,7 +110,7 @@ We recommend using SSDs for storage on the Historicals, as they handle segment d To estimate total memory usage of the Historical under these guidelines: -- Heap: `(0.5GB * number of CPU cores) + (2 * total size of lookup maps) + druid.cache.sizeInBytes` +- Heap: `(0.5GiB * number of CPU cores) + (2 * total size of lookup maps) + druid.cache.sizeInBytes` - Direct Memory: `(druid.processing.numThreads + druid.processing.numMergeBuffers + 1) * druid.processing.buffer.sizeBytes` The Historical will use any available free system memory (i.e., memory not used by the Historical JVM and heap/direct memory buffers or other processes on the system) for memory-mapping of segments on disk. For better query performance, you will want to ensure a good (`free system memory` / total size of all `druid.segmentCache.locations`) ratio so that a greater proportion of segments can be kept in memory. @@ -130,7 +130,7 @@ The biggest contributions to heap usage on Brokers are: The Broker heap requirements scale based on the number of segments in the cluster, and the total data size of the segments. -The heap size will vary based on data size and usage patterns, but 4G to 8G is a good starting point for a small or medium cluster (~15 servers or less). For a rough estimate of memory requirements on the high end, very large clusters with a node count on the order of ~100 nodes may need Broker heaps of 30GB-60GB. +The heap size will vary based on data size and usage patterns, but 4GiB to 8GiB is a good starting point for a small or medium cluster (~15 servers or less). For a rough estimate of memory requirements on the high end, very large clusters with a node count on the order of ~100 nodes may need Broker heaps of 30GiB-60GiB. If caching is enabled on the Broker, the cache is stored on heap, sized by `druid.cache.sizeInBytes`. @@ -138,7 +138,7 @@ If caching is enabled on the Broker, the cache is stored on heap, sized by `drui On the Broker, the amount of direct memory needed depends on how many merge buffers (used for merging GroupBys) are configured. The Broker does not generally need processing threads or processing buffers, as query results are merged on-heap in the HTTP connection threads instead. -- `druid.processing.buffer.sizeBytes` can be set to 500MB. +- `druid.processing.buffer.sizeBytes` can be set to 500MiB. - `druid.processing.numThreads`: set this to 1 (the minimum allowed) - `druid.processing.numMergeBuffers`: set this to the same value as on Historicals or a bit higher @@ -158,9 +158,9 @@ When retrieving query results from Historical processes or Tasks, the Broker can This buffer size is controlled by the `druid.broker.http.maxQueuedBytes` setting. -The limit is divided across the number of Historicals/Tasks that a query would hit: suppose I have `druid.broker.http.maxQueuedBytes` set to 5MB, and the Broker receives a query that needs to be fanned out to 2 Historicals. Each per-historical channel would get a 2.5MB buffer in this case. +The limit is divided across the number of Historicals/Tasks that a query would hit: suppose I have `druid.broker.http.maxQueuedBytes` set to 5MiB, and the Broker receives a query that needs to be fanned out to 2 Historicals. Each per-historical channel would get a 2.5MiB buffer in this case. -You can generally set this to a value of approximately `2MB * number of Historicals`. As your cluster scales up with more Historicals and Tasks, consider increasing this buffer size and increasing the Broker heap accordingly. +You can generally set this to a value of approximately `2MiB * number of Historicals`. As your cluster scales up with more Historicals and Tasks, consider increasing this buffer size and increasing the Broker heap accordingly. - If the buffer is too small, this can lead to inefficient queries due to the buffer filling up rapidly and stalling the channel - If the buffer is too large, this puts more memory pressure on the Broker due to more queued result data in the HTTP channels. @@ -184,7 +184,7 @@ The MiddleManager is a lightweight task controller/manager that launches Task pr #### MiddleManager heap sizing -The MiddleManager itself does not require much resources, you can set the heap to ~128MB generally. +The MiddleManager itself does not require much resources, you can set the heap to ~128MiB generally. #### SSD storage @@ -204,7 +204,7 @@ The following section below describes configuration for Tasks launched by the Mi ##### Task heap sizing -A 1GB heap is usually enough for Tasks. +A 1GiB heap is usually enough for Tasks. ###### Lookups @@ -220,7 +220,7 @@ For Tasks, 1 or 2 processing threads are often enough, as the Tasks tend to hold - `druid.indexer.fork.property.druid.processing.numThreads`: set this to 1 or 2 - `druid.indexer.fork.property.druid.processing.numMergeBuffers`: set this to 2 -- `druid.indexer.fork.property.druid.processing.buffer.sizeBytes`: can be set to 100MB +- `druid.indexer.fork.property.druid.processing.buffer.sizeBytes`: can be set to 100MiB ##### Direct memory sizing @@ -248,7 +248,7 @@ Tuning the cluster so that each Task can accept 50 queries and 10 non-queries is To estimate total memory usage of a Task under these guidelines: -- Heap: `1GB + (2 * total size of lookup maps)` +- Heap: `1GiB + (2 * total size of lookup maps)` - Direct Memory: `(druid.processing.numThreads + druid.processing.numMergeBuffers + 1) * druid.processing.buffer.sizeBytes` The total memory usage of the MiddleManager + Tasks: @@ -309,7 +309,7 @@ The Overlord tends to require less resources than the Coordinator or Broker. You The Router has light resource requirements, as it proxies requests to Brokers without performing much computational work itself. -You can assign it 256MB heap as a starting point, growing it if needed. +You can assign it 256MiB heap as a starting point, growing it if needed. @@ -323,7 +323,7 @@ The `druid.processing.numThreads` configuration controls the size of the process `druid.processing.buffer.sizeBytes` is a closely related property that controls the size of the off-heap buffers allocated to the processing threads. -One buffer is allocated for each processing thread. A size between 500MB and 1GB is a reasonable choice for general use. +One buffer is allocated for each processing thread. A size between 500MiB and 1GiB is a reasonable choice for general use. The TopN and GroupBy queries use these buffers to store intermediate computed results. As the buffer size increases, more data can be processed in a single pass. @@ -371,9 +371,9 @@ As a starting point, allowing for 50 concurrent queries (requests that read segm ### Segment decompression -When opening a segment for reading during segment merging or query processing, Druid allocates a 64KB off-heap decompression buffer for each column being read. +When opening a segment for reading during segment merging or query processing, Druid allocates a 64KiB off-heap decompression buffer for each column being read. -Thus, there is additional direct memory overhead of (64KB * number of columns read per segment * number of segments read) when reading segments. +Thus, there is additional direct memory overhead of (64KiB * number of columns read per segment * number of segments read) when reading segments. ### Segment merging diff --git a/docs/operations/single-server.md b/docs/operations/single-server.md index 6ba14222803..35413952e22 100644 --- a/docs/operations/single-server.md +++ b/docs/operations/single-server.md @@ -34,7 +34,7 @@ Druid includes a set of reference configurations and launch scripts for single-m The `micro-quickstart` is sized for small machines like laptops and is intended for quick evaluation use-cases. -The `nano-quickstart` is an even smaller configuration, targeting a machine with 1 CPU and 4GB memory. It is meant for limited evaluations in resource constrained environments, such as small Docker containers. +The `nano-quickstart` is an even smaller configuration, targeting a machine with 1 CPU and 4GiB memory. It is meant for limited evaluations in resource constrained environments, such as small Docker containers. The other configurations are intended for general use single-machine deployments. They are sized for hardware roughly based on Amazon's i3 series of EC2 instances. @@ -46,32 +46,32 @@ While example configurations are provided for very large single machines, at hig ## Single server reference configurations -### Nano-Quickstart: 1 CPU, 4GB RAM +### Nano-Quickstart: 1 CPU, 4GiB RAM - Launch command: `bin/start-nano-quickstart` - Configuration directory: `conf/druid/single-server/nano-quickstart` -### Micro-Quickstart: 4 CPU, 16GB RAM +### Micro-Quickstart: 4 CPU, 16GiB RAM - Launch command: `bin/start-micro-quickstart` - Configuration directory: `conf/druid/single-server/micro-quickstart` -### Small: 8 CPU, 64GB RAM (~i3.2xlarge) +### Small: 8 CPU, 64GiB RAM (~i3.2xlarge) - Launch command: `bin/start-small` - Configuration directory: `conf/druid/single-server/small` -### Medium: 16 CPU, 128GB RAM (~i3.4xlarge) +### Medium: 16 CPU, 128GiB RAM (~i3.4xlarge) - Launch command: `bin/start-medium` - Configuration directory: `conf/druid/single-server/medium` -### Large: 32 CPU, 256GB RAM (~i3.8xlarge) +### Large: 32 CPU, 256GiB RAM (~i3.8xlarge) - Launch command: `bin/start-large` - Configuration directory: `conf/druid/single-server/large` -### X-Large: 64 CPU, 512GB RAM (~i3.16xlarge) +### X-Large: 64 CPU, 512GiB RAM (~i3.16xlarge) - Launch command: `bin/start-xlarge` - Configuration directory: `conf/druid/single-server/xlarge` diff --git a/docs/querying/querying.md b/docs/querying/querying.md index 53577fb4cbe..e4504318510 100644 --- a/docs/querying/querying.md +++ b/docs/querying/querying.md @@ -144,5 +144,5 @@ Possible Druid error codes for the `error` field include: |`Query timeout`|504|The query timed out.| |`Query interrupted`|500|The query was interrupted, possibly due to JVM shutdown.| |`Query cancelled`|500|The query was cancelled through the query cancellation API.| -|`Truncated response context`|500|An intermediate response context for the query exceeded the built-in limit of 7KB.

The response context is an internal data structure that Druid servers use to share out-of-band information when sending query results to each other. It is serialized in an HTTP header with a maximum length of 7KB. This error occurs when an intermediate response context sent from a data server (like a Historical) to the Broker exceeds this limit.

The response context is used for a variety of purposes, but the one most likely to generate a large context is sharing details about segments that move during a query. That means this error can potentially indicate that a very large number of segments moved in between the time a Broker issued a query and the time it was processed on Historicals. This should rarely, if ever, occur during normal operation.| +|`Truncated response context`|500|An intermediate response context for the query exceeded the built-in limit of 7KiB.

The response context is an internal data structure that Druid servers use to share out-of-band information when sending query results to each other. It is serialized in an HTTP header with a maximum length of 7KiB. This error occurs when an intermediate response context sent from a data server (like a Historical) to the Broker exceeds this limit.

The response context is used for a variety of purposes, but the one most likely to generate a large context is sharing details about segments that move during a query. That means this error can potentially indicate that a very large number of segments moved in between the time a Broker issued a query and the time it was processed on Historicals. This should rarely, if ever, occur during normal operation.| |`Unknown exception`|500|Some other exception occurred. Check errorMessage and errorClass for details, although keep in mind that the contents of those fields are free-form and may change from release to release.| \ No newline at end of file diff --git a/docs/tutorials/cluster.md b/docs/tutorials/cluster.md index 74a7cf769e2..7d33b0e3bee 100644 --- a/docs/tutorials/cluster.md +++ b/docs/tutorials/cluster.md @@ -51,7 +51,7 @@ In this example, we will be deploying the equivalent of one AWS [m5.2xlarge](htt This hardware offers: - 8 vCPUs -- 31 GB RAM +- 32 GiB RAM Example Master server configurations that have been sized for this hardware can be found under `conf/druid/cluster/master`. @@ -65,7 +65,7 @@ In this example, we will be deploying the equivalent of two AWS [i3.4xlarge](htt This hardware offers: - 16 vCPUs -- 122 GB RAM +- 122 GiB RAM - 2 * 1.9TB SSD storage Example Data server configurations that have been sized for this hardware can be found under `conf/druid/cluster/data`. @@ -80,7 +80,7 @@ In this example, we will be deploying the equivalent of one AWS [m5.2xlarge](htt This hardware offers: - 8 vCPUs -- 31 GB RAM +- 32 GiB RAM You can consider co-locating any open source UIs or query libraries on the same server that the Broker is running on. @@ -322,12 +322,12 @@ You can copy your existing `coordinator-overlord` configs from the single-server #### Data -Suppose we are migrating from a single-server deployment that had 32 CPU and 256GB RAM. In the old deployment, the following configurations for Historicals and MiddleManagers were applied: +Suppose we are migrating from a single-server deployment that had 32 CPU and 256GiB RAM. In the old deployment, the following configurations for Historicals and MiddleManagers were applied: Historical (Single-server) ``` -druid.processing.buffer.sizeBytes=500000000 +druid.processing.buffer.sizeBytes=500MiB druid.processing.numMergeBuffers=8 druid.processing.numThreads=31 ``` @@ -337,11 +337,11 @@ MiddleManager (Single-server) ``` druid.worker.capacity=8 druid.indexer.fork.property.druid.processing.numMergeBuffers=2 -druid.indexer.fork.property.druid.processing.buffer.sizeBytes=100000000 +druid.indexer.fork.property.druid.processing.buffer.sizeBytes=100MiB druid.indexer.fork.property.druid.processing.numThreads=1 ``` -In the clustered deployment, we can choose a split factor (2 in this example), and deploy 2 Data servers with 16CPU and 128GB RAM each. The areas to scale are the following: +In the clustered deployment, we can choose a split factor (2 in this example), and deploy 2 Data servers with 16CPU and 128GiB RAM each. The areas to scale are the following: Historical @@ -361,9 +361,9 @@ The resulting configs after the split: New Historical (on 2 Data servers) ``` - druid.processing.buffer.sizeBytes=500000000 - druid.processing.numMergeBuffers=8 - druid.processing.numThreads=31 +druid.processing.buffer.sizeBytes=500MiB +druid.processing.numMergeBuffers=8 +druid.processing.numThreads=31 ``` New MiddleManager (on 2 Data servers) @@ -371,7 +371,7 @@ New MiddleManager (on 2 Data servers) ``` druid.worker.capacity=4 druid.indexer.fork.property.druid.processing.numMergeBuffers=2 -druid.indexer.fork.property.druid.processing.buffer.sizeBytes=100000000 +druid.indexer.fork.property.druid.processing.buffer.sizeBytes=100MiB druid.indexer.fork.property.druid.processing.numThreads=1 ``` diff --git a/docs/tutorials/docker.md b/docs/tutorials/docker.md index b64b636b995..eecb4926f94 100644 --- a/docs/tutorials/docker.md +++ b/docs/tutorials/docker.md @@ -84,4 +84,4 @@ It takes a few seconds for all the Druid processes to fully start up. If you ope From here you can follow along with the [Quickstart](./index.md#step-4-load-data), or elaborate on your `docker-compose.yml` to add any additional external service dependencies as necessary. ## Docker Memory Requirements -If you experience any processes crashing with a 137 error code you likely don't have enough memory allocated to Docker. 6 GB may be a good place to start. +If you experience any processes crashing with a 137 error code you likely don't have enough memory allocated to Docker. 6 GiB may be a good place to start. diff --git a/docs/tutorials/index.md b/docs/tutorials/index.md index 5c8bdd422ca..749dccc418a 100644 --- a/docs/tutorials/index.md +++ b/docs/tutorials/index.md @@ -32,14 +32,14 @@ Before starting, you may want to read the [general Druid overview](../design/ind ## Requirements -You can follow these steps on a relatively small machine, such as a laptop with around 4 CPU and 16 GB of RAM. +You can follow these steps on a relatively small machine, such as a laptop with around 4 CPU and 16 GiB of RAM. Druid comes with several startup configuration profiles for a range of machine sizes. The `micro-quickstart`configuration profile shown here is suitable for evaluating Druid. If you want to try out Druid's performance or scaling capabilities, you'll need a larger machine and configuration profile. -The configuration profiles included with Druid range from the even smaller _Nano-Quickstart_ configuration (1 CPU, 4GB RAM) -to the _X-Large_ configuration (64 CPU, 512GB RAM). For more information, see +The configuration profiles included with Druid range from the even smaller _Nano-Quickstart_ configuration (1 CPU, 4GiB RAM) +to the _X-Large_ configuration (64 CPU, 512GiB RAM). For more information, see [Single server deployment](../operations/single-server.md). Alternatively, see [Clustered deployment](./cluster.md) for information on deploying Druid services across clustered machines. diff --git a/extensions-core/lookups-cached-single/src/main/java/org/apache/druid/server/lookup/cache/loading/OffHeapLoadingCache.java b/extensions-core/lookups-cached-single/src/main/java/org/apache/druid/server/lookup/cache/loading/OffHeapLoadingCache.java index a5f7949d1a1..2d26ed7fdf8 100644 --- a/extensions-core/lookups-cached-single/src/main/java/org/apache/druid/server/lookup/cache/loading/OffHeapLoadingCache.java +++ b/extensions-core/lookups-cached-single/src/main/java/org/apache/druid/server/lookup/cache/loading/OffHeapLoadingCache.java @@ -47,7 +47,7 @@ public class OffHeapLoadingCache implements LoadingCache /** * Sets store size limit. Disk or memory space consumed be storage should not grow over this space. - * maximal size of store in GB, if store is larger entries will start expiring + * maximal size of store in GiB, if store is larger entries will start expiring */ @JsonProperty private final double maxStoreSize; diff --git a/indexing-hadoop/src/main/java/org/apache/druid/indexer/JobHelper.java b/indexing-hadoop/src/main/java/org/apache/druid/indexer/JobHelper.java index 7e0ab4c189d..acff7538155 100644 --- a/indexing-hadoop/src/main/java/org/apache/druid/indexer/JobHelper.java +++ b/indexing-hadoop/src/main/java/org/apache/druid/indexer/JobHelper.java @@ -78,7 +78,7 @@ public class JobHelper private static final Logger log = new Logger(JobHelper.class); private static final int NUM_RETRIES = 8; private static final int SECONDS_BETWEEN_RETRIES = 2; - private static final int DEFAULT_FS_BUFFER_SIZE = 1 << 18; // 256KB + private static final int DEFAULT_FS_BUFFER_SIZE = 1 << 18; // 256KiB private static final Pattern SNAPSHOT_JAR = Pattern.compile(".*-SNAPSHOT(-selfcontained)?\\.jar$"); public static Path distributedClassPath(String path) diff --git a/integration-tests/README.md b/integration-tests/README.md index 9502786dec7..e32524647bd 100644 --- a/integration-tests/README.md +++ b/integration-tests/README.md @@ -36,7 +36,7 @@ Integration Testing Using Docker Before starting, if you don't already have docker on your machine, install it as described on [Docker installation instructions](https://docs.docker.com/install/). Ensure that you -have at least 4GB of memory allocated to the docker engine. (You can verify it +have at least 4GiB of memory allocated to the docker engine. (You can verify it under Preferences > Resources > Advanced.) Also set the `DOCKER_IP` @@ -186,7 +186,7 @@ machine. > NOTE: Quickstart does not run with ssl, so to trick the integration tests we specify the `*_tls_url` in the config to be the same as the http url. -Make sure you have at least 6GB of memory available before you run the tests. +Make sure you have at least 6GiB of memory available before you run the tests. The tests rely on files in the test/resources folder to exist under the path /resources, so create a symlink to make them available: @@ -216,7 +216,7 @@ Then run the tests using a command similar to: Running Tests Using A Configuration File for Any Cluster ------------------- -Make sure that you have at least 6GB of memory available before you run the tests. +Make sure that you have at least 6GiB of memory available before you run the tests. To run tests on any druid cluster that is already running, create a configuration file: diff --git a/processing/src/test/java/org/apache/druid/segment/writeout/FileWriteOutBytesTest.java b/processing/src/test/java/org/apache/druid/segment/writeout/FileWriteOutBytesTest.java index 8501fa61eea..e19faa1ee5a 100644 --- a/processing/src/test/java/org/apache/druid/segment/writeout/FileWriteOutBytesTest.java +++ b/processing/src/test/java/org/apache/druid/segment/writeout/FileWriteOutBytesTest.java @@ -42,9 +42,9 @@ public class FileWriteOutBytesTest } @Test - public void write4KBIntsShouldNotFlush() throws IOException + public void write4KiBIntsShouldNotFlush() throws IOException { - // Write 4KB of ints and expect the write operation of the file channel will be triggered only once. + // Write 4KiB of ints and expect the write operation of the file channel will be triggered only once. EasyMock.expect(mockFileChannel.write(EasyMock.anyObject(ByteBuffer.class))) .andAnswer(() -> { ByteBuffer buffer = (ByteBuffer) EasyMock.getCurrentArguments()[0]; @@ -58,8 +58,8 @@ public class FileWriteOutBytesTest for (int i = 0; i < numOfInt; i++) { fileWriteOutBytes.writeInt(i); } - // no need to flush up to 4KB - // the first byte after 4KB will cause a flush + // no need to flush up to 4KiB + // the first byte after 4KiB will cause a flush fileWriteOutBytes.write(1); EasyMock.verify(mockFileChannel); } diff --git a/server/src/test/java/org/apache/druid/client/cache/MemcachedCacheBenchmark.java b/server/src/test/java/org/apache/druid/client/cache/MemcachedCacheBenchmark.java index fda0b64c414..fcb42b101cb 100644 --- a/server/src/test/java/org/apache/druid/client/cache/MemcachedCacheBenchmark.java +++ b/server/src/test/java/org/apache/druid/client/cache/MemcachedCacheBenchmark.java @@ -63,7 +63,7 @@ public class MemcachedCacheBenchmark extends SimpleBenchmark protected void setUp() throws Exception { SerializingTranscoder transcoder = new SerializingTranscoder( - 50 * 1024 * 1024 // 50 MB + 50 * 1024 * 1024 // 50 MiB ); // disable compression transcoder.setCompressionThreshold(Integer.MAX_VALUE); diff --git a/website/.spelling b/website/.spelling index 4434cd0b1cc..dd240c5e64b 100644 --- a/website/.spelling +++ b/website/.spelling @@ -704,7 +704,7 @@ initialAdminRole adminGroupMapping groupMappingName - ../docs/development/extensions-core/druid-kerberos.md -8Kb +8KiB HttpComponents MyKerberosAuthenticator RFC-4559 @@ -1213,18 +1213,17 @@ taskId taskid un - ../docs/operations/basic-cluster-tuning.md -100MB -128MB +100MiB +128MiB 15ms -2.5MB -24GB -256MB -30GB-60GB -4G +2.5MiB +24GiB +256MiB +30GiB-60GiB +4GiB 5MB -64KB -7KB -8G +64KiB +8GiB G1GC GroupBys QoS-type @@ -1319,12 +1318,12 @@ loadForever - ../docs/operations/segment-optimization.md 700MB - ../docs/operations/single-server.md -128GB -16GB -256GB -4GB -512GB -64GB +128GiB +16GiB +256GiB +4GiB +512GiB +64GiB Nano-Quickstart i3 i3.16xlarge @@ -1454,6 +1453,7 @@ useResultLevelCache vectorSize enableJoinLeftTableScanDirect - ../docs/querying/querying.md +7KiB DatasourceMetadata TimeBoundary errorClass @@ -1609,6 +1609,8 @@ outputType WebUpd8 m5.2xlarge metadata.storage. +256GiB +128GiB - ../docs/tutorials/tutorial-batch-hadoop.md PATH_TO_DRUID namenode @@ -1655,7 +1657,7 @@ GiB 2GB 30_000 524288000L -5MB +5MiB 8u60 Autoscaler AvaticaConnectionBalancer @@ -1860,6 +1862,8 @@ isUnpatrolled metroCode regionIsoCode regionName +4GiB +512GiB - ../docs/development/extensions-core/druid-ranger-security.md json metastore