Eliminate ambiguities of KB/MB/GB in the doc (#11333)

* GB ---> GiB

* suppress spelling check

* MB --> MiB, KB --> KiB

* Use IEC binary prefix

* Add reference link

* Fix doc style
This commit is contained in:
frank chen 2021-07-01 04:42:45 +08:00 committed by GitHub
parent 8037a54525
commit 906a704c55
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
22 changed files with 86 additions and 82 deletions

View File

@ -148,7 +148,7 @@ import java.util.concurrent.TimeUnit;
public class CachingClusteredClientBenchmark
{
private static final Logger LOG = new Logger(CachingClusteredClientBenchmark.class);
private static final int PROCESSING_BUFFER_SIZE = 10 * 1024 * 1024; // ~10MB
private static final int PROCESSING_BUFFER_SIZE = 10 * 1024 * 1024; // ~10MiB
private static final String DATA_SOURCE = "ds";
public static final ObjectMapper JSON_MAPPER;

View File

@ -43,7 +43,7 @@ public interface InputEntity
{
Logger LOG = new Logger(InputEntity.class);
int DEFAULT_FETCH_BUFFER_SIZE = 4 * 1024; // 4 KB
int DEFAULT_FETCH_BUFFER_SIZE = 4 * 1024; // 4 KiB
int DEFAULT_MAX_NUM_FETCH_TRIES = 3; // 3 tries including the initial try
/**

View File

@ -53,7 +53,7 @@ public class MaxSizeSplitHintSpec implements SplitHintSpec
* - 'jute.maxbuffer' in ZooKeeper. This system property controls the max size of ZNode. As its default is 500KB,
* task allocation can fail if the serialized ingestion spec is larger than this limit.
* - 'max_allowed_packet' in MySQL. This is the max size of a communication packet sent to a MySQL server.
* The default is either 64MB or 4MB depending on MySQL version. Updating metadata store can fail if the serialized
* The default is either [64MiB](https://dev.mysql.com/doc/refman/8.0/en/server-system-variables.html#sysvar_max_allowed_packet) or 4MiB depending on MySQL version. Updating metadata store can fail if the serialized
* ingestion spec is larger than this limit.
*
* The default is conservatively chosen as 1000.

View File

@ -27,8 +27,8 @@ import java.util.concurrent.TimeUnit;
*/
public class FetchConfig
{
private static final long DEFAULT_MAX_CACHE_CAPACITY_BYTES = 1024 * 1024 * 1024; // 1GB
private static final long DEFAULT_MAX_FETCH_CAPACITY_BYTES = 1024 * 1024 * 1024; // 1GB
private static final long DEFAULT_MAX_CACHE_CAPACITY_BYTES = 1024 * 1024 * 1024; // 1GiB
private static final long DEFAULT_MAX_FETCH_CAPACITY_BYTES = 1024 * 1024 * 1024; // 1GiB
private static final long DEFAULT_FETCH_TIMEOUT_MS = TimeUnit.SECONDS.toMillis(60);
private static final int DEFAULT_MAX_FETCH_RETRY = 3;

View File

@ -367,7 +367,7 @@ public class CompressionUtils
{
final int otherAvailable = super.available();
// Hack. Docs say available() should return an estimate,
// so we estimate about 1KB to work around available == 0 bug in GZIPInputStream
// so we estimate about 1KiB to work around available == 0 bug in GZIPInputStream
return otherAvailable == 0 ? 1 << 10 : otherAvailable;
}
}

View File

@ -45,7 +45,7 @@ public class FileUtilsTest
long buffersMemoryBefore = BufferUtils.totalMemoryUsedByDirectAndMappedBuffers();
try (RandomAccessFile raf = new RandomAccessFile(dataFile, "rw")) {
raf.write(42);
raf.setLength(1 << 20); // 1 MB
raf.setLength(1 << 20); // 1 MiB
}
try (MappedByteBufferHandler mappedByteBufferHandler = FileUtils.map(dataFile)) {
Assert.assertEquals(42, mappedByteBufferHandler.get().get(0));

View File

@ -183,7 +183,7 @@ public class SmooshedFileMapperTest
try (FileSmoosher smoosher = new FileSmoosher(baseDir)) {
File dataFile = folder.newFile("data.bin");
try (RandomAccessFile raf = new RandomAccessFile(dataFile, "rw")) {
raf.setLength(1 << 20); // 1 MB
raf.setLength(1 << 20); // 1 MiB
}
smoosher.add(dataFile);
}

View File

@ -393,7 +393,7 @@ The Druid servers [emit various metrics](../operations/metrics.md) and alerts vi
|`druid.emitter.http.flushTimeOut`|The timeout after which an event should be sent to the endpoint, even if internal buffers are not filled, in milliseconds.|not specified = no timeout|
|`druid.emitter.http.batchingStrategy`|The strategy of how the batch is formatted. "ARRAY" means `[event1,event2]`, "NEWLINES" means `event1\nevent2`, ONLY_EVENTS means `event1event2`.|ARRAY|
|`druid.emitter.http.maxBatchSize`|The maximum batch size, in bytes.|the minimum of (10% of JVM heap size divided by 2) or (5242880 (i. e. 5 MiB))|
|`druid.emitter.http.batchQueueSizeLimit`|The maximum number of batches in emitter queue, if there are problems with emitting.|the maximum of (2) or (10% of the JVM heap size divided by 5MB)|
|`druid.emitter.http.batchQueueSizeLimit`|The maximum number of batches in emitter queue, if there are problems with emitting.|the maximum of (2) or (10% of the JVM heap size divided by 5MiB)|
|`druid.emitter.http.minHttpTimeoutMillis`|If the speed of filling batches imposes timeout smaller than that, not even trying to send batch to endpoint, because it will likely fail, not being able to send the data that fast. Configure this depending based on emitter/successfulSending/minTimeMs metric. Reasonable values are 10ms..100ms.|0|
|`druid.emitter.http.recipientBaseUrl`|The base URL to emit messages to. Druid will POST JSON to be consumed at the HTTP endpoint specified by this property.|none, required config|
@ -1433,7 +1433,7 @@ Druid uses Jetty to serve HTTP requests.
|Property|Description|Default|
|--------|-----------|-------|
|`druid.processing.buffer.sizeBytes`|This specifies a buffer size (less than 2GiB) for the storage of intermediate results. The computation engine in the Indexer processes will use a scratch buffer of this size to do all of their intermediate computations off-heap. Larger values allow for more aggregations in a single pass over the data while smaller values can require more passes depending on the query that is being executed. [Human-readable format](human-readable-byte.md) is supported.|auto (max 1GB)|
|`druid.processing.buffer.sizeBytes`|This specifies a buffer size (less than 2GiB) for the storage of intermediate results. The computation engine in the Indexer processes will use a scratch buffer of this size to do all of their intermediate computations off-heap. Larger values allow for more aggregations in a single pass over the data while smaller values can require more passes depending on the query that is being executed. [Human-readable format](human-readable-byte.md) is supported.|auto (max 1GiB)|
|`druid.processing.buffer.poolCacheMaxCount`|processing buffer pool caches the buffers for later use, this is the maximum count cache will grow to. note that pool can create more buffers than it can cache if necessary.|Integer.MAX_VALUE|
|`druid.processing.formatString`|Indexer processes use this format string to name their processing threads.|processing-%s|
|`druid.processing.numMergeBuffers`|The number of direct memory buffers available for merging query results. The buffers are sized by `druid.processing.buffer.sizeBytes`. This property is effectively a concurrency limit for queries that require merging buffers. If you are using any queries that require merge buffers (currently, just groupBy v2) then you should have at least two of these.|`max(2, druid.processing.numThreads / 4)`|
@ -1540,7 +1540,7 @@ Druid uses Jetty to serve HTTP requests.
|Property|Description|Default|
|--------|-----------|-------|
|`druid.processing.buffer.sizeBytes`|This specifies a buffer size (less than 2GiB), for the storage of intermediate results. The computation engine in both the Historical and Realtime processes will use a scratch buffer of this size to do all of their intermediate computations off-heap. Larger values allow for more aggregations in a single pass over the data while smaller values can require more passes depending on the query that is being executed. [Human-readable format](human-readable-byte.md) is supported.|auto (max 1GB)|
|`druid.processing.buffer.sizeBytes`|This specifies a buffer size (less than 2GiB), for the storage of intermediate results. The computation engine in both the Historical and Realtime processes will use a scratch buffer of this size to do all of their intermediate computations off-heap. Larger values allow for more aggregations in a single pass over the data while smaller values can require more passes depending on the query that is being executed. [Human-readable format](human-readable-byte.md) is supported.|auto (max 1GiB)|
|`druid.processing.buffer.poolCacheMaxCount`|processing buffer pool caches the buffers for later use, this is the maximum count cache will grow to. note that pool can create more buffers than it can cache if necessary.|Integer.MAX_VALUE|
|`druid.processing.formatString`|Realtime and Historical processes use this format string to name their processing threads.|processing-%s|
|`druid.processing.numMergeBuffers`|The number of direct memory buffers available for merging query results. The buffers are sized by `druid.processing.buffer.sizeBytes`. This property is effectively a concurrency limit for queries that require merging buffers. If you are using any queries that require merge buffers (currently, just groupBy v2) then you should have at least two of these.|`max(2, druid.processing.numThreads / 4)`|
@ -1859,7 +1859,7 @@ Uses memcached as cache backend. This allows all processes to share the same cac
|`druid.cache.expiration`|Memcached [expiration time](https://code.google.com/p/memcached/wiki/NewCommands#Standard_Protocol).|2592000 (30 days)|
|`druid.cache.timeout`|Maximum time in milliseconds to wait for a response from Memcached.|500|
|`druid.cache.hosts`|Comma separated list of Memcached hosts `<host:port>`.|none|
|`druid.cache.maxObjectSize`|Maximum object size in bytes for a Memcached object.|52428800 (50 MB)|
|`druid.cache.maxObjectSize`|Maximum object size in bytes for a Memcached object.|52428800 (50 MiB)|
|`druid.cache.memcachedPrefix`|Key prefix for all keys in Memcached.|druid|
|`druid.cache.numConnections`|Number of memcached connections to use.|1|
|`druid.cache.protocol`|Memcached communication protocol. Can be binary or text.|binary|

View File

@ -72,7 +72,7 @@ If required, multiple rules can be joined by newline character and specified as
### Increasing HTTP Header size for large SPNEGO negotiate header
In Active Directory environment, SPNEGO token in the Authorization header includes PAC (Privilege Access Certificate) information,
which includes all security groups for the user. In some cases when the user belongs to many security groups the header to grow beyond what druid can handle by default.
In such cases, max request header size that druid can handle can be increased by setting `druid.server.http.maxRequestHeaderSize` (default 8Kb) and `druid.router.http.maxRequestBufferSize` (default 8Kb).
In such cases, max request header size that druid can handle can be increased by setting `druid.server.http.maxRequestHeaderSize` (default 8KiB) and `druid.router.http.maxRequestBufferSize` (default 8KiB).
## Configuring Kerberos Escalated Client

View File

@ -139,7 +139,7 @@ Off heap cache is backed by [MapDB](http://www.mapdb.org/) implementation. MapDB
|Field|Type|Description|Required|default|
|-----|----|-----------|--------|-------|
|maxStoreSize|double|maximal size of store in GB, if store is larger entries will start expiring|no |0|
|maxStoreSize|double|maximal size of store in GiB, if store is larger entries will start expiring|no |0|
|maxEntriesSize|long| Specifies the maximum number of entries the cache may contain.|no |0 (infinite capacity)|
|expireAfterAccess|long| Specifies the eviction time after last read in milliseconds.|no |0 (No read-time-based eviction when set to null)|
|expireAfterWrite|long| Specifies the eviction time after last write in milliseconds.|no |0 (No write-time-based eviction when set to null)|

View File

@ -40,11 +40,11 @@ The biggest contributions to heap usage on Historicals are:
- Partial unmerged query results from segments
- The stored maps for [lookups](../querying/lookups.md).
A general rule-of-thumb for sizing the Historical heap is `(0.5GB * number of CPU cores)`, with an upper limit of ~24GB.
A general rule-of-thumb for sizing the Historical heap is `(0.5GiB * number of CPU cores)`, with an upper limit of ~24GiB.
This rule-of-thumb scales using the number of CPU cores as a convenient proxy for hardware size and level of concurrency (note: this formula is not a hard rule for sizing Historical heaps).
Having a heap that is too large can result in excessively long GC collection pauses, the ~24GB upper limit is imposed to avoid this.
Having a heap that is too large can result in excessively long GC collection pauses, the ~24GiB upper limit is imposed to avoid this.
If caching is enabled on Historicals, the cache is stored on heap, sized by `druid.cache.sizeInBytes`.
@ -56,7 +56,7 @@ If you are using lookups, calculate the total size of the lookup maps being load
Druid performs an atomic swap when updating lookup maps (both the old map and the new map will exist in heap during the swap), so the maximum potential heap usage from lookup maps will be (2 * total size of all loaded lookups).
Be sure to add `(2 * total size of all loaded lookups)` to your heap size in addition to the `(0.5GB * number of CPU cores)` guideline.
Be sure to add `(2 * total size of all loaded lookups)` to your heap size in addition to the `(0.5GiB * number of CPU cores)` guideline.
#### Processing Threads and Buffers
@ -65,7 +65,7 @@ Please see the [General Guidelines for Processing Threads and Buffers](#processi
On Historicals:
- `druid.processing.numThreads` should generally be set to `(number of cores - 1)`: a smaller value can result in CPU underutilization, while going over the number of cores can result in unnecessary CPU contention.
- `druid.processing.buffer.sizeBytes` can be set to 500MB.
- `druid.processing.buffer.sizeBytes` can be set to 500MiB.
- `druid.processing.numMergeBuffers`, a 1:4 ratio of merge buffers to processing threads is a reasonable choice for general use.
#### Direct Memory Sizing
@ -110,7 +110,7 @@ We recommend using SSDs for storage on the Historicals, as they handle segment d
To estimate total memory usage of the Historical under these guidelines:
- Heap: `(0.5GB * number of CPU cores) + (2 * total size of lookup maps) + druid.cache.sizeInBytes`
- Heap: `(0.5GiB * number of CPU cores) + (2 * total size of lookup maps) + druid.cache.sizeInBytes`
- Direct Memory: `(druid.processing.numThreads + druid.processing.numMergeBuffers + 1) * druid.processing.buffer.sizeBytes`
The Historical will use any available free system memory (i.e., memory not used by the Historical JVM and heap/direct memory buffers or other processes on the system) for memory-mapping of segments on disk. For better query performance, you will want to ensure a good (`free system memory` / total size of all `druid.segmentCache.locations`) ratio so that a greater proportion of segments can be kept in memory.
@ -130,7 +130,7 @@ The biggest contributions to heap usage on Brokers are:
The Broker heap requirements scale based on the number of segments in the cluster, and the total data size of the segments.
The heap size will vary based on data size and usage patterns, but 4G to 8G is a good starting point for a small or medium cluster (~15 servers or less). For a rough estimate of memory requirements on the high end, very large clusters with a node count on the order of ~100 nodes may need Broker heaps of 30GB-60GB.
The heap size will vary based on data size and usage patterns, but 4GiB to 8GiB is a good starting point for a small or medium cluster (~15 servers or less). For a rough estimate of memory requirements on the high end, very large clusters with a node count on the order of ~100 nodes may need Broker heaps of 30GiB-60GiB.
If caching is enabled on the Broker, the cache is stored on heap, sized by `druid.cache.sizeInBytes`.
@ -138,7 +138,7 @@ If caching is enabled on the Broker, the cache is stored on heap, sized by `drui
On the Broker, the amount of direct memory needed depends on how many merge buffers (used for merging GroupBys) are configured. The Broker does not generally need processing threads or processing buffers, as query results are merged on-heap in the HTTP connection threads instead.
- `druid.processing.buffer.sizeBytes` can be set to 500MB.
- `druid.processing.buffer.sizeBytes` can be set to 500MiB.
- `druid.processing.numThreads`: set this to 1 (the minimum allowed)
- `druid.processing.numMergeBuffers`: set this to the same value as on Historicals or a bit higher
@ -158,9 +158,9 @@ When retrieving query results from Historical processes or Tasks, the Broker can
This buffer size is controlled by the `druid.broker.http.maxQueuedBytes` setting.
The limit is divided across the number of Historicals/Tasks that a query would hit: suppose I have `druid.broker.http.maxQueuedBytes` set to 5MB, and the Broker receives a query that needs to be fanned out to 2 Historicals. Each per-historical channel would get a 2.5MB buffer in this case.
The limit is divided across the number of Historicals/Tasks that a query would hit: suppose I have `druid.broker.http.maxQueuedBytes` set to 5MiB, and the Broker receives a query that needs to be fanned out to 2 Historicals. Each per-historical channel would get a 2.5MiB buffer in this case.
You can generally set this to a value of approximately `2MB * number of Historicals`. As your cluster scales up with more Historicals and Tasks, consider increasing this buffer size and increasing the Broker heap accordingly.
You can generally set this to a value of approximately `2MiB * number of Historicals`. As your cluster scales up with more Historicals and Tasks, consider increasing this buffer size and increasing the Broker heap accordingly.
- If the buffer is too small, this can lead to inefficient queries due to the buffer filling up rapidly and stalling the channel
- If the buffer is too large, this puts more memory pressure on the Broker due to more queued result data in the HTTP channels.
@ -184,7 +184,7 @@ The MiddleManager is a lightweight task controller/manager that launches Task pr
#### MiddleManager heap sizing
The MiddleManager itself does not require much resources, you can set the heap to ~128MB generally.
The MiddleManager itself does not require much resources, you can set the heap to ~128MiB generally.
#### SSD storage
@ -204,7 +204,7 @@ The following section below describes configuration for Tasks launched by the Mi
##### Task heap sizing
A 1GB heap is usually enough for Tasks.
A 1GiB heap is usually enough for Tasks.
###### Lookups
@ -220,7 +220,7 @@ For Tasks, 1 or 2 processing threads are often enough, as the Tasks tend to hold
- `druid.indexer.fork.property.druid.processing.numThreads`: set this to 1 or 2
- `druid.indexer.fork.property.druid.processing.numMergeBuffers`: set this to 2
- `druid.indexer.fork.property.druid.processing.buffer.sizeBytes`: can be set to 100MB
- `druid.indexer.fork.property.druid.processing.buffer.sizeBytes`: can be set to 100MiB
##### Direct memory sizing
@ -248,7 +248,7 @@ Tuning the cluster so that each Task can accept 50 queries and 10 non-queries is
To estimate total memory usage of a Task under these guidelines:
- Heap: `1GB + (2 * total size of lookup maps)`
- Heap: `1GiB + (2 * total size of lookup maps)`
- Direct Memory: `(druid.processing.numThreads + druid.processing.numMergeBuffers + 1) * druid.processing.buffer.sizeBytes`
The total memory usage of the MiddleManager + Tasks:
@ -309,7 +309,7 @@ The Overlord tends to require less resources than the Coordinator or Broker. You
The Router has light resource requirements, as it proxies requests to Brokers without performing much computational work itself.
You can assign it 256MB heap as a starting point, growing it if needed.
You can assign it 256MiB heap as a starting point, growing it if needed.
<a name="processing-threads-buffers"></a>
@ -323,7 +323,7 @@ The `druid.processing.numThreads` configuration controls the size of the process
`druid.processing.buffer.sizeBytes` is a closely related property that controls the size of the off-heap buffers allocated to the processing threads.
One buffer is allocated for each processing thread. A size between 500MB and 1GB is a reasonable choice for general use.
One buffer is allocated for each processing thread. A size between 500MiB and 1GiB is a reasonable choice for general use.
The TopN and GroupBy queries use these buffers to store intermediate computed results. As the buffer size increases, more data can be processed in a single pass.
@ -371,9 +371,9 @@ As a starting point, allowing for 50 concurrent queries (requests that read segm
### Segment decompression
When opening a segment for reading during segment merging or query processing, Druid allocates a 64KB off-heap decompression buffer for each column being read.
When opening a segment for reading during segment merging or query processing, Druid allocates a 64KiB off-heap decompression buffer for each column being read.
Thus, there is additional direct memory overhead of (64KB * number of columns read per segment * number of segments read) when reading segments.
Thus, there is additional direct memory overhead of (64KiB * number of columns read per segment * number of segments read) when reading segments.
### Segment merging

View File

@ -34,7 +34,7 @@ Druid includes a set of reference configurations and launch scripts for single-m
The `micro-quickstart` is sized for small machines like laptops and is intended for quick evaluation use-cases.
The `nano-quickstart` is an even smaller configuration, targeting a machine with 1 CPU and 4GB memory. It is meant for limited evaluations in resource constrained environments, such as small Docker containers.
The `nano-quickstart` is an even smaller configuration, targeting a machine with 1 CPU and 4GiB memory. It is meant for limited evaluations in resource constrained environments, such as small Docker containers.
The other configurations are intended for general use single-machine deployments. They are sized for hardware roughly based on Amazon's i3 series of EC2 instances.
@ -46,32 +46,32 @@ While example configurations are provided for very large single machines, at hig
## Single server reference configurations
### Nano-Quickstart: 1 CPU, 4GB RAM
### Nano-Quickstart: 1 CPU, 4GiB RAM
- Launch command: `bin/start-nano-quickstart`
- Configuration directory: `conf/druid/single-server/nano-quickstart`
### Micro-Quickstart: 4 CPU, 16GB RAM
### Micro-Quickstart: 4 CPU, 16GiB RAM
- Launch command: `bin/start-micro-quickstart`
- Configuration directory: `conf/druid/single-server/micro-quickstart`
### Small: 8 CPU, 64GB RAM (~i3.2xlarge)
### Small: 8 CPU, 64GiB RAM (~i3.2xlarge)
- Launch command: `bin/start-small`
- Configuration directory: `conf/druid/single-server/small`
### Medium: 16 CPU, 128GB RAM (~i3.4xlarge)
### Medium: 16 CPU, 128GiB RAM (~i3.4xlarge)
- Launch command: `bin/start-medium`
- Configuration directory: `conf/druid/single-server/medium`
### Large: 32 CPU, 256GB RAM (~i3.8xlarge)
### Large: 32 CPU, 256GiB RAM (~i3.8xlarge)
- Launch command: `bin/start-large`
- Configuration directory: `conf/druid/single-server/large`
### X-Large: 64 CPU, 512GB RAM (~i3.16xlarge)
### X-Large: 64 CPU, 512GiB RAM (~i3.16xlarge)
- Launch command: `bin/start-xlarge`
- Configuration directory: `conf/druid/single-server/xlarge`

View File

@ -144,5 +144,5 @@ Possible Druid error codes for the `error` field include:
|`Query timeout`|504|The query timed out.|
|`Query interrupted`|500|The query was interrupted, possibly due to JVM shutdown.|
|`Query cancelled`|500|The query was cancelled through the query cancellation API.|
|`Truncated response context`|500|An intermediate response context for the query exceeded the built-in limit of 7KB.<br/><br/>The response context is an internal data structure that Druid servers use to share out-of-band information when sending query results to each other. It is serialized in an HTTP header with a maximum length of 7KB. This error occurs when an intermediate response context sent from a data server (like a Historical) to the Broker exceeds this limit.<br/><br/>The response context is used for a variety of purposes, but the one most likely to generate a large context is sharing details about segments that move during a query. That means this error can potentially indicate that a very large number of segments moved in between the time a Broker issued a query and the time it was processed on Historicals. This should rarely, if ever, occur during normal operation.|
|`Truncated response context`|500|An intermediate response context for the query exceeded the built-in limit of 7KiB.<br/><br/>The response context is an internal data structure that Druid servers use to share out-of-band information when sending query results to each other. It is serialized in an HTTP header with a maximum length of 7KiB. This error occurs when an intermediate response context sent from a data server (like a Historical) to the Broker exceeds this limit.<br/><br/>The response context is used for a variety of purposes, but the one most likely to generate a large context is sharing details about segments that move during a query. That means this error can potentially indicate that a very large number of segments moved in between the time a Broker issued a query and the time it was processed on Historicals. This should rarely, if ever, occur during normal operation.|
|`Unknown exception`|500|Some other exception occurred. Check errorMessage and errorClass for details, although keep in mind that the contents of those fields are free-form and may change from release to release.|

View File

@ -51,7 +51,7 @@ In this example, we will be deploying the equivalent of one AWS [m5.2xlarge](htt
This hardware offers:
- 8 vCPUs
- 31 GB RAM
- 32 GiB RAM
Example Master server configurations that have been sized for this hardware can be found under `conf/druid/cluster/master`.
@ -65,7 +65,7 @@ In this example, we will be deploying the equivalent of two AWS [i3.4xlarge](htt
This hardware offers:
- 16 vCPUs
- 122 GB RAM
- 122 GiB RAM
- 2 * 1.9TB SSD storage
Example Data server configurations that have been sized for this hardware can be found under `conf/druid/cluster/data`.
@ -80,7 +80,7 @@ In this example, we will be deploying the equivalent of one AWS [m5.2xlarge](htt
This hardware offers:
- 8 vCPUs
- 31 GB RAM
- 32 GiB RAM
You can consider co-locating any open source UIs or query libraries on the same server that the Broker is running on.
@ -322,12 +322,12 @@ You can copy your existing `coordinator-overlord` configs from the single-server
#### Data
Suppose we are migrating from a single-server deployment that had 32 CPU and 256GB RAM. In the old deployment, the following configurations for Historicals and MiddleManagers were applied:
Suppose we are migrating from a single-server deployment that had 32 CPU and 256GiB RAM. In the old deployment, the following configurations for Historicals and MiddleManagers were applied:
Historical (Single-server)
```
druid.processing.buffer.sizeBytes=500000000
druid.processing.buffer.sizeBytes=500MiB
druid.processing.numMergeBuffers=8
druid.processing.numThreads=31
```
@ -337,11 +337,11 @@ MiddleManager (Single-server)
```
druid.worker.capacity=8
druid.indexer.fork.property.druid.processing.numMergeBuffers=2
druid.indexer.fork.property.druid.processing.buffer.sizeBytes=100000000
druid.indexer.fork.property.druid.processing.buffer.sizeBytes=100MiB
druid.indexer.fork.property.druid.processing.numThreads=1
```
In the clustered deployment, we can choose a split factor (2 in this example), and deploy 2 Data servers with 16CPU and 128GB RAM each. The areas to scale are the following:
In the clustered deployment, we can choose a split factor (2 in this example), and deploy 2 Data servers with 16CPU and 128GiB RAM each. The areas to scale are the following:
Historical
@ -361,7 +361,7 @@ The resulting configs after the split:
New Historical (on 2 Data servers)
```
druid.processing.buffer.sizeBytes=500000000
druid.processing.buffer.sizeBytes=500MiB
druid.processing.numMergeBuffers=8
druid.processing.numThreads=31
```
@ -371,7 +371,7 @@ New MiddleManager (on 2 Data servers)
```
druid.worker.capacity=4
druid.indexer.fork.property.druid.processing.numMergeBuffers=2
druid.indexer.fork.property.druid.processing.buffer.sizeBytes=100000000
druid.indexer.fork.property.druid.processing.buffer.sizeBytes=100MiB
druid.indexer.fork.property.druid.processing.numThreads=1
```

View File

@ -84,4 +84,4 @@ It takes a few seconds for all the Druid processes to fully start up. If you ope
From here you can follow along with the [Quickstart](./index.md#step-4-load-data), or elaborate on your `docker-compose.yml` to add any additional external service dependencies as necessary.
## Docker Memory Requirements
If you experience any processes crashing with a 137 error code you likely don't have enough memory allocated to Docker. 6 GB may be a good place to start.
If you experience any processes crashing with a 137 error code you likely don't have enough memory allocated to Docker. 6 GiB may be a good place to start.

View File

@ -32,14 +32,14 @@ Before starting, you may want to read the [general Druid overview](../design/ind
## Requirements
You can follow these steps on a relatively small machine, such as a laptop with around 4 CPU and 16 GB of RAM.
You can follow these steps on a relatively small machine, such as a laptop with around 4 CPU and 16 GiB of RAM.
Druid comes with several startup configuration profiles for a range of machine sizes.
The `micro-quickstart`configuration profile shown here is suitable for evaluating Druid. If you want to
try out Druid's performance or scaling capabilities, you'll need a larger machine and configuration profile.
The configuration profiles included with Druid range from the even smaller _Nano-Quickstart_ configuration (1 CPU, 4GB RAM)
to the _X-Large_ configuration (64 CPU, 512GB RAM). For more information, see
The configuration profiles included with Druid range from the even smaller _Nano-Quickstart_ configuration (1 CPU, 4GiB RAM)
to the _X-Large_ configuration (64 CPU, 512GiB RAM). For more information, see
[Single server deployment](../operations/single-server.md). Alternatively, see [Clustered deployment](./cluster.md) for
information on deploying Druid services across clustered machines.

View File

@ -47,7 +47,7 @@ public class OffHeapLoadingCache<K, V> implements LoadingCache<K, V>
/**
* Sets store size limit. Disk or memory space consumed be storage should not grow over this space.
* maximal size of store in GB, if store is larger entries will start expiring
* maximal size of store in GiB, if store is larger entries will start expiring
*/
@JsonProperty
private final double maxStoreSize;

View File

@ -78,7 +78,7 @@ public class JobHelper
private static final Logger log = new Logger(JobHelper.class);
private static final int NUM_RETRIES = 8;
private static final int SECONDS_BETWEEN_RETRIES = 2;
private static final int DEFAULT_FS_BUFFER_SIZE = 1 << 18; // 256KB
private static final int DEFAULT_FS_BUFFER_SIZE = 1 << 18; // 256KiB
private static final Pattern SNAPSHOT_JAR = Pattern.compile(".*-SNAPSHOT(-selfcontained)?\\.jar$");
public static Path distributedClassPath(String path)

View File

@ -36,7 +36,7 @@ Integration Testing Using Docker
Before starting, if you don't already have docker on your machine, install it as described on
[Docker installation instructions](https://docs.docker.com/install/). Ensure that you
have at least 4GB of memory allocated to the docker engine. (You can verify it
have at least 4GiB of memory allocated to the docker engine. (You can verify it
under Preferences > Resources > Advanced.)
Also set the `DOCKER_IP`
@ -186,7 +186,7 @@ machine.
> NOTE: Quickstart does not run with ssl, so to trick the integration tests we specify the `*_tls_url` in the config to be the same as the http url.
Make sure you have at least 6GB of memory available before you run the tests.
Make sure you have at least 6GiB of memory available before you run the tests.
The tests rely on files in the test/resources folder to exist under the path /resources,
so create a symlink to make them available:
@ -216,7 +216,7 @@ Then run the tests using a command similar to:
Running Tests Using A Configuration File for Any Cluster
-------------------
Make sure that you have at least 6GB of memory available before you run the tests.
Make sure that you have at least 6GiB of memory available before you run the tests.
To run tests on any druid cluster that is already running, create a configuration file:

View File

@ -42,9 +42,9 @@ public class FileWriteOutBytesTest
}
@Test
public void write4KBIntsShouldNotFlush() throws IOException
public void write4KiBIntsShouldNotFlush() throws IOException
{
// Write 4KB of ints and expect the write operation of the file channel will be triggered only once.
// Write 4KiB of ints and expect the write operation of the file channel will be triggered only once.
EasyMock.expect(mockFileChannel.write(EasyMock.anyObject(ByteBuffer.class)))
.andAnswer(() -> {
ByteBuffer buffer = (ByteBuffer) EasyMock.getCurrentArguments()[0];
@ -58,8 +58,8 @@ public class FileWriteOutBytesTest
for (int i = 0; i < numOfInt; i++) {
fileWriteOutBytes.writeInt(i);
}
// no need to flush up to 4KB
// the first byte after 4KB will cause a flush
// no need to flush up to 4KiB
// the first byte after 4KiB will cause a flush
fileWriteOutBytes.write(1);
EasyMock.verify(mockFileChannel);
}

View File

@ -63,7 +63,7 @@ public class MemcachedCacheBenchmark extends SimpleBenchmark
protected void setUp() throws Exception
{
SerializingTranscoder transcoder = new SerializingTranscoder(
50 * 1024 * 1024 // 50 MB
50 * 1024 * 1024 // 50 MiB
);
// disable compression
transcoder.setCompressionThreshold(Integer.MAX_VALUE);

View File

@ -704,7 +704,7 @@ initialAdminRole
adminGroupMapping
groupMappingName
- ../docs/development/extensions-core/druid-kerberos.md
8Kb
8KiB
HttpComponents
MyKerberosAuthenticator
RFC-4559
@ -1213,18 +1213,17 @@ taskId
taskid
un
- ../docs/operations/basic-cluster-tuning.md
100MB
128MB
100MiB
128MiB
15ms
2.5MB
24GB
256MB
30GB-60GB
4G
2.5MiB
24GiB
256MiB
30GiB-60GiB
4GiB
5MB
64KB
7KB
8G
64KiB
8GiB
G1GC
GroupBys
QoS-type
@ -1319,12 +1318,12 @@ loadForever
- ../docs/operations/segment-optimization.md
700MB
- ../docs/operations/single-server.md
128GB
16GB
256GB
4GB
512GB
64GB
128GiB
16GiB
256GiB
4GiB
512GiB
64GiB
Nano-Quickstart
i3
i3.16xlarge
@ -1454,6 +1453,7 @@ useResultLevelCache
vectorSize
enableJoinLeftTableScanDirect
- ../docs/querying/querying.md
7KiB
DatasourceMetadata
TimeBoundary
errorClass
@ -1609,6 +1609,8 @@ outputType
WebUpd8
m5.2xlarge
metadata.storage.
256GiB
128GiB
- ../docs/tutorials/tutorial-batch-hadoop.md
PATH_TO_DRUID
namenode
@ -1655,7 +1657,7 @@ GiB
2GB
30_000
524288000L
5MB
5MiB
8u60
Autoscaler
AvaticaConnectionBalancer
@ -1860,6 +1862,8 @@ isUnpatrolled
metroCode
regionIsoCode
regionName
4GiB
512GiB
- ../docs/development/extensions-core/druid-ranger-security.md
json
metastore