mirror of https://github.com/apache/druid.git
add support for smartSegmentLoading (#14610)
This commit is contained in:
parent
1f4ee5e21b
commit
f7348d7389
|
@ -0,0 +1,40 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. The ASF licenses this file
|
||||
* to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import type { CoordinatorDynamicConfig } from './coordinator-dynamic-config';
|
||||
|
||||
export const DEFAULT_COORDINATOR_DYNAMIC_CONFIG: CoordinatorDynamicConfig = {
|
||||
millisToWaitBeforeDeleting: 900000,
|
||||
mergeBytesLimit: 524288000,
|
||||
mergeSegmentsLimit: 100,
|
||||
maxSegmentsToMove: 100,
|
||||
replicantLifetime: 15,
|
||||
replicationThrottleLimit: 500,
|
||||
balancerComputeThreads: 1,
|
||||
killDataSourceWhitelist: [],
|
||||
killPendingSegmentsSkipList: [],
|
||||
maxSegmentsInNodeLoadingQueue: 500,
|
||||
decommissioningNodes: [],
|
||||
decommissioningMaxPercentOfMaxSegmentsToMove: 70,
|
||||
pauseCoordination: false,
|
||||
replicateAfterLoadTimeout: false,
|
||||
maxNonPrimaryReplicantsToLoad: 2147483647,
|
||||
useRoundRobinSegmentAssignment: true,
|
||||
smartSegmentLoading: true,
|
||||
debugDimensions: null,
|
||||
};
|
|
@ -20,6 +20,8 @@ import { Code } from '@blueprintjs/core';
|
|||
import React from 'react';
|
||||
|
||||
import type { Field } from '../../components';
|
||||
import { ExternalLink } from '../../components';
|
||||
import { getLink } from '../../links';
|
||||
|
||||
export interface CoordinatorDynamicConfig {
|
||||
maxSegmentsToMove?: number;
|
||||
|
@ -37,23 +39,168 @@ export interface CoordinatorDynamicConfig {
|
|||
decommissioningMaxPercentOfMaxSegmentsToMove?: number;
|
||||
pauseCoordination?: boolean;
|
||||
maxNonPrimaryReplicantsToLoad?: number;
|
||||
replicateAfterLoadTimeout?: boolean;
|
||||
useRoundRobinSegmentAssignment?: boolean;
|
||||
smartSegmentLoading?: boolean;
|
||||
|
||||
// Undocumented
|
||||
debugDimensions?: any;
|
||||
}
|
||||
|
||||
export const COORDINATOR_DYNAMIC_CONFIG_FIELDS: Field<CoordinatorDynamicConfig>[] = [
|
||||
{
|
||||
name: 'pauseCoordination',
|
||||
type: 'boolean',
|
||||
defaultValue: false,
|
||||
info: (
|
||||
<>
|
||||
Boolean flag for whether or not the coordinator should execute its various duties of
|
||||
coordinating the cluster. Setting this to true essentially pauses all coordination work
|
||||
while allowing the API to remain up. Duties that are paused include all classes that
|
||||
implement the <Code>CoordinatorDuty</Code> interface. Such duties include: Segment
|
||||
balancing, Segment compaction, Submitting kill tasks for unused segments (if enabled),
|
||||
Logging of used segments in the cluster, Marking of newly unused or overshadowed segments,
|
||||
Matching and execution of load/drop rules for used segments, Unloading segments that are no
|
||||
longer marked as used from Historical servers. An example of when an admin may want to pause
|
||||
coordination would be if they are doing deep storage maintenance on HDFS Name Nodes with
|
||||
downtime and don't want the coordinator to be directing Historical Nodes to hit the
|
||||
Name Node with API requests until maintenance is done and the deep store is declared healthy
|
||||
for use again.
|
||||
</>
|
||||
),
|
||||
},
|
||||
|
||||
// Start "smart" segment loading section
|
||||
|
||||
{
|
||||
name: 'smartSegmentLoading',
|
||||
type: 'boolean',
|
||||
defaultValue: true,
|
||||
info: (
|
||||
<>
|
||||
Enables{' '}
|
||||
<ExternalLink href={`${getLink('DOCS')}/configuration#smart-segment-loading`}>
|
||||
"smart" segment loading mode
|
||||
</ExternalLink>{' '}
|
||||
which dynamically computes the optimal values of several properties that maximize
|
||||
Coordinator performance.
|
||||
</>
|
||||
),
|
||||
},
|
||||
{
|
||||
name: 'maxSegmentsToMove',
|
||||
type: 'number',
|
||||
defaultValue: 100,
|
||||
defined: cdc => (cdc.smartSegmentLoading === false ? true : undefined),
|
||||
info: <>The maximum number of segments that can be moved at any given time.</>,
|
||||
},
|
||||
{
|
||||
name: 'balancerComputeThreads',
|
||||
name: 'maxSegmentsInNodeLoadingQueue',
|
||||
type: 'number',
|
||||
defaultValue: 1,
|
||||
defaultValue: 500,
|
||||
defined: cdc => (cdc.smartSegmentLoading === false ? true : undefined),
|
||||
info: (
|
||||
<>
|
||||
Thread pool size for computing moving cost of segments in segment balancing. Consider
|
||||
increasing this if you have a lot of segments and moving segments starts to get stuck.
|
||||
The maximum number of segments that could be queued for loading to any given server. This
|
||||
parameter could be used to speed up segments loading process, especially if there are
|
||||
"slow" nodes in the cluster (with low loading speed) or if too much segments
|
||||
scheduled to be replicated to some particular node (faster loading could be preferred to
|
||||
better segments distribution). Desired value depends on segments loading speed, acceptable
|
||||
replication time and number of nodes. Value 1000 could be a start point for a rather big
|
||||
cluster. Default value is 500.
|
||||
</>
|
||||
),
|
||||
},
|
||||
{
|
||||
name: 'useRoundRobinSegmentAssignment',
|
||||
type: 'boolean',
|
||||
defaultValue: true,
|
||||
defined: cdc => (cdc.smartSegmentLoading === false ? true : undefined),
|
||||
info: (
|
||||
<>
|
||||
Boolean flag for whether segments should be assigned to historicals in a round robin
|
||||
fashion. When disabled, segment assignment is done using the chosen balancer strategy. When
|
||||
enabled, this can speed up segment assignments leaving balancing to move the segments to
|
||||
their optimal locations (based on the balancer strategy) lazily.
|
||||
</>
|
||||
),
|
||||
},
|
||||
{
|
||||
name: 'replicationThrottleLimit',
|
||||
type: 'number',
|
||||
defaultValue: 500,
|
||||
defined: cdc => (cdc.smartSegmentLoading === false ? true : undefined),
|
||||
info: <>The maximum number of segments that can be replicated at one time.</>,
|
||||
},
|
||||
{
|
||||
name: 'replicantLifetime',
|
||||
type: 'number',
|
||||
defaultValue: 15,
|
||||
defined: cdc => (cdc.smartSegmentLoading === false ? true : undefined),
|
||||
info: (
|
||||
<>
|
||||
The maximum number of Coordinator runs for which a segment can wait in the load queue of a
|
||||
Historical before Druid raises an alert.
|
||||
</>
|
||||
),
|
||||
},
|
||||
{
|
||||
name: 'maxNonPrimaryReplicantsToLoad',
|
||||
type: 'number',
|
||||
defaultValue: 2147483647,
|
||||
defined: cdc => (cdc.smartSegmentLoading === false ? true : undefined),
|
||||
info: (
|
||||
<>
|
||||
The maximum number of non-primary replicants to load in a single Coordinator cycle. Once
|
||||
this limit is hit, only primary replicants will be loaded for the remainder of the cycle.
|
||||
Tuning this value lower can help reduce the delay in loading primary segments when the
|
||||
cluster has a very large number of non-primary replicants to load (such as when a single
|
||||
historical drops out of the cluster leaving many under-replicated segments).
|
||||
</>
|
||||
),
|
||||
},
|
||||
{
|
||||
name: 'decommissioningMaxPercentOfMaxSegmentsToMove',
|
||||
type: 'number',
|
||||
defaultValue: 70,
|
||||
defined: cdc => (cdc.smartSegmentLoading === false ? true : undefined),
|
||||
info: (
|
||||
<>
|
||||
<p>
|
||||
Upper limit of segments the Coordinator can move from decommissioning servers to active
|
||||
non-decommissioning servers during a single run. This value is relative to the total
|
||||
maximum number of segments that can be moved at any given time based upon the value of
|
||||
<Code>maxSegmentsToMove</Code>.
|
||||
</p>
|
||||
<p>
|
||||
If <Code>decommissioningMaxPercentOfMaxSegmentsToMove</Code> is 0, the Coordinator does
|
||||
not move segments to decommissioning servers, effectively putting them in a type of
|
||||
"maintenance" mode. In this case, decommissioning servers do not participate in
|
||||
balancing or assignment by load rules. The Coordinator still considers segments on
|
||||
decommissioning servers as candidates to replicate on active servers.
|
||||
</p>
|
||||
<p>
|
||||
Decommissioning can stall if there are no available active servers to move the segments
|
||||
to. You can use the maximum percent of decommissioning segment movements to prioritize
|
||||
balancing or to decrease commissioning time to prevent active servers from being
|
||||
overloaded. The value must be between 0 and 100.
|
||||
</p>
|
||||
</>
|
||||
),
|
||||
},
|
||||
|
||||
// End "smart" segment loading section
|
||||
|
||||
{
|
||||
name: 'decommissioningNodes',
|
||||
type: 'string-array',
|
||||
emptyValue: [],
|
||||
info: (
|
||||
<>
|
||||
List of historical services to 'decommission'. Coordinator will not assign new
|
||||
segments to 'decommissioning' services, and segments will be moved away from them
|
||||
to be placed on non-decommissioning services at the maximum rate specified by{' '}
|
||||
<Code>decommissioningMaxPercentOfMaxSegmentsToMove</Code>.
|
||||
</>
|
||||
),
|
||||
},
|
||||
|
@ -83,18 +230,13 @@ export const COORDINATOR_DYNAMIC_CONFIG_FIELDS: Field<CoordinatorDynamicConfig>[
|
|||
),
|
||||
},
|
||||
{
|
||||
name: 'maxSegmentsInNodeLoadingQueue',
|
||||
name: 'balancerComputeThreads',
|
||||
type: 'number',
|
||||
defaultValue: 500,
|
||||
defaultValue: 1,
|
||||
info: (
|
||||
<>
|
||||
The maximum number of segments that could be queued for loading to any given server. This
|
||||
parameter could be used to speed up segments loading process, especially if there are
|
||||
"slow" nodes in the cluster (with low loading speed) or if too much segments
|
||||
scheduled to be replicated to some particular node (faster loading could be preferred to
|
||||
better segments distribution). Desired value depends on segments loading speed, acceptable
|
||||
replication time and number of nodes. Value 1000 could be a start point for a rather big
|
||||
cluster. Default value is 500.
|
||||
Thread pool size for computing moving cost of segments during segment balancing. Consider
|
||||
increasing this if you have a lot of segments and moving segments begins to stall.
|
||||
</>
|
||||
),
|
||||
},
|
||||
|
@ -116,82 +258,8 @@ export const COORDINATOR_DYNAMIC_CONFIG_FIELDS: Field<CoordinatorDynamicConfig>[
|
|||
defaultValue: 900000,
|
||||
info: (
|
||||
<>
|
||||
How long does the Coordinator need to be active before it can start removing (marking
|
||||
unused) segments in metadata storage.
|
||||
</>
|
||||
),
|
||||
},
|
||||
{
|
||||
name: 'replicantLifetime',
|
||||
type: 'number',
|
||||
defaultValue: 15,
|
||||
info: (
|
||||
<>
|
||||
The maximum number of Coordinator runs for a segment to be replicated before we start
|
||||
alerting.
|
||||
</>
|
||||
),
|
||||
},
|
||||
{
|
||||
name: 'replicationThrottleLimit',
|
||||
type: 'number',
|
||||
defaultValue: 500,
|
||||
info: <>The maximum number of segments that can be replicated at one time.</>,
|
||||
},
|
||||
{
|
||||
name: 'decommissioningNodes',
|
||||
type: 'string-array',
|
||||
emptyValue: [],
|
||||
info: (
|
||||
<>
|
||||
List of historical services to 'decommission'. Coordinator will not assign new
|
||||
segments to 'decommissioning' services, and segments will be moved away from them
|
||||
to be placed on non-decommissioning services at the maximum rate specified by{' '}
|
||||
<Code>decommissioningMaxPercentOfMaxSegmentsToMove</Code>.
|
||||
</>
|
||||
),
|
||||
},
|
||||
{
|
||||
name: 'decommissioningMaxPercentOfMaxSegmentsToMove',
|
||||
type: 'number',
|
||||
defaultValue: 70,
|
||||
info: (
|
||||
<>
|
||||
The maximum number of segments that may be moved away from 'decommissioning'
|
||||
services to non-decommissioning (that is, active) services during one Coordinator run. This
|
||||
value is relative to the total maximum segment movements allowed during one run which is
|
||||
determined by <Code>maxSegmentsToMove</Code>. If
|
||||
<Code>decommissioningMaxPercentOfMaxSegmentsToMove</Code> is 0, segments will neither be
|
||||
moved from or to 'decommissioning' services, effectively putting them in a sort of
|
||||
"maintenance" mode that will not participate in balancing or assignment by load
|
||||
rules. Decommissioning can also become stalled if there are no available active services to
|
||||
place the segments. By leveraging the maximum percent of decommissioning segment movements,
|
||||
an operator can prevent active services from overload by prioritizing balancing, or decrease
|
||||
decommissioning time instead. The value should be between 0 and 100.
|
||||
</>
|
||||
),
|
||||
},
|
||||
{
|
||||
name: 'useRoundRobinSegmentAssignment',
|
||||
type: 'boolean',
|
||||
defaultValue: true,
|
||||
info: (
|
||||
<>
|
||||
Boolean flag for whether segments should be assigned to historicals in a round-robin
|
||||
fashion. If enabled, this can speed up initial segment loading leaving segment balancing to
|
||||
make cost-based decisions and find the optimal location of a segment.
|
||||
</>
|
||||
),
|
||||
},
|
||||
{
|
||||
name: 'pauseCoordination',
|
||||
type: 'boolean',
|
||||
defaultValue: false,
|
||||
info: (
|
||||
<>
|
||||
Boolean flag for whether or not the coordinator should execute its various duties of
|
||||
coordinating the cluster. Setting this to true essentially pauses all coordination work
|
||||
while allowing the API to remain up.
|
||||
How long does the Coordinator need to be a leader before it can start marking overshadowed
|
||||
segments as unused in metadata storage.
|
||||
</>
|
||||
),
|
||||
},
|
||||
|
@ -202,22 +270,11 @@ export const COORDINATOR_DYNAMIC_CONFIG_FIELDS: Field<CoordinatorDynamicConfig>[
|
|||
info: (
|
||||
<>
|
||||
Boolean flag for whether or not additional replication is needed for segments that have
|
||||
failed to load due to the expiry of coordinator load timeout. If this is set to true, the
|
||||
coordinator will attempt to replicate the failed segment on a different historical server.
|
||||
</>
|
||||
),
|
||||
},
|
||||
{
|
||||
name: 'maxNonPrimaryReplicantsToLoad',
|
||||
type: 'number',
|
||||
defaultValue: 2147483647,
|
||||
info: (
|
||||
<>
|
||||
The maximum number of non-primary replicants to load in a single Coordinator cycle. Once
|
||||
this limit is hit, only primary replicants will be loaded for the remainder of the cycle.
|
||||
Tuning this value lower can help reduce the delay in loading primary segments when the
|
||||
cluster has a very large number of non-primary replicants to load (such as when a single
|
||||
historical drops out of the cluster leaving many under-replicated segments).
|
||||
failed to load due to the expiry of <Code>druid.coordinator.load.timeout</Code>. If this is
|
||||
set to true, the coordinator will attempt to replicate the failed segment on a different
|
||||
historical server. This helps improve the segment availability if there are a few slow
|
||||
historicals in the cluster. However, the slow historical may still load the segment later
|
||||
and the coordinator may issue drop requests if the segment is over-replicated.
|
||||
</>
|
||||
),
|
||||
},
|
||||
|
|
|
@ -17,6 +17,7 @@
|
|||
*/
|
||||
|
||||
export * from './async-query/async-query.mock';
|
||||
export * from './coordinator-dynamic-config/coordinator-dynamic-config.mock';
|
||||
export * from './execution/execution-ingest-complete.mock';
|
||||
export * from './execution/execution-ingest-error.mock';
|
||||
export * from './stages/stages.mock';
|
||||
|
|
Loading…
Reference in New Issue