add support for smartSegmentLoading (#14610)

2023-07-19 14:21:30 -07:00 · 2023-07-19 14:21:30 -07:00 · f7348d7389
parent 1f4ee5e21b
commit f7348d7389
3 changed files with 203 additions and 105 deletions
--- a/web-console/src/druid-models/coordinator-dynamic-config/coordinator-dynamic-config.mock.ts
+++ b/web-console/src/druid-models/coordinator-dynamic-config/coordinator-dynamic-config.mock.ts
@ -0,0 +1,40 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import type { CoordinatorDynamicConfig } from './coordinator-dynamic-config';
+
+export const DEFAULT_COORDINATOR_DYNAMIC_CONFIG: CoordinatorDynamicConfig = {
+  millisToWaitBeforeDeleting: 900000,
+  mergeBytesLimit: 524288000,
+  mergeSegmentsLimit: 100,
+  maxSegmentsToMove: 100,
+  replicantLifetime: 15,
+  replicationThrottleLimit: 500,
+  balancerComputeThreads: 1,
+  killDataSourceWhitelist: [],
+  killPendingSegmentsSkipList: [],
+  maxSegmentsInNodeLoadingQueue: 500,
+  decommissioningNodes: [],
+  decommissioningMaxPercentOfMaxSegmentsToMove: 70,
+  pauseCoordination: false,
+  replicateAfterLoadTimeout: false,
+  maxNonPrimaryReplicantsToLoad: 2147483647,
+  useRoundRobinSegmentAssignment: true,
+  smartSegmentLoading: true,
+  debugDimensions: null,
+};
--- a/web-console/src/druid-models/coordinator-dynamic-config/coordinator-dynamic-config.tsx
+++ b/web-console/src/druid-models/coordinator-dynamic-config/coordinator-dynamic-config.tsx
@ -20,6 +20,8 @@ import { Code } from '@blueprintjs/core';
 import React from 'react';

 import type { Field } from '../../components';
+import { ExternalLink } from '../../components';
+import { getLink } from '../../links';

 export interface CoordinatorDynamicConfig {
  maxSegmentsToMove?: number;
@ -37,23 +39,168 @@ export interface CoordinatorDynamicConfig {
  decommissioningMaxPercentOfMaxSegmentsToMove?: number;
  pauseCoordination?: boolean;
  maxNonPrimaryReplicantsToLoad?: number;
+  replicateAfterLoadTimeout?: boolean;
+  useRoundRobinSegmentAssignment?: boolean;
+  smartSegmentLoading?: boolean;
+
+  // Undocumented
+  debugDimensions?: any;
 }

 export const COORDINATOR_DYNAMIC_CONFIG_FIELDS: Field<CoordinatorDynamicConfig>[] = [
+  {
+    name: 'pauseCoordination',
+    type: 'boolean',
+    defaultValue: false,
+    info: (
+      <>
+        Boolean flag for whether or not the coordinator should execute its various duties of
+        coordinating the cluster. Setting this to true essentially pauses all coordination work
+        while allowing the API to remain up. Duties that are paused include all classes that
+        implement the <Code>CoordinatorDuty</Code> interface. Such duties include: Segment
+        balancing, Segment compaction, Submitting kill tasks for unused segments (if enabled),
+        Logging of used segments in the cluster, Marking of newly unused or overshadowed segments,
+        Matching and execution of load/drop rules for used segments, Unloading segments that are no
+        longer marked as used from Historical servers. An example of when an admin may want to pause
+        coordination would be if they are doing deep storage maintenance on HDFS Name Nodes with
+        downtime and don&apos;t want the coordinator to be directing Historical Nodes to hit the
+        Name Node with API requests until maintenance is done and the deep store is declared healthy
+        for use again.
+      </>
+    ),
+  },
+
+  // Start "smart" segment loading section
+
+  {
+    name: 'smartSegmentLoading',
+    type: 'boolean',
+    defaultValue: true,
+    info: (
+      <>
+        Enables{' '}
+        <ExternalLink href={`${getLink('DOCS')}/configuration#smart-segment-loading`}>
+          &quot;smart&quot; segment loading mode
+        </ExternalLink>{' '}
+        which dynamically computes the optimal values of several properties that maximize
+        Coordinator performance.
+      </>
+    ),
+  },
  {
    name: 'maxSegmentsToMove',
    type: 'number',
    defaultValue: 100,
+    defined: cdc => (cdc.smartSegmentLoading === false ? true : undefined),
    info: <>The maximum number of segments that can be moved at any given time.</>,
  },
  {
-    name: 'balancerComputeThreads',
+    name: 'maxSegmentsInNodeLoadingQueue',
    type: 'number',
-    defaultValue: 1,
+    defaultValue: 500,
+    defined: cdc => (cdc.smartSegmentLoading === false ? true : undefined),
    info: (
      <>
-        Thread pool size for computing moving cost of segments in segment balancing. Consider
-        increasing this if you have a lot of segments and moving segments starts to get stuck.
+        The maximum number of segments that could be queued for loading to any given server. This
+        parameter could be used to speed up segments loading process, especially if there are
+        &quot;slow&quot; nodes in the cluster (with low loading speed) or if too much segments
+        scheduled to be replicated to some particular node (faster loading could be preferred to
+        better segments distribution). Desired value depends on segments loading speed, acceptable
+        replication time and number of nodes. Value 1000 could be a start point for a rather big
+        cluster. Default value is 500.
+      </>
+    ),
+  },
+  {
+    name: 'useRoundRobinSegmentAssignment',
+    type: 'boolean',
+    defaultValue: true,
+    defined: cdc => (cdc.smartSegmentLoading === false ? true : undefined),
+    info: (
+      <>
+        Boolean flag for whether segments should be assigned to historicals in a round robin
+        fashion. When disabled, segment assignment is done using the chosen balancer strategy. When
+        enabled, this can speed up segment assignments leaving balancing to move the segments to
+        their optimal locations (based on the balancer strategy) lazily.
+      </>
+    ),
+  },
+  {
+    name: 'replicationThrottleLimit',
+    type: 'number',
+    defaultValue: 500,
+    defined: cdc => (cdc.smartSegmentLoading === false ? true : undefined),
+    info: <>The maximum number of segments that can be replicated at one time.</>,
+  },
+  {
+    name: 'replicantLifetime',
+    type: 'number',
+    defaultValue: 15,
+    defined: cdc => (cdc.smartSegmentLoading === false ? true : undefined),
+    info: (
+      <>
+        The maximum number of Coordinator runs for which a segment can wait in the load queue of a
+        Historical before Druid raises an alert.
+      </>
+    ),
+  },
+  {
+    name: 'maxNonPrimaryReplicantsToLoad',
+    type: 'number',
+    defaultValue: 2147483647,
+    defined: cdc => (cdc.smartSegmentLoading === false ? true : undefined),
+    info: (
+      <>
+        The maximum number of non-primary replicants to load in a single Coordinator cycle. Once
+        this limit is hit, only primary replicants will be loaded for the remainder of the cycle.
+        Tuning this value lower can help reduce the delay in loading primary segments when the
+        cluster has a very large number of non-primary replicants to load (such as when a single
+        historical drops out of the cluster leaving many under-replicated segments).
+      </>
+    ),
+  },
+  {
+    name: 'decommissioningMaxPercentOfMaxSegmentsToMove',
+    type: 'number',
+    defaultValue: 70,
+    defined: cdc => (cdc.smartSegmentLoading === false ? true : undefined),
+    info: (
+      <>
+        <p>
+          Upper limit of segments the Coordinator can move from decommissioning servers to active
+          non-decommissioning servers during a single run. This value is relative to the total
+          maximum number of segments that can be moved at any given time based upon the value of
+          <Code>maxSegmentsToMove</Code>.
+        </p>
+        <p>
+          If <Code>decommissioningMaxPercentOfMaxSegmentsToMove</Code> is 0, the Coordinator does
+          not move segments to decommissioning servers, effectively putting them in a type of
+          &quot;maintenance&quot; mode. In this case, decommissioning servers do not participate in
+          balancing or assignment by load rules. The Coordinator still considers segments on
+          decommissioning servers as candidates to replicate on active servers.
+        </p>
+        <p>
+          Decommissioning can stall if there are no available active servers to move the segments
+          to. You can use the maximum percent of decommissioning segment movements to prioritize
+          balancing or to decrease commissioning time to prevent active servers from being
+          overloaded. The value must be between 0 and 100.
+        </p>
+      </>
+    ),
+  },
+
+  // End "smart" segment loading section
+
+  {
+    name: 'decommissioningNodes',
+    type: 'string-array',
+    emptyValue: [],
+    info: (
+      <>
+        List of historical services to &apos;decommission&apos;. Coordinator will not assign new
+        segments to &apos;decommissioning&apos; services, and segments will be moved away from them
+        to be placed on non-decommissioning services at the maximum rate specified by{' '}
+        <Code>decommissioningMaxPercentOfMaxSegmentsToMove</Code>.
      </>
    ),
  },
@ -83,18 +230,13 @@ export const COORDINATOR_DYNAMIC_CONFIG_FIELDS: Field<CoordinatorDynamicConfig>[
    ),
  },
  {
-    name: 'maxSegmentsInNodeLoadingQueue',
+    name: 'balancerComputeThreads',
    type: 'number',
-    defaultValue: 500,
+    defaultValue: 1,
    info: (
      <>
-        The maximum number of segments that could be queued for loading to any given server. This
-        parameter could be used to speed up segments loading process, especially if there are
-        &quot;slow&quot; nodes in the cluster (with low loading speed) or if too much segments
-        scheduled to be replicated to some particular node (faster loading could be preferred to
-        better segments distribution). Desired value depends on segments loading speed, acceptable
-        replication time and number of nodes. Value 1000 could be a start point for a rather big
-        cluster. Default value is 500.
+        Thread pool size for computing moving cost of segments during segment balancing. Consider
+        increasing this if you have a lot of segments and moving segments begins to stall.
      </>
    ),
  },
@ -116,82 +258,8 @@ export const COORDINATOR_DYNAMIC_CONFIG_FIELDS: Field<CoordinatorDynamicConfig>[
    defaultValue: 900000,
    info: (
      <>
-        How long does the Coordinator need to be active before it can start removing (marking
-        unused) segments in metadata storage.
-      </>
-    ),
-  },
-  {
-    name: 'replicantLifetime',
-    type: 'number',
-    defaultValue: 15,
-    info: (
-      <>
-        The maximum number of Coordinator runs for a segment to be replicated before we start
-        alerting.
-      </>
-    ),
-  },
-  {
-    name: 'replicationThrottleLimit',
-    type: 'number',
-    defaultValue: 500,
-    info: <>The maximum number of segments that can be replicated at one time.</>,
-  },
-  {
-    name: 'decommissioningNodes',
-    type: 'string-array',
-    emptyValue: [],
-    info: (
-      <>
-        List of historical services to &apos;decommission&apos;. Coordinator will not assign new
-        segments to &apos;decommissioning&apos; services, and segments will be moved away from them
-        to be placed on non-decommissioning services at the maximum rate specified by{' '}
-        <Code>decommissioningMaxPercentOfMaxSegmentsToMove</Code>.
-      </>
-    ),
-  },
-  {
-    name: 'decommissioningMaxPercentOfMaxSegmentsToMove',
-    type: 'number',
-    defaultValue: 70,
-    info: (
-      <>
-        The maximum number of segments that may be moved away from &apos;decommissioning&apos;
-        services to non-decommissioning (that is, active) services during one Coordinator run. This
-        value is relative to the total maximum segment movements allowed during one run which is
-        determined by <Code>maxSegmentsToMove</Code>. If
-        <Code>decommissioningMaxPercentOfMaxSegmentsToMove</Code> is 0, segments will neither be
-        moved from or to &apos;decommissioning&apos; services, effectively putting them in a sort of
-        &quot;maintenance&quot; mode that will not participate in balancing or assignment by load
-        rules. Decommissioning can also become stalled if there are no available active services to
-        place the segments. By leveraging the maximum percent of decommissioning segment movements,
-        an operator can prevent active services from overload by prioritizing balancing, or decrease
-        decommissioning time instead. The value should be between 0 and 100.
-      </>
-    ),
-  },
-  {
-    name: 'useRoundRobinSegmentAssignment',
-    type: 'boolean',
-    defaultValue: true,
-    info: (
-      <>
-        Boolean flag for whether segments should be assigned to historicals in a round-robin
-        fashion. If enabled, this can speed up initial segment loading leaving segment balancing to
-        make cost-based decisions and find the optimal location of a segment.
-      </>
-    ),
-  },
-  {
-    name: 'pauseCoordination',
-    type: 'boolean',
-    defaultValue: false,
-    info: (
-      <>
-        Boolean flag for whether or not the coordinator should execute its various duties of
-        coordinating the cluster. Setting this to true essentially pauses all coordination work
-        while allowing the API to remain up.
+        How long does the Coordinator need to be a leader before it can start marking overshadowed
+        segments as unused in metadata storage.
      </>
    ),
  },
@ -202,22 +270,11 @@ export const COORDINATOR_DYNAMIC_CONFIG_FIELDS: Field<CoordinatorDynamicConfig>[
    info: (
      <>
        Boolean flag for whether or not additional replication is needed for segments that have
-        failed to load due to the expiry of coordinator load timeout. If this is set to true, the
-        coordinator will attempt to replicate the failed segment on a different historical server.
-      </>
-    ),
-  },
-  {
-    name: 'maxNonPrimaryReplicantsToLoad',
-    type: 'number',
-    defaultValue: 2147483647,
-    info: (
-      <>
-        The maximum number of non-primary replicants to load in a single Coordinator cycle. Once
-        this limit is hit, only primary replicants will be loaded for the remainder of the cycle.
-        Tuning this value lower can help reduce the delay in loading primary segments when the
-        cluster has a very large number of non-primary replicants to load (such as when a single
-        historical drops out of the cluster leaving many under-replicated segments).
+        failed to load due to the expiry of <Code>druid.coordinator.load.timeout</Code>. If this is
+        set to true, the coordinator will attempt to replicate the failed segment on a different
+        historical server. This helps improve the segment availability if there are a few slow
+        historicals in the cluster. However, the slow historical may still load the segment later
+        and the coordinator may issue drop requests if the segment is over-replicated.
      </>
    ),
  },
--- a/web-console/src/druid-models/mocks.ts
+++ b/web-console/src/druid-models/mocks.ts
@ -17,6 +17,7 @@
 */

 export * from './async-query/async-query.mock';
+export * from './coordinator-dynamic-config/coordinator-dynamic-config.mock';
 export * from './execution/execution-ingest-complete.mock';
 export * from './execution/execution-ingest-error.mock';
 export * from './stages/stages.mock';