From 9964dd4cb260058194772c7cee638542205b5e33 Mon Sep 17 00:00:00 2001
From: Vadim Ogievetsky
druid.coordinator.kill.on
is true. If this is set to true then{' '}
- killDataSourceWhitelist
must not be specified or be empty list.
- >
- ),
- },
- {
- name: 'killDataSourceWhitelist',
- type: 'string-array',
- emptyValue: [],
- info: (
- <>
- List of dataSources for which kill tasks are sent if property{' '}
- druid.coordinator.kill.on
is true. This can be a list of
- comma-separated dataSources or a JSON array.
- >
- ),
- },
- {
- name: 'killPendingSegmentsSkipList',
- type: 'string-array',
- emptyValue: [],
- info: (
- <>
- List of dataSources for which pendingSegments are NOT cleaned up if property{' '}
- druid.coordinator.kill.pendingSegments.on
is true. This can be a list
- of comma-separated dataSources or a JSON array.
- >
- ),
- },
- {
- name: 'maxSegmentsInNodeLoadingQueue',
- type: 'number',
- defaultValue: 0,
- info: (
- <>
- The maximum number of segments that could be queued for loading to any given server.
- This parameter could be used to speed up segments loading process, especially if
- there are "slow" nodes in the cluster (with low loading speed) or if too much
- segments scheduled to be replicated to some particular node (faster loading could be
- preferred to better segments distribution). Desired value depends on segments
- loading speed, acceptable replication time and number of nodes. Value 1000 could be
- a start point for a rather big cluster. Default value is 0 (loading queue is
- unbounded)
- >
- ),
- },
- {
- name: 'mergeBytesLimit',
- type: 'size-bytes',
- defaultValue: 524288000,
- info: <>The maximum total uncompressed size in bytes of segments to merge.>,
- },
- {
- name: 'mergeSegmentsLimit',
- type: 'number',
- defaultValue: 100,
- info: <>The maximum number of segments that can be in a single append task.>,
- },
- {
- name: 'millisToWaitBeforeDeleting',
- type: 'number',
- defaultValue: 900000,
- info: (
- <>
- How long does the Coordinator need to be active before it can start removing
- (marking unused) segments in metadata storage.
- >
- ),
- },
- {
- name: 'replicantLifetime',
- type: 'number',
- defaultValue: 15,
- info: (
- <>
- The maximum number of Coordinator runs for a segment to be replicated before we
- start alerting.
- >
- ),
- },
- {
- name: 'replicationThrottleLimit',
- type: 'number',
- defaultValue: 10,
- info: <>The maximum number of segments that can be replicated at one time.>,
- },
- {
- name: 'decommissioningNodes',
- type: 'string-array',
- emptyValue: [],
- info: (
- <>
- List of historical services to 'decommission'. Coordinator will not assign new
- segments to 'decommissioning' services, and segments will be moved away from them to
- be placed on non-decommissioning services at the maximum rate specified by{' '}
- decommissioningMaxPercentOfMaxSegmentsToMove
.
- >
- ),
- },
- {
- name: 'decommissioningMaxPercentOfMaxSegmentsToMove',
- type: 'number',
- defaultValue: 70,
- info: (
- <>
- The maximum number of segments that may be moved away from 'decommissioning'
- services to non-decommissioning (that is, active) services during one Coordinator
- run. This value is relative to the total maximum segment movements allowed during
- one run which is determined by maxSegmentsToMove
. If
- decommissioningMaxPercentOfMaxSegmentsToMove
is 0, segments will
- neither be moved from or to 'decommissioning' services, effectively putting them in
- a sort of "maintenance" mode that will not participate in balancing or assignment by
- load rules. Decommissioning can also become stalled if there are no available active
- services to place the segments. By leveraging the maximum percent of decommissioning
- segment movements, an operator can prevent active services from overload by
- prioritizing balancing, or decrease decommissioning time instead. The value should
- be between 0 and 100.
- >
- ),
- },
- {
- name: 'pauseCoordination',
- type: 'boolean',
- defaultValue: false,
- info: (
- <>
- Boolean flag for whether or not the coordinator should execute its various duties of
- coordinating the cluster. Setting this to true essentially pauses all coordination
- work while allowing the API to remain up.
- >
- ),
- },
- ]}
- model={dynamicConfig}
- onChange={m => setDynamicConfig(m)}
- />
+ druid.coordinator.kill.on
is
+ true. If this is set to true then killDataSourceWhitelist
must not be specified
+ or be empty list.
+ >
+ ),
+ },
+ {
+ name: 'killDataSourceWhitelist',
+ type: 'string-array',
+ emptyValue: [],
+ info: (
+ <>
+ List of dataSources for which kill tasks are sent if property{' '}
+ druid.coordinator.kill.on
is true. This can be a list of comma-separated
+ dataSources or a JSON array.
+ >
+ ),
+ },
+ {
+ name: 'killPendingSegmentsSkipList',
+ type: 'string-array',
+ emptyValue: [],
+ info: (
+ <>
+ List of dataSources for which pendingSegments are NOT cleaned up if property{' '}
+ druid.coordinator.kill.pendingSegments.on
is true. This can be a list of
+ comma-separated dataSources or a JSON array.
+ >
+ ),
+ },
+ {
+ name: 'maxSegmentsInNodeLoadingQueue',
+ type: 'number',
+ defaultValue: 0,
+ info: (
+ <>
+ The maximum number of segments that could be queued for loading to any given server. This
+ parameter could be used to speed up segments loading process, especially if there are "slow"
+ nodes in the cluster (with low loading speed) or if too much segments scheduled to be
+ replicated to some particular node (faster loading could be preferred to better segments
+ distribution). Desired value depends on segments loading speed, acceptable replication time
+ and number of nodes. Value 1000 could be a start point for a rather big cluster. Default
+ value is 0 (loading queue is unbounded)
+ >
+ ),
+ },
+ {
+ name: 'mergeBytesLimit',
+ type: 'size-bytes',
+ defaultValue: 524288000,
+ info: <>The maximum total uncompressed size in bytes of segments to merge.>,
+ },
+ {
+ name: 'mergeSegmentsLimit',
+ type: 'number',
+ defaultValue: 100,
+ info: <>The maximum number of segments that can be in a single append task.>,
+ },
+ {
+ name: 'millisToWaitBeforeDeleting',
+ type: 'number',
+ defaultValue: 900000,
+ info: (
+ <>
+ How long does the Coordinator need to be active before it can start removing (marking
+ unused) segments in metadata storage.
+ >
+ ),
+ },
+ {
+ name: 'replicantLifetime',
+ type: 'number',
+ defaultValue: 15,
+ info: (
+ <>
+ The maximum number of Coordinator runs for a segment to be replicated before we start
+ alerting.
+ >
+ ),
+ },
+ {
+ name: 'replicationThrottleLimit',
+ type: 'number',
+ defaultValue: 10,
+ info: <>The maximum number of segments that can be replicated at one time.>,
+ },
+ {
+ name: 'decommissioningNodes',
+ type: 'string-array',
+ emptyValue: [],
+ info: (
+ <>
+ List of historical services to 'decommission'. Coordinator will not assign new segments to
+ 'decommissioning' services, and segments will be moved away from them to be placed on
+ non-decommissioning services at the maximum rate specified by{' '}
+ decommissioningMaxPercentOfMaxSegmentsToMove
.
+ >
+ ),
+ },
+ {
+ name: 'decommissioningMaxPercentOfMaxSegmentsToMove',
+ type: 'number',
+ defaultValue: 70,
+ info: (
+ <>
+ The maximum number of segments that may be moved away from 'decommissioning' services to
+ non-decommissioning (that is, active) services during one Coordinator run. This value is
+ relative to the total maximum segment movements allowed during one run which is determined
+ by maxSegmentsToMove
. If
+ decommissioningMaxPercentOfMaxSegmentsToMove
is 0, segments will neither be
+ moved from or to 'decommissioning' services, effectively putting them in a sort of
+ "maintenance" mode that will not participate in balancing or assignment by load rules.
+ Decommissioning can also become stalled if there are no available active services to place
+ the segments. By leveraging the maximum percent of decommissioning segment movements, an
+ operator can prevent active services from overload by prioritizing balancing, or decrease
+ decommissioning time instead. The value should be between 0 and 100.
+ >
+ ),
+ },
+ {
+ name: 'pauseCoordination',
+ type: 'boolean',
+ defaultValue: false,
+ info: (
+ <>
+ Boolean flag for whether or not the coordinator should execute its various duties of
+ coordinating the cluster. Setting this to true essentially pauses all coordination work
+ while allowing the API to remain up.
+ >
+ ),
+ },
+];
diff --git a/web-console/src/druid-models/index.ts b/web-console/src/druid-models/index.ts
index 5e7debe93cf..c5186110c44 100644
--- a/web-console/src/druid-models/index.ts
+++ b/web-console/src/druid-models/index.ts
@@ -29,3 +29,5 @@ export * from './filter';
export * from './dimension-spec';
export * from './metric-spec';
export * from './ingestion-spec';
+export * from './coordinator-dynamic-config';
+export * from './overlord-dynamic-config';
diff --git a/web-console/src/druid-models/ingestion-spec.spec.ts b/web-console/src/druid-models/ingestion-spec.spec.ts
index adccec656f8..fb6df068351 100644
--- a/web-console/src/druid-models/ingestion-spec.spec.ts
+++ b/web-console/src/druid-models/ingestion-spec.spec.ts
@@ -45,8 +45,8 @@ describe('ingestion-spec', () => {
dataSource: 'wikipedia',
granularitySpec: {
type: 'uniform',
- segmentGranularity: 'DAY',
- queryGranularity: 'HOUR',
+ segmentGranularity: 'day',
+ queryGranularity: 'hour',
rollup: true,
},
parser: {
@@ -183,8 +183,8 @@ describe('spec utils', () => {
dataSource: 'wikipedia',
granularitySpec: {
type: 'uniform',
- segmentGranularity: 'DAY',
- queryGranularity: 'HOUR',
+ segmentGranularity: 'day',
+ queryGranularity: 'hour',
},
timestampSpec: {
column: 'timestamp',
@@ -219,9 +219,9 @@ describe('spec utils', () => {
],
},
"granularitySpec": Object {
- "queryGranularity": "HOUR",
+ "queryGranularity": "hour",
"rollup": true,
- "segmentGranularity": "DAY",
+ "segmentGranularity": "day",
"type": "uniform",
},
"metricsSpec": Array [
diff --git a/web-console/src/druid-models/ingestion-spec.tsx b/web-console/src/druid-models/ingestion-spec.tsx
index e58c5916504..baa8b50729e 100644
--- a/web-console/src/druid-models/ingestion-spec.tsx
+++ b/web-console/src/druid-models/ingestion-spec.tsx
@@ -47,7 +47,7 @@ import {
getMetricSpecSingleFieldName,
MetricSpec,
} from './metric-spec';
-import { PLACEHOLDER_TIMESTAMP_SPEC, TimestampSpec } from './timestamp-spec';
+import { TimestampSpec } from './timestamp-spec';
import { TransformSpec } from './transform-spec';
export const MAX_INLINE_DATA_LENGTH = 65536;
@@ -475,6 +475,7 @@ export function getIoConfigFormFields(ingestionComboType: IngestionComboType): F
label: 'Dimensions',
type: 'string-array',
placeholder: '(optional)',
+ hideInMore: true,
info: (
The list of dimensions to select. If left empty, no dimensions are returned. If left @@ -487,6 +488,7 @@ export function getIoConfigFormFields(ingestionComboType: IngestionComboType): F label: 'Metrics', type: 'string-array', placeholder: '(optional)', + hideInMore: true, info: (
The list of metrics to select. If left empty, no metrics are returned. If left null or @@ -499,6 +501,7 @@ export function getIoConfigFormFields(ingestionComboType: IngestionComboType): F label: 'Filter', type: 'json', placeholder: '(optional)', + hideInMore: true, info: (
The{' '} @@ -983,45 +986,6 @@ export function getIoConfigTuningFormFields( > ), }, - { - name: 'inputSource.maxCacheCapacityBytes', - label: 'Max cache capacity bytes', - type: 'number', - defaultValue: 1073741824, - info: ( - <> -
- Maximum size of the cache space in bytes. 0 means disabling cache. Cached files are - not removed until the ingestion task completes. -
- > - ), - }, - { - name: 'inputSource.maxFetchCapacityBytes', - label: 'Max fetch capacity bytes', - type: 'number', - defaultValue: 1073741824, - info: ( - <> -- Maximum size of the fetch space in bytes. 0 means disabling prefetch. Prefetched - files are removed immediately once they are read. -
- > - ), - }, - { - name: 'inputSource.prefetchTriggerBytes', - label: 'Prefetch trigger bytes', - type: 'number', - placeholder: 'maxFetchCapacityBytes / 2', - info: ( - <> -Threshold to trigger prefetching the objects.
- > - ), - }, ]; case 'index_parallel:local': @@ -1420,6 +1384,7 @@ export function invalidTuningConfig(tuningConfig: TuningConfig, intervals: any): export function getPartitionRelatedTuningSpecFormFields( specType: IngestionType, + dimensionSuggestions: string[] | undefined, ): Fielddynamic
.
),
+ adjustment: (t: TuningConfig) => {
+ if (!Array.isArray(dimensionSuggestions) || !dimensionSuggestions.length) return t;
+ return deepSet(t, 'partitionsSpec.partitionDimension', dimensionSuggestions[0]);
+ },
},
// partitionsSpec type: dynamic
{
@@ -1460,6 +1429,8 @@ export function getPartitionRelatedTuningSpecFormFields(
name: 'partitionsSpec.targetRowsPerSegment',
label: 'Target rows per segment',
type: 'number',
+ zeroMeansUndefined: true,
+ defaultValue: 5000000,
defined: (t: TuningConfig) =>
deepGet(t, 'partitionsSpec.type') === 'hashed' &&
!deepGet(t, 'partitionsSpec.numShards'),
@@ -1481,6 +1452,8 @@ export function getPartitionRelatedTuningSpecFormFields(
name: 'partitionsSpec.numShards',
label: 'Num shards',
type: 'number',
+ zeroMeansUndefined: true,
+ hideInMore: true,
defined: (t: TuningConfig) =>
deepGet(t, 'partitionsSpec.type') === 'hashed' &&
!deepGet(t, 'partitionsSpec.targetRowsPerSegment'),
@@ -1502,6 +1475,7 @@ export function getPartitionRelatedTuningSpecFormFields(
name: 'partitionsSpec.partitionDimensions',
label: 'Partition dimensions',
type: 'string-array',
+ placeholder: '(all dimensions)',
defined: (t: TuningConfig) => deepGet(t, 'partitionsSpec.type') === 'hashed',
info: The dimensions to partition on. Leave blank to select all dimensions.
, }, @@ -1512,7 +1486,19 @@ export function getPartitionRelatedTuningSpecFormFields( type: 'string', defined: (t: TuningConfig) => deepGet(t, 'partitionsSpec.type') === 'single_dim', required: true, - info:The dimension to partition on.
, + suggestions: dimensionSuggestions, + info: ( + <> +The dimension to partition on.
+
+ This should be the first dimension in your schema which would make it first in the
+ sort order. As{' '}
+
resetOffsetAutomatically
.
+ >
+ ),
+ },
{
name: 'intermediatePersistPeriod',
type: 'duration',
@@ -1686,6 +1687,7 @@ const TUNING_CONFIG_FORM_FIELDS: Field
+ When shards are split or merged, the supervisor will recompute shard, task group mappings,
+ and signal any running tasks created under the old mappings to stop early at{' '}
+ (current time + repartitionTransitionDuration)
. Stopping the tasks early
+ allows Druid to begin reading from the new shards more quickly.
+
+ The repartition transition wait time controlled by this property gives the stream
+ additional time to write records to the new shards after the split/merge, which helps
+ avoid the issues with empty shard handling described at
+
skipHeaderRows
rows from each file.
+ >
+ ),
+ },
+ {
+ name: 'findColumnsFromHeader',
+ type: 'boolean',
+ required: true,
+ defined: (p: InputFormat) => oneOf(p.type, 'csv', 'tsv'),
+ info: (
+ <>
+ If this is set, find the column names from the header row. Note that
+ skipHeaderRows
will be applied before finding column names from the header. For
+ example, if you set skipHeaderRows
to 2 and findColumnsFromHeader
{' '}
+ to true, the task will skip the first two lines and then extract column information from the
+ third line.
>
),
},
@@ -93,7 +101,13 @@ export const INPUT_FORMAT_FIELDS: Field
- Please specify your timestamp format by using the suggestions menu or typing in a{' '}
+ Specify your timestamp format by using the suggestions menu or typing in a{' '}
This value will be used if the specified column can not be found. Specify a static value for cases when the source time column is missing or is null.
+ Druid ingests raw data and converts it into a custom,{' '}
+ To get started, please paste some data in the box to the left. Click "Apply" to verify your data with Druid. To get started, please specify what data you want to ingest.
+ Druid requires flat data (non-nested, non-hierarchical). Each row should represent a
+ discrete event.
+
+ If you have nested data, you can{' '}
+ Ensure that your data appears correctly in a row/column orientation.
+ Druid partitions data based on the primary time column of your data. This column is stored
+ internally in Druid as Configure how to define the time column for this data.
+ If your data does not have a time column, you can select
+ Druid can perform per-row{' '}
+
+ Druid can filter out unwanted data by applying per-row{' '}
+
+ Each column in Druid must have an assigned type (string, long, float, double, complex, etc).
+
+ Default primitive types have been automatically assigned to your columns. If you want to
+ change the type, click on the column header.
+ Configure how Druid will partition data. Fine tune how Druid will ingest data. Configure behavior of indexed data once it reaches Druid.
+ Druid begins ingesting data once you submit a JSON ingestion spec. If you modify any values
+ in this view, the values entered in previous sections will update accordingly. If you modify
+ any values in previous sections, this spec will automatically update.
+ Submit the spec to begin loading data into Druid. {
this.setState(
new QueryState
{
this.setState(
new QueryState
{
this.setState(
new QueryState
__time
.
+ None
to use a
+ placeholder value. If the time information is spread across multiple columns you can combine
+ them into one by selecting Expression
and defining a transform expression.
+