[] = [
- {
- name: 'type',
- label: 'Input format',
- type: 'string',
- suggestions: ['json', 'csv', 'tsv', 'parquet', 'orc', 'avro_ocf', 'avro_stream', 'regex'],
- required: true,
- info: (
- <>
- The parser used to parse the data.
-
- For more information see{' '}
-
- the documentation
-
- .
-
- >
- ),
- },
- {
- name: 'featureSpec',
- label: 'JSON parser features',
- type: 'json',
- defined: typeIs('json'),
- info: (
- <>
-
-
- JSON parser features
- {' '}
- supported by Jackson library. Those features will be applied when parsing the input JSON
- data.
-
-
- Example:{' '}
- {`{ "ALLOW_SINGLE_QUOTES": true, "ALLOW_UNQUOTED_FIELD_NAMES": true }`}
-
- >
- ),
- },
- {
- name: 'delimiter',
- type: 'string',
- defaultValue: '\t',
- suggestions: ['\t', ';', '|', '#'],
- defined: typeIs('tsv'),
- info: <>A custom delimiter for data values.>,
- },
- {
- name: 'pattern',
- type: 'string',
- defined: typeIs('regex'),
- required: true,
- },
- {
- name: 'function',
- type: 'string',
- defined: typeIs('javascript'),
- required: true,
- },
- {
- name: 'skipHeaderRows',
- type: 'number',
- defaultValue: 0,
- defined: typeIs('csv', 'tsv'),
- min: 0,
- info: (
- <>
- If this is set, skip the first skipHeaderRows
rows from each file.
- >
- ),
- },
- {
- name: 'findColumnsFromHeader',
- type: 'boolean',
- defined: typeIs('csv', 'tsv'),
- required: true,
- info: (
- <>
- If this is set, find the column names from the header row. Note that
- skipHeaderRows
will be applied before finding column names from the header. For
- example, if you set skipHeaderRows
to 2 and findColumnsFromHeader
{' '}
- to true, the task will skip the first two lines and then extract column information from the
- third line.
- >
- ),
- },
- {
- name: 'columns',
- type: 'string-array',
- required: true,
- defined: p =>
- (oneOf(p.type, 'csv', 'tsv') && p.findColumnsFromHeader === false) || p.type === 'regex',
- info: (
- <>
- Specifies the columns of the data. The columns should be in the same order with the columns
- of your data.
- >
- ),
- },
- {
- name: 'listDelimiter',
- type: 'string',
- defaultValue: '\x01',
- suggestions: ['\x01', '\x00'],
- defined: typeIs('csv', 'tsv', 'regex'),
- info: <>A custom delimiter for multi-value dimensions.>,
- },
- {
- name: 'binaryAsString',
- type: 'boolean',
- defaultValue: false,
- defined: typeIs('parquet', 'orc', 'avro_ocf', 'avro_stream'),
- info: (
- <>
- Specifies if the binary column which is not logically marked as a string should be treated
- as a UTF-8 encoded string.
- >
- ),
- },
-];
+function generateInputFormatFields(streaming: boolean) {
+ return compact([
+ {
+ name: 'type',
+ label: 'Input format',
+ type: 'string',
+ suggestions: ['json', 'csv', 'tsv', 'parquet', 'orc', 'avro_ocf', 'avro_stream', 'regex'],
+ required: true,
+ info: (
+ <>
+ The parser used to parse the data.
+
+ For more information see{' '}
+
+ the documentation
+
+ .
+
+ >
+ ),
+ },
+ {
+ name: 'featureSpec',
+ label: 'JSON parser features',
+ type: 'json',
+ defined: typeIs('json'),
+ info: (
+ <>
+
+
+ JSON parser features
+ {' '}
+ supported by Jackson library. Those features will be applied when parsing the input JSON
+ data.
+
+
+ Example:{' '}
+ {`{ "ALLOW_SINGLE_QUOTES": true, "ALLOW_UNQUOTED_FIELD_NAMES": true }`}
+
+ >
+ ),
+ },
+ streaming
+ ? {
+ name: 'assumeNewlineDelimited',
+ type: 'boolean',
+ defined: typeIs('json'),
+ disabled: (inputFormat: InputFormat) => inputFormat.useJsonNodeReader,
+ defaultValue: false,
+ info: (
+ <>
+
+ In streaming ingestion, multi-line JSON events can be ingested (i.e. where a single
+ JSON event spans multiple lines). However, if a parsing exception occurs, all JSON
+ events that are present in the same streaming record will be discarded.
+
+
+ assumeNewlineDelimited
and useJsonNodeReader
(at most one
+ can be true
) affect only how parsing exceptions are handled.
+
+
+ If the input is known to be newline delimited JSON (each individual JSON event is
+ contained in a single line, separated by newlines), setting this option to true
+ allows for more flexible parsing exception handling. Only the lines with invalid
+ JSON syntax will be discarded, while lines containing valid JSON events will still
+ be ingested.
+
+ >
+ ),
+ }
+ : undefined,
+ streaming
+ ? {
+ name: 'useJsonNodeReader',
+ type: 'boolean',
+ defined: typeIs('json'),
+ disabled: (inputFormat: InputFormat) => inputFormat.assumeNewlineDelimited,
+ defaultValue: false,
+ info: (
+ <>
+ {' '}
+
+ In streaming ingestion, multi-line JSON events can be ingested (i.e. where a single
+ JSON event spans multiple lines). However, if a parsing exception occurs, all JSON
+ events that are present in the same streaming record will be discarded.
+
+
+ assumeNewlineDelimited
and useJsonNodeReader
(at most one
+ can be true
) affect only how parsing exceptions are handled.
+
+
+ When ingesting multi-line JSON events, enabling this option will enable the use of a
+ JSON parser which will retain any valid JSON events encountered within a streaming
+ record prior to when a parsing exception occurred.
+
+ >
+ ),
+ }
+ : undefined,
+ {
+ name: 'delimiter',
+ type: 'string',
+ defaultValue: '\t',
+ suggestions: ['\t', ';', '|', '#'],
+ defined: typeIs('tsv'),
+ info: <>A custom delimiter for data values.>,
+ },
+ {
+ name: 'pattern',
+ type: 'string',
+ defined: typeIs('regex'),
+ required: true,
+ },
+ {
+ name: 'function',
+ type: 'string',
+ defined: typeIs('javascript'),
+ required: true,
+ },
+ {
+ name: 'skipHeaderRows',
+ type: 'number',
+ defaultValue: 0,
+ defined: typeIs('csv', 'tsv'),
+ min: 0,
+ info: (
+ <>
+ If this is set, skip the first skipHeaderRows
rows from each file.
+ >
+ ),
+ },
+ {
+ name: 'findColumnsFromHeader',
+ type: 'boolean',
+ defined: typeIs('csv', 'tsv'),
+ required: true,
+ info: (
+ <>
+ If this is set, find the column names from the header row. Note that
+ skipHeaderRows
will be applied before finding column names from the header.
+ For example, if you set skipHeaderRows
to 2 and{' '}
+ findColumnsFromHeader
to true, the task will skip the first two lines and
+ then extract column information from the third line.
+ >
+ ),
+ },
+ {
+ name: 'columns',
+ type: 'string-array',
+ required: true,
+ defined: p =>
+ (oneOf(p.type, 'csv', 'tsv') && p.findColumnsFromHeader === false) || p.type === 'regex',
+ info: (
+ <>
+ Specifies the columns of the data. The columns should be in the same order with the
+ columns of your data.
+ >
+ ),
+ },
+ {
+ name: 'listDelimiter',
+ type: 'string',
+ defaultValue: '\x01',
+ suggestions: ['\x01', '\x00'],
+ defined: typeIs('csv', 'tsv', 'regex'),
+ info: <>A custom delimiter for multi-value dimensions.>,
+ },
+ {
+ name: 'binaryAsString',
+ type: 'boolean',
+ defaultValue: false,
+ defined: typeIs('parquet', 'orc', 'avro_ocf', 'avro_stream'),
+ info: (
+ <>
+ Specifies if the binary column which is not logically marked as a string should be treated
+ as a UTF-8 encoded string.
+ >
+ ),
+ },
+ ] as (Field | undefined)[]);
+}
+
+export const INPUT_FORMAT_FIELDS: Field[] = generateInputFormatFields(false);
+export const STREAMING_INPUT_FORMAT_FIELDS: Field[] = generateInputFormatFields(true);
export function issueWithInputFormat(inputFormat: InputFormat | undefined): string | undefined {
return AutoForm.issueWithModel(inputFormat, INPUT_FORMAT_FIELDS);
}
-export const inputFormatCanFlatten: (inputFormat: InputFormat) => boolean = typeIs(
+export const inputFormatCanProduceNestedData: (inputFormat: InputFormat) => boolean = typeIs(
'json',
'parquet',
'orc',
diff --git a/web-console/src/views/load-data-view/info-messages.tsx b/web-console/src/views/load-data-view/info-messages.tsx
index 5c24be5cd8f..733ad34dbb3 100644
--- a/web-console/src/views/load-data-view/info-messages.tsx
+++ b/web-console/src/views/load-data-view/info-messages.tsx
@@ -57,30 +57,36 @@ export const ConnectMessage = React.memo(function ConnectMessage(props: ConnectM
});
export interface ParserMessageProps {
- canFlatten: boolean;
+ canHaveNestedData: boolean;
}
export const ParserMessage = React.memo(function ParserMessage(props: ParserMessageProps) {
- const { canFlatten } = props;
+ const { canHaveNestedData } = props;
return (
- You can{' '}
-
- directly ingest nested data
- {' '}
- into COMPLEX<json> columns.
+ Druid needs to parse data as columns. Determine the format of your data and ensure that
+ the columns are accurately parsed.
- {canFlatten && (
-
- If you have nested data, you can{' '}
-
- flatten
- {' '}
- it here.
-
+ {canHaveNestedData && (
+ <>
+
+ If you have nested data, you can ingest it into{' '}
+
+ COMPLEX<json>
+ {' '}
+ columns.
+
+
+ Alternatively, you can explicitly{' '}
+
+ flatten
+ {' '}
+ it here.
+
+ >
)}
diff --git a/web-console/src/views/load-data-view/load-data-view.tsx b/web-console/src/views/load-data-view/load-data-view.tsx
index e4322e34e5b..6e5995048fb 100644
--- a/web-console/src/views/load-data-view/load-data-view.tsx
+++ b/web-console/src/views/load-data-view/load-data-view.tsx
@@ -85,7 +85,6 @@ import {
getRequiredModule,
getRollup,
getSecondaryPartitionRelatedFormFields,
- getSpecType,
getTimestampExpressionFields,
getTimestampSchema,
getTuningFormFields,
@@ -93,15 +92,15 @@ import {
IngestionSpec,
INPUT_FORMAT_FIELDS,
InputFormat,
- inputFormatCanFlatten,
+ inputFormatCanProduceNestedData,
invalidIoConfig,
invalidPartitionConfig,
IoConfig,
isDruidSource,
isEmptyIngestionSpec,
+ isStreamingSpec,
issueWithIoConfig,
issueWithSampleData,
- isTask,
joinFilter,
KNOWN_FILTER_TYPES,
MAX_INLINE_DATA_LENGTH,
@@ -113,6 +112,7 @@ import {
PRIMARY_PARTITION_RELATED_FORM_FIELDS,
removeTimestampTransform,
splitFilter,
+ STREAMING_INPUT_FORMAT_FIELDS,
TIME_COLUMN,
TIMESTAMP_SPEC_FIELDS,
TimestampSpec,
@@ -140,7 +140,6 @@ import {
localStorageSetJson,
moveElement,
moveToIndex,
- oneOf,
pluralIfNeeded,
QueryState,
} from '../../utils';
@@ -1205,7 +1204,6 @@ export class LoadDataView extends React.PureComponent
)}
- {oneOf(specType, 'kafka', 'kinesis') && (
+ {isStreamingSpec(spec) && (
this.setState({ columnFilter })}
placeholder="Search columns"
/>
- {canFlatten && (
+ {canHaveNestedData && (
r.input),
'ignore-arrays',
);
}
+ const inputFormatFields = isStreamingSpec(spec)
+ ? STREAMING_INPUT_FORMAT_FIELDS
+ : INPUT_FORMAT_FIELDS;
return (
<>
{mainFill}
-
+
{!selectedFlattenField && (
<>
this.updateSpecPreview(deepSet(spec, 'spec.ioConfig.inputFormat', p))
@@ -1511,11 +1512,11 @@ export class LoadDataView extends React.PureComponent
{this.renderApplyButtonBar(
parserQueryState,
- AutoForm.issueWithModel(inputFormat, INPUT_FORMAT_FIELDS),
+ AutoForm.issueWithModel(inputFormat, inputFormatFields),
)}
>
)}
- {canFlatten && this.renderFlattenControls()}
+ {canHaveNestedData && this.renderFlattenControls()}
{suggestedFlattenFields && suggestedFlattenFields.length ? (