diff --git a/web-console/src/druid-models/ingestion-spec/ingestion-spec.tsx b/web-console/src/druid-models/ingestion-spec/ingestion-spec.tsx index 393c3e7ee1b..37ec41d50e9 100644 --- a/web-console/src/druid-models/ingestion-spec/ingestion-spec.tsx +++ b/web-console/src/druid-models/ingestion-spec/ingestion-spec.tsx @@ -56,7 +56,11 @@ import { summarizeIndexSpec } from '../index-spec/index-spec'; import type { InputFormat } from '../input-format/input-format'; import { issueWithInputFormat } from '../input-format/input-format'; import type { InputSource } from '../input-source/input-source'; -import { FILTER_SUGGESTIONS, issueWithInputSource } from '../input-source/input-source'; +import { + FILTER_SUGGESTIONS, + issueWithInputSource, + OBJECT_GLOB_SUGGESTIONS, +} from '../input-source/input-source'; import type { MetricSpec } from '../metric-spec/metric-spec'; import { getMetricSpecOutputType, @@ -584,21 +588,29 @@ export function getIoConfigFormFields(ingestionComboType: IngestionComboType): F ), }; - const inputSourceFilter: Field = { - name: 'inputSource.filter', - label: 'File filter', + const inputSourceObjectGlob: Field = { + name: 'inputSource.objectGlob', + label: 'Object glob', type: 'string', - suggestions: FILTER_SUGGESTIONS, - placeholder: '*', + suggestions: OBJECT_GLOB_SUGGESTIONS, + placeholder: '(all files)', info: ( -

- A wildcard filter for files. See{' '} - - here - {' '} - for format information. Files matching the filter criteria are considered for ingestion. - Files not matching the filter criteria are ignored. -

+ <> +

A glob for the object part of the URI.

+

+ The glob must match the entire object part, not just the filename. For example, the glob + *.json does not match /bar/file.json, because and the{' '} + * does not match the slash. To match all objects ending in .json + , use **.json instead. +

+

+ For more information, refer to the documentation for{' '} + + FileSystem#getPathMatcher + + . +

+ ), }; @@ -781,7 +793,7 @@ export function getIoConfigFormFields(ingestionComboType: IngestionComboType): F ), }, - inputSourceFilter, + inputSourceObjectGlob, { name: 'inputSource.properties.accessKeyId.type', label: 'Access key ID type', @@ -944,7 +956,7 @@ export function getIoConfigFormFields(ingestionComboType: IngestionComboType): F ), }, - inputSourceFilter, + inputSourceObjectGlob, { name: 'inputSource.properties.sharedAccessStorageToken', label: 'Shared Access Storage Token', @@ -1018,7 +1030,7 @@ export function getIoConfigFormFields(ingestionComboType: IngestionComboType): F ), }, - inputSourceFilter, + inputSourceObjectGlob, ]; case 'index_parallel:delta': diff --git a/web-console/src/druid-models/input-source/input-source.tsx b/web-console/src/druid-models/input-source/input-source.tsx index 17b137412ef..174f8aba516 100644 --- a/web-console/src/druid-models/input-source/input-source.tsx +++ b/web-console/src/druid-models/input-source/input-source.tsx @@ -16,6 +16,7 @@ * limitations under the License. */ +import { Code } from '@blueprintjs/core'; import React from 'react'; import type { Field } from '../../components'; @@ -36,6 +37,18 @@ export const FILTER_SUGGESTIONS: string[] = [ '*.avro', ]; +export const OBJECT_GLOB_SUGGESTIONS: string[] = [ + '**.jsonl', + '**.jsonl.gz', + '**.json', + '**.json.gz', + '**.csv', + '**.tsv', + '**.parquet', + '**.orc', + '**.avro', +]; + export interface InputSource { type: string; baseDir?: string; @@ -43,6 +56,7 @@ export interface InputSource { uris?: string[]; prefixes?: string[]; objects?: { bucket: string; path: string }[]; + objectGlob?: string; fetchTimeout?: number; systemFields?: string[]; @@ -94,10 +108,11 @@ export type InputSourceDesc = httpAuthenticationPassword?: any; } | { - type: 's3'; + type: 's3' | 'google' | 'azureStorage'; uris?: string[]; prefixes?: string[]; objects?: { bucket: string; path: string }[]; + objectGlob?: string; properties?: { accessKeyId?: any; secretAccessKey?: any; @@ -105,12 +120,6 @@ export type InputSourceDesc = assumeRoleExternalId?: any; }; } - | { - type: 'google' | 'azureStorage'; - uris?: string[]; - prefixes?: string[]; - objects?: { bucket: string; path: string }[]; - } | { type: 'hdfs'; paths?: string | string[]; @@ -483,21 +492,28 @@ export const INPUT_SOURCE_FIELDS: Field[] = [ // Cloud common { - name: 'filter', - label: 'File filter', + name: 'objectGlob', type: 'string', - suggestions: FILTER_SUGGESTIONS, - placeholder: '*', + suggestions: OBJECT_GLOB_SUGGESTIONS, + placeholder: '(all files)', defined: typeIsKnown(KNOWN_TYPES, 's3', 'azureStorage', 'google'), info: ( -

- A wildcard filter for files. See{' '} - - here - {' '} - for format information. Files matching the filter criteria are considered for ingestion. - Files not matching the filter criteria are ignored. -

+ <> +

A glob for the object part of the URI.

+

+ The glob must match the entire object part, not just the filename. For example, the glob + *.json does not match /bar/file.json, because and the{' '} + * does not match the slash. To match all objects ending in .json + , use **.json instead. +

+

+ For more information, refer to the documentation for{' '} + + FileSystem#getPathMatcher + + . +

+ ), },