diff --git a/web-console/src/druid-models/ingestion-spec/ingestion-spec.tsx b/web-console/src/druid-models/ingestion-spec/ingestion-spec.tsx index 604f14afe71..eca148ada3b 100644 --- a/web-console/src/druid-models/ingestion-spec/ingestion-spec.tsx +++ b/web-console/src/druid-models/ingestion-spec/ingestion-spec.tsx @@ -375,6 +375,30 @@ export function isDruidSource(spec: Partial): boolean { return deepGet(spec, 'spec.ioConfig.inputSource.type') === 'druid'; } +export function getPossibleSystemFieldsForSpec(spec: Partial): string[] { + const inputSource = deepGet(spec, 'spec.ioConfig.inputSource'); + if (!inputSource) return []; + return getPossibleSystemFieldsForInputSource(inputSource); +} + +export function getPossibleSystemFieldsForInputSource(inputSource: InputSource): string[] { + switch (inputSource.type) { + case 's3': + case 'google': + case 'azureStorage': + return ['__file_uri', '__file_bucket', '__file_path']; + + case 'hdfs': + case 'local': + return ['__file_uri', '__file_path']; + + default: + return []; + } +} + +export const ALL_POSSIBLE_SYSTEM_FIELDS: string[] = ['__file_uri', '__file_bucket', '__file_path']; + // --------------------------------- // Spec cleanup and normalization diff --git a/web-console/src/druid-models/input-source/input-source.tsx b/web-console/src/druid-models/input-source/input-source.tsx index 6fb8fbd4b77..2b0b60eecfc 100644 --- a/web-console/src/druid-models/input-source/input-source.tsx +++ b/web-console/src/druid-models/input-source/input-source.tsx @@ -44,6 +44,7 @@ export interface InputSource { prefixes?: string[]; objects?: { bucket: string; path: string }[]; fetchTimeout?: number; + systemFields?: string[]; // druid dataSource?: string; diff --git a/web-console/src/utils/sampler.ts b/web-console/src/utils/sampler.ts index 9ec31b723ff..866b195d457 100644 --- a/web-console/src/utils/sampler.ts +++ b/web-console/src/utils/sampler.ts @@ -32,6 +32,7 @@ import type { TransformSpec, } from '../druid-models'; import { + ALL_POSSIBLE_SYSTEM_FIELDS, DETECTION_TIMESTAMP_SPEC, getDimensionNamesFromTransforms, getDimensionSpecName, @@ -46,7 +47,7 @@ import { Api } from '../singletons'; import { getDruidErrorMessage, queryDruidRune } from './druid-query'; import { EMPTY_ARRAY, filterMap } from './general'; -import { deepGet, deepSet } from './object-change'; +import { allowKeys, deepGet, deepSet } from './object-change'; const BASE_SAMPLER_CONFIG: SamplerConfig = { numRows: 500, @@ -130,7 +131,10 @@ export interface SampleEntry { } export function getCacheRowsFromSampleResponse(sampleResponse: SampleResponse): CacheRows { - return filterMap(sampleResponse.data, d => d.input).slice(0, 20); + return filterMap(sampleResponse.data, d => ({ + ...d.input, + ...allowKeys(d.parsed, ALL_POSSIBLE_SYSTEM_FIELDS), + })).slice(0, 20); } export function applyCache(sampleSpec: SampleSpec, cacheRows: CacheRows) { @@ -349,6 +353,7 @@ export async function sampleForParser( dataSource: 'sample', timestampSpec: reingestMode ? REINDEX_TIMESTAMP_SPEC : DETECTION_TIMESTAMP_SPEC, dimensionsSpec: { + dimensions: deepGet(ioConfig, 'inputSource.systemFields'), useSchemaDiscovery: true, }, granularitySpec: { diff --git a/web-console/src/views/load-data-view/load-data-view.tsx b/web-console/src/views/load-data-view/load-data-view.tsx index 661561cc092..ddc6c304711 100644 --- a/web-console/src/views/load-data-view/load-data-view.tsx +++ b/web-console/src/views/load-data-view/load-data-view.tsx @@ -96,6 +96,7 @@ import { getIoConfigTuningFormFields, getIssueWithSpec, getMetricSpecName, + getPossibleSystemFieldsForSpec, getRequiredModule, getRollup, getSchemaMode, @@ -1520,6 +1521,8 @@ export class LoadDataView extends React.PureComponent )} + {possibleSystemFields.length > 0 && ( + + )} {this.renderApplyButtonBar( parserQueryState, AutoForm.issueWithModel(inputFormat, inputFormatFields) || diff --git a/web-console/src/views/sql-data-loader-view/sql-data-loader-view.tsx b/web-console/src/views/sql-data-loader-view/sql-data-loader-view.tsx index 4b99da236b3..0fc89573bd2 100644 --- a/web-console/src/views/sql-data-loader-view/sql-data-loader-view.tsx +++ b/web-console/src/views/sql-data-loader-view/sql-data-loader-view.tsx @@ -186,10 +186,10 @@ export const SqlDataLoaderView = React.memo(function SqlDataLoaderView( ) : inputFormat && inputSource ? ( { + onSet={({ inputSource, inputFormat, signature, timeExpression, arrayMode }) => { setContent({ queryString: ingestQueryPatternToQuery( externalConfigToIngestQueryPattern( @@ -203,7 +203,7 @@ export const SqlDataLoaderView = React.memo(function SqlDataLoaderView( }); }} altText="Skip the wizard and continue with custom SQL" - onAltSet={({ inputFormat, signature, timeExpression, arrayMode }) => { + onAltSet={({ inputSource, inputFormat, signature, timeExpression, arrayMode }) => { goToQuery({ queryString: ingestQueryPatternToQuery( externalConfigToIngestQueryPattern( diff --git a/web-console/src/views/workbench-view/connect-external-data-dialog/connect-external-data-dialog.tsx b/web-console/src/views/workbench-view/connect-external-data-dialog/connect-external-data-dialog.tsx index 9d4974332ef..4302c3783b2 100644 --- a/web-console/src/views/workbench-view/connect-external-data-dialog/connect-external-data-dialog.tsx +++ b/web-console/src/views/workbench-view/connect-external-data-dialog/connect-external-data-dialog.tsx @@ -64,10 +64,10 @@ export const ConnectExternalDataDialog = React.memo(function ConnectExternalData
{inputFormat && inputSource ? ( { + onSet={({ inputSource, inputFormat, signature, timeExpression, arrayMode }) => { onSetExternalConfig( { inputSource, inputFormat, signature }, timeExpression, diff --git a/web-console/src/views/workbench-view/input-format-step/input-format-step.tsx b/web-console/src/views/workbench-view/input-format-step/input-format-step.tsx index e6ea2d9081b..9729b224831 100644 --- a/web-console/src/views/workbench-view/input-format-step/input-format-step.tsx +++ b/web-console/src/views/workbench-view/input-format-step/input-format-step.tsx @@ -28,6 +28,7 @@ import { BATCH_INPUT_FORMAT_FIELDS, chooseByBestTimestamp, DETECTION_TIMESTAMP_SPEC, + getPossibleSystemFieldsForInputSource, guessColumnTypeFromSampleResponse, inputFormatOutputsNumericStrings, possibleDruidFormatForValues, @@ -48,13 +49,19 @@ import { ParseDataTable } from '../../load-data-view/parse-data-table/parse-data import './input-format-step.scss'; -export interface InputFormatAndMore { +export interface InputSourceFormatAndMore { + inputSource: InputSource; inputFormat: InputFormat; signature: SqlColumnDeclaration[]; timeExpression: SqlExpression | undefined; arrayMode: ArrayMode; } +interface InputSourceAndFormat { + inputSource: InputSource; + inputFormat: Partial; +} + interface PossibleTimeExpression { column: string; format: string; @@ -62,28 +69,37 @@ interface PossibleTimeExpression { } export interface InputFormatStepProps { - inputSource: InputSource; + initInputSource: InputSource; initInputFormat: Partial; doneButton: boolean; - onSet(inputFormatAndMore: InputFormatAndMore): void; + onSet(inputSourceFormatAndMore: InputSourceFormatAndMore): void; onBack(): void; - onAltSet?(inputFormatAndMore: InputFormatAndMore): void; + onAltSet?(inputSourceFormatAndMore: InputSourceFormatAndMore): void; altText?: string; } -export const InputFormatStep = React.memo(function InputFormatStep(props: InputFormatStepProps) { - const { inputSource, initInputFormat, doneButton, onSet, onBack, onAltSet, altText } = props; +function isValidInputFormat(inputFormat: Partial): inputFormat is InputFormat { + return AutoForm.isValidModel(inputFormat, BATCH_INPUT_FORMAT_FIELDS); +} - const [inputFormat, setInputFormat] = useState>(initInputFormat); - const [inputFormatToSample, setInputFormatToSample] = useState( - AutoForm.isValidModel(initInputFormat, BATCH_INPUT_FORMAT_FIELDS) ? initInputFormat : undefined, - ); +export const InputFormatStep = React.memo(function InputFormatStep(props: InputFormatStepProps) { + const { initInputSource, initInputFormat, doneButton, onSet, onBack, onAltSet, altText } = props; + + const [inputSourceAndFormat, setInputSourceAndFormat] = useState({ + inputSource: initInputSource, + inputFormat: initInputFormat, + }); + const [inputSourceAndFormatToSample, setInputSourceAndFormatToSample] = useState< + InputSourceAndFormat | undefined + >(isValidInputFormat(initInputFormat) ? inputSourceAndFormat : undefined); const [selectTimestamp, setSelectTimestamp] = useState(true); const [arrayMode, setArrayMode] = useState('multi-values'); - const [previewState] = useQueryManager({ - query: inputFormatToSample, - processQuery: async (inputFormat: InputFormat) => { + const [previewState] = useQueryManager({ + query: inputSourceAndFormatToSample, + processQuery: async ({ inputSource, inputFormat }) => { + if (!isValidInputFormat(inputFormat)) throw new Error('invalid input format'); + const sampleSpec: SampleSpec = { type: 'index_parallel', spec: { @@ -96,6 +112,7 @@ export const InputFormatStep = React.memo(function InputFormatStep(props: InputF dataSource: 'sample', timestampSpec: DETECTION_TIMESTAMP_SPEC, dimensionsSpec: { + dimensions: inputSource.systemFields, useSchemaDiscovery: true, }, granularitySpec: { @@ -148,12 +165,12 @@ export const InputFormatStep = React.memo(function InputFormatStep(props: InputF ? getHeaderNamesFromSampleResponse(previewSampleResponse, 'ignoreIfZero') : undefined; - const inputFormatAndMore = - previewSampleResponse && - headerNames && - AutoForm.isValidModel(inputFormat, BATCH_INPUT_FORMAT_FIELDS) + const currentInputFormat = inputSourceAndFormat.inputFormat; + const inputSourceFormatAndMore: InputSourceFormatAndMore | undefined = + previewSampleResponse && headerNames && isValidInputFormat(currentInputFormat) ? { - inputFormat, + inputSource: inputSourceAndFormat.inputSource, + inputFormat: currentInputFormat, signature: headerNames.map(name => SqlColumnDeclaration.create( name, @@ -161,7 +178,7 @@ export const InputFormatStep = React.memo(function InputFormatStep(props: InputF guessColumnTypeFromSampleResponse( previewSampleResponse, name, - inputFormatOutputsNumericStrings(inputFormat), + inputFormatOutputsNumericStrings(currentInputFormat), ), ), ), @@ -171,7 +188,10 @@ export const InputFormatStep = React.memo(function InputFormatStep(props: InputF } : undefined; - const hasArrays = inputFormatAndMore?.signature.some(d => d.columnType.isArray()); + const hasArrays = inputSourceFormatAndMore?.signature.some(d => d.columnType.isArray()); + const possibleSystemFields = getPossibleSystemFieldsForInputSource( + inputSourceAndFormat.inputSource, + ); return (
@@ -206,18 +226,35 @@ export const InputFormatStep = React.memo(function InputFormatStep(props: InputF + setInputSourceAndFormat({ ...inputSourceAndFormat, inputFormat }) + } /> - {inputFormatToSample !== inputFormat && ( + {possibleSystemFields.length > 0 && ( + + )} + {inputSourceAndFormatToSample !== inputSourceAndFormat && (