Web console: add system fields UI (#15858)

This PR adds console support for configuring system fields in the batch data loader.
This commit is contained in:
Vadim Ogievetsky 2024-02-07 21:38:55 -08:00 committed by GitHub
parent 21a97f4c61
commit 26815d425b
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
7 changed files with 124 additions and 39 deletions

View File

@ -375,6 +375,30 @@ export function isDruidSource(spec: Partial<IngestionSpec>): boolean {
return deepGet(spec, 'spec.ioConfig.inputSource.type') === 'druid'; return deepGet(spec, 'spec.ioConfig.inputSource.type') === 'druid';
} }
export function getPossibleSystemFieldsForSpec(spec: Partial<IngestionSpec>): string[] {
const inputSource = deepGet(spec, 'spec.ioConfig.inputSource');
if (!inputSource) return [];
return getPossibleSystemFieldsForInputSource(inputSource);
}
export function getPossibleSystemFieldsForInputSource(inputSource: InputSource): string[] {
switch (inputSource.type) {
case 's3':
case 'google':
case 'azureStorage':
return ['__file_uri', '__file_bucket', '__file_path'];
case 'hdfs':
case 'local':
return ['__file_uri', '__file_path'];
default:
return [];
}
}
export const ALL_POSSIBLE_SYSTEM_FIELDS: string[] = ['__file_uri', '__file_bucket', '__file_path'];
// --------------------------------- // ---------------------------------
// Spec cleanup and normalization // Spec cleanup and normalization

View File

@ -44,6 +44,7 @@ export interface InputSource {
prefixes?: string[]; prefixes?: string[];
objects?: { bucket: string; path: string }[]; objects?: { bucket: string; path: string }[];
fetchTimeout?: number; fetchTimeout?: number;
systemFields?: string[];
// druid // druid
dataSource?: string; dataSource?: string;

View File

@ -32,6 +32,7 @@ import type {
TransformSpec, TransformSpec,
} from '../druid-models'; } from '../druid-models';
import { import {
ALL_POSSIBLE_SYSTEM_FIELDS,
DETECTION_TIMESTAMP_SPEC, DETECTION_TIMESTAMP_SPEC,
getDimensionNamesFromTransforms, getDimensionNamesFromTransforms,
getDimensionSpecName, getDimensionSpecName,
@ -46,7 +47,7 @@ import { Api } from '../singletons';
import { getDruidErrorMessage, queryDruidRune } from './druid-query'; import { getDruidErrorMessage, queryDruidRune } from './druid-query';
import { EMPTY_ARRAY, filterMap } from './general'; import { EMPTY_ARRAY, filterMap } from './general';
import { deepGet, deepSet } from './object-change'; import { allowKeys, deepGet, deepSet } from './object-change';
const BASE_SAMPLER_CONFIG: SamplerConfig = { const BASE_SAMPLER_CONFIG: SamplerConfig = {
numRows: 500, numRows: 500,
@ -130,7 +131,10 @@ export interface SampleEntry {
} }
export function getCacheRowsFromSampleResponse(sampleResponse: SampleResponse): CacheRows { export function getCacheRowsFromSampleResponse(sampleResponse: SampleResponse): CacheRows {
return filterMap(sampleResponse.data, d => d.input).slice(0, 20); return filterMap(sampleResponse.data, d => ({
...d.input,
...allowKeys<any>(d.parsed, ALL_POSSIBLE_SYSTEM_FIELDS),
})).slice(0, 20);
} }
export function applyCache(sampleSpec: SampleSpec, cacheRows: CacheRows) { export function applyCache(sampleSpec: SampleSpec, cacheRows: CacheRows) {
@ -349,6 +353,7 @@ export async function sampleForParser(
dataSource: 'sample', dataSource: 'sample',
timestampSpec: reingestMode ? REINDEX_TIMESTAMP_SPEC : DETECTION_TIMESTAMP_SPEC, timestampSpec: reingestMode ? REINDEX_TIMESTAMP_SPEC : DETECTION_TIMESTAMP_SPEC,
dimensionsSpec: { dimensionsSpec: {
dimensions: deepGet(ioConfig, 'inputSource.systemFields'),
useSchemaDiscovery: true, useSchemaDiscovery: true,
}, },
granularitySpec: { granularitySpec: {

View File

@ -96,6 +96,7 @@ import {
getIoConfigTuningFormFields, getIoConfigTuningFormFields,
getIssueWithSpec, getIssueWithSpec,
getMetricSpecName, getMetricSpecName,
getPossibleSystemFieldsForSpec,
getRequiredModule, getRequiredModule,
getRollup, getRollup,
getSchemaMode, getSchemaMode,
@ -1520,6 +1521,8 @@ export class LoadDataView extends React.PureComponent<LoadDataViewProps, LoadDat
? STREAMING_INPUT_FORMAT_FIELDS ? STREAMING_INPUT_FORMAT_FIELDS
: BATCH_INPUT_FORMAT_FIELDS; : BATCH_INPUT_FORMAT_FIELDS;
const possibleSystemFields = getPossibleSystemFieldsForSpec(spec);
const normalInputAutoForm = ( const normalInputAutoForm = (
<AutoForm <AutoForm
fields={inputFormatFields} fields={inputFormatFields}
@ -1583,6 +1586,21 @@ export class LoadDataView extends React.PureComponent<LoadDataViewProps, LoadDat
)} )}
</> </>
)} )}
{possibleSystemFields.length > 0 && (
<AutoForm
fields={[
{
name: 'spec.ioConfig.inputSource.systemFields',
label: 'System fields',
type: 'string-array',
suggestions: possibleSystemFields,
info: 'JSON array of system fields to return as part of input rows.',
},
]}
model={spec}
onChange={this.updateSpecPreview}
/>
)}
{this.renderApplyButtonBar( {this.renderApplyButtonBar(
parserQueryState, parserQueryState,
AutoForm.issueWithModel(inputFormat, inputFormatFields) || AutoForm.issueWithModel(inputFormat, inputFormatFields) ||

View File

@ -186,10 +186,10 @@ export const SqlDataLoaderView = React.memo(function SqlDataLoaderView(
) : inputFormat && inputSource ? ( ) : inputFormat && inputSource ? (
<TitleFrame title="Load data" subtitle="Parse"> <TitleFrame title="Load data" subtitle="Parse">
<InputFormatStep <InputFormatStep
inputSource={inputSource} initInputSource={inputSource}
initInputFormat={inputFormat} initInputFormat={inputFormat}
doneButton={false} doneButton={false}
onSet={({ inputFormat, signature, timeExpression, arrayMode }) => { onSet={({ inputSource, inputFormat, signature, timeExpression, arrayMode }) => {
setContent({ setContent({
queryString: ingestQueryPatternToQuery( queryString: ingestQueryPatternToQuery(
externalConfigToIngestQueryPattern( externalConfigToIngestQueryPattern(
@ -203,7 +203,7 @@ export const SqlDataLoaderView = React.memo(function SqlDataLoaderView(
}); });
}} }}
altText="Skip the wizard and continue with custom SQL" altText="Skip the wizard and continue with custom SQL"
onAltSet={({ inputFormat, signature, timeExpression, arrayMode }) => { onAltSet={({ inputSource, inputFormat, signature, timeExpression, arrayMode }) => {
goToQuery({ goToQuery({
queryString: ingestQueryPatternToQuery( queryString: ingestQueryPatternToQuery(
externalConfigToIngestQueryPattern( externalConfigToIngestQueryPattern(

View File

@ -64,10 +64,10 @@ export const ConnectExternalDataDialog = React.memo(function ConnectExternalData
<div className={Classes.DIALOG_BODY}> <div className={Classes.DIALOG_BODY}>
{inputFormat && inputSource ? ( {inputFormat && inputSource ? (
<InputFormatStep <InputFormatStep
inputSource={inputSource} initInputSource={inputSource}
initInputFormat={inputFormat} initInputFormat={inputFormat}
doneButton doneButton
onSet={({ inputFormat, signature, timeExpression, arrayMode }) => { onSet={({ inputSource, inputFormat, signature, timeExpression, arrayMode }) => {
onSetExternalConfig( onSetExternalConfig(
{ inputSource, inputFormat, signature }, { inputSource, inputFormat, signature },
timeExpression, timeExpression,

View File

@ -28,6 +28,7 @@ import {
BATCH_INPUT_FORMAT_FIELDS, BATCH_INPUT_FORMAT_FIELDS,
chooseByBestTimestamp, chooseByBestTimestamp,
DETECTION_TIMESTAMP_SPEC, DETECTION_TIMESTAMP_SPEC,
getPossibleSystemFieldsForInputSource,
guessColumnTypeFromSampleResponse, guessColumnTypeFromSampleResponse,
inputFormatOutputsNumericStrings, inputFormatOutputsNumericStrings,
possibleDruidFormatForValues, possibleDruidFormatForValues,
@ -48,13 +49,19 @@ import { ParseDataTable } from '../../load-data-view/parse-data-table/parse-data
import './input-format-step.scss'; import './input-format-step.scss';
export interface InputFormatAndMore { export interface InputSourceFormatAndMore {
inputSource: InputSource;
inputFormat: InputFormat; inputFormat: InputFormat;
signature: SqlColumnDeclaration[]; signature: SqlColumnDeclaration[];
timeExpression: SqlExpression | undefined; timeExpression: SqlExpression | undefined;
arrayMode: ArrayMode; arrayMode: ArrayMode;
} }
interface InputSourceAndFormat {
inputSource: InputSource;
inputFormat: Partial<InputFormat>;
}
interface PossibleTimeExpression { interface PossibleTimeExpression {
column: string; column: string;
format: string; format: string;
@ -62,28 +69,37 @@ interface PossibleTimeExpression {
} }
export interface InputFormatStepProps { export interface InputFormatStepProps {
inputSource: InputSource; initInputSource: InputSource;
initInputFormat: Partial<InputFormat>; initInputFormat: Partial<InputFormat>;
doneButton: boolean; doneButton: boolean;
onSet(inputFormatAndMore: InputFormatAndMore): void; onSet(inputSourceFormatAndMore: InputSourceFormatAndMore): void;
onBack(): void; onBack(): void;
onAltSet?(inputFormatAndMore: InputFormatAndMore): void; onAltSet?(inputSourceFormatAndMore: InputSourceFormatAndMore): void;
altText?: string; altText?: string;
} }
export const InputFormatStep = React.memo(function InputFormatStep(props: InputFormatStepProps) { function isValidInputFormat(inputFormat: Partial<InputFormat>): inputFormat is InputFormat {
const { inputSource, initInputFormat, doneButton, onSet, onBack, onAltSet, altText } = props; return AutoForm.isValidModel(inputFormat, BATCH_INPUT_FORMAT_FIELDS);
}
const [inputFormat, setInputFormat] = useState<Partial<InputFormat>>(initInputFormat); export const InputFormatStep = React.memo(function InputFormatStep(props: InputFormatStepProps) {
const [inputFormatToSample, setInputFormatToSample] = useState<InputFormat | undefined>( const { initInputSource, initInputFormat, doneButton, onSet, onBack, onAltSet, altText } = props;
AutoForm.isValidModel(initInputFormat, BATCH_INPUT_FORMAT_FIELDS) ? initInputFormat : undefined,
); const [inputSourceAndFormat, setInputSourceAndFormat] = useState<InputSourceAndFormat>({
inputSource: initInputSource,
inputFormat: initInputFormat,
});
const [inputSourceAndFormatToSample, setInputSourceAndFormatToSample] = useState<
InputSourceAndFormat | undefined
>(isValidInputFormat(initInputFormat) ? inputSourceAndFormat : undefined);
const [selectTimestamp, setSelectTimestamp] = useState(true); const [selectTimestamp, setSelectTimestamp] = useState(true);
const [arrayMode, setArrayMode] = useState<ArrayMode>('multi-values'); const [arrayMode, setArrayMode] = useState<ArrayMode>('multi-values');
const [previewState] = useQueryManager<InputFormat, SampleResponse>({ const [previewState] = useQueryManager<InputSourceAndFormat, SampleResponse>({
query: inputFormatToSample, query: inputSourceAndFormatToSample,
processQuery: async (inputFormat: InputFormat) => { processQuery: async ({ inputSource, inputFormat }) => {
if (!isValidInputFormat(inputFormat)) throw new Error('invalid input format');
const sampleSpec: SampleSpec = { const sampleSpec: SampleSpec = {
type: 'index_parallel', type: 'index_parallel',
spec: { spec: {
@ -96,6 +112,7 @@ export const InputFormatStep = React.memo(function InputFormatStep(props: InputF
dataSource: 'sample', dataSource: 'sample',
timestampSpec: DETECTION_TIMESTAMP_SPEC, timestampSpec: DETECTION_TIMESTAMP_SPEC,
dimensionsSpec: { dimensionsSpec: {
dimensions: inputSource.systemFields,
useSchemaDiscovery: true, useSchemaDiscovery: true,
}, },
granularitySpec: { granularitySpec: {
@ -148,12 +165,12 @@ export const InputFormatStep = React.memo(function InputFormatStep(props: InputF
? getHeaderNamesFromSampleResponse(previewSampleResponse, 'ignoreIfZero') ? getHeaderNamesFromSampleResponse(previewSampleResponse, 'ignoreIfZero')
: undefined; : undefined;
const inputFormatAndMore = const currentInputFormat = inputSourceAndFormat.inputFormat;
previewSampleResponse && const inputSourceFormatAndMore: InputSourceFormatAndMore | undefined =
headerNames && previewSampleResponse && headerNames && isValidInputFormat(currentInputFormat)
AutoForm.isValidModel(inputFormat, BATCH_INPUT_FORMAT_FIELDS)
? { ? {
inputFormat, inputSource: inputSourceAndFormat.inputSource,
inputFormat: currentInputFormat,
signature: headerNames.map(name => signature: headerNames.map(name =>
SqlColumnDeclaration.create( SqlColumnDeclaration.create(
name, name,
@ -161,7 +178,7 @@ export const InputFormatStep = React.memo(function InputFormatStep(props: InputF
guessColumnTypeFromSampleResponse( guessColumnTypeFromSampleResponse(
previewSampleResponse, previewSampleResponse,
name, name,
inputFormatOutputsNumericStrings(inputFormat), inputFormatOutputsNumericStrings(currentInputFormat),
), ),
), ),
), ),
@ -171,7 +188,10 @@ export const InputFormatStep = React.memo(function InputFormatStep(props: InputF
} }
: undefined; : undefined;
const hasArrays = inputFormatAndMore?.signature.some(d => d.columnType.isArray()); const hasArrays = inputSourceFormatAndMore?.signature.some(d => d.columnType.isArray());
const possibleSystemFields = getPossibleSystemFieldsForInputSource(
inputSourceAndFormat.inputSource,
);
return ( return (
<div className="input-format-step"> <div className="input-format-step">
@ -206,18 +226,35 @@ export const InputFormatStep = React.memo(function InputFormatStep(props: InputF
</FormGroup> </FormGroup>
<AutoForm <AutoForm
fields={BATCH_INPUT_FORMAT_FIELDS} fields={BATCH_INPUT_FORMAT_FIELDS}
model={inputFormat} model={inputSourceAndFormat.inputFormat}
onChange={setInputFormat} onChange={inputFormat =>
setInputSourceAndFormat({ ...inputSourceAndFormat, inputFormat })
}
/> />
{inputFormatToSample !== inputFormat && ( {possibleSystemFields.length > 0 && (
<AutoForm
fields={[
{
name: 'inputSource.systemFields',
label: 'System fields',
type: 'string-array',
suggestions: possibleSystemFields,
info: 'JSON array of system fields to return as part of input rows.',
},
]}
model={inputSourceAndFormat}
onChange={setInputSourceAndFormat as any}
/>
)}
{inputSourceAndFormatToSample !== inputSourceAndFormat && (
<FormGroup className="control-buttons"> <FormGroup className="control-buttons">
<Button <Button
text="Preview changes" text="Preview changes"
intent={Intent.PRIMARY} intent={Intent.PRIMARY}
disabled={!AutoForm.isValidModel(inputFormat, BATCH_INPUT_FORMAT_FIELDS)} disabled={!isValidInputFormat(inputSourceAndFormat.inputFormat)}
onClick={() => { onClick={() => {
if (!AutoForm.isValidModel(inputFormat, BATCH_INPUT_FORMAT_FIELDS)) return; if (!isValidInputFormat(inputSourceAndFormat.inputFormat)) return;
setInputFormatToSample(inputFormat); setInputSourceAndFormatToSample(inputSourceAndFormat);
}} }}
/> />
</FormGroup> </FormGroup>
@ -246,10 +283,10 @@ export const InputFormatStep = React.memo(function InputFormatStep(props: InputF
text={altText} text={altText}
rightIcon={IconNames.ARROW_TOP_RIGHT} rightIcon={IconNames.ARROW_TOP_RIGHT}
minimal minimal
disabled={!inputFormatAndMore} disabled={!inputSourceFormatAndMore}
onClick={() => { onClick={() => {
if (!inputFormatAndMore) return; if (!inputSourceFormatAndMore) return;
onAltSet(inputFormatAndMore); onAltSet(inputSourceFormatAndMore);
}} }}
/> />
</Callout> </Callout>
@ -262,10 +299,10 @@ export const InputFormatStep = React.memo(function InputFormatStep(props: InputF
text={doneButton ? 'Done' : 'Next'} text={doneButton ? 'Done' : 'Next'}
rightIcon={doneButton ? IconNames.TICK : IconNames.ARROW_RIGHT} rightIcon={doneButton ? IconNames.TICK : IconNames.ARROW_RIGHT}
intent={Intent.PRIMARY} intent={Intent.PRIMARY}
disabled={!inputFormatAndMore} disabled={!inputSourceFormatAndMore}
onClick={() => { onClick={() => {
if (!inputFormatAndMore) return; if (!inputSourceFormatAndMore) return;
onSet(inputFormatAndMore); onSet(inputSourceFormatAndMore);
}} }}
/> />
</div> </div>