Web console: add system fields UI (#15858)

This PR adds console support for configuring system fields in the batch data loader.
This commit is contained in:
Vadim Ogievetsky 2024-02-07 21:38:55 -08:00 committed by GitHub
parent 21a97f4c61
commit 26815d425b
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
7 changed files with 124 additions and 39 deletions

View File

@ -375,6 +375,30 @@ export function isDruidSource(spec: Partial<IngestionSpec>): boolean {
return deepGet(spec, 'spec.ioConfig.inputSource.type') === 'druid';
}
export function getPossibleSystemFieldsForSpec(spec: Partial<IngestionSpec>): string[] {
const inputSource = deepGet(spec, 'spec.ioConfig.inputSource');
if (!inputSource) return [];
return getPossibleSystemFieldsForInputSource(inputSource);
}
export function getPossibleSystemFieldsForInputSource(inputSource: InputSource): string[] {
switch (inputSource.type) {
case 's3':
case 'google':
case 'azureStorage':
return ['__file_uri', '__file_bucket', '__file_path'];
case 'hdfs':
case 'local':
return ['__file_uri', '__file_path'];
default:
return [];
}
}
export const ALL_POSSIBLE_SYSTEM_FIELDS: string[] = ['__file_uri', '__file_bucket', '__file_path'];
// ---------------------------------
// Spec cleanup and normalization

View File

@ -44,6 +44,7 @@ export interface InputSource {
prefixes?: string[];
objects?: { bucket: string; path: string }[];
fetchTimeout?: number;
systemFields?: string[];
// druid
dataSource?: string;

View File

@ -32,6 +32,7 @@ import type {
TransformSpec,
} from '../druid-models';
import {
ALL_POSSIBLE_SYSTEM_FIELDS,
DETECTION_TIMESTAMP_SPEC,
getDimensionNamesFromTransforms,
getDimensionSpecName,
@ -46,7 +47,7 @@ import { Api } from '../singletons';
import { getDruidErrorMessage, queryDruidRune } from './druid-query';
import { EMPTY_ARRAY, filterMap } from './general';
import { deepGet, deepSet } from './object-change';
import { allowKeys, deepGet, deepSet } from './object-change';
const BASE_SAMPLER_CONFIG: SamplerConfig = {
numRows: 500,
@ -130,7 +131,10 @@ export interface SampleEntry {
}
export function getCacheRowsFromSampleResponse(sampleResponse: SampleResponse): CacheRows {
return filterMap(sampleResponse.data, d => d.input).slice(0, 20);
return filterMap(sampleResponse.data, d => ({
...d.input,
...allowKeys<any>(d.parsed, ALL_POSSIBLE_SYSTEM_FIELDS),
})).slice(0, 20);
}
export function applyCache(sampleSpec: SampleSpec, cacheRows: CacheRows) {
@ -349,6 +353,7 @@ export async function sampleForParser(
dataSource: 'sample',
timestampSpec: reingestMode ? REINDEX_TIMESTAMP_SPEC : DETECTION_TIMESTAMP_SPEC,
dimensionsSpec: {
dimensions: deepGet(ioConfig, 'inputSource.systemFields'),
useSchemaDiscovery: true,
},
granularitySpec: {

View File

@ -96,6 +96,7 @@ import {
getIoConfigTuningFormFields,
getIssueWithSpec,
getMetricSpecName,
getPossibleSystemFieldsForSpec,
getRequiredModule,
getRollup,
getSchemaMode,
@ -1520,6 +1521,8 @@ export class LoadDataView extends React.PureComponent<LoadDataViewProps, LoadDat
? STREAMING_INPUT_FORMAT_FIELDS
: BATCH_INPUT_FORMAT_FIELDS;
const possibleSystemFields = getPossibleSystemFieldsForSpec(spec);
const normalInputAutoForm = (
<AutoForm
fields={inputFormatFields}
@ -1583,6 +1586,21 @@ export class LoadDataView extends React.PureComponent<LoadDataViewProps, LoadDat
)}
</>
)}
{possibleSystemFields.length > 0 && (
<AutoForm
fields={[
{
name: 'spec.ioConfig.inputSource.systemFields',
label: 'System fields',
type: 'string-array',
suggestions: possibleSystemFields,
info: 'JSON array of system fields to return as part of input rows.',
},
]}
model={spec}
onChange={this.updateSpecPreview}
/>
)}
{this.renderApplyButtonBar(
parserQueryState,
AutoForm.issueWithModel(inputFormat, inputFormatFields) ||

View File

@ -186,10 +186,10 @@ export const SqlDataLoaderView = React.memo(function SqlDataLoaderView(
) : inputFormat && inputSource ? (
<TitleFrame title="Load data" subtitle="Parse">
<InputFormatStep
inputSource={inputSource}
initInputSource={inputSource}
initInputFormat={inputFormat}
doneButton={false}
onSet={({ inputFormat, signature, timeExpression, arrayMode }) => {
onSet={({ inputSource, inputFormat, signature, timeExpression, arrayMode }) => {
setContent({
queryString: ingestQueryPatternToQuery(
externalConfigToIngestQueryPattern(
@ -203,7 +203,7 @@ export const SqlDataLoaderView = React.memo(function SqlDataLoaderView(
});
}}
altText="Skip the wizard and continue with custom SQL"
onAltSet={({ inputFormat, signature, timeExpression, arrayMode }) => {
onAltSet={({ inputSource, inputFormat, signature, timeExpression, arrayMode }) => {
goToQuery({
queryString: ingestQueryPatternToQuery(
externalConfigToIngestQueryPattern(

View File

@ -64,10 +64,10 @@ export const ConnectExternalDataDialog = React.memo(function ConnectExternalData
<div className={Classes.DIALOG_BODY}>
{inputFormat && inputSource ? (
<InputFormatStep
inputSource={inputSource}
initInputSource={inputSource}
initInputFormat={inputFormat}
doneButton
onSet={({ inputFormat, signature, timeExpression, arrayMode }) => {
onSet={({ inputSource, inputFormat, signature, timeExpression, arrayMode }) => {
onSetExternalConfig(
{ inputSource, inputFormat, signature },
timeExpression,

View File

@ -28,6 +28,7 @@ import {
BATCH_INPUT_FORMAT_FIELDS,
chooseByBestTimestamp,
DETECTION_TIMESTAMP_SPEC,
getPossibleSystemFieldsForInputSource,
guessColumnTypeFromSampleResponse,
inputFormatOutputsNumericStrings,
possibleDruidFormatForValues,
@ -48,13 +49,19 @@ import { ParseDataTable } from '../../load-data-view/parse-data-table/parse-data
import './input-format-step.scss';
export interface InputFormatAndMore {
export interface InputSourceFormatAndMore {
inputSource: InputSource;
inputFormat: InputFormat;
signature: SqlColumnDeclaration[];
timeExpression: SqlExpression | undefined;
arrayMode: ArrayMode;
}
interface InputSourceAndFormat {
inputSource: InputSource;
inputFormat: Partial<InputFormat>;
}
interface PossibleTimeExpression {
column: string;
format: string;
@ -62,28 +69,37 @@ interface PossibleTimeExpression {
}
export interface InputFormatStepProps {
inputSource: InputSource;
initInputSource: InputSource;
initInputFormat: Partial<InputFormat>;
doneButton: boolean;
onSet(inputFormatAndMore: InputFormatAndMore): void;
onSet(inputSourceFormatAndMore: InputSourceFormatAndMore): void;
onBack(): void;
onAltSet?(inputFormatAndMore: InputFormatAndMore): void;
onAltSet?(inputSourceFormatAndMore: InputSourceFormatAndMore): void;
altText?: string;
}
export const InputFormatStep = React.memo(function InputFormatStep(props: InputFormatStepProps) {
const { inputSource, initInputFormat, doneButton, onSet, onBack, onAltSet, altText } = props;
function isValidInputFormat(inputFormat: Partial<InputFormat>): inputFormat is InputFormat {
return AutoForm.isValidModel(inputFormat, BATCH_INPUT_FORMAT_FIELDS);
}
const [inputFormat, setInputFormat] = useState<Partial<InputFormat>>(initInputFormat);
const [inputFormatToSample, setInputFormatToSample] = useState<InputFormat | undefined>(
AutoForm.isValidModel(initInputFormat, BATCH_INPUT_FORMAT_FIELDS) ? initInputFormat : undefined,
);
export const InputFormatStep = React.memo(function InputFormatStep(props: InputFormatStepProps) {
const { initInputSource, initInputFormat, doneButton, onSet, onBack, onAltSet, altText } = props;
const [inputSourceAndFormat, setInputSourceAndFormat] = useState<InputSourceAndFormat>({
inputSource: initInputSource,
inputFormat: initInputFormat,
});
const [inputSourceAndFormatToSample, setInputSourceAndFormatToSample] = useState<
InputSourceAndFormat | undefined
>(isValidInputFormat(initInputFormat) ? inputSourceAndFormat : undefined);
const [selectTimestamp, setSelectTimestamp] = useState(true);
const [arrayMode, setArrayMode] = useState<ArrayMode>('multi-values');
const [previewState] = useQueryManager<InputFormat, SampleResponse>({
query: inputFormatToSample,
processQuery: async (inputFormat: InputFormat) => {
const [previewState] = useQueryManager<InputSourceAndFormat, SampleResponse>({
query: inputSourceAndFormatToSample,
processQuery: async ({ inputSource, inputFormat }) => {
if (!isValidInputFormat(inputFormat)) throw new Error('invalid input format');
const sampleSpec: SampleSpec = {
type: 'index_parallel',
spec: {
@ -96,6 +112,7 @@ export const InputFormatStep = React.memo(function InputFormatStep(props: InputF
dataSource: 'sample',
timestampSpec: DETECTION_TIMESTAMP_SPEC,
dimensionsSpec: {
dimensions: inputSource.systemFields,
useSchemaDiscovery: true,
},
granularitySpec: {
@ -148,12 +165,12 @@ export const InputFormatStep = React.memo(function InputFormatStep(props: InputF
? getHeaderNamesFromSampleResponse(previewSampleResponse, 'ignoreIfZero')
: undefined;
const inputFormatAndMore =
previewSampleResponse &&
headerNames &&
AutoForm.isValidModel(inputFormat, BATCH_INPUT_FORMAT_FIELDS)
const currentInputFormat = inputSourceAndFormat.inputFormat;
const inputSourceFormatAndMore: InputSourceFormatAndMore | undefined =
previewSampleResponse && headerNames && isValidInputFormat(currentInputFormat)
? {
inputFormat,
inputSource: inputSourceAndFormat.inputSource,
inputFormat: currentInputFormat,
signature: headerNames.map(name =>
SqlColumnDeclaration.create(
name,
@ -161,7 +178,7 @@ export const InputFormatStep = React.memo(function InputFormatStep(props: InputF
guessColumnTypeFromSampleResponse(
previewSampleResponse,
name,
inputFormatOutputsNumericStrings(inputFormat),
inputFormatOutputsNumericStrings(currentInputFormat),
),
),
),
@ -171,7 +188,10 @@ export const InputFormatStep = React.memo(function InputFormatStep(props: InputF
}
: undefined;
const hasArrays = inputFormatAndMore?.signature.some(d => d.columnType.isArray());
const hasArrays = inputSourceFormatAndMore?.signature.some(d => d.columnType.isArray());
const possibleSystemFields = getPossibleSystemFieldsForInputSource(
inputSourceAndFormat.inputSource,
);
return (
<div className="input-format-step">
@ -206,18 +226,35 @@ export const InputFormatStep = React.memo(function InputFormatStep(props: InputF
</FormGroup>
<AutoForm
fields={BATCH_INPUT_FORMAT_FIELDS}
model={inputFormat}
onChange={setInputFormat}
model={inputSourceAndFormat.inputFormat}
onChange={inputFormat =>
setInputSourceAndFormat({ ...inputSourceAndFormat, inputFormat })
}
/>
{inputFormatToSample !== inputFormat && (
{possibleSystemFields.length > 0 && (
<AutoForm
fields={[
{
name: 'inputSource.systemFields',
label: 'System fields',
type: 'string-array',
suggestions: possibleSystemFields,
info: 'JSON array of system fields to return as part of input rows.',
},
]}
model={inputSourceAndFormat}
onChange={setInputSourceAndFormat as any}
/>
)}
{inputSourceAndFormatToSample !== inputSourceAndFormat && (
<FormGroup className="control-buttons">
<Button
text="Preview changes"
intent={Intent.PRIMARY}
disabled={!AutoForm.isValidModel(inputFormat, BATCH_INPUT_FORMAT_FIELDS)}
disabled={!isValidInputFormat(inputSourceAndFormat.inputFormat)}
onClick={() => {
if (!AutoForm.isValidModel(inputFormat, BATCH_INPUT_FORMAT_FIELDS)) return;
setInputFormatToSample(inputFormat);
if (!isValidInputFormat(inputSourceAndFormat.inputFormat)) return;
setInputSourceAndFormatToSample(inputSourceAndFormat);
}}
/>
</FormGroup>
@ -246,10 +283,10 @@ export const InputFormatStep = React.memo(function InputFormatStep(props: InputF
text={altText}
rightIcon={IconNames.ARROW_TOP_RIGHT}
minimal
disabled={!inputFormatAndMore}
disabled={!inputSourceFormatAndMore}
onClick={() => {
if (!inputFormatAndMore) return;
onAltSet(inputFormatAndMore);
if (!inputSourceFormatAndMore) return;
onAltSet(inputSourceFormatAndMore);
}}
/>
</Callout>
@ -262,10 +299,10 @@ export const InputFormatStep = React.memo(function InputFormatStep(props: InputF
text={doneButton ? 'Done' : 'Next'}
rightIcon={doneButton ? IconNames.TICK : IconNames.ARROW_RIGHT}
intent={Intent.PRIMARY}
disabled={!inputFormatAndMore}
disabled={!inputSourceFormatAndMore}
onClick={() => {
if (!inputFormatAndMore) return;
onSet(inputFormatAndMore);
if (!inputSourceFormatAndMore) return;
onSet(inputSourceFormatAndMore);
}}
/>
</div>