mirror of https://github.com/apache/druid.git
This commit is contained in:
parent
7638d29c40
commit
c6f41dcd22
|
@ -56,7 +56,7 @@ function _build_distribution() {
|
|||
(
|
||||
# Add HEAD as an allowed HTTP method since this is how we check when the Druid service is ready.
|
||||
cd "$(_get_code_root)" \
|
||||
&& mvn -Pdist,skip-static-checks,skip-tests -Dmaven.javadoc.skip=true -q -T1C install \
|
||||
&& mvn -Pdist,bundle-contrib-exts,skip-static-checks,skip-tests -Dforbiddenapis.skip=true -Dcheckstyle.skip=true -Dpmd.skip=true -Dmaven.javadoc.skip=true -Danimal.sniffer.skip=true -Denforcer.skip=true -Dcyclonedx.skip=true -q -T1C install \
|
||||
&& cd distribution/target \
|
||||
&& tar xzf "apache-druid-$(_get_druid_version)-bin.tar.gz" \
|
||||
&& cd apache-druid-$(_get_druid_version) \
|
||||
|
@ -64,7 +64,7 @@ function _build_distribution() {
|
|||
&& cp "$(_get_code_root)/extensions-core/testing-tools/target/druid-testing-tools-$(_get_druid_version).jar" extensions/druid-testing-tools/ \
|
||||
&& mkdir -p extensions/druid-compressed-bigdecimal \
|
||||
&& cp "$(_get_code_root)/extensions-contrib/compressed-bigdecimal/target/druid-compressed-bigdecimal-$(_get_druid_version).jar" extensions/druid-compressed-bigdecimal/ \
|
||||
&& echo -e "\n\ndruid.extensions.loadList=[\"druid-hdfs-storage\", \"druid-kafka-indexing-service\", \"druid-multi-stage-query\", \"druid-testing-tools\", \"druid-bloom-filter\", \"druid-datasketches\", \"druid-histogram\", \"druid-stats\", \"druid-compressed-bigdecimal\"]" >> conf/druid/auto/_common/common.runtime.properties \
|
||||
&& echo -e "\n\ndruid.extensions.loadList=[\"druid-hdfs-storage\", \"druid-kafka-indexing-service\", \"druid-multi-stage-query\", \"druid-testing-tools\", \"druid-bloom-filter\", \"druid-datasketches\", \"druid-histogram\", \"druid-stats\", \"druid-compressed-bigdecimal\", \"druid-parquet-extensions\", \"druid-deltalake-extensions\"]" >> conf/druid/auto/_common/common.runtime.properties \
|
||||
&& echo -e "\n\ndruid.server.http.allowedHttpMethods=[\"HEAD\"]" >> conf/druid/auto/_common/common.runtime.properties \
|
||||
&& echo -e "\n\ndruid.export.storage.baseDir=/" >> conf/druid/auto/_common/common.runtime.properties \
|
||||
)
|
||||
|
|
|
@ -25,9 +25,8 @@ import AceEditor from 'react-ace';
|
|||
|
||||
import './json-input.scss';
|
||||
|
||||
function parseHjson(str: string) {
|
||||
// Throwing on empty input is more consistent with how JSON.parse works
|
||||
if (str.trim() === '') throw new Error('empty hjson');
|
||||
function parseHjson(str: string): any {
|
||||
if (str.trim() === '') return;
|
||||
return Hjson.parse(str);
|
||||
}
|
||||
|
||||
|
|
|
@ -395,6 +395,10 @@ export function isDruidSource(spec: Partial<IngestionSpec>): boolean {
|
|||
return deepGet(spec, 'spec.ioConfig.inputSource.type') === 'druid';
|
||||
}
|
||||
|
||||
export function isFixedFormatSource(spec: Partial<IngestionSpec>): boolean {
|
||||
return oneOf(deepGet(spec, 'spec.ioConfig.inputSource.type'), 'druid', 'delta');
|
||||
}
|
||||
|
||||
export function getPossibleSystemFieldsForSpec(spec: Partial<IngestionSpec>): string[] {
|
||||
const inputSource = deepGet(spec, 'spec.ioConfig.inputSource');
|
||||
if (!inputSource) return [];
|
||||
|
@ -1061,7 +1065,6 @@ export function getIoConfigFormFields(ingestionComboType: IngestionComboType): F
|
|||
label: 'Delta filter',
|
||||
type: 'json',
|
||||
placeholder: '{"type": "=", "column": "name", "value": "foo"}',
|
||||
defaultValue: {},
|
||||
info: (
|
||||
<>
|
||||
<ExternalLink
|
||||
|
@ -1078,7 +1081,7 @@ export function getIoConfigFormFields(ingestionComboType: IngestionComboType): F
|
|||
label: 'Delta snapshot version',
|
||||
type: 'number',
|
||||
placeholder: '(latest)',
|
||||
defaultValue: {},
|
||||
zeroMeansUndefined: true,
|
||||
info: (
|
||||
<>
|
||||
The snapshot version to read from the Delta table. By default, the latest snapshot is
|
||||
|
@ -1613,6 +1616,9 @@ export function guessDataSourceNameFromInputSource(inputSource: InputSource): st
|
|||
return actualPath ? actualPath.path : uriPath ? filenameFromPath(uriPath) : undefined;
|
||||
}
|
||||
|
||||
case 'delta':
|
||||
return inputSource.tablePath ? filenameFromPath(inputSource.tablePath) : undefined;
|
||||
|
||||
case 'http':
|
||||
return Array.isArray(inputSource.uris) ? filenameFromPath(inputSource.uris[0]) : undefined;
|
||||
|
||||
|
|
|
@ -653,7 +653,6 @@ export const INPUT_SOURCE_FIELDS: Field<InputSource>[] = [
|
|||
type: 'json',
|
||||
placeholder: '{"type": "=", "column": "name", "value": "foo"}',
|
||||
defined: typeIsKnown(KNOWN_TYPES, 'delta'),
|
||||
required: false,
|
||||
info: (
|
||||
<>
|
||||
<ExternalLink href={`${getLink('DOCS')}/ingestion/input-sources/#delta-filter-object`}>
|
||||
|
@ -668,8 +667,8 @@ export const INPUT_SOURCE_FIELDS: Field<InputSource>[] = [
|
|||
label: 'Delta snapshot version',
|
||||
type: 'number',
|
||||
placeholder: '(latest)',
|
||||
zeroMeansUndefined: true,
|
||||
defined: typeIsKnown(KNOWN_TYPES, 'delta'),
|
||||
required: false,
|
||||
info: (
|
||||
<>
|
||||
The snapshot version to read from the Delta table. By default, the latest snapshot is read.
|
||||
|
|
|
@ -17,6 +17,7 @@
|
|||
*/
|
||||
|
||||
import { dedupe, F, SqlExpression, SqlFunction } from '@druid-toolkit/query';
|
||||
import type { CancelToken } from 'axios';
|
||||
import * as JSONBig from 'json-bigint-native';
|
||||
|
||||
import type {
|
||||
|
@ -40,6 +41,7 @@ import {
|
|||
getSpecType,
|
||||
getTimestampSchema,
|
||||
isDruidSource,
|
||||
isFixedFormatSource,
|
||||
PLACEHOLDER_TIMESTAMP_SPEC,
|
||||
REINDEX_TIMESTAMP_SPEC,
|
||||
TIME_COLUMN,
|
||||
|
@ -187,12 +189,15 @@ export async function getProxyOverlordModules(): Promise<string[]> {
|
|||
export async function postToSampler(
|
||||
sampleSpec: SampleSpec,
|
||||
forStr: string,
|
||||
cancelToken?: CancelToken,
|
||||
): Promise<SampleResponse> {
|
||||
sampleSpec = fixSamplerLookups(fixSamplerTypes(sampleSpec));
|
||||
|
||||
let sampleResp: any;
|
||||
try {
|
||||
sampleResp = await Api.instance.post(`/druid/indexer/v1/sampler?for=${forStr}`, sampleSpec);
|
||||
sampleResp = await Api.instance.post(`/druid/indexer/v1/sampler?for=${forStr}`, sampleSpec, {
|
||||
cancelToken,
|
||||
});
|
||||
} catch (e) {
|
||||
throw new Error(getDruidErrorMessage(e));
|
||||
}
|
||||
|
@ -269,8 +274,7 @@ export async function sampleForConnect(
|
|||
sampleStrategy,
|
||||
);
|
||||
|
||||
const reingestMode = isDruidSource(spec);
|
||||
if (!reingestMode) {
|
||||
if (!isFixedFormatSource(spec)) {
|
||||
ioConfig = deepSet(
|
||||
ioConfig,
|
||||
'inputFormat',
|
||||
|
@ -282,6 +286,7 @@ export async function sampleForConnect(
|
|||
);
|
||||
}
|
||||
|
||||
const reingestMode = isDruidSource(spec);
|
||||
const sampleSpec: SampleSpec = {
|
||||
type: samplerType,
|
||||
spec: {
|
||||
|
@ -290,7 +295,7 @@ export async function sampleForConnect(
|
|||
dataSchema: {
|
||||
dataSource: 'sample',
|
||||
timestampSpec: reingestMode ? REINDEX_TIMESTAMP_SPEC : PLACEHOLDER_TIMESTAMP_SPEC,
|
||||
dimensionsSpec: {},
|
||||
dimensionsSpec: { useSchemaDiscovery: true },
|
||||
granularitySpec: {
|
||||
rollup: false,
|
||||
},
|
||||
|
|
|
@ -115,6 +115,7 @@ import {
|
|||
invalidPartitionConfig,
|
||||
isDruidSource,
|
||||
isEmptyIngestionSpec,
|
||||
isFixedFormatSource,
|
||||
isKafkaOrKinesis,
|
||||
isStreamingSpec,
|
||||
issueWithIoConfig,
|
||||
|
@ -265,26 +266,27 @@ function showBlankLine(line: SampleEntry): string {
|
|||
|
||||
function formatSampleEntries(
|
||||
sampleEntries: SampleEntry[],
|
||||
specialSource: undefined | 'druid' | 'kafka' | 'kinesis',
|
||||
): string {
|
||||
if (!sampleEntries.length) return 'No data returned from sampler';
|
||||
specialSource: undefined | 'fixedFormat' | 'druid' | 'kafka' | 'kinesis',
|
||||
): string[] {
|
||||
if (!sampleEntries.length) return ['No data returned from sampler'];
|
||||
|
||||
switch (specialSource) {
|
||||
case 'fixedFormat':
|
||||
return sampleEntries.map(l => JSONBig.stringify(l.parsed));
|
||||
|
||||
case 'druid':
|
||||
return sampleEntries.map(showDruidLine).join('\n');
|
||||
return sampleEntries.map(showDruidLine);
|
||||
|
||||
case 'kafka':
|
||||
return sampleEntries.map(showKafkaLine).join('\n');
|
||||
return sampleEntries.map(showKafkaLine);
|
||||
|
||||
case 'kinesis':
|
||||
return sampleEntries.map(showKinesisLine).join('\n');
|
||||
return sampleEntries.map(showKinesisLine);
|
||||
|
||||
default:
|
||||
return (
|
||||
sampleEntries.every(l => !l.parsed)
|
||||
? sampleEntries.map(showBlankLine)
|
||||
: sampleEntries.map(showRawLine)
|
||||
).join('\n');
|
||||
return sampleEntries.every(l => !l.parsed)
|
||||
? sampleEntries.map(showBlankLine)
|
||||
: sampleEntries.map(showRawLine);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -551,7 +553,6 @@ export class LoadDataView extends React.PureComponent<LoadDataViewProps, LoadDat
|
|||
|
||||
isStepEnabled(step: Step): boolean {
|
||||
const { spec, cacheRows } = this.state;
|
||||
const druidSource = isDruidSource(spec);
|
||||
const ioConfig: IoConfig = deepGet(spec, 'spec.ioConfig') || EMPTY_OBJECT;
|
||||
|
||||
switch (step) {
|
||||
|
@ -559,10 +560,12 @@ export class LoadDataView extends React.PureComponent<LoadDataViewProps, LoadDat
|
|||
return Boolean(spec.type);
|
||||
|
||||
case 'parser':
|
||||
return Boolean(!druidSource && spec.type && !issueWithIoConfig(ioConfig));
|
||||
return Boolean(!isFixedFormatSource(spec) && spec.type && !issueWithIoConfig(ioConfig));
|
||||
|
||||
case 'timestamp':
|
||||
return Boolean(!druidSource && cacheRows && deepGet(spec, 'spec.dataSchema.timestampSpec'));
|
||||
return Boolean(
|
||||
!isDruidSource(spec) && cacheRows && deepGet(spec, 'spec.dataSchema.timestampSpec'),
|
||||
);
|
||||
|
||||
case 'transform':
|
||||
case 'filter':
|
||||
|
@ -1256,7 +1259,7 @@ export class LoadDataView extends React.PureComponent<LoadDataViewProps, LoadDat
|
|||
const deltaState: Partial<LoadDataViewState> = {
|
||||
inputQueryState: new QueryState({ data: sampleResponse }),
|
||||
};
|
||||
if (isDruidSource(spec)) {
|
||||
if (isFixedFormatSource(spec)) {
|
||||
deltaState.cacheRows = getCacheRowsFromSampleResponse(sampleResponse);
|
||||
}
|
||||
this.setState(deltaState as LoadDataViewState);
|
||||
|
@ -1268,8 +1271,15 @@ export class LoadDataView extends React.PureComponent<LoadDataViewProps, LoadDat
|
|||
const specType = getSpecType(spec);
|
||||
const ioConfig: IoConfig = deepGet(spec, 'spec.ioConfig') || EMPTY_OBJECT;
|
||||
const inlineMode = deepGet(spec, 'spec.ioConfig.inputSource.type') === 'inline';
|
||||
const fixedFormatSource = isFixedFormatSource(spec);
|
||||
const druidSource = isDruidSource(spec);
|
||||
const specialSource = druidSource ? 'druid' : isKafkaOrKinesis(specType) ? specType : undefined;
|
||||
const specialSource = druidSource
|
||||
? 'druid'
|
||||
: fixedFormatSource
|
||||
? 'fixedFormat'
|
||||
: isKafkaOrKinesis(specType)
|
||||
? specType
|
||||
: undefined;
|
||||
|
||||
let mainFill: JSX.Element | string;
|
||||
if (inlineMode) {
|
||||
|
@ -1301,7 +1311,7 @@ export class LoadDataView extends React.PureComponent<LoadDataViewProps, LoadDat
|
|||
<TextArea
|
||||
className="raw-lines"
|
||||
readOnly
|
||||
value={formatSampleEntries(inputData, specialSource)}
|
||||
value={formatSampleEntries(inputData, specialSource).join('\n')}
|
||||
/>
|
||||
)}
|
||||
{inputQueryState.isLoading() && <Loader />}
|
||||
|
@ -1373,7 +1383,7 @@ export class LoadDataView extends React.PureComponent<LoadDataViewProps, LoadDat
|
|||
</div>
|
||||
{this.renderNextBar({
|
||||
disabled: !inputQueryState.data,
|
||||
nextStep: druidSource ? 'transform' : 'parser',
|
||||
nextStep: druidSource ? 'transform' : fixedFormatSource ? 'timestamp' : 'parser',
|
||||
onNextStep: () => {
|
||||
if (!inputQueryState.data) return false;
|
||||
const inputData = inputQueryState.data;
|
||||
|
@ -1421,6 +1431,15 @@ export class LoadDataView extends React.PureComponent<LoadDataViewProps, LoadDat
|
|||
}
|
||||
}
|
||||
|
||||
this.updateSpec(fillDataSourceNameIfNeeded(newSpec));
|
||||
}
|
||||
if (fixedFormatSource) {
|
||||
const newSpec = deepSet(
|
||||
spec,
|
||||
'spec.dataSchema.timestampSpec',
|
||||
getTimestampSpec(inputQueryState.data),
|
||||
);
|
||||
|
||||
this.updateSpec(fillDataSourceNameIfNeeded(newSpec));
|
||||
} else {
|
||||
const issue = issueWithSampleData(
|
||||
|
@ -1673,21 +1692,15 @@ export class LoadDataView extends React.PureComponent<LoadDataViewProps, LoadDat
|
|||
disabled: !parserQueryState.data,
|
||||
onNextStep: () => {
|
||||
if (!parserQueryState.data) return false;
|
||||
let possibleTimestampSpec: TimestampSpec;
|
||||
if (isDruidSource(spec)) {
|
||||
possibleTimestampSpec = {
|
||||
column: TIME_COLUMN,
|
||||
format: 'auto',
|
||||
};
|
||||
} else {
|
||||
possibleTimestampSpec = getTimestampSpec(parserQueryState.data);
|
||||
}
|
||||
|
||||
if (possibleTimestampSpec) {
|
||||
const newSpec = deepSet(spec, 'spec.dataSchema.timestampSpec', possibleTimestampSpec);
|
||||
this.updateSpec(newSpec);
|
||||
}
|
||||
const possibleTimestampSpec = isDruidSource(spec)
|
||||
? {
|
||||
column: TIME_COLUMN,
|
||||
format: 'auto',
|
||||
}
|
||||
: getTimestampSpec(parserQueryState.data);
|
||||
|
||||
const newSpec = deepSet(spec, 'spec.dataSchema.timestampSpec', possibleTimestampSpec);
|
||||
this.updateSpec(newSpec);
|
||||
return true;
|
||||
},
|
||||
})}
|
||||
|
|
|
@ -256,7 +256,6 @@ export const SqlDataLoaderView = React.memo(function SqlDataLoaderView(
|
|||
<TitleFrame title="Load data" subtitle="Select input type">
|
||||
<InputSourceStep
|
||||
initInputSource={inputSource}
|
||||
mode="sampler"
|
||||
onSet={(inputSource, inputFormat) => {
|
||||
setExternalConfigStep({ inputSource, inputFormat });
|
||||
}}
|
||||
|
|
|
@ -83,7 +83,6 @@ export const ConnectExternalDataDialog = React.memo(function ConnectExternalData
|
|||
) : (
|
||||
<InputSourceStep
|
||||
initInputSource={inputSource}
|
||||
mode="sampler"
|
||||
onSet={(inputSource, inputFormat, partitionedByHint) => {
|
||||
setExternalConfigStep({ inputSource, inputFormat, partitionedByHint });
|
||||
}}
|
||||
|
|
|
@ -98,7 +98,9 @@ export const InputFormatStep = React.memo(function InputFormatStep(props: InputF
|
|||
const [previewState] = useQueryManager<InputSourceAndFormat, SampleResponse>({
|
||||
query: inputSourceAndFormatToSample,
|
||||
processQuery: async ({ inputSource, inputFormat }) => {
|
||||
if (!isValidInputFormat(inputFormat)) throw new Error('invalid input format');
|
||||
const fixedFormatSource = inputSource.type === 'delta';
|
||||
if (!fixedFormatSource && !isValidInputFormat(inputFormat))
|
||||
throw new Error('invalid input format');
|
||||
|
||||
const sampleSpec: SampleSpec = {
|
||||
type: 'index_parallel',
|
||||
|
@ -106,7 +108,9 @@ export const InputFormatStep = React.memo(function InputFormatStep(props: InputF
|
|||
ioConfig: {
|
||||
type: 'index_parallel',
|
||||
inputSource,
|
||||
inputFormat: deepSet(inputFormat, 'keepNullColumns', true),
|
||||
inputFormat: fixedFormatSource
|
||||
? undefined
|
||||
: (deepSet(inputFormat, 'keepNullColumns', true) as InputFormat),
|
||||
},
|
||||
dataSchema: {
|
||||
dataSource: 'sample',
|
||||
|
@ -196,6 +200,8 @@ export const InputFormatStep = React.memo(function InputFormatStep(props: InputF
|
|||
const needsResample = inputSourceAndFormatToSample !== inputSourceAndFormat;
|
||||
const nextDisabled = !inputSourceFormatAndMore || needsResample;
|
||||
|
||||
const fixedFormatSource = inputSourceFormatAndMore?.inputSource.type === 'delta';
|
||||
|
||||
return (
|
||||
<div className="input-format-step">
|
||||
<div className="preview">
|
||||
|
@ -227,27 +233,38 @@ export const InputFormatStep = React.memo(function InputFormatStep(props: InputF
|
|||
<LearnMore href={`${getLink('DOCS')}/ingestion/data-formats`} />
|
||||
</Callout>
|
||||
</FormGroup>
|
||||
<AutoForm
|
||||
fields={BATCH_INPUT_FORMAT_FIELDS}
|
||||
model={inputSourceAndFormat.inputFormat}
|
||||
onChange={inputFormat =>
|
||||
setInputSourceAndFormat({ ...inputSourceAndFormat, inputFormat })
|
||||
}
|
||||
/>
|
||||
{possibleSystemFields.length > 0 && (
|
||||
<AutoForm
|
||||
fields={[
|
||||
{
|
||||
name: 'inputSource.systemFields',
|
||||
label: 'System fields',
|
||||
type: 'string-array',
|
||||
suggestions: possibleSystemFields,
|
||||
info: 'JSON array of system fields to return as part of input rows.',
|
||||
},
|
||||
]}
|
||||
model={inputSourceAndFormat}
|
||||
onChange={setInputSourceAndFormat as any}
|
||||
/>
|
||||
{fixedFormatSource ? (
|
||||
<FormGroup>
|
||||
<Callout>
|
||||
The <Tag minimal>{inputSourceFormatAndMore?.inputSource.type}</Tag> input source has
|
||||
a fixed format that can not be configured.
|
||||
</Callout>
|
||||
</FormGroup>
|
||||
) : (
|
||||
<>
|
||||
<AutoForm
|
||||
fields={BATCH_INPUT_FORMAT_FIELDS}
|
||||
model={inputSourceAndFormat.inputFormat}
|
||||
onChange={inputFormat =>
|
||||
setInputSourceAndFormat({ ...inputSourceAndFormat, inputFormat })
|
||||
}
|
||||
/>
|
||||
{possibleSystemFields.length > 0 && (
|
||||
<AutoForm
|
||||
fields={[
|
||||
{
|
||||
name: 'inputSource.systemFields',
|
||||
label: 'System fields',
|
||||
type: 'string-array',
|
||||
suggestions: possibleSystemFields,
|
||||
info: 'JSON array of system fields to return as part of input rows.',
|
||||
},
|
||||
]}
|
||||
model={inputSourceAndFormat}
|
||||
onChange={setInputSourceAndFormat as any}
|
||||
/>
|
||||
)}
|
||||
</>
|
||||
)}
|
||||
{needsResample && (
|
||||
<FormGroup className="control-buttons">
|
||||
|
|
|
@ -28,7 +28,6 @@ import {
|
|||
} from '@blueprintjs/core';
|
||||
import { IconNames } from '@blueprintjs/icons';
|
||||
import type { QueryResult } from '@druid-toolkit/query';
|
||||
import { SqlColumnDeclaration } from '@druid-toolkit/query';
|
||||
import classNames from 'classnames';
|
||||
import type { JSX } from 'react';
|
||||
import React, { useEffect, useState } from 'react';
|
||||
|
@ -37,7 +36,6 @@ import { AutoForm, ExternalLink } from '../../../components';
|
|||
import { ShowValueDialog } from '../../../dialogs/show-value-dialog/show-value-dialog';
|
||||
import type { Execution, ExecutionError, InputFormat, InputSource } from '../../../druid-models';
|
||||
import {
|
||||
externalConfigToTableExpression,
|
||||
getIngestionImage,
|
||||
getIngestionTitle,
|
||||
guessSimpleInputFormat,
|
||||
|
@ -45,11 +43,7 @@ import {
|
|||
issueWithSampleData,
|
||||
PLACEHOLDER_TIMESTAMP_SPEC,
|
||||
} from '../../../druid-models';
|
||||
import {
|
||||
executionBackgroundResultStatusCheck,
|
||||
extractResult,
|
||||
submitTaskQuery,
|
||||
} from '../../../helpers';
|
||||
import { executionBackgroundResultStatusCheck } from '../../../helpers';
|
||||
import { useQueryManager } from '../../../hooks';
|
||||
import { AppToaster, UrlBaser } from '../../../singletons';
|
||||
import { filterMap, IntermediateQueryState } from '../../../utils';
|
||||
|
@ -61,17 +55,20 @@ import { InputSourceInfo } from './input-source-info';
|
|||
|
||||
import './input-source-step.scss';
|
||||
|
||||
const BOGUS_LIST_DELIMITER = '56616469-6de2-9da4-efb8-8f416e6e6965'; // Just a UUID to disable the list delimiter, let's hope we do not see this UUID in the data
|
||||
const ROWS_TO_SAMPLE = 50;
|
||||
|
||||
const FIXED_FORMAT_FOR_SOURCE: Record<string, InputFormat> = {
|
||||
delta: { type: 'parquet' },
|
||||
};
|
||||
|
||||
function resultToInputFormat(result: QueryResult): InputFormat {
|
||||
if (!result.rows.length) throw new Error('No data returned from sample query');
|
||||
return guessSimpleInputFormat(result.rows.map((r: any) => r[0]));
|
||||
}
|
||||
|
||||
const BOGUS_LIST_DELIMITER = '56616469-6de2-9da4-efb8-8f416e6e6965'; // Just a UUID to disable the list delimiter, let's hope we do not see this UUID in the data
|
||||
const ROWS_TO_SAMPLE = 50;
|
||||
|
||||
export interface InputSourceStepProps {
|
||||
initInputSource: Partial<InputSource> | undefined;
|
||||
mode: 'sampler' | 'msq';
|
||||
onSet(
|
||||
inputSource: InputSource,
|
||||
inputFormat: InputFormat,
|
||||
|
@ -80,7 +77,7 @@ export interface InputSourceStepProps {
|
|||
}
|
||||
|
||||
export const InputSourceStep = React.memo(function InputSourceStep(props: InputSourceStepProps) {
|
||||
const { initInputSource, mode, onSet } = props;
|
||||
const { initInputSource, onSet } = props;
|
||||
|
||||
const [stackToShow, setStackToShow] = useState<string | undefined>();
|
||||
const [inputSource, setInputSource] = useState<Partial<InputSource> | string | undefined>(
|
||||
|
@ -94,66 +91,46 @@ export const InputSourceStep = React.memo(function InputSourceStep(props: InputS
|
|||
Execution
|
||||
>({
|
||||
processQuery: async ({ inputSource, suggestedInputFormat }, cancelToken) => {
|
||||
let sampleLines: string[];
|
||||
if (mode === 'sampler') {
|
||||
const sampleSpec: SampleSpec = {
|
||||
type: 'index_parallel',
|
||||
spec: {
|
||||
ioConfig: {
|
||||
type: 'index_parallel',
|
||||
inputSource,
|
||||
inputFormat: {
|
||||
type: 'regex',
|
||||
pattern: '([\\s\\S]*)', // Match the entire line, every single character
|
||||
listDelimiter: BOGUS_LIST_DELIMITER,
|
||||
columns: ['raw'],
|
||||
},
|
||||
},
|
||||
dataSchema: {
|
||||
dataSource: 'sample',
|
||||
timestampSpec: PLACEHOLDER_TIMESTAMP_SPEC,
|
||||
dimensionsSpec: {},
|
||||
granularitySpec: {
|
||||
rollup: false,
|
||||
},
|
||||
const fixedFormat = FIXED_FORMAT_FOR_SOURCE['delta'];
|
||||
|
||||
const sampleSpec: SampleSpec = {
|
||||
type: 'index_parallel',
|
||||
spec: {
|
||||
ioConfig: {
|
||||
type: 'index_parallel',
|
||||
inputSource,
|
||||
inputFormat: fixedFormat
|
||||
? undefined
|
||||
: {
|
||||
type: 'regex',
|
||||
pattern: '([\\s\\S]*)', // Match the entire line, every single character
|
||||
listDelimiter: BOGUS_LIST_DELIMITER,
|
||||
columns: ['raw'],
|
||||
},
|
||||
},
|
||||
dataSchema: {
|
||||
dataSource: 'sample',
|
||||
timestampSpec: PLACEHOLDER_TIMESTAMP_SPEC,
|
||||
dimensionsSpec: {},
|
||||
granularitySpec: {
|
||||
rollup: false,
|
||||
},
|
||||
},
|
||||
samplerConfig: {
|
||||
numRows: ROWS_TO_SAMPLE,
|
||||
timeoutMs: 15000,
|
||||
},
|
||||
};
|
||||
},
|
||||
samplerConfig: {
|
||||
numRows: ROWS_TO_SAMPLE,
|
||||
timeoutMs: 15000,
|
||||
},
|
||||
};
|
||||
|
||||
const sampleResponse = await postToSampler(sampleSpec, 'input-source-step');
|
||||
|
||||
sampleLines = filterMap(sampleResponse.data, l => (l.input ? l.input.raw : undefined));
|
||||
} else {
|
||||
const tableExpression = externalConfigToTableExpression({
|
||||
inputSource,
|
||||
inputFormat: {
|
||||
type: 'regex',
|
||||
pattern: '([\\s\\S]*)',
|
||||
listDelimiter: BOGUS_LIST_DELIMITER,
|
||||
columns: ['raw'],
|
||||
},
|
||||
signature: [SqlColumnDeclaration.create('raw', 'VARCHAR')],
|
||||
});
|
||||
|
||||
const result = extractResult(
|
||||
await submitTaskQuery({
|
||||
query: `SELECT REPLACE(raw, U&'\\0000', '') AS "raw" FROM ${tableExpression}`, // Make sure to remove possible \u0000 chars as they are not allowed and will produce an InvalidNullByte error message
|
||||
context: {
|
||||
sqlOuterLimit: ROWS_TO_SAMPLE,
|
||||
},
|
||||
cancelToken,
|
||||
}),
|
||||
);
|
||||
|
||||
if (result instanceof IntermediateQueryState) return result;
|
||||
sampleLines = result.rows.map((r: string[]) => r[0]);
|
||||
}
|
||||
const sampleResponse = await postToSampler(sampleSpec, 'input-source-step', cancelToken);
|
||||
const sampleLines = filterMap(
|
||||
sampleResponse.data,
|
||||
fixedFormat ? l => l.input : l => (l.input ? l.input.raw : undefined),
|
||||
);
|
||||
|
||||
if (!sampleLines.length) throw new Error('No data returned from sampler');
|
||||
if (fixedFormat) return fixedFormat;
|
||||
|
||||
const issue = issueWithSampleData(sampleLines, false);
|
||||
if (issue) {
|
||||
|
@ -226,6 +203,7 @@ export const InputSourceStep = React.memo(function InputSourceStep(props: InputS
|
|||
{renderIngestionCard('s3')}
|
||||
{renderIngestionCard('azureStorage')}
|
||||
{renderIngestionCard('google')}
|
||||
{renderIngestionCard('delta')}
|
||||
{renderIngestionCard('hdfs')}
|
||||
{renderIngestionCard('http')}
|
||||
{renderIngestionCard('local')}
|
||||
|
|
Loading…
Reference in New Issue