From a6dcc9996284b8d8acb2a3681f7a65b0193ac660 Mon Sep 17 00:00:00 2001 From: Vadim Ogievetsky Date: Mon, 9 Dec 2019 22:31:28 -0800 Subject: [PATCH] better input format detection (#9007) --- web-console/src/utils/ingestion-spec.tsx | 55 +++++++++---------- .../views/load-data-view/load-data-view.tsx | 4 +- 2 files changed, 28 insertions(+), 31 deletions(-) diff --git a/web-console/src/utils/ingestion-spec.tsx b/web-console/src/utils/ingestion-spec.tsx index 069b07dd2a0..9ce733d45e5 100644 --- a/web-console/src/utils/ingestion-spec.tsx +++ b/web-console/src/utils/ingestion-spec.tsx @@ -334,13 +334,13 @@ const INPUT_FORMAT_FORM_FIELDS: Field[] = [ defined: (p: InputFormat) => ((p.type === 'csv' || p.type === 'tsv') && !p.findColumnsFromHeader) || p.type === 'regex', }, - // { - // name: 'delimiter', - // type: 'string', - // defaultValue: '\t', - // defined: (p: InputFormat) => p.type === 'tsv', - // info: <>A custom delimiter for data values., - // }, + { + name: 'delimiter', + type: 'string', + defaultValue: '\t', + defined: (p: InputFormat) => p.type === 'tsv', + info: <>A custom delimiter for data values., + }, { name: 'listDelimiter', type: 'string', @@ -2455,34 +2455,33 @@ export function updateIngestionType( } export function fillInputFormat(spec: IngestionSpec, sampleData: string[]): IngestionSpec { - const inputFormat = guessInputFormat(sampleData); - if (!inputFormat) return spec; - - return deepSet(spec, 'ioConfig.inputFormat', inputFormat); + return deepSet(spec, 'ioConfig.inputFormat', guessInputFormat(sampleData)); } -function guessInputFormat(sampleData: string[]): InputFormat | undefined { - const sampleDatum = sampleData[0]; - if (!sampleDatum) return; +function guessInputFormat(sampleData: string[]): InputFormat { + let sampleDatum = sampleData[0]; + if (sampleDatum) { + sampleDatum = String(sampleDatum); // Really ensure it is a string - if (sampleDatum.startsWith('{') && sampleDatum.endsWith('}')) { - return inputFormatFromType('json'); - } + if (sampleDatum.startsWith('{') && sampleDatum.endsWith('}')) { + return inputFormatFromType('json'); + } - if (sampleDatum.split('\t').length > 3) { - return inputFormatFromType('tsv', !/\t\d+\t/.test(sampleDatum)); - } + if (sampleDatum.split('\t').length > 3) { + return inputFormatFromType('tsv', !/\t\d+\t/.test(sampleDatum)); + } - if (sampleDatum.split(',').length > 3) { - return inputFormatFromType('csv', !/,\d+,/.test(sampleDatum)); - } + if (sampleDatum.split(',').length > 3) { + return inputFormatFromType('csv', !/,\d+,/.test(sampleDatum)); + } - if (sampleDatum.startsWith('PAR1')) { - return inputFormatFromType('parquet'); - } + if (sampleDatum.startsWith('PAR1')) { + return inputFormatFromType('parquet'); + } - if (sampleDatum.startsWith('ORC')) { - return inputFormatFromType('orc'); + if (sampleDatum.startsWith('ORC')) { + return inputFormatFromType('orc'); + } } return inputFormatFromType('regex'); diff --git a/web-console/src/views/load-data-view/load-data-view.tsx b/web-console/src/views/load-data-view/load-data-view.tsx index 4e733459fe8..79b502f2bbc 100644 --- a/web-console/src/views/load-data-view/load-data-view.tsx +++ b/web-console/src/views/load-data-view/load-data-view.tsx @@ -1112,9 +1112,7 @@ export class LoadDataView extends React.PureComponent