better input format detection (#9007)

This commit is contained in:
Vadim Ogievetsky 2019-12-09 22:31:28 -08:00 committed by Clint Wylie
parent 4327892b84
commit a6dcc99962
2 changed files with 28 additions and 31 deletions

View File

@ -334,13 +334,13 @@ const INPUT_FORMAT_FORM_FIELDS: Field<InputFormat>[] = [
defined: (p: InputFormat) =>
((p.type === 'csv' || p.type === 'tsv') && !p.findColumnsFromHeader) || p.type === 'regex',
},
// {
// name: 'delimiter',
// type: 'string',
// defaultValue: '\t',
// defined: (p: InputFormat) => p.type === 'tsv',
// info: <>A custom delimiter for data values.</>,
// },
{
name: 'delimiter',
type: 'string',
defaultValue: '\t',
defined: (p: InputFormat) => p.type === 'tsv',
info: <>A custom delimiter for data values.</>,
},
{
name: 'listDelimiter',
type: 'string',
@ -2455,34 +2455,33 @@ export function updateIngestionType(
}
export function fillInputFormat(spec: IngestionSpec, sampleData: string[]): IngestionSpec {
const inputFormat = guessInputFormat(sampleData);
if (!inputFormat) return spec;
return deepSet(spec, 'ioConfig.inputFormat', inputFormat);
return deepSet(spec, 'ioConfig.inputFormat', guessInputFormat(sampleData));
}
function guessInputFormat(sampleData: string[]): InputFormat | undefined {
const sampleDatum = sampleData[0];
if (!sampleDatum) return;
function guessInputFormat(sampleData: string[]): InputFormat {
let sampleDatum = sampleData[0];
if (sampleDatum) {
sampleDatum = String(sampleDatum); // Really ensure it is a string
if (sampleDatum.startsWith('{') && sampleDatum.endsWith('}')) {
return inputFormatFromType('json');
}
if (sampleDatum.startsWith('{') && sampleDatum.endsWith('}')) {
return inputFormatFromType('json');
}
if (sampleDatum.split('\t').length > 3) {
return inputFormatFromType('tsv', !/\t\d+\t/.test(sampleDatum));
}
if (sampleDatum.split('\t').length > 3) {
return inputFormatFromType('tsv', !/\t\d+\t/.test(sampleDatum));
}
if (sampleDatum.split(',').length > 3) {
return inputFormatFromType('csv', !/,\d+,/.test(sampleDatum));
}
if (sampleDatum.split(',').length > 3) {
return inputFormatFromType('csv', !/,\d+,/.test(sampleDatum));
}
if (sampleDatum.startsWith('PAR1')) {
return inputFormatFromType('parquet');
}
if (sampleDatum.startsWith('PAR1')) {
return inputFormatFromType('parquet');
}
if (sampleDatum.startsWith('ORC')) {
return inputFormatFromType('orc');
if (sampleDatum.startsWith('ORC')) {
return inputFormatFromType('orc');
}
}
return inputFormatFromType('regex');

View File

@ -1112,9 +1112,7 @@ export class LoadDataView extends React.PureComponent<LoadDataViewProps, LoadDat
const inputData = inputQueryState.data;
if (druidSource) {
let newSpec = fillInputFormat(spec, []);
newSpec = deepSet(newSpec, 'dataSchema.timestampSpec', {
let newSpec = deepSet(spec, 'dataSchema.timestampSpec', {
column: '__time',
format: 'iso',
});