Support more JODA time formats (#7857)

This commit is contained in:
Vadim Ogievetsky 2019-06-09 20:08:29 -07:00 committed by Fangjin Yang
parent c3d230b354
commit c612ddc0f4
7 changed files with 171 additions and 123 deletions

View File

@ -30,13 +30,11 @@ import {
Position
} from '@blueprintjs/core';
import { IconNames } from '@blueprintjs/icons';
import classNames from 'classnames';
import React from 'react';
import { AboutDialog } from '../../dialogs/about-dialog/about-dialog';
import { CoordinatorDynamicConfigDialog } from '../../dialogs/coordinator-dynamic-config/coordinator-dynamic-config';
import { OverlordDynamicConfigDialog } from '../../dialogs/overlord-dynamic-config/overlord-dynamic-config';
import { getWikipediaSpec } from '../../utils/example-ingestion-spec';
import {
DRUID_DOCS,
DRUID_GITHUB,

View File

@ -0,0 +1,15 @@
// Jest Snapshot v1, https://goo.gl/fbAQLP
exports[`jodaFormatToRegExp works for common formats 1`] = `"/^(?:3[0-1]|[12][0-9]|[1-9])\\\\/(?:1[0-2]|[1-9])\\\\/[0-9]{4}$/i"`;
exports[`jodaFormatToRegExp works for common formats 2`] = `"/^(?:1[0-2]|0[1-9])\\\\/(?:3[0-1]|[12][0-9]|0[1-9])\\\\/[0-9]{4}$/i"`;
exports[`jodaFormatToRegExp works for common formats 3`] = `"/^(?:1[0-2]|[1-9])\\\\/(?:3[0-1]|[12][0-9]|[1-9])\\\\/[0-9]{2}$/i"`;
exports[`jodaFormatToRegExp works for common formats 4`] = `"/^(?:3[0-1]|[12][0-9]|[1-9])-(?:1[0-2]|[1-9])-[0-9]{4} (?:1[0-2]|0[1-9]):[0-5][0-9]:[0-5][0-9] [ap]m$/i"`;
exports[`jodaFormatToRegExp works for common formats 5`] = `"/^(?:1[0-2]|0[1-9])\\\\/(?:3[0-1]|[12][0-9]|0[1-9])\\\\/[0-9]{4} (?:1[0-2]|0[1-9]):[0-5][0-9]:[0-5][0-9] [ap]m$/i"`;
exports[`jodaFormatToRegExp works for common formats 6`] = `"/^[0-9]{4}-(?:1[0-2]|0[1-9])-(?:3[0-1]|[12][0-9]|0[1-9]) (?:2[0-3]|1[0-9]|0[0-9]):[0-5][0-9]:[0-5][0-9]$/i"`;
exports[`jodaFormatToRegExp works for common formats 7`] = `"/^[0-9]{4}-(?:1[0-2]|0[1-9])-(?:3[0-1]|[12][0-9]|0[1-9]) (?:2[0-3]|1[0-9]|0[0-9]):[0-5][0-9]:[0-5][0-9].[0-9]{1,3}$/i"`;

View File

@ -16,14 +16,33 @@
* limitations under the License.
*/
export type DruidTimestampFormat = 'iso' | 'millis' | 'posix' | 'auto' | 'd/M/yyyy' | 'dd-M-yyyy hh:mm:ss a' |
'MM/dd/YYYY' | 'M/d/YY' | 'MM/dd/YYYY hh:mm:ss a' | 'YYYY-MM-dd HH:mm:ss' | 'YYYY-MM-dd HH:mm:ss.S';
import { jodaFormatToRegExp } from './joda-to-regexp';
export const TIMESTAMP_FORMAT_VALUES: DruidTimestampFormat[] = [
'iso', 'millis', 'posix', 'MM/dd/YYYY hh:mm:ss a', 'MM/dd/YYYY', 'M/d/YY', 'd/M/yyyy',
'YYYY-MM-dd HH:mm:ss', 'YYYY-MM-dd HH:mm:ss.S'
export const BASIC_FORMAT_VALUES: string[] = [
'iso',
'millis',
'posix'
];
export const DATE_FORMAT_VALUES: string[] = [
'dd/MM/yyyy',
'MM/dd/yyyy',
'd/M/yy',
'M/d/yy',
'd/M/yyyy',
'M/d/yyyy'
];
export const DATE_TIME_FORMAT_VALUES: string[] = [
'd/M/yyyy H:mm:ss',
'M/d/yyyy H:mm:ss',
'MM/dd/yyyy hh:mm:ss a',
'yyyy-MM-dd HH:mm:ss',
'yyyy-MM-dd HH:mm:ss.S'
];
const ALL_FORMAT_VALUES: string[] = BASIC_FORMAT_VALUES.concat(DATE_FORMAT_VALUES, DATE_TIME_FORMAT_VALUES);
const EXAMPLE_DATE_ISO = '2015-10-29T23:00:00.000Z';
const EXAMPLE_DATE_VALUE = Date.parse(EXAMPLE_DATE_ISO);
const MIN_MILLIS = 3.15576e11; // 3 years in millis, so Tue Jan 01 1980
@ -33,18 +52,9 @@ const MAX_POSIX = MAX_MILLIS / 1000;
// copied from http://goo.gl/0ejHHW with small tweak to make dddd not pass on its own
// tslint:disable-next-line:max-line-length
export const ISO_MATCHER = new RegExp(/^([\+-]?\d{4}(?!\d{2}\b))((-?)((0[1-9]|1[0-2])(\3([12]\d|0[1-9]|3[01]))?|W([0-4]\d|5[0-2])(-?[1-7])?|(00[1-9]|0[1-9]\d|[12]\d{2}|3([0-5]\d|6[1-6])))(T((([01]\d|2[0-3])((:?)[0-5]\d)?|24:?00)([\.,]\d+(?!:))?)?(\17[0-5]\d([\.,]\d+)?)?([zZ]|([\+-])([01]\d|2[0-3]):?([0-5]\d)?)?)?)$/);
export const JODA_TO_REGEXP_LOOKUP: Record<string, RegExp> = {
'd/M/yyyy': /^[12]?\d\/1?\d\/\d\d\d\d$/,
'MM/dd/YYYY': /^\d\d\/\d\d\/\d\d\d\d$/,
'M/d/YY': /^1?\d\/[12]?\d\/\d\d$/,
'd-M-yyyy hh:mm:ss a': /^[12]?\d-1?\d-\d\d\d\d \d\d:\d\d:\d\d [ap]m$/i,
'MM/dd/YYYY hh:mm:ss a' : /^\d\d\/\d\d\/\d\d\d\d \d\d:\d\d:\d\d [ap]m$/i,
'YYYY-MM-dd HH:mm:ss' : /^\d\d\d\d-\d\d-\d\d \d\d:\d\d:\d\d$/,
'YYYY-MM-dd HH:mm:ss.S': /^\d\d\d\d-\d\d-\d\d \d\d:\d\d:\d\d\.\d\d\d$/
};
export const ISO_MATCHER = /^([\+-]?\d{4}(?!\d{2}\b))((-?)((0[1-9]|1[0-2])(\3([12]\d|0[1-9]|3[01]))?|W([0-4]\d|5[0-2])(-?[1-7])?|(00[1-9]|0[1-9]\d|[12]\d{2}|3([0-5]\d|6[1-6])))(T((([01]\d|2[0-3])((:?)[0-5]\d)?|24:?00)([\.,]\d+(?!:))?)?(\17[0-5]\d([\.,]\d+)?)?([zZ]|([\+-])([01]\d|2[0-3]):?([0-5]\d)?)?)?)$/;
export function timeFormatMatches(format: DruidTimestampFormat, value: string | number): boolean {
export function timeFormatMatches(format: string, value: string | number): boolean {
if (format === 'iso') {
return ISO_MATCHER.test(String(value));
}
@ -59,14 +69,11 @@ export function timeFormatMatches(format: DruidTimestampFormat, value: string |
return MIN_POSIX < absValue && absValue < MAX_POSIX;
}
const formatRegexp = JODA_TO_REGEXP_LOOKUP[format];
if (!formatRegexp) throw new Error(`unknown Druid format ${format}`);
return formatRegexp.test(String(value));
return jodaFormatToRegExp(format).test(String(value));
}
export function possibleDruidFormatForValues(values: any[]): DruidTimestampFormat | null {
return TIMESTAMP_FORMAT_VALUES.filter(format => {
export function possibleDruidFormatForValues(values: any[]): string | null {
return ALL_FORMAT_VALUES.filter(format => {
return values.every(value => timeFormatMatches(format, value));
})[0] || null;
}

View File

@ -1,97 +0,0 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import { IngestionSpec } from './ingestion-spec';
export function getWikipediaSpec(dataSourceSuffix: string): IngestionSpec {
return {
'type': 'index',
'dataSchema': {
'dataSource': 'wikipedia-' + dataSourceSuffix,
'parser': {
'type': 'string',
'parseSpec': {
'format': 'json',
'dimensionsSpec': {
'dimensions': [
'isRobot',
'channel',
'flags',
'isUnpatrolled',
'page',
'diffUrl',
{
'name': 'added',
'type': 'long'
},
'comment',
{
'name': 'commentLength',
'type': 'long'
},
'isNew',
'isMinor',
{
'name': 'delta',
'type': 'long'
},
'isAnonymous',
'user',
{
'name': 'deltaBucket',
'type': 'long'
},
{
'name': 'deleted',
'type': 'long'
},
'namespace'
]
},
'timestampSpec': {
'column': 'timestamp',
'format': 'iso'
}
}
},
'granularitySpec': {
'type': 'uniform',
'segmentGranularity': 'DAY',
'rollup': false,
'queryGranularity': 'none'
},
'metricsSpec': []
},
'ioConfig': {
'type': 'index',
'firehose': {
'fetchTimeout': 300000,
'type': 'http',
'uris': [
'https://static.imply.io/data/wikipedia.json.gz'
]
}
},
'tuningConfig': {
'type': 'index',
'forceExtendableShardSpecs': true,
'maxParseExceptions': 100,
'maxSavedParseExceptions': 10
}
};
}

View File

@ -23,7 +23,7 @@ import React from 'react';
import { Field } from '../components/auto-form/auto-form';
import { ExternalLink } from '../components/external-link/external-link';
import { TIMESTAMP_FORMAT_VALUES } from './druid-time';
import { BASIC_FORMAT_VALUES, DATE_FORMAT_VALUES, DATE_TIME_FORMAT_VALUES } from './druid-time';
import { deepGet, deepSet } from './object-change';
// These constants are used to make sure that they are not constantly recreated thrashing the pure components
@ -278,7 +278,18 @@ const TIMESTAMP_SPEC_FORM_FIELDS: Field<TimestampSpec>[] = [
name: 'format',
type: 'string',
defaultValue: 'auto',
suggestions: ['auto'].concat(TIMESTAMP_FORMAT_VALUES),
suggestions: [
'auto',
...BASIC_FORMAT_VALUES,
{
group: 'Date and time formats',
suggestions: DATE_TIME_FORMAT_VALUES
},
{
group: 'Date only formats',
suggestions: DATE_FORMAT_VALUES
}
],
isDefined: (timestampSpec: TimestampSpec) => isColumnTimestampSpec(timestampSpec),
info: <p>
Please specify your timestamp format by using the suggestions menu or typing in a <ExternalLink href="https://docs.oracle.com/javase/8/docs/api/java/time/format/DateTimeFormatter.html">format string</ExternalLink>.

View File

@ -0,0 +1,37 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import { jodaFormatToRegExp } from './joda-to-regexp';
describe('jodaFormatToRegExp', () => {
it('works for common formats', () => {
expect(jodaFormatToRegExp('d/M/yyyy').toString()).toMatchSnapshot();
expect(jodaFormatToRegExp('MM/dd/YYYY').toString()).toMatchSnapshot();
expect(jodaFormatToRegExp('M/d/YY').toString()).toMatchSnapshot();
expect(jodaFormatToRegExp('d-M-yyyy hh:mm:ss a').toString()).toMatchSnapshot();
expect(jodaFormatToRegExp('MM/dd/YYYY hh:mm:ss a' ).toString()).toMatchSnapshot();
expect(jodaFormatToRegExp('YYYY-MM-dd HH:mm:ss' ).toString()).toMatchSnapshot();
expect(jodaFormatToRegExp('YYYY-MM-dd HH:mm:ss.S').toString()).toMatchSnapshot();
});
it('matches dates when needed', () => {
expect(jodaFormatToRegExp('d-M-yyyy hh:mm:ss a').test('26-4-1986 01:23:40 am')).toEqual(true);
expect(jodaFormatToRegExp('YYYY-MM-dd HH:mm:ss.S').test('26-4-1986 01:23:40 am')).toEqual(false);
});
});

View File

@ -0,0 +1,77 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
// Refer to https://www.joda.org/joda-time/key_format.html
const TEXT = '\\w+';
const NUMBER_2_DIGIT = '[0-9]{2}';
const NUMBER_4_DIGIT = '[0-9]{4}';
const JODA_FRAGMENT_TO_REG_EXP_STRING: Record<string, string> = {
C: '[0-9]{1,2}',
CC: NUMBER_2_DIGIT,
YY: NUMBER_2_DIGIT,
YYYY: NUMBER_4_DIGIT,
xx: NUMBER_2_DIGIT,
xxxx: NUMBER_4_DIGIT,
w: '[0-9]{1,2}',
ww: NUMBER_2_DIGIT,
e: '[0-7]',
E: TEXT,
EEEE: TEXT,
yy: NUMBER_2_DIGIT,
yyyy: NUMBER_4_DIGIT,
D: '[0-9]{1,3}',
DD: '[0-9]{2,3}',
DDD: '[0-9]{3}',
M: '(?:1[0-2]|[1-9])',
MM: '(?:1[0-2]|0[1-9])',
MMM: TEXT,
MMMM: TEXT,
d: '(?:3[0-1]|[12][0-9]|[1-9])',
dd: '(?:3[0-1]|[12][0-9]|0[1-9])',
a: '[ap]m',
K: '(?:1[01]|[0-9])',
KK: '(?:1[01]|0[0-9])',
h: '(?:1[0-2]|[1-9])',
hh: '(?:1[0-2]|0[1-9])',
H: '(?:2[0-3]|1[0-9]|[0-9])',
HH: '(?:2[0-3]|1[0-9]|0[0-9])',
k: '(?:2[0-4]|1[0-9]|[1-9])',
kk: '(?:2[0-4]|1[0-9]|0[1-9])',
m: '(?:[1-5][0-9]|[0-9])',
mm: '[0-5][0-9]',
s: '(?:[1-5][0-9]|[0-9])',
ss: '[0-5][0-9]',
S: '[0-9]{1,3}',
SS: '[0-9]{2,3}',
SSS: '[0-9]{3}',
z: TEXT,
Z: TEXT
};
export function jodaFormatToRegExp(jodaFormat: string): RegExp {
const regExpStr = jodaFormat.replace(/([a-zA-Z])\1{0,3}/g, jodaPart => {
const re = JODA_FRAGMENT_TO_REG_EXP_STRING[jodaPart];
if (!re) throw new Error(`could not convert ${jodaPart} to RegExp`);
return re;
});
return new RegExp(`^${regExpStr}$`, 'i');
}