mirror of https://github.com/apache/druid.git
Support more JODA time formats (#7857)
This commit is contained in:
parent
c3d230b354
commit
c612ddc0f4
|
@ -30,13 +30,11 @@ import {
|
|||
Position
|
||||
} from '@blueprintjs/core';
|
||||
import { IconNames } from '@blueprintjs/icons';
|
||||
import classNames from 'classnames';
|
||||
import React from 'react';
|
||||
|
||||
import { AboutDialog } from '../../dialogs/about-dialog/about-dialog';
|
||||
import { CoordinatorDynamicConfigDialog } from '../../dialogs/coordinator-dynamic-config/coordinator-dynamic-config';
|
||||
import { OverlordDynamicConfigDialog } from '../../dialogs/overlord-dynamic-config/overlord-dynamic-config';
|
||||
import { getWikipediaSpec } from '../../utils/example-ingestion-spec';
|
||||
import {
|
||||
DRUID_DOCS,
|
||||
DRUID_GITHUB,
|
||||
|
|
|
@ -0,0 +1,15 @@
|
|||
// Jest Snapshot v1, https://goo.gl/fbAQLP
|
||||
|
||||
exports[`jodaFormatToRegExp works for common formats 1`] = `"/^(?:3[0-1]|[12][0-9]|[1-9])\\\\/(?:1[0-2]|[1-9])\\\\/[0-9]{4}$/i"`;
|
||||
|
||||
exports[`jodaFormatToRegExp works for common formats 2`] = `"/^(?:1[0-2]|0[1-9])\\\\/(?:3[0-1]|[12][0-9]|0[1-9])\\\\/[0-9]{4}$/i"`;
|
||||
|
||||
exports[`jodaFormatToRegExp works for common formats 3`] = `"/^(?:1[0-2]|[1-9])\\\\/(?:3[0-1]|[12][0-9]|[1-9])\\\\/[0-9]{2}$/i"`;
|
||||
|
||||
exports[`jodaFormatToRegExp works for common formats 4`] = `"/^(?:3[0-1]|[12][0-9]|[1-9])-(?:1[0-2]|[1-9])-[0-9]{4} (?:1[0-2]|0[1-9]):[0-5][0-9]:[0-5][0-9] [ap]m$/i"`;
|
||||
|
||||
exports[`jodaFormatToRegExp works for common formats 5`] = `"/^(?:1[0-2]|0[1-9])\\\\/(?:3[0-1]|[12][0-9]|0[1-9])\\\\/[0-9]{4} (?:1[0-2]|0[1-9]):[0-5][0-9]:[0-5][0-9] [ap]m$/i"`;
|
||||
|
||||
exports[`jodaFormatToRegExp works for common formats 6`] = `"/^[0-9]{4}-(?:1[0-2]|0[1-9])-(?:3[0-1]|[12][0-9]|0[1-9]) (?:2[0-3]|1[0-9]|0[0-9]):[0-5][0-9]:[0-5][0-9]$/i"`;
|
||||
|
||||
exports[`jodaFormatToRegExp works for common formats 7`] = `"/^[0-9]{4}-(?:1[0-2]|0[1-9])-(?:3[0-1]|[12][0-9]|0[1-9]) (?:2[0-3]|1[0-9]|0[0-9]):[0-5][0-9]:[0-5][0-9].[0-9]{1,3}$/i"`;
|
|
@ -16,14 +16,33 @@
|
|||
* limitations under the License.
|
||||
*/
|
||||
|
||||
export type DruidTimestampFormat = 'iso' | 'millis' | 'posix' | 'auto' | 'd/M/yyyy' | 'dd-M-yyyy hh:mm:ss a' |
|
||||
'MM/dd/YYYY' | 'M/d/YY' | 'MM/dd/YYYY hh:mm:ss a' | 'YYYY-MM-dd HH:mm:ss' | 'YYYY-MM-dd HH:mm:ss.S';
|
||||
import { jodaFormatToRegExp } from './joda-to-regexp';
|
||||
|
||||
export const TIMESTAMP_FORMAT_VALUES: DruidTimestampFormat[] = [
|
||||
'iso', 'millis', 'posix', 'MM/dd/YYYY hh:mm:ss a', 'MM/dd/YYYY', 'M/d/YY', 'd/M/yyyy',
|
||||
'YYYY-MM-dd HH:mm:ss', 'YYYY-MM-dd HH:mm:ss.S'
|
||||
export const BASIC_FORMAT_VALUES: string[] = [
|
||||
'iso',
|
||||
'millis',
|
||||
'posix'
|
||||
];
|
||||
|
||||
export const DATE_FORMAT_VALUES: string[] = [
|
||||
'dd/MM/yyyy',
|
||||
'MM/dd/yyyy',
|
||||
'd/M/yy',
|
||||
'M/d/yy',
|
||||
'd/M/yyyy',
|
||||
'M/d/yyyy'
|
||||
];
|
||||
|
||||
export const DATE_TIME_FORMAT_VALUES: string[] = [
|
||||
'd/M/yyyy H:mm:ss',
|
||||
'M/d/yyyy H:mm:ss',
|
||||
'MM/dd/yyyy hh:mm:ss a',
|
||||
'yyyy-MM-dd HH:mm:ss',
|
||||
'yyyy-MM-dd HH:mm:ss.S'
|
||||
];
|
||||
|
||||
const ALL_FORMAT_VALUES: string[] = BASIC_FORMAT_VALUES.concat(DATE_FORMAT_VALUES, DATE_TIME_FORMAT_VALUES);
|
||||
|
||||
const EXAMPLE_DATE_ISO = '2015-10-29T23:00:00.000Z';
|
||||
const EXAMPLE_DATE_VALUE = Date.parse(EXAMPLE_DATE_ISO);
|
||||
const MIN_MILLIS = 3.15576e11; // 3 years in millis, so Tue Jan 01 1980
|
||||
|
@ -33,18 +52,9 @@ const MAX_POSIX = MAX_MILLIS / 1000;
|
|||
|
||||
// copied from http://goo.gl/0ejHHW with small tweak to make dddd not pass on its own
|
||||
// tslint:disable-next-line:max-line-length
|
||||
export const ISO_MATCHER = new RegExp(/^([\+-]?\d{4}(?!\d{2}\b))((-?)((0[1-9]|1[0-2])(\3([12]\d|0[1-9]|3[01]))?|W([0-4]\d|5[0-2])(-?[1-7])?|(00[1-9]|0[1-9]\d|[12]\d{2}|3([0-5]\d|6[1-6])))(T((([01]\d|2[0-3])((:?)[0-5]\d)?|24:?00)([\.,]\d+(?!:))?)?(\17[0-5]\d([\.,]\d+)?)?([zZ]|([\+-])([01]\d|2[0-3]):?([0-5]\d)?)?)?)$/);
|
||||
export const JODA_TO_REGEXP_LOOKUP: Record<string, RegExp> = {
|
||||
'd/M/yyyy': /^[12]?\d\/1?\d\/\d\d\d\d$/,
|
||||
'MM/dd/YYYY': /^\d\d\/\d\d\/\d\d\d\d$/,
|
||||
'M/d/YY': /^1?\d\/[12]?\d\/\d\d$/,
|
||||
'd-M-yyyy hh:mm:ss a': /^[12]?\d-1?\d-\d\d\d\d \d\d:\d\d:\d\d [ap]m$/i,
|
||||
'MM/dd/YYYY hh:mm:ss a' : /^\d\d\/\d\d\/\d\d\d\d \d\d:\d\d:\d\d [ap]m$/i,
|
||||
'YYYY-MM-dd HH:mm:ss' : /^\d\d\d\d-\d\d-\d\d \d\d:\d\d:\d\d$/,
|
||||
'YYYY-MM-dd HH:mm:ss.S': /^\d\d\d\d-\d\d-\d\d \d\d:\d\d:\d\d\.\d\d\d$/
|
||||
};
|
||||
export const ISO_MATCHER = /^([\+-]?\d{4}(?!\d{2}\b))((-?)((0[1-9]|1[0-2])(\3([12]\d|0[1-9]|3[01]))?|W([0-4]\d|5[0-2])(-?[1-7])?|(00[1-9]|0[1-9]\d|[12]\d{2}|3([0-5]\d|6[1-6])))(T((([01]\d|2[0-3])((:?)[0-5]\d)?|24:?00)([\.,]\d+(?!:))?)?(\17[0-5]\d([\.,]\d+)?)?([zZ]|([\+-])([01]\d|2[0-3]):?([0-5]\d)?)?)?)$/;
|
||||
|
||||
export function timeFormatMatches(format: DruidTimestampFormat, value: string | number): boolean {
|
||||
export function timeFormatMatches(format: string, value: string | number): boolean {
|
||||
if (format === 'iso') {
|
||||
return ISO_MATCHER.test(String(value));
|
||||
}
|
||||
|
@ -59,14 +69,11 @@ export function timeFormatMatches(format: DruidTimestampFormat, value: string |
|
|||
return MIN_POSIX < absValue && absValue < MAX_POSIX;
|
||||
}
|
||||
|
||||
const formatRegexp = JODA_TO_REGEXP_LOOKUP[format];
|
||||
if (!formatRegexp) throw new Error(`unknown Druid format ${format}`);
|
||||
|
||||
return formatRegexp.test(String(value));
|
||||
return jodaFormatToRegExp(format).test(String(value));
|
||||
}
|
||||
|
||||
export function possibleDruidFormatForValues(values: any[]): DruidTimestampFormat | null {
|
||||
return TIMESTAMP_FORMAT_VALUES.filter(format => {
|
||||
export function possibleDruidFormatForValues(values: any[]): string | null {
|
||||
return ALL_FORMAT_VALUES.filter(format => {
|
||||
return values.every(value => timeFormatMatches(format, value));
|
||||
})[0] || null;
|
||||
}
|
||||
|
|
|
@ -1,97 +0,0 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. The ASF licenses this file
|
||||
* to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import { IngestionSpec } from './ingestion-spec';
|
||||
|
||||
export function getWikipediaSpec(dataSourceSuffix: string): IngestionSpec {
|
||||
return {
|
||||
'type': 'index',
|
||||
'dataSchema': {
|
||||
'dataSource': 'wikipedia-' + dataSourceSuffix,
|
||||
'parser': {
|
||||
'type': 'string',
|
||||
'parseSpec': {
|
||||
'format': 'json',
|
||||
'dimensionsSpec': {
|
||||
'dimensions': [
|
||||
'isRobot',
|
||||
'channel',
|
||||
'flags',
|
||||
'isUnpatrolled',
|
||||
'page',
|
||||
'diffUrl',
|
||||
{
|
||||
'name': 'added',
|
||||
'type': 'long'
|
||||
},
|
||||
'comment',
|
||||
{
|
||||
'name': 'commentLength',
|
||||
'type': 'long'
|
||||
},
|
||||
'isNew',
|
||||
'isMinor',
|
||||
{
|
||||
'name': 'delta',
|
||||
'type': 'long'
|
||||
},
|
||||
'isAnonymous',
|
||||
'user',
|
||||
{
|
||||
'name': 'deltaBucket',
|
||||
'type': 'long'
|
||||
},
|
||||
{
|
||||
'name': 'deleted',
|
||||
'type': 'long'
|
||||
},
|
||||
'namespace'
|
||||
]
|
||||
},
|
||||
'timestampSpec': {
|
||||
'column': 'timestamp',
|
||||
'format': 'iso'
|
||||
}
|
||||
}
|
||||
},
|
||||
'granularitySpec': {
|
||||
'type': 'uniform',
|
||||
'segmentGranularity': 'DAY',
|
||||
'rollup': false,
|
||||
'queryGranularity': 'none'
|
||||
},
|
||||
'metricsSpec': []
|
||||
},
|
||||
'ioConfig': {
|
||||
'type': 'index',
|
||||
'firehose': {
|
||||
'fetchTimeout': 300000,
|
||||
'type': 'http',
|
||||
'uris': [
|
||||
'https://static.imply.io/data/wikipedia.json.gz'
|
||||
]
|
||||
}
|
||||
},
|
||||
'tuningConfig': {
|
||||
'type': 'index',
|
||||
'forceExtendableShardSpecs': true,
|
||||
'maxParseExceptions': 100,
|
||||
'maxSavedParseExceptions': 10
|
||||
}
|
||||
};
|
||||
}
|
|
@ -23,7 +23,7 @@ import React from 'react';
|
|||
import { Field } from '../components/auto-form/auto-form';
|
||||
import { ExternalLink } from '../components/external-link/external-link';
|
||||
|
||||
import { TIMESTAMP_FORMAT_VALUES } from './druid-time';
|
||||
import { BASIC_FORMAT_VALUES, DATE_FORMAT_VALUES, DATE_TIME_FORMAT_VALUES } from './druid-time';
|
||||
import { deepGet, deepSet } from './object-change';
|
||||
|
||||
// These constants are used to make sure that they are not constantly recreated thrashing the pure components
|
||||
|
@ -278,7 +278,18 @@ const TIMESTAMP_SPEC_FORM_FIELDS: Field<TimestampSpec>[] = [
|
|||
name: 'format',
|
||||
type: 'string',
|
||||
defaultValue: 'auto',
|
||||
suggestions: ['auto'].concat(TIMESTAMP_FORMAT_VALUES),
|
||||
suggestions: [
|
||||
'auto',
|
||||
...BASIC_FORMAT_VALUES,
|
||||
{
|
||||
group: 'Date and time formats',
|
||||
suggestions: DATE_TIME_FORMAT_VALUES
|
||||
},
|
||||
{
|
||||
group: 'Date only formats',
|
||||
suggestions: DATE_FORMAT_VALUES
|
||||
}
|
||||
],
|
||||
isDefined: (timestampSpec: TimestampSpec) => isColumnTimestampSpec(timestampSpec),
|
||||
info: <p>
|
||||
Please specify your timestamp format by using the suggestions menu or typing in a <ExternalLink href="https://docs.oracle.com/javase/8/docs/api/java/time/format/DateTimeFormatter.html">format string</ExternalLink>.
|
||||
|
|
|
@ -0,0 +1,37 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. The ASF licenses this file
|
||||
* to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import { jodaFormatToRegExp } from './joda-to-regexp';
|
||||
|
||||
describe('jodaFormatToRegExp', () => {
|
||||
it('works for common formats', () => {
|
||||
expect(jodaFormatToRegExp('d/M/yyyy').toString()).toMatchSnapshot();
|
||||
expect(jodaFormatToRegExp('MM/dd/YYYY').toString()).toMatchSnapshot();
|
||||
expect(jodaFormatToRegExp('M/d/YY').toString()).toMatchSnapshot();
|
||||
expect(jodaFormatToRegExp('d-M-yyyy hh:mm:ss a').toString()).toMatchSnapshot();
|
||||
expect(jodaFormatToRegExp('MM/dd/YYYY hh:mm:ss a' ).toString()).toMatchSnapshot();
|
||||
expect(jodaFormatToRegExp('YYYY-MM-dd HH:mm:ss' ).toString()).toMatchSnapshot();
|
||||
expect(jodaFormatToRegExp('YYYY-MM-dd HH:mm:ss.S').toString()).toMatchSnapshot();
|
||||
});
|
||||
|
||||
it('matches dates when needed', () => {
|
||||
expect(jodaFormatToRegExp('d-M-yyyy hh:mm:ss a').test('26-4-1986 01:23:40 am')).toEqual(true);
|
||||
expect(jodaFormatToRegExp('YYYY-MM-dd HH:mm:ss.S').test('26-4-1986 01:23:40 am')).toEqual(false);
|
||||
});
|
||||
|
||||
});
|
|
@ -0,0 +1,77 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. The ASF licenses this file
|
||||
* to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
// Refer to https://www.joda.org/joda-time/key_format.html
|
||||
const TEXT = '\\w+';
|
||||
const NUMBER_2_DIGIT = '[0-9]{2}';
|
||||
const NUMBER_4_DIGIT = '[0-9]{4}';
|
||||
const JODA_FRAGMENT_TO_REG_EXP_STRING: Record<string, string> = {
|
||||
C: '[0-9]{1,2}',
|
||||
CC: NUMBER_2_DIGIT,
|
||||
YY: NUMBER_2_DIGIT,
|
||||
YYYY: NUMBER_4_DIGIT,
|
||||
|
||||
xx: NUMBER_2_DIGIT,
|
||||
xxxx: NUMBER_4_DIGIT,
|
||||
w: '[0-9]{1,2}',
|
||||
ww: NUMBER_2_DIGIT,
|
||||
e: '[0-7]',
|
||||
E: TEXT,
|
||||
EEEE: TEXT,
|
||||
|
||||
yy: NUMBER_2_DIGIT,
|
||||
yyyy: NUMBER_4_DIGIT,
|
||||
D: '[0-9]{1,3}',
|
||||
DD: '[0-9]{2,3}',
|
||||
DDD: '[0-9]{3}',
|
||||
M: '(?:1[0-2]|[1-9])',
|
||||
MM: '(?:1[0-2]|0[1-9])',
|
||||
MMM: TEXT,
|
||||
MMMM: TEXT,
|
||||
d: '(?:3[0-1]|[12][0-9]|[1-9])',
|
||||
dd: '(?:3[0-1]|[12][0-9]|0[1-9])',
|
||||
|
||||
a: '[ap]m',
|
||||
K: '(?:1[01]|[0-9])',
|
||||
KK: '(?:1[01]|0[0-9])',
|
||||
h: '(?:1[0-2]|[1-9])',
|
||||
hh: '(?:1[0-2]|0[1-9])',
|
||||
|
||||
H: '(?:2[0-3]|1[0-9]|[0-9])',
|
||||
HH: '(?:2[0-3]|1[0-9]|0[0-9])',
|
||||
k: '(?:2[0-4]|1[0-9]|[1-9])',
|
||||
kk: '(?:2[0-4]|1[0-9]|0[1-9])',
|
||||
m: '(?:[1-5][0-9]|[0-9])',
|
||||
mm: '[0-5][0-9]',
|
||||
s: '(?:[1-5][0-9]|[0-9])',
|
||||
ss: '[0-5][0-9]',
|
||||
S: '[0-9]{1,3}',
|
||||
SS: '[0-9]{2,3}',
|
||||
SSS: '[0-9]{3}',
|
||||
z: TEXT,
|
||||
Z: TEXT
|
||||
};
|
||||
|
||||
export function jodaFormatToRegExp(jodaFormat: string): RegExp {
|
||||
const regExpStr = jodaFormat.replace(/([a-zA-Z])\1{0,3}/g, jodaPart => {
|
||||
const re = JODA_FRAGMENT_TO_REG_EXP_STRING[jodaPart];
|
||||
if (!re) throw new Error(`could not convert ${jodaPart} to RegExp`);
|
||||
return re;
|
||||
});
|
||||
return new RegExp(`^${regExpStr}$`, 'i');
|
||||
}
|
Loading…
Reference in New Issue