add ability to make inputFormat part of the example datasets (#13402)

This commit is contained in:
Vadim Ogievetsky 2022-11-21 12:50:44 -08:00 committed by GitHub
parent 68018a808f
commit fe34ecc5e3
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
2 changed files with 86 additions and 17 deletions

View File

@ -16,15 +16,74 @@
* limitations under the License. * limitations under the License.
*/ */
import { InputSource } from '../../../druid-models'; import { InputFormat, InputSource } from '../../../druid-models';
export interface ExampleInputSource { export interface ExampleInput {
name: string; name: string;
description: string; description: string;
inputSource: InputSource; inputSource: InputSource;
inputFormat?: InputFormat;
} }
export const EXAMPLE_INPUT_SOURCES: ExampleInputSource[] = [ const TRIPS_INPUT_FORMAT: InputFormat = {
type: 'csv',
findColumnsFromHeader: false,
columns: [
'trip_id',
'vendor_id',
'pickup_datetime',
'dropoff_datetime',
'store_and_fwd_flag',
'rate_code_id',
'pickup_longitude',
'pickup_latitude',
'dropoff_longitude',
'dropoff_latitude',
'passenger_count',
'trip_distance',
'fare_amount',
'extra',
'mta_tax',
'tip_amount',
'tolls_amount',
'ehail_fee',
'improvement_surcharge',
'total_amount',
'payment_type',
'trip_type',
'pickup',
'dropoff',
'cab_type',
'precipitation',
'snow_depth',
'snowfall',
'max_temperature',
'min_temperature',
'average_wind_speed',
'pickup_nyct2010_gid',
'pickup_ctlabel',
'pickup_borocode',
'pickup_boroname',
'pickup_ct2010',
'pickup_boroct2010',
'pickup_cdeligibil',
'pickup_ntacode',
'pickup_ntaname',
'pickup_puma',
'dropoff_nyct2010_gid',
'dropoff_ctlabel',
'dropoff_borocode',
'dropoff_boroname',
'dropoff_ct2010',
'dropoff_boroct2010',
'dropoff_cdeligibil',
'dropoff_ntacode',
'dropoff_ntaname',
'dropoff_puma',
],
};
export const EXAMPLE_INPUTS: ExampleInput[] = [
{ {
name: 'Wikipedia', name: 'Wikipedia',
description: 'One day of wikipedia edits (JSON)', description: 'One day of wikipedia edits (JSON)',
@ -62,6 +121,7 @@ export const EXAMPLE_INPUT_SOURCES: ExampleInputSource[] = [
'https://static.imply.io/example-data/trips/trips_xac.csv.gz', 'https://static.imply.io/example-data/trips/trips_xac.csv.gz',
], ],
}, },
inputFormat: TRIPS_INPUT_FORMAT,
}, },
{ {
name: 'NYC Taxi cabs (all files)', name: 'NYC Taxi cabs (all files)',
@ -145,6 +205,7 @@ export const EXAMPLE_INPUT_SOURCES: ExampleInputSource[] = [
'https://static.imply.io/example-data/trips/trips_xcv.csv.gz', 'https://static.imply.io/example-data/trips/trips_xcv.csv.gz',
], ],
}, },
inputFormat: TRIPS_INPUT_FORMAT,
}, },
{ {
name: 'FlightCarrierOnTime (1 month)', name: 'FlightCarrierOnTime (1 month)',

View File

@ -55,7 +55,7 @@ import { UrlBaser } from '../../../singletons';
import { filterMap, IntermediateQueryState } from '../../../utils'; import { filterMap, IntermediateQueryState } from '../../../utils';
import { postToSampler, SampleSpec } from '../../../utils/sampler'; import { postToSampler, SampleSpec } from '../../../utils/sampler';
import { EXAMPLE_INPUT_SOURCES } from './example-inputs'; import { EXAMPLE_INPUTS } from './example-inputs';
import { InputSourceInfo } from './input-source-info'; import { InputSourceInfo } from './input-source-info';
import './input-source-step.scss'; import './input-source-step.scss';
@ -81,16 +81,15 @@ export const InputSourceStep = React.memo(function InputSourceStep(props: InputS
const [inputSource, setInputSource] = useState<Partial<InputSource> | string | undefined>( const [inputSource, setInputSource] = useState<Partial<InputSource> | string | undefined>(
initInputSource, initInputSource,
); );
const exampleInputSource = EXAMPLE_INPUT_SOURCES.find( const exampleInput = EXAMPLE_INPUTS.find(({ name }) => name === inputSource);
({ name }) => name === inputSource,
)?.inputSource;
const [guessedInputFormatState, connectQueryManager] = useQueryManager< const [guessedInputFormatState, connectQueryManager] = useQueryManager<
InputSource, { inputSource: InputSource; suggestedInputFormat?: InputFormat },
InputFormat, InputFormat,
Execution Execution
>({ >({
processQuery: async (inputSource: InputSource, cancelToken) => { processQuery: async ({ inputSource, suggestedInputFormat }, cancelToken) => {
let guessedInputFormat: InputFormat | undefined;
if (mode === 'sampler') { if (mode === 'sampler') {
const sampleSpec: SampleSpec = { const sampleSpec: SampleSpec = {
type: 'index_parallel', type: 'index_parallel',
@ -127,7 +126,7 @@ export const InputSourceStep = React.memo(function InputSourceStep(props: InputS
); );
if (!sampleLines.length) throw new Error('No data returned from sampler'); if (!sampleLines.length) throw new Error('No data returned from sampler');
return guessInputFormat(sampleLines); guessedInputFormat = guessInputFormat(sampleLines);
} else { } else {
const tableExpression = externalConfigToTableExpression({ const tableExpression = externalConfigToTableExpression({
inputSource, inputSource,
@ -151,8 +150,14 @@ export const InputSourceStep = React.memo(function InputSourceStep(props: InputS
); );
if (result instanceof IntermediateQueryState) return result; if (result instanceof IntermediateQueryState) return result;
return resultToInputFormat(result); guessedInputFormat = resultToInputFormat(result);
} }
if (suggestedInputFormat?.type === guessedInputFormat.type) {
return suggestedInputFormat;
}
return guessedInputFormat;
}, },
backgroundStatusCheck: async (execution, query, cancelToken) => { backgroundStatusCheck: async (execution, query, cancelToken) => {
const result = await executionBackgroundResultStatusCheck(execution, query, cancelToken); const result = await executionBackgroundResultStatusCheck(execution, query, cancelToken);
@ -164,7 +169,7 @@ export const InputSourceStep = React.memo(function InputSourceStep(props: InputS
useEffect(() => { useEffect(() => {
const guessedInputFormat = guessedInputFormatState.data; const guessedInputFormat = guessedInputFormatState.data;
if (!guessedInputFormat) return; if (!guessedInputFormat) return;
onSet(exampleInputSource || (inputSource as any), guessedInputFormat); onSet(exampleInput?.inputSource || (inputSource as any), guessedInputFormat);
// eslint-disable-next-line react-hooks/exhaustive-deps // eslint-disable-next-line react-hooks/exhaustive-deps
}, [guessedInputFormatState]); }, [guessedInputFormatState]);
@ -217,7 +222,7 @@ export const InputSourceStep = React.memo(function InputSourceStep(props: InputS
selectedValue={inputSource} selectedValue={inputSource}
onChange={e => setInputSource(e.currentTarget.value)} onChange={e => setInputSource(e.currentTarget.value)}
> >
{EXAMPLE_INPUT_SOURCES.map((e, i) => ( {EXAMPLE_INPUTS.map((e, i) => (
<Radio <Radio
key={i} key={i}
labelElement={ labelElement={
@ -306,10 +311,13 @@ export const InputSourceStep = React.memo(function InputSourceStep(props: InputS
text={guessedInputFormatState.isLoading() ? 'Loading...' : 'Use example'} text={guessedInputFormatState.isLoading() ? 'Loading...' : 'Use example'}
rightIcon={IconNames.ARROW_RIGHT} rightIcon={IconNames.ARROW_RIGHT}
intent={Intent.PRIMARY} intent={Intent.PRIMARY}
disabled={!exampleInputSource || guessedInputFormatState.isLoading()} disabled={!exampleInput || guessedInputFormatState.isLoading()}
onClick={() => { onClick={() => {
if (!exampleInputSource) return; if (!exampleInput) return;
connectQueryManager.runQuery(exampleInputSource); connectQueryManager.runQuery({
inputSource: exampleInput.inputSource,
suggestedInputFormat: exampleInput.inputFormat,
});
}} }}
/> />
) : inputSource ? ( ) : inputSource ? (
@ -324,7 +332,7 @@ export const InputSourceStep = React.memo(function InputSourceStep(props: InputS
} }
onClick={() => { onClick={() => {
if (!AutoForm.isValidModel(inputSource, INPUT_SOURCE_FIELDS)) return; if (!AutoForm.isValidModel(inputSource, INPUT_SOURCE_FIELDS)) return;
connectQueryManager.runQuery(inputSource); connectQueryManager.runQuery({ inputSource });
}} }}
/> />
) : undefined} ) : undefined}