add ability to make inputFormat part of the example datasets (#13402)

This commit is contained in:
Vadim Ogievetsky 2022-11-21 12:50:44 -08:00 committed by GitHub
parent 68018a808f
commit fe34ecc5e3
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
2 changed files with 86 additions and 17 deletions

View File

@ -16,15 +16,74 @@
* limitations under the License.
*/
import { InputSource } from '../../../druid-models';
import { InputFormat, InputSource } from '../../../druid-models';
export interface ExampleInputSource {
export interface ExampleInput {
name: string;
description: string;
inputSource: InputSource;
inputFormat?: InputFormat;
}
export const EXAMPLE_INPUT_SOURCES: ExampleInputSource[] = [
const TRIPS_INPUT_FORMAT: InputFormat = {
type: 'csv',
findColumnsFromHeader: false,
columns: [
'trip_id',
'vendor_id',
'pickup_datetime',
'dropoff_datetime',
'store_and_fwd_flag',
'rate_code_id',
'pickup_longitude',
'pickup_latitude',
'dropoff_longitude',
'dropoff_latitude',
'passenger_count',
'trip_distance',
'fare_amount',
'extra',
'mta_tax',
'tip_amount',
'tolls_amount',
'ehail_fee',
'improvement_surcharge',
'total_amount',
'payment_type',
'trip_type',
'pickup',
'dropoff',
'cab_type',
'precipitation',
'snow_depth',
'snowfall',
'max_temperature',
'min_temperature',
'average_wind_speed',
'pickup_nyct2010_gid',
'pickup_ctlabel',
'pickup_borocode',
'pickup_boroname',
'pickup_ct2010',
'pickup_boroct2010',
'pickup_cdeligibil',
'pickup_ntacode',
'pickup_ntaname',
'pickup_puma',
'dropoff_nyct2010_gid',
'dropoff_ctlabel',
'dropoff_borocode',
'dropoff_boroname',
'dropoff_ct2010',
'dropoff_boroct2010',
'dropoff_cdeligibil',
'dropoff_ntacode',
'dropoff_ntaname',
'dropoff_puma',
],
};
export const EXAMPLE_INPUTS: ExampleInput[] = [
{
name: 'Wikipedia',
description: 'One day of wikipedia edits (JSON)',
@ -62,6 +121,7 @@ export const EXAMPLE_INPUT_SOURCES: ExampleInputSource[] = [
'https://static.imply.io/example-data/trips/trips_xac.csv.gz',
],
},
inputFormat: TRIPS_INPUT_FORMAT,
},
{
name: 'NYC Taxi cabs (all files)',
@ -145,6 +205,7 @@ export const EXAMPLE_INPUT_SOURCES: ExampleInputSource[] = [
'https://static.imply.io/example-data/trips/trips_xcv.csv.gz',
],
},
inputFormat: TRIPS_INPUT_FORMAT,
},
{
name: 'FlightCarrierOnTime (1 month)',

View File

@ -55,7 +55,7 @@ import { UrlBaser } from '../../../singletons';
import { filterMap, IntermediateQueryState } from '../../../utils';
import { postToSampler, SampleSpec } from '../../../utils/sampler';
import { EXAMPLE_INPUT_SOURCES } from './example-inputs';
import { EXAMPLE_INPUTS } from './example-inputs';
import { InputSourceInfo } from './input-source-info';
import './input-source-step.scss';
@ -81,16 +81,15 @@ export const InputSourceStep = React.memo(function InputSourceStep(props: InputS
const [inputSource, setInputSource] = useState<Partial<InputSource> | string | undefined>(
initInputSource,
);
const exampleInputSource = EXAMPLE_INPUT_SOURCES.find(
({ name }) => name === inputSource,
)?.inputSource;
const exampleInput = EXAMPLE_INPUTS.find(({ name }) => name === inputSource);
const [guessedInputFormatState, connectQueryManager] = useQueryManager<
InputSource,
{ inputSource: InputSource; suggestedInputFormat?: InputFormat },
InputFormat,
Execution
>({
processQuery: async (inputSource: InputSource, cancelToken) => {
processQuery: async ({ inputSource, suggestedInputFormat }, cancelToken) => {
let guessedInputFormat: InputFormat | undefined;
if (mode === 'sampler') {
const sampleSpec: SampleSpec = {
type: 'index_parallel',
@ -127,7 +126,7 @@ export const InputSourceStep = React.memo(function InputSourceStep(props: InputS
);
if (!sampleLines.length) throw new Error('No data returned from sampler');
return guessInputFormat(sampleLines);
guessedInputFormat = guessInputFormat(sampleLines);
} else {
const tableExpression = externalConfigToTableExpression({
inputSource,
@ -151,8 +150,14 @@ export const InputSourceStep = React.memo(function InputSourceStep(props: InputS
);
if (result instanceof IntermediateQueryState) return result;
return resultToInputFormat(result);
guessedInputFormat = resultToInputFormat(result);
}
if (suggestedInputFormat?.type === guessedInputFormat.type) {
return suggestedInputFormat;
}
return guessedInputFormat;
},
backgroundStatusCheck: async (execution, query, cancelToken) => {
const result = await executionBackgroundResultStatusCheck(execution, query, cancelToken);
@ -164,7 +169,7 @@ export const InputSourceStep = React.memo(function InputSourceStep(props: InputS
useEffect(() => {
const guessedInputFormat = guessedInputFormatState.data;
if (!guessedInputFormat) return;
onSet(exampleInputSource || (inputSource as any), guessedInputFormat);
onSet(exampleInput?.inputSource || (inputSource as any), guessedInputFormat);
// eslint-disable-next-line react-hooks/exhaustive-deps
}, [guessedInputFormatState]);
@ -217,7 +222,7 @@ export const InputSourceStep = React.memo(function InputSourceStep(props: InputS
selectedValue={inputSource}
onChange={e => setInputSource(e.currentTarget.value)}
>
{EXAMPLE_INPUT_SOURCES.map((e, i) => (
{EXAMPLE_INPUTS.map((e, i) => (
<Radio
key={i}
labelElement={
@ -306,10 +311,13 @@ export const InputSourceStep = React.memo(function InputSourceStep(props: InputS
text={guessedInputFormatState.isLoading() ? 'Loading...' : 'Use example'}
rightIcon={IconNames.ARROW_RIGHT}
intent={Intent.PRIMARY}
disabled={!exampleInputSource || guessedInputFormatState.isLoading()}
disabled={!exampleInput || guessedInputFormatState.isLoading()}
onClick={() => {
if (!exampleInputSource) return;
connectQueryManager.runQuery(exampleInputSource);
if (!exampleInput) return;
connectQueryManager.runQuery({
inputSource: exampleInput.inputSource,
suggestedInputFormat: exampleInput.inputFormat,
});
}}
/>
) : inputSource ? (
@ -324,7 +332,7 @@ export const InputSourceStep = React.memo(function InputSourceStep(props: InputS
}
onClick={() => {
if (!AutoForm.isValidModel(inputSource, INPUT_SOURCE_FIELDS)) return;
connectQueryManager.runQuery(inputSource);
connectQueryManager.runQuery({ inputSource });
}}
/>
) : undefined}