diff --git a/docs/development/extensions-core/datasketches-tuple.md b/docs/development/extensions-core/datasketches-tuple.md index fc4f74d5c81..c9a05b5ab19 100644 --- a/docs/development/extensions-core/datasketches-tuple.md +++ b/docs/development/extensions-core/datasketches-tuple.md @@ -39,19 +39,52 @@ druid.extensions.loadList=["druid-datasketches"] "name" : , "fieldName" : , "nominalEntries": , - "numberOfValues" : , - "metricColumns" : + "metricColumns" : , + "numberOfValues" : } ``` |property|description|required?| |--------|-----------|---------| |type|This String should always be "arrayOfDoublesSketch"|yes| -|name|A String for the output (result) name of the calculation.|yes| +|name|String representing the output column to store sketch values.|yes| |fieldName|A String for the name of the input field.|yes| |nominalEntries|Parameter that determines the accuracy and size of the sketch. Higher k means higher accuracy but more space to store sketches. Must be a power of 2. See the [Theta sketch accuracy](https://datasketches.apache.org/docs/Theta/ThetaErrorTable) for details. |no, defaults to 16384| -|numberOfValues|Number of values associated with each distinct key. |no, defaults to 1| -|metricColumns|If building sketches from raw data, an array of names of the input columns containing numeric values to be associated with each distinct key.|no, defaults to empty array| +|metricColumns|When building sketches from raw data, an array input column that contain numeric values to associate with each distinct key. If not provided, assumes `fieldName` is an `arrayOfDoublesSketch`|no, if not provided `fieldName` is assumed to be an arrayOfDoublesSketch| +|numberOfValues|Number of values associated with each distinct key. |no, defaults to the length of `metricColumns` if provided and 1 otherwise| + +You can use the `arrayOfDoublesSketch` aggregator to: + +- Build a sketch from raw data. In this case, set `metricColumns` to an array. +- Build a sketch from an existing `ArrayOfDoubles` sketch . In this case, leave `metricColumns` unset and set the `fieldName` to an `ArrayOfDoubles` sketch with `numberOfValues` doubles. At ingestion time, you must base64 encode `ArrayOfDoubles` sketches at ingestion time. + +#### Example on top of raw data + +Compute a theta of unique users. For each user store the `added` and `deleted` scores. The new sketch column will be called `users_theta`. + +```json +{ + "type": "arrayOfDoublesSketch", + "name": "users_theta", + "fieldName": "user", + "nominalEntries": 16384, + "metricColumns": ["added", "deleted"], +} +``` + +#### Example ingesting a precomputed sketch column + +Ingest a sketch column called `user_sketches` that has a base64 encoded value of two doubles in its array and store it in a column called `users_theta`. + +```json +{ + "type": "arrayOfDoublesSketch", + "name": "users_theta", + "fieldName": "user_sketches", + "nominalEntries": 16384, + "numberOfValues": 2, +} +``` ### Post Aggregators diff --git a/web-console/lib/keywords.js b/web-console/lib/keywords.js index e34b2daf45b..bc81153dd77 100644 --- a/web-console/lib/keywords.js +++ b/web-console/lib/keywords.js @@ -61,6 +61,9 @@ exports.SQL_KEYWORDS = [ 'REPLACE INTO', 'OVERWRITE', 'RETURNING', + 'OVER', + 'PARTITION BY', + 'WINDOW', ]; exports.SQL_EXPRESSION_PARTS = [ diff --git a/web-console/script/create-sql-docs.js b/web-console/script/create-sql-docs.js index 6af65006f8e..13ed438915b 100755 --- a/web-console/script/create-sql-docs.js +++ b/web-console/script/create-sql-docs.js @@ -52,9 +52,7 @@ function convertMarkdownToHtml(markdown) { // Concert to markdown markdown = snarkdown(markdown); - return markdown - .replace(/
/g, '

') // Double up the
s - .replace(/]*>(.*?)<\/a>/g, '$1'); // Remove links + return markdown.replace(/]*>(.*?)<\/a>/g, '$1'); // Remove links } const readDoc = async () => { diff --git a/web-console/src/bootstrap/react-table-defaults.tsx b/web-console/src/bootstrap/react-table-defaults.tsx index 4c31928064c..139a13bcd5a 100644 --- a/web-console/src/bootstrap/react-table-defaults.tsx +++ b/web-console/src/bootstrap/react-table-defaults.tsx @@ -53,12 +53,12 @@ export function bootstrapReactTable() { .map((row: any) => row[column.id]); const previewCount = countBy(previewValues); return ( - +
{Object.keys(previewCount) .sort() .map(v => `${v} (${previewCount[v]})`) .join(', ')} - +
); }, defaultPageSize: 20, diff --git a/web-console/src/components/segment-timeline/segment-timeline.tsx b/web-console/src/components/segment-timeline/segment-timeline.tsx index c138e82dff2..f8cef06189b 100644 --- a/web-console/src/components/segment-timeline/segment-timeline.tsx +++ b/web-console/src/components/segment-timeline/segment-timeline.tsx @@ -278,7 +278,7 @@ ORDER BY "start" DESC`; intervals = await queryDruidSql({ query: SegmentTimeline.getSqlQuery(startDate, endDate), }); - datasources = uniq(intervals.map(r => r.datasource)); + datasources = uniq(intervals.map(r => r.datasource).sort()); } else if (capabilities.hasCoordinatorAccess()) { const startIso = startDate.toISOString(); diff --git a/web-console/src/druid-models/ingest-query-pattern/ingest-query-pattern.ts b/web-console/src/druid-models/ingest-query-pattern/ingest-query-pattern.ts index f4dee926b60..7bdcaae50ab 100644 --- a/web-console/src/druid-models/ingest-query-pattern/ingest-query-pattern.ts +++ b/web-console/src/druid-models/ingest-query-pattern/ingest-query-pattern.ts @@ -63,6 +63,7 @@ export function externalConfigToIngestQueryPattern( config: ExternalConfig, isArrays: boolean[], timeExpression: SqlExpression | undefined, + partitionedByHint: string | undefined, ): IngestQueryPattern { return { destinationTableName: guessDataSourceNameFromInputSource(config.inputSource) || 'data', @@ -71,7 +72,7 @@ export function externalConfigToIngestQueryPattern( mainExternalConfig: config, filters: [], dimensions: externalConfigToInitDimensions(config, isArrays, timeExpression), - partitionedBy: timeExpression ? 'day' : 'all', + partitionedBy: partitionedByHint || (timeExpression ? 'day' : 'all'), clusteredBy: [], }; } diff --git a/web-console/src/druid-models/metric-spec/metric-spec.tsx b/web-console/src/druid-models/metric-spec/metric-spec.tsx index 6b3290272ee..4295310486c 100644 --- a/web-console/src/druid-models/metric-spec/metric-spec.tsx +++ b/web-console/src/druid-models/metric-spec/metric-spec.tsx @@ -78,6 +78,7 @@ export const METRIC_SPEC_FIELDS: Field[] = [ // Should the first / last aggregators become usable at ingestion time, reverse the changes made in: // https://github.com/apache/druid/pull/10794 'thetaSketch', + 'arrayOfDoublesSketch', { group: 'HLLSketch', suggestions: ['HLLSketchBuild', 'HLLSketchMerge'], @@ -104,6 +105,7 @@ export const METRIC_SPEC_FIELDS: Field[] = [ 'doubleMax', 'floatMax', 'thetaSketch', + 'arrayOfDoublesSketch', 'HLLSketchBuild', 'HLLSketchMerge', 'quantilesDoublesSketch', @@ -178,6 +180,47 @@ export const METRIC_SPEC_FIELDS: Field[] = [ ), }, + // arrayOfDoublesSketch + { + name: 'nominalEntries', + type: 'number', + defined: typeIs('arrayOfDoublesSketch'), + defaultValue: 16384, + info: ( + <> +

+ Parameter that determines the accuracy and size of the sketch. Higher k means higher + accuracy but more space to store sketches. +

+

Must be a power of 2.

+

+ See the{' '} + + Theta sketch accuracy + {' '} + for details. +

+ + ), + }, + { + name: 'metricColumns', + type: 'string-array', + defined: typeIs('arrayOfDoublesSketch'), + info: ( + <> + If building sketches from raw data, an array of names of the input columns containing + numeric values to be associated with each distinct key. + + ), + }, + { + name: 'numberOfValues', + type: 'number', + defined: typeIs('arrayOfDoublesSketch'), + placeholder: 'metricColumns length or 1', + info: <>Number of values associated with each distinct key., + }, // HLLSketchBuild & HLLSketchMerge { name: 'lgK', diff --git a/web-console/src/druid-models/workbench-query/workbench-query.ts b/web-console/src/druid-models/workbench-query/workbench-query.ts index 36c71cb07a8..b6a1f74aa10 100644 --- a/web-console/src/druid-models/workbench-query/workbench-query.ts +++ b/web-console/src/druid-models/workbench-query/workbench-query.ts @@ -82,13 +82,19 @@ export class WorkbenchQuery { externalConfig: ExternalConfig, isArrays: boolean[], timeExpression: SqlExpression | undefined, + partitionedByHint: string | undefined, ): WorkbenchQuery { return new WorkbenchQuery({ queryContext: {}, queryParts: [ WorkbenchQueryPart.fromQueryString( ingestQueryPatternToQuery( - externalConfigToIngestQueryPattern(externalConfig, isArrays, timeExpression), + externalConfigToIngestQueryPattern( + externalConfig, + isArrays, + timeExpression, + partitionedByHint, + ), ).toString(), ), ], diff --git a/web-console/src/helpers/execution/sql-task-execution.ts b/web-console/src/helpers/execution/sql-task-execution.ts index e7f7250c535..358eee25c56 100644 --- a/web-console/src/helpers/execution/sql-task-execution.ts +++ b/web-console/src/helpers/execution/sql-task-execution.ts @@ -124,9 +124,14 @@ export async function reattachTaskExecution( option: ReattachTaskQueryOptions, ): Promise> { const { id, cancelToken, preserveOnTermination } = option; - let execution = await getTaskExecution(id, undefined, cancelToken); + let execution: Execution; - execution = await updateExecutionWithDatasourceExistsIfNeeded(execution, cancelToken); + try { + execution = await getTaskExecution(id, undefined, cancelToken); + execution = await updateExecutionWithDatasourceExistsIfNeeded(execution, cancelToken); + } catch (e) { + throw new Error(`Reattaching to query failed due to: ${e.message}`); + } if (execution.isFullyComplete()) return execution; diff --git a/web-console/src/react-table/react-table-extra.scss b/web-console/src/react-table/react-table-extra.scss index d87c25c84d8..bdeecf5e964 100644 --- a/web-console/src/react-table/react-table-extra.scss +++ b/web-console/src/react-table/react-table-extra.scss @@ -45,4 +45,8 @@ } } } + + .default-aggregated { + padding: 10px 5px; + } } diff --git a/web-console/src/views/sql-data-loader-view/sql-data-loader-view.tsx b/web-console/src/views/sql-data-loader-view/sql-data-loader-view.tsx index 23c81d02d21..2b3126372a7 100644 --- a/web-console/src/views/sql-data-loader-view/sql-data-loader-view.tsx +++ b/web-console/src/views/sql-data-loader-view/sql-data-loader-view.tsx @@ -151,6 +151,7 @@ export const SqlDataLoaderView = React.memo(function SqlDataLoaderView( { inputSource, inputFormat, signature }, isArrays, timeExpression, + undefined, ), ).toString(), queryContext: { @@ -167,6 +168,7 @@ export const SqlDataLoaderView = React.memo(function SqlDataLoaderView( { inputSource, inputFormat, signature }, isArrays, timeExpression, + undefined, ), ).toString(), }); diff --git a/web-console/src/views/workbench-view/connect-external-data-dialog/connect-external-data-dialog.tsx b/web-console/src/views/workbench-view/connect-external-data-dialog/connect-external-data-dialog.tsx index d857d37dbf3..e043cf13404 100644 --- a/web-console/src/views/workbench-view/connect-external-data-dialog/connect-external-data-dialog.tsx +++ b/web-console/src/views/workbench-view/connect-external-data-dialog/connect-external-data-dialog.tsx @@ -20,7 +20,7 @@ import { Classes, Dialog } from '@blueprintjs/core'; import { SqlExpression } from 'druid-query-toolkit'; import React, { useState } from 'react'; -import { ExternalConfig } from '../../../druid-models'; +import { ExternalConfig, InputFormat, InputSource } from '../../../druid-models'; import { InputFormatStep } from '../input-format-step/input-format-step'; import { InputSourceStep } from '../input-source-step/input-source-step'; @@ -32,20 +32,27 @@ export interface ConnectExternalDataDialogProps { config: ExternalConfig, isArrays: boolean[], timeExpression: SqlExpression | undefined, + partitionedByHint: string | undefined, ): void; onClose(): void; } +interface ExternalConfigStep { + inputSource?: InputSource; + inputFormat?: InputFormat; + partitionedByHint?: string; +} + export const ConnectExternalDataDialog = React.memo(function ConnectExternalDataDialog( props: ConnectExternalDataDialogProps, ) { const { initExternalConfig, onClose, onSetExternalConfig } = props; - const [externalConfigStep, setExternalConfigStep] = useState>( + const [externalConfigStep, setExternalConfigStep] = useState( initExternalConfig || {}, ); - const { inputSource, inputFormat } = externalConfigStep; + const { inputSource, inputFormat, partitionedByHint } = externalConfigStep; return ( { - setExternalConfigStep({ inputSource, inputFormat }); + onSet={(inputSource, inputFormat, partitionedByHint) => { + setExternalConfigStep({ inputSource, inputFormat, partitionedByHint }); }} /> )} diff --git a/web-console/src/views/workbench-view/input-source-step/example-inputs.ts b/web-console/src/views/workbench-view/input-source-step/example-inputs.ts index a74f1754b17..a6ad104c7f4 100644 --- a/web-console/src/views/workbench-view/input-source-step/example-inputs.ts +++ b/web-console/src/views/workbench-view/input-source-step/example-inputs.ts @@ -23,6 +23,7 @@ export interface ExampleInput { description: string; inputSource: InputSource; inputFormat?: InputFormat; + partitionedByHint?: string; } const TRIPS_INPUT_FORMAT: InputFormat = { @@ -122,6 +123,7 @@ export const EXAMPLE_INPUTS: ExampleInput[] = [ ], }, inputFormat: TRIPS_INPUT_FORMAT, + partitionedByHint: 'month', }, { name: 'NYC Taxi cabs (all files)', @@ -206,6 +208,7 @@ export const EXAMPLE_INPUTS: ExampleInput[] = [ ], }, inputFormat: TRIPS_INPUT_FORMAT, + partitionedByHint: 'month', }, { name: 'FlightCarrierOnTime (1 month)', diff --git a/web-console/src/views/workbench-view/input-source-step/input-source-step.tsx b/web-console/src/views/workbench-view/input-source-step/input-source-step.tsx index f144e8f975d..9ea55fd0d17 100644 --- a/web-console/src/views/workbench-view/input-source-step/input-source-step.tsx +++ b/web-console/src/views/workbench-view/input-source-step/input-source-step.tsx @@ -71,7 +71,11 @@ const ROWS_TO_SAMPLE = 50; export interface InputSourceStepProps { initInputSource: Partial | undefined; mode: 'sampler' | 'msq'; - onSet(inputSource: InputSource, inputFormat: InputFormat): void; + onSet( + inputSource: InputSource, + inputFormat: InputFormat, + partitionedByHint: string | undefined, + ): void; } export const InputSourceStep = React.memo(function InputSourceStep(props: InputSourceStepProps) { @@ -169,7 +173,11 @@ export const InputSourceStep = React.memo(function InputSourceStep(props: InputS useEffect(() => { const guessedInputFormat = guessedInputFormatState.data; if (!guessedInputFormat) return; - onSet(exampleInput?.inputSource || (inputSource as any), guessedInputFormat); + onSet( + exampleInput?.inputSource || (inputSource as any), + guessedInputFormat, + exampleInput?.partitionedByHint, + ); // eslint-disable-next-line react-hooks/exhaustive-deps }, [guessedInputFormatState]); diff --git a/web-console/src/views/workbench-view/workbench-view.tsx b/web-console/src/views/workbench-view/workbench-view.tsx index 5d601d2fb65..56af602e432 100644 --- a/web-console/src/views/workbench-view/workbench-view.tsx +++ b/web-console/src/views/workbench-view/workbench-view.tsx @@ -324,9 +324,14 @@ export class WorkbenchView extends React.PureComponent { + onSetExternalConfig={(externalConfig, isArrays, timeExpression, partitionedByHint) => { this.handleNewTab( - WorkbenchQuery.fromInitExternalConfig(externalConfig, isArrays, timeExpression), + WorkbenchQuery.fromInitExternalConfig( + externalConfig, + isArrays, + timeExpression, + partitionedByHint, + ), 'Ext ' + guessDataSourceNameFromInputSource(externalConfig.inputSource), ); }}