diff --git a/.github/workflows/reusable-standard-its.yml b/.github/workflows/reusable-standard-its.yml index ecc9c3d6544..b9bb8bb2d56 100644 --- a/.github/workflows/reusable-standard-its.yml +++ b/.github/workflows/reusable-standard-its.yml @@ -113,7 +113,7 @@ jobs: - name: Collect service logs on failure if: ${{ failure() && steps.run-it.conclusion == 'failure' }} run: | - tar cvzf ./service-logs.tgz ~/shared/logs + tar cvzf ./service-logs.tgz ~/shared/logs ~/shared/tasklogs - name: Upload Druid service logs to GitHub if: ${{ failure() && steps.run-it.conclusion == 'failure' }} diff --git a/.github/workflows/revised-its.yml b/.github/workflows/revised-its.yml index 62aac48dc99..069562bf7bd 100644 --- a/.github/workflows/revised-its.yml +++ b/.github/workflows/revised-its.yml @@ -50,7 +50,7 @@ jobs: matrix: #jdk: [8, 11, 17] jdk: [8] - it: [HighAvailability, MultiStageQuery, Catalog, BatchIndex, MultiStageQueryWithMM, InputSource, InputFormat, Security] + it: [HighAvailability, MultiStageQuery, Catalog, BatchIndex, MultiStageQueryWithMM, InputSource, InputFormat, Security, Query] #indexer: [indexer, middleManager] indexer: [middleManager] uses: ./.github/workflows/reusable-revised-its.yml diff --git a/benchmarks/src/test/java/org/apache/druid/benchmark/frame/FrameChannelMergerBenchmark.java b/benchmarks/src/test/java/org/apache/druid/benchmark/frame/FrameChannelMergerBenchmark.java index 43c5dc82a90..a864bcd3419 100644 --- a/benchmarks/src/test/java/org/apache/druid/benchmark/frame/FrameChannelMergerBenchmark.java +++ b/benchmarks/src/test/java/org/apache/druid/benchmark/frame/FrameChannelMergerBenchmark.java @@ -351,11 +351,11 @@ public class FrameChannelMergerBenchmark channels.stream().map(BlockingQueueFrameChannel::readable).collect(Collectors.toList()), frameReader, outputChannel.writable(), - FrameWriters.makeFrameWriterFactory( - FrameType.ROW_BASED, + FrameWriters.makeRowBasedFrameWriterFactory( new ArenaMemoryAllocatorFactory(1_000_000), signature, - sortKey + sortKey, + false ), sortKey, null, diff --git a/benchmarks/src/test/java/org/apache/druid/benchmark/query/SqlExpressionBenchmark.java b/benchmarks/src/test/java/org/apache/druid/benchmark/query/SqlExpressionBenchmark.java index d4c2e0906b4..8b2172182a0 100644 --- a/benchmarks/src/test/java/org/apache/druid/benchmark/query/SqlExpressionBenchmark.java +++ b/benchmarks/src/test/java/org/apache/druid/benchmark/query/SqlExpressionBenchmark.java @@ -37,6 +37,7 @@ import org.apache.druid.math.expr.ExpressionProcessing; import org.apache.druid.query.DruidProcessingConfig; import org.apache.druid.query.QueryContexts; import org.apache.druid.query.QueryRunnerFactoryConglomerate; +import org.apache.druid.query.groupby.GroupByQueryConfig; import org.apache.druid.segment.AutoTypeColumnSchema; import org.apache.druid.segment.IndexSpec; import org.apache.druid.segment.QueryableIndex; @@ -236,6 +237,14 @@ public class SqlExpressionBenchmark }) private String schema; + @Param({ + "singleString", + "fixedWidth", + "fixedWidthNonNumeric", + "always" + }) + private String deferExpressionDimensions; + @Param({ // non-expression reference "0", @@ -414,7 +423,8 @@ public class SqlExpressionBenchmark { final Map context = ImmutableMap.of( QueryContexts.VECTORIZE_KEY, vectorize, - QueryContexts.VECTORIZE_VIRTUAL_COLUMNS_KEY, vectorize + QueryContexts.VECTORIZE_VIRTUAL_COLUMNS_KEY, vectorize, + GroupByQueryConfig.CTX_KEY_DEFER_EXPRESSION_DIMENSIONS, deferExpressionDimensions ); final String sql = QUERIES.get(Integer.parseInt(query)); try (final DruidPlanner planner = plannerFactory.createPlannerForTesting(engine, sql, context)) { diff --git a/benchmarks/src/test/java/org/apache/druid/server/coordinator/NewestSegmentFirstPolicyBenchmark.java b/benchmarks/src/test/java/org/apache/druid/server/coordinator/NewestSegmentFirstPolicyBenchmark.java index 52a6e0d9757..98c27c4b2b8 100644 --- a/benchmarks/src/test/java/org/apache/druid/server/coordinator/NewestSegmentFirstPolicyBenchmark.java +++ b/benchmarks/src/test/java/org/apache/druid/server/coordinator/NewestSegmentFirstPolicyBenchmark.java @@ -104,6 +104,7 @@ public class NewestSegmentFirstPolicyBenchmark null, null, null, + null, null ) ); @@ -140,7 +141,7 @@ public class NewestSegmentFirstPolicyBenchmark @Benchmark public void measureNewestSegmentFirstPolicy(Blackhole blackhole) { - final CompactionSegmentIterator iterator = policy.reset(compactionConfigs, dataSources, Collections.emptyMap()); + final CompactionSegmentIterator iterator = policy.createIterator(compactionConfigs, dataSources, Collections.emptyMap()); for (int i = 0; i < numCompactionTaskSlots && iterator.hasNext(); i++) { blackhole.consume(iterator.next()); } diff --git a/dev/intellij-setup.md b/dev/intellij-setup.md index 5c4cdc52f3d..c08375a3347 100644 --- a/dev/intellij-setup.md +++ b/dev/intellij-setup.md @@ -55,7 +55,7 @@ The installation of a MySQL metadata store is outside the scope of this document Use of other databases such as Postgres or Derby are entirely reasonable, but doing so is left as an excercise to the reader. ## ZooKeeper -This also assumes you have [ZooKeeper](http://zookeeper.apache.org/releases.html) running locally, which usually just involves downloading the latst distribution of ZooKeeper, doing some minor configuration in ZooKeeper's `conf/` directory (most defaults are fine), then running `./bin/zkServer.sh start` in the ZooKeeper directory. +This also assumes you have [ZooKeeper](http://zookeeper.apache.org/releases.html) running locally, which usually just involves downloading the latest distribution of ZooKeeper, doing some minor configuration in ZooKeeper's `conf/` directory (most defaults are fine), then running `./bin/zkServer.sh start` in the ZooKeeper directory. On macOS, you can also achieve this through the following commands diff --git a/distribution/bin/tag-missing-milestones.py b/distribution/bin/tag-missing-milestones.py index e25ca743749..cf41afd63b6 100755 --- a/distribution/bin/tag-missing-milestones.py +++ b/distribution/bin/tag-missing-milestones.py @@ -16,10 +16,9 @@ # limitations under the License. import os +import requests import subprocess import sys -import requests - if len(sys.argv) != 5: sys.stderr.write('usage: program \n') diff --git a/docs/api-reference/legacy-metadata-api.md b/docs/api-reference/legacy-metadata-api.md index 453159c1a58..d22be18a7ec 100644 --- a/docs/api-reference/legacy-metadata-api.md +++ b/docs/api-reference/legacy-metadata-api.md @@ -176,7 +176,11 @@ Returns a list of all segments, overlapping with any of given intervals, for a d `POST /druid/coordinator/v1/metadata/dataSourceInformation` -Returns information about the specified datasources, including the datasource schema. +Returns information about the specified datasources, including the datasource schema. + +`POST /druid/coordinator/v1/metadata/bootstrapSegments` + +Returns information about bootstrap segments for all datasources. The returned set includes all broadcast segments if broadcast rules are configured. diff --git a/docs/configuration/extensions.md b/docs/configuration/extensions.md index 29356ebc05c..d396bc29000 100644 --- a/docs/configuration/extensions.md +++ b/docs/configuration/extensions.md @@ -22,7 +22,6 @@ title: "Extensions" ~ under the License. --> - Druid implements an extension system that allows for adding functionality at runtime. Extensions are commonly used to add support for deep storages (like HDFS and S3), metadata stores (like MySQL and PostgreSQL), new aggregators, new input formats, and so on. @@ -55,7 +54,7 @@ Core extensions are maintained by Druid committers. |druid-parquet-extensions|Support for data in Apache Parquet data format. Requires druid-avro-extensions to be loaded.|[link](../development/extensions-core/parquet.md)| |druid-protobuf-extensions| Support for data in Protobuf data format.|[link](../development/extensions-core/protobuf.md)| |druid-ranger-security|Support for access control through Apache Ranger.|[link](../development/extensions-core/druid-ranger-security.md)| -|druid-s3-extensions|Interfacing with data in AWS S3, and using S3 as deep storage.|[link](../development/extensions-core/s3.md)| +|druid-s3-extensions|Interfacing with data in Amazon S3, and using S3 as deep storage.|[link](../development/extensions-core/s3.md)| |druid-ec2-extensions|Interfacing with AWS EC2 for autoscaling middle managers|UNDOCUMENTED| |druid-aws-rds-extensions|Support for AWS token based access to AWS RDS DB Cluster.|[link](../development/extensions-core/druid-aws-rds.md)| |druid-stats|Statistics related module including variance and standard deviation.|[link](../development/extensions-core/stats.md)| @@ -101,7 +100,7 @@ All of these community extensions can be downloaded using [pull-deps](../operati |druid-momentsketch|Support for approximate quantile queries using the [momentsketch](https://github.com/stanford-futuredata/momentsketch) library|[link](../development/extensions-contrib/momentsketch-quantiles.md)| |druid-tdigestsketch|Support for approximate sketch aggregators based on [T-Digest](https://github.com/tdunning/t-digest)|[link](../development/extensions-contrib/tdigestsketch-quantiles.md)| |gce-extensions|GCE Extensions|[link](../development/extensions-contrib/gce-extensions.md)| -|prometheus-emitter|Exposes [Druid metrics](../operations/metrics.md) for Prometheus server collection (https://prometheus.io/)|[link](../development/extensions-contrib/prometheus.md)| +|prometheus-emitter|Exposes [Druid metrics](../operations/metrics.md) for Prometheus server collection ()|[link](../development/extensions-contrib/prometheus.md)| |druid-kubernetes-overlord-extensions|Support for launching tasks in k8s without Middle Managers|[link](../development/extensions-contrib/k8s-jobs.md)| |druid-spectator-histogram|Support for efficient approximate percentile queries|[link](../development/extensions-contrib/spectator-histogram.md)| |druid-rabbit-indexing-service|Support for creating and managing [RabbitMQ](https://www.rabbitmq.com/) indexing tasks|[link](../development/extensions-contrib/rabbit-stream-ingestion.md)| @@ -111,7 +110,6 @@ All of these community extensions can be downloaded using [pull-deps](../operati Please post on [dev@druid.apache.org](https://lists.apache.org/list.html?dev@druid.apache.org) if you'd like an extension to be promoted to core. If we see a community extension actively supported by the community, we can promote it to core based on community feedback. - For information how to create your own extension, please see [here](../development/modules.md). ## Loading extensions diff --git a/docs/configuration/index.md b/docs/configuration/index.md index 4eceec8beec..c627e9fd7f0 100644 --- a/docs/configuration/index.md +++ b/docs/configuration/index.md @@ -668,14 +668,12 @@ Store task logs in S3. Note that the `druid-s3-extensions` extension must be loa ##### Azure Blob Store task logs -Store task logs in Azure Blob Store. +Store task logs in Azure Blob Store. To enable this feature, load the `druid-azure-extensions` extension, and configure deep storage for Azure. Druid uses the same authentication method configured for deep storage and stores task logs in the same storage account (set in `druid.azure.account`). -Note: The `druid-azure-extensions` extension must be loaded, and this uses the same storage account as the deep storage module for azure. - -|Property|Description|Default| -|--------|-----------|-------| -|`druid.indexer.logs.container`|The Azure Blob Store container to write logs to|none| -|`druid.indexer.logs.prefix`|The path to prepend to logs|none| +| Property | Description | Default | +|---|---|---| +| `druid.indexer.logs.container` | The Azure Blob Store container to write logs to. | Must be set. | +| `druid.indexer.logs.prefix` | The path to prepend to logs. | Must be set. | ##### Google Cloud Storage task logs @@ -714,7 +712,7 @@ You can configure Druid API error responses to hide internal information like th |`druid.server.http.showDetailedJettyErrors`|When set to true, any error from the Jetty layer / Jetty filter includes the following fields in the JSON response: `servlet`, `message`, `url`, `status`, and `cause`, if it exists. When set to false, the JSON response only includes `message`, `url`, and `status`. The field values remain unchanged.|true| |`druid.server.http.errorResponseTransform.strategy`|Error response transform strategy. The strategy controls how Druid transforms error responses from Druid services. When unset or set to `none`, Druid leaves error responses unchanged.|`none`| -##### Error response transform strategy +#### Error response transform strategy You can use an error response transform strategy to transform error responses from within Druid services to hide internal information. When you specify an error response transform strategy other than `none`, Druid transforms the error responses from Druid services as follows: @@ -723,12 +721,12 @@ When you specify an error response transform strategy other than `none`, Druid t * For any SQL query API that fails, for example `POST /druid/v2/sql/...`, Druid sets the fields `errorClass` and `host` to null. Druid applies the transformation strategy to the `errorMessage` field. * For any JDBC related exceptions, Druid will turn all checked exceptions into `QueryInterruptedException` otherwise druid will attempt to keep the exception as the same type. For example if the original exception isn't owned by Druid it will become `QueryInterruptedException`. Druid applies the transformation strategy to the `errorMessage` field. -###### No error response transform strategy +##### No error response transform strategy In this mode, Druid leaves error responses from underlying services unchanged and returns the unchanged errors to the API client. This is the default Druid error response mode. To explicitly enable this strategy, set `druid.server.http.errorResponseTransform.strategy` to `none`. -###### Allowed regular expression error response transform strategy +##### Allowed regular expression error response transform strategy In this mode, Druid validates the error responses from underlying services against a list of regular expressions. Only error messages that match a configured regular expression are returned. To enable this strategy, set `druid.server.http.errorResponseTransform.strategy` to `allowedRegex`. @@ -774,7 +772,7 @@ This config is used to find the [Coordinator](../design/coordinator.md) using Cu You can configure how to announce and unannounce Znodes in ZooKeeper (using Curator). For normal operations you do not need to override any of these configs. -##### Batch data segment announcer +#### Batch data segment announcer In current Druid, multiple data segments may be announced under the same Znode. @@ -1562,7 +1560,7 @@ For most types of tasks, `SegmentWriteOutMediumFactory` can be configured per-ta |`druid.worker.capacity`|Maximum number of tasks the Indexer can accept.|Number of available processors - 1| |`druid.worker.baseTaskDirs`|List of base temporary working directories, one of which is assigned per task in a round-robin fashion. This property can be used to allow usage of multiple disks for indexing. This property is recommended in place of and takes precedence over `${druid.indexer.task.baseTaskDir}`. If this configuration is not set, `${druid.indexer.task.baseTaskDir}` is used. Example: `druid.worker.baseTaskDirs=[\"PATH1\",\"PATH2\",...]`.|null| |`druid.worker.baseTaskDirSize`|The total amount of bytes that can be used by tasks on any single task dir. This value is treated symmetrically across all directories, that is, if this is 500 GB and there are 3 `baseTaskDirs`, then each of those task directories is assumed to allow for 500 GB to be used and a total of 1.5 TB will potentially be available across all tasks. The actual amount of memory assigned to each task is discussed in [Configuring task storage sizes](../ingestion/tasks.md#configuring-task-storage-sizes)|`Long.MAX_VALUE`| -|`druid.worker.globalIngestionHeapLimitBytes`|Total amount of heap available for ingestion processing. This is applied by automatically setting the `maxBytesInMemory` property on tasks.|60% of configured JVM heap| +|`druid.worker.globalIngestionHeapLimitBytes`|Total amount of heap available for ingestion processing. This is applied by automatically setting the `maxBytesInMemory` property on tasks.|Configured max JVM heap size / 6| |`druid.worker.numConcurrentMerges`|Maximum number of segment persist or merge operations that can run concurrently across all tasks.|`druid.worker.capacity` / 2, rounded down| |`druid.indexer.task.baseDir`|Base temporary working directory.|`System.getProperty("java.io.tmpdir")`| |`druid.indexer.task.baseTaskDir`|Base temporary working directory for tasks.|`${druid.indexer.task.baseDir}/persistent/tasks`| @@ -2037,7 +2035,7 @@ A simple in-memory LRU cache. Local cache resides in JVM heap memory, so if you |Property|Description|Default| |--------|-----------|-------| |`druid.cache.sizeInBytes`|Maximum cache size in bytes. Zero disables caching.|0| -|`druid.cache.initialSize`|Initial size of the hashtable backing the cache.|500000| +|`druid.cache.initialSize`|Initial size of the hash table backing the cache.|500000| |`druid.cache.logEvictionCount`|If non-zero, log cache eviction every `logEvictionCount` items.|0| #### Caffeine cache diff --git a/docs/development/build.md b/docs/development/build.md index 911f25aabbf..e96f37fdc4a 100644 --- a/docs/development/build.md +++ b/docs/development/build.md @@ -33,7 +33,7 @@ make sure it has `/master/` in the URL. ##### Installing Java and Maven -- JDK 8, 8u92+ or JDK 11. See our [Java documentation](../operations/java.md) for information about obtaining a JDK. +- See our [Java documentation](../operations/java.md) for information about obtaining a supported JDK - [Maven version 3.x](http://maven.apache.org/download.cgi) ##### Other dependencies diff --git a/docs/development/docs-contribute.md b/docs/development/docs-contribute.md index d50b86b4478..eb0aa3bb72e 100644 --- a/docs/development/docs-contribute.md +++ b/docs/development/docs-contribute.md @@ -34,8 +34,7 @@ Druid docs contributors: Druid docs contributors can open an issue about documentation, or contribute a change with a pull request (PR). The open source Druid docs are located here: -https://druid.apache.org/docs/latest/design/index.html - + If you need to update a Druid doc, locate and update the doc in the Druid repo following the instructions below. @@ -48,19 +47,22 @@ See [`CONTRIBUTING.md`](https://github.com/apache/incubator-druid/blob/master/CO ## Before you begin Before you can contribute to the Druid docs for the first time, you must complete the following steps: + 1. Fork the [Druid repo](https://github.com/apache/druid). Your fork will be the `origin` remote. -2. Clone your fork: - +2. Clone your fork: + ```bash git clone git@github.com:GITHUB_USERNAME/druid.git ``` + Replace `GITHUB_USERNAME` with your GitHub username. 3. In the directory where you cloned your fork, set up `apache/druid` as your your remote `upstream` repo: - + ```bash git remote add upstream https://github.com/apache/druid.git ``` -4. Confirm that your fork shows up as the origin repo and `apache/druid` shows up as the upstream repo: + +4. Confirm that your fork shows up as the origin repo and `apache/druid` shows up as the upstream repo: ```bash git remote -v @@ -71,11 +73,11 @@ Before you can contribute to the Druid docs for the first time, you must complet ```bash git config user.email ``` - + If you need to set your email, see the [GitHub instructions](https://docs.github.com/en/github-ae@latest/account-and-profile/setting-up-and-managing-your-github-user-account/managing-email-preferences/setting-your-commit-email-address#setting-your-commit-email-address-in-git). -5. Install Docusaurus so that you can build the site locally. Run either `npm install` or `yarn install` in the `website` directory. - +6. Install Docusaurus so that you can build the site locally. Run either `npm install` or `yarn install` in the `website` directory. + ## Contributing Before you contribute, make sure your local branch of `master` and the upstream Apache branch are up-to-date and in sync. This can help you avoid merge conflicts. Run the following commands on your fork's `master` branch: @@ -104,13 +106,13 @@ Now you're up to date, and you can make your changes. Provide a name for your feature branch in `MY-BRANCH`. 2. Find the file that you want to make changes to. All the source files for the docs are written in Markdown and located in the `docs` directory. The URL for the page includes the subdirectory the source file is in. For example, the SQL-based ingestion tutorial found at `https://druid.apache.org/docs/latest/tutorials/tutorial-msq-extern.html` is in the `tutorials` subdirectory. - + If you're adding a page, create a new Markdown file in the appropriate subdirectory. Then, copy the front matter and Apache license from an existing file. Update the `title` and `id` fields. Don't forget to add it to `website/sidebars.json` so that your new page shows up in the navigation. -4. Test changes locally by building the site and navigating to your changes. In the `website` directory, run `docusaurus-start`. By default, this starts the site on `localhost:3000`. If port `3000` is already in use, it'll increment the port number from there. - -5. Use the following commands to run the link and spellcheckers locally: - +3. Test changes locally by building the site and navigating to your changes. In the `website` directory, run `npm run start`. By default, this starts the site on `localhost:3000`. If port `3000` is already in use, it'll increment the port number from there. + +4. Use the following commands to run the link and spellcheckers locally: + ```bash cd website # You only need to install once @@ -123,7 +125,7 @@ Now you're up to date, and you can make your changes. This step can save you time during the review process since they'll run faster than the GitHub Action version of the checks and warn you of issues before you create a PR. -5. Push your changes to your fork: +5. Push your changes to your fork: ```bash git push --set-upstream origin MY-BRANCH @@ -133,7 +135,7 @@ Now you're up to date, and you can make your changes. The pull request template is extensive. You may not need all the information there, so feel free to delete unneeded sections as you fill it out. Once you create the pull request, GitHub automatically labels the issue so that reviewers can take a look. -The docs go through a review process similar to the code where community members will offer feedback. Once the review process is complete and your changes are merged, they'll be available on the live site when the site gets republished. +The docs go through a review process similar to the code where community members will offer feedback. Once the review process is complete and your changes are merged, they'll be available on the live site when the site gets republished. ## Style guide @@ -146,6 +148,7 @@ The style guide should serve as a point of reference to enable contributors and In some cases, Google Style might make the Druid docs more difficult to read and understand. This section highlights those exceptions. #### SQL keyword syntax + For SQL keywords and functions, use all caps, but do not use code font. :::tip @@ -159,7 +162,6 @@ The UNNEST clause unnests array values. The \`UNNEST\` clause unnests array values. ::: - #### Optional parameters and arguments For optional parameters and arguments, enclose the optional parameter and leading command in brackets. diff --git a/docs/development/extensions-contrib/k8s-jobs.md b/docs/development/extensions-contrib/k8s-jobs.md index 1d75e2b7efe..913e40b9373 100644 --- a/docs/development/extensions-contrib/k8s-jobs.md +++ b/docs/development/extensions-contrib/k8s-jobs.md @@ -40,7 +40,280 @@ The extension uses `druid.indexer.runner.capacity` to limit the number of k8s jo Other configurations required are: `druid.indexer.runner.type: k8s` and `druid.indexer.task.encapsulatedTask: true` -## Pod Adapters +### Dynamic config + +Druid operators can dynamically tune certain features within this extension. You don't need to restart the Overlord +service for these changes to take effect. + +Druid can dynamically tune [pod template selection](#pod-template-selection), which allows you to configure the pod +template based on the task to be run. To enable dynamic pod template selection, first configure the +[custom template pod adapter](#custom-template-pod-adapter). + +Use the following APIs to view and update the dynamic configuration for the Kubernetes task runner. + +To use these APIs, ensure you have read and write permissions for the CONFIG resource type with the resource name +"CONFIG". For more information on permissions, see +[User authentication and authorization](../../operations/security-user-auth.md#config). + +#### Get dynamic configuration + +Retrieves the current dynamic execution config for the Kubernetes task runner. +Returns a JSON object with the dynamic configuration properties. + +##### URL + +`GET` `/druid/indexer/v1/k8s/taskrunner/executionconfig` + +##### Responses + + + + + + +*Successfully retrieved dynamic configuration* + + + + +--- + +##### Sample request + + + + + +```shell +curl "http://ROUTER_IP:ROUTER_PORT/druid/indexer/v1/k8s/taskrunner/executionconfig" +``` + + + + +```HTTP +GET /druid/indexer/v1/k8s/taskrunner/executionconfig HTTP/1.1 +Host: http://ROUTER_IP:ROUTER_PORT +``` + + + + +##### Sample response + +
+View the response + +```json +{ + "type": "default", + "podTemplateSelectStrategy": + { + "type": "selectorBased", + "selectors": [ + { + "selectionKey": "podSpec1", + "context.tags": { + "userProvidedTag": ["tag1", "tag2"] + }, + "dataSource": ["wikipedia"] + }, + { + "selectionKey": "podSpec2", + "type": ["index_kafka"] + } + ] + } +} +``` +
+ +#### Update dynamic configuration + +Updates the dynamic configuration for the Kubernetes Task Runner + +##### URL + +`POST` `/druid/indexer/v1/k8s/taskrunner/executionconfig` + +##### Header parameters + +The endpoint supports the following optional header parameters to populate the `author` and `comment` fields in the configuration history. + +* `X-Druid-Author` + * Type: String + * Author of the configuration change. +* `X-Druid-Comment` + * Type: String + * Description for the update. + +##### Responses + + + + + + +*Successfully updated dynamic configuration* + + + + +--- + +##### Sample request + + + + + + +```shell +curl "http://ROUTER_IP:ROUTER_PORT/druid/indexer/v1/k8s/taskrunner/executionconfig" \ +--header 'Content-Type: application/json' \ +--data '{ + "type": "default", + "podTemplateSelectStrategy": + { + "type": "selectorBased", + "selectors": [ + { + "selectionKey": "podSpec1", + "context.tags": + { + "userProvidedTag": ["tag1", "tag2"] + }, + "dataSource": ["wikipedia"] + }, + { + "selectionKey": "podSpec2", + "type": ["index_kafka"] + } + ] + } +}' +``` + + + + + +```HTTP +POST /druid/indexer/v1/k8s/taskrunner/executionconfig HTTP/1.1 +Host: http://ROUTER_IP:ROUTER_PORT +Content-Type: application/json + +{ + "type": "default", + "podTemplateSelectStrategy": + { + "type": "selectorBased", + "selectors": [ + { + "selectionKey": "podSpec1", + "context.tags": + { + "userProvidedTag": ["tag1", "tag2"] + }, + "dataSource": ["wikipedia"] + }, + { + "selectionKey": "podSpec2", + "type": ["index_kafka"] + } + ] + } +} +``` + + + + +##### Sample response + +A successful request returns an HTTP `200 OK` message code and an empty response body. + +#### Get dynamic configuration history + +Retrieves the history of changes to Kubernetes task runner's dynamic execution config over an interval of time. Returns +an empty array if there are no history records available. + +##### URL + +`GET` `/druid/indexer/v1/k8s/taskrunner/executionconfig/history` + +##### Query parameters + +The endpoint supports the following optional query parameters to filter results. + +* `interval` + * Type: String + * Limit the results to the specified time interval in ISO 8601 format delimited with `/`. For example, `2023-07-13/2023-07-19`. The default interval is one week. You can change this period by setting `druid.audit.manager.auditHistoryMillis` in the `runtime.properties` file for the Coordinator. + +* `count` + * Type: Integer + * Limit the number of results to the last `n` entries. + +##### Responses + + + + + + +*Successfully retrieved dynamic configuration* + + + + +--- + +##### Sample request + + + + + + +```shell +curl "http://ROUTER_IP:ROUTER_PORT/druid/indexer/v1/k8s/taskrunner/executionconfig/history" +``` + + + + + +```HTTP +GET /druid/indexer/v1/k8s/taskrunner/executionconfig/history HTTP/1.1 +Host: http://ROUTER_IP:ROUTER_PORT +``` + + + + +##### Sample response + +
+View the response + +```json +[ + { + "key": "k8s.taskrunner.config", + "type": "k8s.taskrunner.config", + "auditInfo": { + "author": "", + "comment": "", + "ip": "127.0.0.1" + }, + "payload": "{\"type\": \"default\",\"podTemplateSelectStrategy\":{\"type\": \"taskType\"}", + "auditTime": "2024-06-13T20:59:51.622Z" + } +] +``` +
+ +## Pod adapters The logic defining how the pod template is built for your Kubernetes Job depends on which pod adapter you have specified. ### Overlord Single Container Pod Adapter/Overlord Multi Container Pod Adapter @@ -65,7 +338,7 @@ and in your sidecar specs: That will not work, because we cannot decipher what your command is, the extension needs to know it explicitly. **Even for sidecars like Istio which are dynamically created by the service mesh, this needs to happen.* -Instead do the following: +Instead, do the following: You can keep your Dockerfile the same but you must have a sidecar spec like so: ``` container: name: foo @@ -90,13 +363,10 @@ The custom template pod adapter allows you to specify a pod template file per ta The base pod template must be specified as the runtime property `druid.indexer.runner.k8s.podTemplate.base: /path/to/basePodSpec.yaml` -Task specific pod templates can be specified as the runtime property `druid.indexer.runner.k8s.podTemplate.{taskType}: /path/to/taskSpecificPodSpec.yaml` where {taskType} is the name of the task type i.e `index_parallel`. +
+Example Pod Template that uses the regular druid docker image -If you are trying to use the default image's environment variable parsing feature to set runtime properties, you need to add a extra escape underscore when specifying pod templates. -e.g. set the environment variable `druid_indexer_runner_k8s_podTemplate_index__parallel` when setting `druid.indxer.runner.k8s.podTemplate.index_parallel` - -The following is an example Pod Template that uses the regular druid docker image. -``` +```yaml apiVersion: "v1" kind: "PodTemplate" template: @@ -164,6 +434,7 @@ template: - emptyDir: {} name: deepstorage-volume ``` +
The below runtime properties need to be passed to the Job's peon process. @@ -177,6 +448,10 @@ druid.indexer.task.encapsulatedTask=true ``` Any runtime property or JVM config used by the peon process can also be passed. E.G. below is a example of a ConfigMap that can be used to generate the `nodetype-config-volume` mount in the above template. + +
+Example ConfigMap + ``` kind: ConfigMap metadata: @@ -217,59 +492,112 @@ data: druid.peon.mode=remote druid.indexer.task.encapsulatedTask=true ``` -#### Dynamic Pod Template Selection Config -The Dynamic Pod Template Selection feature enhances the K8s extension by enabling more flexible and dynamic selection of pod templates based on task properties. This process is governed by the `PodTemplateSelectStrategy`. Below are the two strategies implemented: +
-|Property|Description|Default| -|--------|-----------|-------| -|`TaskTypePodTemplateSelectStrategy`| This strategy selects pod templates based on task type for execution purposes, implementing the behavior that maps templates to specific task types. | true | -|`SelectorBasedPodTemplateSelectStrategy`| This strategy evaluates a series of selectors, known as `selectors`, which are aligned with potential task properties. | false | +#### Pod template selection + +The pod template adapter can select which pod template should be used for a task using the [task runner execution config](#dynamic-config) -`SelectorBasedPodTemplateSelectStrategy`, the strategy implementing this new feature, is based on conditional `selectors` that match against top-level keys from the task payload. Currently, it supports matching based on task context tags, task type, and dataSource. These selectors are ordered in the dynamic configuration, with the first selector given the highest priority during the evaluation process. This means that the selection process uses these ordered conditions to determine a task’s Pod template. The first matching condition immediately determines the Pod template, thereby prioritizing certain configurations over others. If no selector matches, it will fall back to an optional `defaultKey` if configured; if there is still no match, it will use the `base` template. +##### Select based on task type -Example Configuration: +The `TaskTypePodTemplateSelectStrategy` strategy selects pod templates based on task type for execution purposes, +implementing the behavior that maps templates to specific task types. This is the default pod template selection +strategy. To explicitly select this strategy, set the `podTemplateSelectStrategy` in the dynamic execution config to -We define two template keys in the configuration—`low-throughput` and `medium-throughput`—each associated with specific task conditions and arranged in a priority order. - -- Low Throughput Template: This is the first template evaluated and has the highest priority. Tasks that have a context tag `billingCategory=streaming_ingestion` and a datasource of `wikipedia` will be classified under the `low-throughput` template. This classification directs such tasks to utilize a predefined pod template optimized for low throughput requirements. - -- Medium Throughput Template: If a task does not meet the low-throughput criteria, the system will then evaluate it against the next selector in order. In this example, if the task type is index_kafka, it will fall into the `medium-throughput` template. +```json +{ "type": "default" } ``` + +Task specific pod templates can be specified as the runtime property +`druid.indexer.runner.k8s.podTemplate.{taskType}: /path/to/taskSpecificPodSpec.yaml` where {taskType} is the name of the +task type. For example, `index_parallel`. + +If you are trying to use the default image's environment variable parsing feature to set runtime properties, you need to add a extra escape underscore when specifying pod templates. +For example, set the environment variable `druid_indexer_runner_k8s_podTemplate_index__kafka` when you set the runtime property `druid.indexer.runner.k8s.podTemplate.index_kafka` + + +The following example shows a configuration for task-based pod template selection: + +```properties +druid.indexer.runner.k8s.podTemplate.base=/path/to/basePodSpec.yaml +druid.indexer.runner.k8s.podTemplate.index_kafka=/path/to/kafkaPodSpec.yaml +``` + +##### Select based on one or more conditions + +The `SelectorBasedPodTemplateSelectStrategy` strategy evaluates a series of criteria within `selectors` to determine +which pod template to use to run the task. Pod templates are configured in the runtime properties like +`druid.indexer.runner.k8s.podTemplate.=...`. + +```json +{ + "type": "selectorBased", + "selectors": [ + { + "selectionKey": "podSpec1", + "context.tags": + { + "userProvidedTag": ["tag1", "tag2"] + }, + "dataSource": ["wikipedia"] + }, + { + "selectionKey": "podSpec2", + "type": ["index_kafka"] + } + ] +} +``` + +Selectors are processed in order. Druid selects the template based on the first matching selector. If a task does not +match any selector in the list, it will use the `base` pod template. + +For a task to match a selector, all the conditions within the selector must match. A selector can match on +- `type`: Type of the task +- `dataSource`: Destination datasource of the task. +- `context.tags`: Tags passed in the task's context. + +##### Example + +Set the following runtime properties to define the pod specs that can be used by Druid. + +```properties +druid.indexer.runner.k8s.podTemplate.base=/path/to/basePodSpec.yaml +druid.indexer.runner.k8s.podTemplate.podSpec1=/path/to/podSpecWithHighMemRequests.yaml +druid.indexer.runner.k8s.podTemplate.podSpec2=/path/to/podSpecWithLowCpuRequests.yaml +``` + +Set the dynamic execution config to define the pod template selection strategy. + +```json { "type": "default", - "podTemplateSelectStrategy": - { + "podTemplateSelectStrategy": { "type": "selectorBased", "selectors": [ { - "selectionKey": "low-throughput", - "context.tags": - { - "billingCategory": ["streaming_ingestion"] - }, + "selectionKey": "podSpec1", + "context.tags": { "userProvidedTag": ["tag1", "tag2"] }, "dataSource": ["wikipedia"] }, { - "selectionKey": "medium-throughput", + "selectionKey": "podSpec2", "type": ["index_kafka"] } - ], - "defaultKey"" "base" + ] } } ``` -Task specific pod templates can be specified as the runtime property `druid.indexer.runner.k8s.podTemplate.{template}: /path/to/taskSpecificPodSpec.yaml` where {template} is the matched `selectionKey` of the `podTemplateSelectStrategy` i.e low-throughput. -Similar to Overlord dynamic configuration, the following API endpoints are defined to retrieve and manage dynamic configurations of Pod Template Selection config: +Druid selects the pod templates as follows: +1. Use `podSpecWithHighMemRequests.yaml` when both of the following conditions are met: + 1. The task context contains a tag with the key `userProvidedTag` that has the value `tag1` or `tag2`. + 2. The task targets the `wikipedia` datasource. +2. Use `podSpecWithLowCpuRequests.yaml` when the task type is `index_kafka`. +3. Use the `basePodSpec.yaml` for all other tasks. -- Get dynamic configuration: -`POST` `/druid/indexer/v1/k8s/taskRunner/executionConfig` - -- Update dynamic configuration: -`GET` `/druid/indexer/v1/k8s/taskRunner/executionConfig` - -- Get dynamic configuration history: -`GET` `/druid/indexer/v1/k8s/taskRunner/executionConfig/history` +In this example, if there is an `index_kafka` task for the `wikipedia` datasource with the tag `userProvidedTag: tag1`, +Druid selects the pod template `podSpecWithHighMemRequests.yaml`. ### Properties |Property| Possible Values | Description |Default|required| @@ -302,7 +630,8 @@ Similar to Overlord dynamic configuration, the following API endpoints are defin - All Druid Pods belonging to one Druid cluster must be inside the same Kubernetes namespace. - You must have a role binding for the overlord's service account that provides the needed permissions for interacting with Kubernetes. An example spec could be: -``` + +```yaml kind: Role apiVersion: rbac.authorization.k8s.io/v1 metadata: diff --git a/docs/development/extensions-core/azure.md b/docs/development/extensions-core/azure.md index 21e24153a47..d6310e32cf9 100644 --- a/docs/development/extensions-core/azure.md +++ b/docs/development/extensions-core/azure.md @@ -22,25 +22,75 @@ title: "Microsoft Azure" ~ under the License. --> +## Azure extension + +This extension allows you to do the following: + +* [Ingest data](#ingest-data-from-azure) from objects stored in Azure Blob Storage. +* [Write segments](#store-segments-in-azure) to Azure Blob Storage for deep storage. +* [Persist task logs](#persist-task-logs-in-azure) to Azure Blob Storage for long-term storage. + +:::info To use this Apache Druid extension, [include](../../configuration/extensions.md#loading-extensions) `druid-azure-extensions` in the extensions load list. -## Deep Storage +::: -[Microsoft Azure Storage](http://azure.microsoft.com/en-us/services/storage/) is another option for deep storage. This requires some additional Druid configuration. +### Ingest data from Azure -|Property|Description|Possible Values|Default| -|--------|---------------|-----------|-------| -|`druid.storage.type`|azure||Must be set.| -|`druid.azure.account`||Azure Storage account name.|Must be set.| -|`druid.azure.key`||Azure Storage account key.|Optional. Set one of key, sharedAccessStorageToken or useAzureCredentialsChain.| -|`druid.azure.sharedAccessStorageToken`||Azure Shared Storage access token|Optional. Set one of key, sharedAccessStorageToken or useAzureCredentialsChain..| -|`druid.azure.useAzureCredentialsChain`|Use [DefaultAzureCredential](https://learn.microsoft.com/en-us/java/api/overview/azure/identity-readme?view=azure-java-stable) for authentication|Optional. Set one of key, sharedAccessStorageToken or useAzureCredentialsChain.|False| -|`druid.azure.managedIdentityClientId`|If you want to use managed identity authentication in the `DefaultAzureCredential`, `useAzureCredentialsChain` must be true.||Optional.| -|`druid.azure.container`||Azure Storage container name.|Must be set.| -|`druid.azure.prefix`|A prefix string that will be prepended to the blob names for the segments published to Azure deep storage| |""| -|`druid.azure.protocol`|the protocol to use|http or https|https| -|`druid.azure.maxTries`|Number of tries before canceling an Azure operation.| |3| -|`druid.azure.maxListingLength`|maximum number of input files matching a given prefix to retrieve at a time| |1024| -|`druid.azure.storageAccountEndpointSuffix`| The endpoint suffix to use. Use this config instead of `druid.azure.endpointSuffix`. Override the default value to connect to [Azure Government](https://learn.microsoft.com/en-us/azure/azure-government/documentation-government-get-started-connect-to-storage#getting-started-with-storage-api). This config supports storage accounts enabled for [AzureDNSZone](https://learn.microsoft.com/en-us/azure/dns/dns-getstarted-portal). Note: do not include the storage account name prefix in this config value. | Examples: `ABCD1234.blob.storage.azure.net`, `blob.core.usgovcloudapi.net`| `blob.core.windows.net`| -See [Azure Services](http://azure.microsoft.com/en-us/pricing/free-trial/) for more information. +Ingest data using either [MSQ](../../multi-stage-query/index.md) or a native batch [parallel task](../../ingestion/native-batch.md) with an [Azure input source](../../ingestion/input-sources.md#azure-input-source) (`azureStorage`) to read objects directly from Azure Blob Storage. + +### Store segments in Azure + +:::info + +To use Azure for deep storage, set `druid.storage.type=azure`. + +::: + +#### Configure location + +Configure where to store segments using the following properties: + +| Property | Description | Default | +|---|---|---| +| `druid.azure.account` | The Azure Storage account name. | Must be set. | +| `druid.azure.container` | The Azure Storage container name. | Must be set. | +| `druid.azure.prefix` | A prefix string that will be prepended to the blob names for the segments published. | "" | +| `druid.azure.maxTries` | Number of tries before canceling an Azure operation. | 3 | +| `druid.azure.protocol` | The protocol to use to connect to the Azure Storage account. Either `http` or `https`. | `https` | +| `druid.azure.storageAccountEndpointSuffix` | The Storage account endpoint to use. Override the default value to connect to [Azure Government](https://learn.microsoft.com/en-us/azure/azure-government/documentation-government-get-started-connect-to-storage#getting-started-with-storage-api) or storage accounts with [Azure DNS zone endpoints](https://learn.microsoft.com/en-us/azure/storage/common/storage-account-overview#azure-dns-zone-endpoints-preview).

Do _not_ include the storage account name prefix in this config value.

Examples: `ABCD1234.blob.storage.azure.net`, `blob.core.usgovcloudapi.net`. | `blob.core.windows.net` | + +#### Configure authentication + +Authenticate access to Azure Blob Storage using one of the following methods: + +* [SAS token](https://learn.microsoft.com/en-us/azure/storage/common/storage-sas-overview) +* [Shared Key](https://learn.microsoft.com/en-us/rest/api/storageservices/authorize-with-shared-key) +* Default Azure credentials chain ([`DefaultAzureCredential`](https://learn.microsoft.com/en-us/java/api/overview/azure/identity-readme#defaultazurecredential)). + +Configure authentication using the following properties: + +| Property | Description | Default | +|---|---|---| +| `druid.azure.sharedAccessStorageToken` | The SAS (Shared Storage Access) token. | | +| `druid.azure.key` | The Shared Key. | | +| `druid.azure.useAzureCredentialsChain` | If `true`, use `DefaultAzureCredential` for authentication. | `false` | +| `druid.azure.managedIdentityClientId` | To use managed identity authentication in the `DefaultAzureCredential`, set `useAzureCredentialsChain` to `true` and provide the client ID here. | | + +### Persist task logs in Azure + +:::info + +To persist task logs in Azure Blob Storage, set `druid.indexer.logs.type=azure`. + +::: + +Druid stores task logs using the storage account and authentication method configured for storing segments. Use the following configuration to set up where to store the task logs: + +| Property | Description | Default | +|---|---|---| +| `druid.indexer.logs.container` | The Azure Blob Store container to write logs to. | Must be set. | +| `druid.indexer.logs.prefix` | The path to prepend to logs. | Must be set. | + +For general options regarding task retention, see [Log retention policy](../../configuration/index.md#log-retention-policy). diff --git a/docs/development/extensions-core/hdfs.md b/docs/development/extensions-core/hdfs.md index 32ef6133a9d..b1d2d0ceaab 100644 --- a/docs/development/extensions-core/hdfs.md +++ b/docs/development/extensions-core/hdfs.md @@ -22,7 +22,6 @@ title: "HDFS" ~ under the License. --> - To use this Apache Druid extension, [include](../../configuration/extensions.md#loading-extensions) `druid-hdfs-storage` in the extensions load list and run druid processes with `GOOGLE_APPLICATION_CREDENTIALS=/path/to/service_account_keyfile` in the environment. ## Deep Storage @@ -44,11 +43,11 @@ If you want to eagerly authenticate against a secured hadoop/hdfs cluster you mu ### Configuration for Cloud Storage -You can also use the AWS S3 or the Google Cloud Storage as the deep storage via HDFS. +You can also use the Amazon S3 or the Google Cloud Storage as the deep storage via HDFS. -#### Configuration for AWS S3 +#### Configuration for Amazon S3 -To use the AWS S3 as the deep storage, you need to configure `druid.storage.storageDirectory` properly. +To use the Amazon S3 as the deep storage, you need to configure `druid.storage.storageDirectory` properly. |Property|Possible Values|Description|Default| |--------|---------------|-----------|-------| diff --git a/docs/development/extensions-core/s3.md b/docs/development/extensions-core/s3.md index 20bd1682f24..ab8745f6e3b 100644 --- a/docs/development/extensions-core/s3.md +++ b/docs/development/extensions-core/s3.md @@ -25,6 +25,7 @@ title: "S3-compatible" ## S3 extension This extension allows you to do 2 things: + * [Ingest data](#reading-data-from-s3) from files stored in S3. * Write segments to [deep storage](#deep-storage) in S3. @@ -41,7 +42,7 @@ To read objects from S3, you must supply [connection information](#configuration ### Deep Storage -S3-compatible deep storage means either AWS S3 or a compatible service like Google Storage which exposes the same API as S3. +S3-compatible deep storage means either Amazon S3 or a compatible service like Google Storage which exposes the same API as S3. S3 deep storage needs to be explicitly enabled by setting `druid.storage.type=s3`. **Only after setting the storage type to S3 will any of the settings below take effect.** @@ -97,19 +98,19 @@ Note that this setting only affects Druid's behavior. Changing S3 to use Object If you're using ACLs, Druid needs the following permissions: -- `s3:GetObject` -- `s3:PutObject` -- `s3:DeleteObject` -- `s3:GetBucketAcl` -- `s3:PutObjectAcl` +* `s3:GetObject` +* `s3:PutObject` +* `s3:DeleteObject` +* `s3:GetBucketAcl` +* `s3:PutObjectAcl` #### Object Ownership permissions If you're using Object Ownership, Druid needs the following permissions: -- `s3:GetObject` -- `s3:PutObject` -- `s3:DeleteObject` +* `s3:GetObject` +* `s3:PutObject` +* `s3:DeleteObject` ### AWS region @@ -117,8 +118,8 @@ The AWS SDK requires that a target region be specified. You can set these by us For example, to set the region to 'us-east-1' through system properties: -- Add `-Daws.region=us-east-1` to the `jvm.config` file for all Druid services. -- Add `-Daws.region=us-east-1` to `druid.indexer.runner.javaOpts` in [Middle Manager configuration](../../configuration/index.md#middlemanager-configuration) so that the property will be passed to Peon (worker) processes. +* Add `-Daws.region=us-east-1` to the `jvm.config` file for all Druid services. +* Add `-Daws.region=us-east-1` to `druid.indexer.runner.javaOpts` in [Middle Manager configuration](../../configuration/index.md#middlemanager-configuration) so that the property will be passed to Peon (worker) processes. ### Connecting to S3 configuration @@ -146,6 +147,6 @@ For example, to set the region to 'us-east-1' through system properties: You can enable [server-side encryption](https://docs.aws.amazon.com/AmazonS3/latest/dev/serv-side-encryption) by setting `druid.storage.sse.type` to a supported type of server-side encryption. The current supported types are: -- s3: [Server-side encryption with S3-managed encryption keys](https://docs.aws.amazon.com/AmazonS3/latest/dev/UsingServerSideEncryption) -- kms: [Server-side encryption with AWS KMS–Managed Keys](https://docs.aws.amazon.com/AmazonS3/latest/dev/UsingKMSEncryption) -- custom: [Server-side encryption with Customer-Provided Encryption Keys](https://docs.aws.amazon.com/AmazonS3/latest/dev/ServerSideEncryptionCustomerKeys) +* s3: [Server-side encryption with S3-managed encryption keys](https://docs.aws.amazon.com/AmazonS3/latest/dev/UsingServerSideEncryption) +* kms: [Server-side encryption with AWS KMS–Managed Keys](https://docs.aws.amazon.com/AmazonS3/latest/dev/UsingKMSEncryption) +* custom: [Server-side encryption with Customer-Provided Encryption Keys](https://docs.aws.amazon.com/AmazonS3/latest/dev/ServerSideEncryptionCustomerKeys) diff --git a/docs/ingestion/hadoop.md b/docs/ingestion/hadoop.md index c34fdb92117..96373e27517 100644 --- a/docs/ingestion/hadoop.md +++ b/docs/ingestion/hadoop.md @@ -148,7 +148,7 @@ For example, using the static input paths: "paths" : "hdfs://path/to/data/is/here/data.gz,hdfs://path/to/data/is/here/moredata.gz,hdfs://path/to/data/is/here/evenmoredata.gz" ``` -You can also read from cloud storage such as AWS S3 or Google Cloud Storage. +You can also read from cloud storage such as Amazon S3 or Google Cloud Storage. To do so, you need to install the necessary library under Druid's classpath in _all MiddleManager or Indexer processes_. For S3, you can run the below command to install the [Hadoop AWS module](https://hadoop.apache.org/docs/stable/hadoop-aws/tools/hadoop-aws/). @@ -336,7 +336,7 @@ The tuningConfig is optional and default parameters will be used if no tuningCon |logParseExceptions|Boolean|If true, log an error message when a parsing exception occurs, containing information about the row where the error occurred.|no(default = false)| |maxParseExceptions|Integer|The maximum number of parse exceptions that can occur before the task halts ingestion and fails. Overrides `ignoreInvalidRows` if `maxParseExceptions` is defined.|no(default = unlimited)| |useYarnRMJobStatusFallback|Boolean|If the Hadoop jobs created by the indexing task are unable to retrieve their completion status from the JobHistory server, and this parameter is true, the indexing task will try to fetch the application status from `http:///ws/v1/cluster/apps/`, where `` is the value of `yarn.resourcemanager.webapp.address` in your Hadoop configuration. This flag is intended as a fallback for cases where an indexing task's jobs succeed, but the JobHistory server is unavailable, causing the indexing task to fail because it cannot determine the job statuses.|no (default = true)| -|awaitSegmentAvailabilityTimeoutMillis|Long|Milliseconds to wait for the newly indexed segments to become available for query after ingestion completes. If `<= 0`, no wait will occur. If `> 0`, the task will wait for the Coordinator to indicate that the new segments are available for querying. If the timeout expires, the task will exit as successful, but the segments were not confirmed to have become available for query.|no (default = 0)| +|awaitSegmentAvailabilityTimeoutMillis|Long|Milliseconds to wait for the newly indexed segments to become available for query after ingestion completes. If `<= 0`, no wait will occur. If `> 0`, the task will wait for the Coordinator to indicate that the new segments are available for querying. If the timeout expires, the task will exit as successful, but the segments were not confirmed to have become available for query.|no (default = 0)| ### `jobProperties` diff --git a/docs/ingestion/input-sources.md b/docs/ingestion/input-sources.md index f89693740c8..fb8e1f98c91 100644 --- a/docs/ingestion/input-sources.md +++ b/docs/ingestion/input-sources.md @@ -30,12 +30,15 @@ For general information on native batch indexing and parallel task indexing, see ## S3 input source :::info - You need to include the [`druid-s3-extensions`](../development/extensions-core/s3.md) as an extension to use the S3 input source. + +You need to include the [`druid-s3-extensions`](../development/extensions-core/s3.md) as an extension to use the S3 input source. + ::: The S3 input source reads objects directly from S3. You can specify either: -- a list of S3 URI strings -- a list of S3 location prefixes that attempts to list the contents and ingest + +* a list of S3 URI strings +* a list of S3 location prefixes that attempts to list the contents and ingest all objects contained within the locations. The S3 input source is splittable. Therefore, you can use it with the [Parallel task](./native-batch.md). Each worker task of `index_parallel` reads one or multiple objects. @@ -76,7 +79,6 @@ Sample specs: ... ``` - ```json ... "ioConfig": { @@ -210,13 +212,17 @@ Properties Object: |assumeRoleExternalId|A unique identifier that might be required when you assume a role in another account [see](https://docs.aws.amazon.com/IAM/latest/UserGuide/id_credentials_temp_request.html)|None|no| :::info - **Note:** If `accessKeyId` and `secretAccessKey` are not given, the default [S3 credentials provider chain](../development/extensions-core/s3.md#s3-authentication-methods) is used. + +If `accessKeyId` and `secretAccessKey` are not given, the default [S3 credentials provider chain](../development/extensions-core/s3.md#s3-authentication-methods) is used. + ::: ## Google Cloud Storage input source :::info - You need to include the [`druid-google-extensions`](../development/extensions-core/google.md) as an extension to use the Google Cloud Storage input source. + +You need to include the [`druid-google-extensions`](../development/extensions-core/google.md) as an extension to use the Google Cloud Storage input source. + ::: The Google Cloud Storage input source is to support reading objects directly @@ -261,7 +267,6 @@ Sample specs: ... ``` - ```json ... "ioConfig": { @@ -300,16 +305,18 @@ Google Cloud Storage object: |path|The path where data is located.|None|yes| |systemFields|JSON array of system fields to return as part of input rows. Possible values: `__file_uri` (Google Cloud Storage URI starting with `gs://`), `__file_bucket` (GCS bucket), and `__file_path` (GCS key).|None|no| -## Azure input source +## Azure input source :::info - You need to include the [`druid-azure-extensions`](../development/extensions-core/azure.md) as an extension to use the Azure input source. + +You need to include the [`druid-azure-extensions`](../development/extensions-core/azure.md) as an extension to use the Azure input source. + ::: The Azure input source (that uses the type `azureStorage`) reads objects directly from Azure Blob store or Azure Data Lake sources. You can specify objects as a list of file URI strings or prefixes. You can split the Azure input source for use with [Parallel task](./native-batch.md) indexing and each worker task reads one chunk of the split data. -The `azureStorage` input source is a new schema for Azure input sources that allows you to specify which storage account files should be ingested from. We recommend that you update any specs that use the old `azure` schema to use the new `azureStorage` schema. The new schema provides more functionality than the older `azure` schema. +The `azureStorage` input source is a new schema for Azure input sources that allows you to specify which storage account files should be ingested from. We recommend that you update any specs that use the old `azure` schema to use the new `azureStorage` schema. The new schema provides more functionality than the older `azure` schema. Sample specs: @@ -347,7 +354,6 @@ Sample specs: ... ``` - ```json ... "ioConfig": { @@ -379,7 +385,7 @@ Sample specs: |objects|JSON array of Azure objects to ingest.|None|One of the following must be set:`uris`, `prefixes`, or `objects`.| |objectGlob|A glob for the object part of the Azure URI. In the URI `azureStorage://foo/bar/file.json`, the glob is applied to `bar/file.json`.

The glob must match the entire object part, not just the filename. For example, the glob `*.json` does not match `azureStorage://foo/bar/file.json` because the object part is `bar/file.json`, and the`*` does not match the slash. To match all objects ending in `.json`, use `**.json` instead.

For more information, refer to the documentation for [`FileSystem#getPathMatcher`](https://docs.oracle.com/javase/8/docs/api/java/nio/file/FileSystem.html#getPathMatcher-java.lang.String-).|None|no| |systemFields|JSON array of system fields to return as part of input rows. Possible values: `__file_uri` (Azure blob URI starting with `azureStorage://`), `__file_bucket` (Azure bucket), and `__file_path` (Azure object path).|None|no| -|properties|Properties object for overriding the default Azure configuration. See below for more information.|None|No (defaults will be used if not given) +|properties|Properties object for overriding the default Azure configuration. See below for more information.|None|No (defaults will be used if not given)| Note that the Azure input source skips all empty objects only when `prefixes` is specified. @@ -390,14 +396,12 @@ The `objects` property can one of the following: |bucket|Name of the Azure Blob Storage or Azure Data Lake storage account|None|yes| |path|The container and path where data is located.|None|yes| - The `properties` property can be one of the following: -- `sharedAccessStorageToken` -- `key` -- `appRegistrationClientId`, `appRegistrationClientSecret`, and `tenantId` -- empty - +* `sharedAccessStorageToken` +* `key` +* `appRegistrationClientId`, `appRegistrationClientSecret`, and `tenantId` +* empty |Property|Description|Default|Required| |--------|-----------|-------|---------| @@ -407,8 +411,7 @@ The `properties` property can be one of the following: |appRegistrationClientSecret|The client secret of the Azure App registration to authenticate as|None|Yes if `appRegistrationClientId` is provided| |tenantId|The tenant ID of the Azure App registration to authenticate as|None|Yes if `appRegistrationClientId` is provided| - -#### `azure` input source +### Legacy `azure` input source The Azure input source that uses the type `azure` is an older version of the Azure input type and is not recommended. It doesn't support specifying which storage account to ingest from. We recommend using the [`azureStorage` input source schema](#azure-input-source) instead since it provides more functionality. @@ -448,7 +451,6 @@ Sample specs: ... ``` - ```json ... "ioConfig": { @@ -487,11 +489,12 @@ The `objects` property is: |bucket|Name of the Azure Blob Storage or Azure Data Lake container|None|yes| |path|The path where data is located.|None|yes| - ## HDFS input source :::info - You need to include the [`druid-hdfs-storage`](../development/extensions-core/hdfs.md) as an extension to use the HDFS input source. + +You need to include the [`druid-hdfs-storage`](../development/extensions-core/hdfs.md) as an extension to use the HDFS input source. + ::: The HDFS input source is to support reading files directly @@ -580,10 +583,12 @@ in `druid.ingestion.hdfs.allowedProtocols`. See [HDFS input source security conf The HTTP input source is to support reading files directly from remote sites via HTTP. -:::info - **Security notes:** Ingestion tasks run under the operating system account that runs the Druid processes, for example the Indexer, Middle Manager, and Peon. This means any user who can submit an ingestion task can specify an input source referring to any location that the Druid process can access. For example, using `http` input source, users may have access to internal network servers. +:::info Security notes + +Ingestion tasks run under the operating system account that runs the Druid processes, for example the Indexer, Middle Manager, and Peon. This means any user who can submit an ingestion task can specify an input source referring to any location that the Druid process can access. For example, using `http` input source, users may have access to internal network servers. + +The `http` input source is not limited to the HTTP or HTTPS protocols. It uses the Java URI class that supports HTTP, HTTPS, FTP, file, and jar protocols by default. - The `http` input source is not limited to the HTTP or HTTPS protocols. It uses the Java URI class that supports HTTP, HTTPS, FTP, file, and jar protocols by default. ::: For more information about security best practices, see [Security overview](../operations/security-overview.md#best-practices). @@ -725,7 +730,7 @@ Sample spec: |filter|A wildcard filter for files. See [here](http://commons.apache.org/proper/commons-io/apidocs/org/apache/commons/io/filefilter/WildcardFileFilter) for more information. Files matching the filter criteria are considered for ingestion. Files not matching the filter criteria are ignored.|yes if `baseDir` is specified| |baseDir|Directory to search recursively for files to be ingested. Empty files under the `baseDir` will be skipped.|At least one of `baseDir` or `files` should be specified| |files|File paths to ingest. Some files can be ignored to avoid ingesting duplicate files if they are located under the specified `baseDir`. Empty files will be skipped.|At least one of `baseDir` or `files` should be specified| -|systemFields|JSON array of system fields to return as part of input rows. Possible values: `__file_uri` (File URI starting with `file:`) and `__file_path` (file path).|None|no| +|systemFields|JSON array of system fields to return as part of input rows. Possible values: `__file_uri` (File URI starting with `file:`) and `__file_path` (file path).|no| ## Druid input source @@ -744,9 +749,9 @@ no `inputFormat` field needs to be specified in the ingestion spec when using th The Druid input source can be used for a variety of purposes, including: -- Creating new datasources that are rolled-up copies of existing datasources. -- Changing the [partitioning or sorting](./partitioning.md) of a datasource to improve performance. -- Updating or removing rows using a [`transformSpec`](./ingestion-spec.md#transformspec). +* Creating new datasources that are rolled-up copies of existing datasources. +* Changing the [partitioning or sorting](./partitioning.md) of a datasource to improve performance. +* Updating or removing rows using a [`transformSpec`](./ingestion-spec.md#transformspec). When using the Druid input source, the timestamp column shows up as a numeric field named `__time` set to the number of milliseconds since the epoch (January 1, 1970 00:00:00 UTC). It is common to use this in the timestampSpec, if you @@ -813,16 +818,16 @@ rolled-up datasource `wikipedia_rollup` by grouping on hour, "countryName", and ``` :::info - Note: Older versions (0.19 and earlier) did not respect the timestampSpec when using the Druid input source. If you - have ingestion specs that rely on this and cannot rewrite them, set - [`druid.indexer.task.ignoreTimestampSpecForDruidInputSource`](../configuration/index.md#indexer-general-configuration) - to `true` to enable a compatibility mode where the timestampSpec is ignored. + +Older versions (0.19 and earlier) did not respect the timestampSpec when using the Druid input source. If you have ingestion specs that rely on this and cannot rewrite them, set [`druid.indexer.task.ignoreTimestampSpecForDruidInputSource`](../configuration/index.md#indexer-general-configuration) to `true` to enable a compatibility mode where the timestampSpec is ignored. + ::: The [secondary partitioning method](native-batch.md#partitionsspec) determines the requisite number of concurrent worker tasks that run in parallel to complete ingestion with the Combining input source. Set this value in `maxNumConcurrentSubTasks` in `tuningConfig` based on the secondary partitioning method: -- `range` or `single_dim` partitioning: greater than or equal to 1 -- `hashed` or `dynamic` partitioning: greater than or equal to 2 + +* `range` or `single_dim` partitioning: greater than or equal to 1 +* `hashed` or `dynamic` partitioning: greater than or equal to 2 For more information on the `maxNumConcurrentSubTasks` field, see [Implementation considerations](native-batch.md#implementation-considerations). @@ -866,7 +871,7 @@ The following is an example of an SQL input source spec: The spec above will read all events from two separate SQLs for the interval `2013-01-01/2013-01-02`. Each of the SQL queries will be run in its own sub-task and thus for the above example, there would be two sub-tasks. -**Recommended practices** +### Recommended practices Compared to the other native batch input sources, SQL input source behaves differently in terms of reading the input data. Therefore, consider the following points before using this input source in a production environment: @@ -878,7 +883,6 @@ Compared to the other native batch input sources, SQL input source behaves diffe * Similar to file-based input formats, any updates to existing data will replace the data in segments specific to the intervals specified in the `granularitySpec`. - ## Combining input source The Combining input source lets you read data from multiple input sources. @@ -928,7 +932,9 @@ The following is an example of a Combining input source spec: ## Iceberg input source :::info + To use the Iceberg input source, load the extension [`druid-iceberg-extensions`](../development/extensions-contrib/iceberg.md). + ::: You use the Iceberg input source to read data stored in the Iceberg table format. For a given table, the input source scans up to the latest Iceberg snapshot from the configured Hive catalog. Druid ingests the underlying live data files using the existing input source formats. @@ -1133,13 +1139,15 @@ This input source provides the following filters: `and`, `equals`, `interval`, a ## Delta Lake input source :::info + To use the Delta Lake input source, load the extension [`druid-deltalake-extensions`](../development/extensions-contrib/delta-lake.md). + ::: You can use the Delta input source to read data stored in a Delta Lake table. For a given table, the input source scans the latest snapshot from the configured table. Druid ingests the underlying delta files from the table. - | Property|Description|Required| +| Property|Description|Required| |---------|-----------|--------| | type|Set this value to `delta`.|yes| | tablePath|The location of the Delta table.|yes| @@ -1155,7 +1163,6 @@ on statistics collected when the non-partitioned table is created. In this scena data that doesn't match the filter. To guarantee that the Delta Kernel prunes out unnecessary column values, only use filters on partitioned columns. - `and` filter: | Property | Description | Required | @@ -1217,7 +1224,6 @@ filters on partitioned columns. | column | The table column to apply the filter on. | yes | | value | The value to use in the filter. | yes | - The following is a sample spec to read all records from the Delta table `/delta-table/foo`: ```json diff --git a/docs/ingestion/native-batch.md b/docs/ingestion/native-batch.md index fc234cce0a2..398fea9f69a 100644 --- a/docs/ingestion/native-batch.md +++ b/docs/ingestion/native-batch.md @@ -28,12 +28,14 @@ sidebar_label: JSON-based batch ::: Apache Druid supports the following types of JSON-based batch indexing tasks: + - Parallel task indexing (`index_parallel`) that can run multiple indexing tasks concurrently. Parallel task works well for production ingestion tasks. - Simple task indexing (`index`) that run a single indexing task at a time. Simple task indexing is suitable for development and test environments. This topic covers the configuration for `index_parallel` ingestion specs. For related information on batch indexing, see: + - [Batch ingestion method comparison table](./index.md#batch) for a comparison of batch ingestion methods. - [Tutorial: Loading a file](../tutorials/tutorial-batch.md) for a tutorial on JSON-based batch ingestion. - [Input sources](./input-sources.md) for possible input sources. @@ -97,7 +99,6 @@ By default, JSON-based batch ingestion replaces all data in the intervals in you You can also perform concurrent append and replace tasks. For more information, see [Concurrent append and replace](./concurrent-append-replace.md) - #### Fully replacing existing segments using tombstones :::info @@ -124,12 +125,12 @@ You want to re-ingest and overwrite with new data as follows: Unless you set `dropExisting` to true, the result after ingestion with overwrite using the same `MONTH` `segmentGranularity` would be: -* **January**: 1 record -* **February**: 10 records -* **March**: 9 records +- **January**: 1 record +- **February**: 10 records +- **March**: 9 records This may not be what it is expected since the new data has 0 records for January. Set `dropExisting` to true to replace the unneeded January segment with a tombstone. - + ## Parallel indexing example The following example illustrates the configuration for a parallel indexing task. @@ -214,6 +215,7 @@ The following example illustrates the configuration for a parallel indexing task } } ``` + ## Parallel indexing configuration @@ -305,7 +307,7 @@ The segments split hint spec is used only for [`DruidInputSource`](./input-sourc ### `partitionsSpec` -The primary partition for Druid is time. You can define a secondary partitioning method in the partitions spec. Use the `partitionsSpec` type that applies for your [rollup](rollup.md) method. +The primary partition for Druid is time. You can define a secondary partitioning method in the partitions spec. Use the `partitionsSpec` type that applies for your [rollup](rollup.md) method. For perfect rollup, you can use: @@ -366,7 +368,7 @@ In the `partial segment generation` phase, just like the Map phase in MapReduce, the Parallel task splits the input data based on the split hint spec and assigns each split to a worker task. Each worker task (type `partial_index_generate`) reads the assigned split, and partitions rows by the time chunk from `segmentGranularity` (primary partition key) in the `granularitySpec` and then by the hash value of `partitionDimensions` (secondary partition key) in the `partitionsSpec`. -The partitioned data is stored in local storage of +The partitioned data is stored in local storage of the [middleManager](../design/middlemanager.md) or the [indexer](../design/indexer.md). The `partial segment merge` phase is similar to the Reduce phase in MapReduce. @@ -709,12 +711,14 @@ The returned result contains the worker task spec, a current task status if exis "taskHistory": [] } ``` + `http://{PEON_IP}:{PEON_PORT}/druid/worker/v1/chat/{SUPERVISOR_TASK_ID}/subtaskspec/{SUB_TASK_SPEC_ID}/history` Returns the task attempt history of the worker task spec of the given id, or HTTP 404 Not Found error if the supervisor task is running in the sequential mode. ## Segment pushing modes + While ingesting data using the parallel task indexing, Druid creates segments from the input data and pushes them. For segment pushing, the parallel task index supports the following segment pushing modes based upon your type of [rollup](./rollup.md): @@ -743,10 +747,12 @@ This may help the higher priority tasks to finish earlier than lower priority ta by assigning more task slots to them. ## Splittable input sources + Use the `inputSource` object to define the location where your index can read data. Only the native parallel task and simple task support the input source. For details on available input sources see: -- [S3 input source](./input-sources.md#s3-input-source) (`s3`) reads data from AWS S3 storage. + +- [S3 input source](./input-sources.md#s3-input-source) (`s3`) reads data from Amazon S3 storage. - [Google Cloud Storage input source](./input-sources.md#google-cloud-storage-input-source) (`gs`) reads data from Google Cloud Storage. - [Azure input source](./input-sources.md#azure-input-source) (`azure`) reads data from Azure Blob Storage and Azure Data Lake. - [HDFS input source](./input-sources.md#hdfs-input-source) (`hdfs`) reads data from HDFS storage. diff --git a/docs/ingestion/supervisor.md b/docs/ingestion/supervisor.md index 9320c39a02a..70939adb633 100644 --- a/docs/ingestion/supervisor.md +++ b/docs/ingestion/supervisor.md @@ -42,6 +42,7 @@ The following table outlines the high-level configuration options for a supervis |`spec.dataSchema`|Object|The schema for the indexing task to use during ingestion. See [`dataSchema`](../ingestion/ingestion-spec.md#dataschema) for more information.|Yes| |`spec.ioConfig`|Object|The I/O configuration object to define the connection and I/O-related settings for the supervisor and indexing tasks.|Yes| |`spec.tuningConfig`|Object|The tuning configuration object to define performance-related settings for the supervisor and indexing tasks.|No| +|`suspended`|Boolean|Puts the supervisor in a suspended state|No| ### I/O configuration diff --git a/docs/ingestion/tranquility.md b/docs/ingestion/tranquility.md index 9cc0636fd6c..9124ff04b86 100644 --- a/docs/ingestion/tranquility.md +++ b/docs/ingestion/tranquility.md @@ -22,15 +22,9 @@ title: "Tranquility" ~ under the License. --> -[Tranquility](https://github.com/druid-io/tranquility/) is a separately distributed package for pushing -streams to Druid in real-time. - -Tranquility has not been built against a version of Druid later than Druid 0.9.2 -release. It may still work with the latest Druid servers, but not all features and functionality will be available -due to limitations of older Druid APIs on the Tranquility side. +[Tranquility](https://github.com/druid-io/tranquility/) was a separately distributed package for pushing +streams to Druid in real-time. It is not compatible with recent versions of Druid. For new projects that require streaming ingestion, we recommend using Druid's native support for [Apache Kafka](../ingestion/kafka-ingestion.md) or [Amazon Kinesis](../ingestion/kinesis-ingestion.md). - -For more details, check out the [Tranquility GitHub page](https://github.com/druid-io/tranquility/). diff --git a/docs/multi-stage-query/reference.md b/docs/multi-stage-query/reference.md index aeb4305f31e..cf06156c658 100644 --- a/docs/multi-stage-query/reference.md +++ b/docs/multi-stage-query/reference.md @@ -410,6 +410,7 @@ The following table lists the context parameters for the MSQ task engine: | `skipTypeVerification` | INSERT or REPLACE

During query validation, Druid validates that [string arrays](../querying/arrays.md) and [multi-value dimensions](../querying/multi-value-dimensions.md) are not mixed in the same column. If you are intentionally migrating from one to the other, use this context parameter to disable type validation.

Provide the column list as comma-separated values or as a JSON array in string form.| empty list | | `failOnEmptyInsert` | INSERT or REPLACE

When set to false (the default), an INSERT query generating no output rows will be no-op, and a REPLACE query generating no output rows will delete all data that matches the OVERWRITE clause. When set to true, an ingest query generating no output rows will throw an `InsertCannotBeEmpty` fault. | `false` | | `storeCompactionState` | REPLACE

When set to true, a REPLACE query stores as part of each segment's metadata a `lastCompactionState` field that captures the various specs used to create the segment. Future compaction jobs skip segments whose `lastCompactionState` matches the desired compaction state. Works the same as [`storeCompactionState`](../ingestion/tasks.md#context-parameters) task context flag. | `false` | +| `removeNullBytes` | SELECT, INSERT or REPLACE

The MSQ engine cannot process null bytes in strings and throws `InvalidNullByteFault` if it encounters them in the source data. If the parameter is set to true, The MSQ engine will remove the null bytes in string fields when reading the data. | `false` | ## Joins diff --git a/docs/operations/metrics.md b/docs/operations/metrics.md index bf241ac5708..1d37169684e 100644 --- a/docs/operations/metrics.md +++ b/docs/operations/metrics.md @@ -508,6 +508,19 @@ These metrics are only available if the `OshiSysMonitor` module is included. |`sys/tcpv4/out/rsts`|Total "out reset" packets sent to reset the connection||Generally 0| |`sys/tcpv4/retrans/segs`|Total segments re-transmitted||Varies| + +## S3 multi-part upload + +These metrics are only available if the `druid-s3-extensions` module is included and if certain specific features are being used: MSQ export to S3, durable intermediate storage on S3. + +|Metric|Description|Dimensions|Normal value| +|------|-----------|----------|------------| +|`s3/upload/part/queueSize`|Number of items currently waiting in queue to be uploaded to S3. Each item in the queue corresponds to a single part in a multi-part upload.||Varies| +|`s3/upload/part/queuedTime`|Milliseconds spent by a single item (or part) in queue before it starts getting uploaded to S3.|`uploadId`, `partNumber`|Varies| +|`s3/upload/part/time`|Milliseconds taken to upload a single part of a multi-part upload to S3.|`uploadId`, `partNumber`|Varies| +|`s3/upload/total/time`|Milliseconds taken for uploading all parts of a multi-part upload to S3.|`uploadId`|Varies| +|`s3/upload/total/bytes`|Total bytes uploaded to S3 during a multi-part upload.|`uploadId`|Varies| + ## Cgroup These metrics are available on operating systems with the cgroup kernel feature. All the values are derived by reading from `/sys/fs/cgroup`. diff --git a/docs/querying/groupbyquery.md b/docs/querying/groupbyquery.md index 935bd90a145..a11f82d124a 100644 --- a/docs/querying/groupbyquery.md +++ b/docs/querying/groupbyquery.md @@ -385,7 +385,7 @@ Supported query contexts: |`forceLimitPushDown`|When all fields in the orderby are part of the grouping key, the Broker will push limit application down to the Historical processes. When the sorting order uses fields that are not in the grouping key, applying this optimization can result in approximate results with unknown accuracy, so this optimization is disabled by default in that case. Enabling this context flag turns on limit push down for limit/orderbys that contain non-grouping key columns.|false| |`applyLimitPushDownToSegment`|If Broker pushes limit down to queryable nodes (historicals, peons) then limit results during segment scan. This context value can be used to override `druid.query.groupBy.applyLimitPushDownToSegment`.|true| |`groupByEnableMultiValueUnnesting`|Safety flag to enable/disable the implicit unnesting on multi value column's as part of the grouping key. 'true' indicates multi-value grouping keys are unnested. 'false' returns an error if a multi value column is found as part of the grouping key.|true| - +|`deferExpressionDimensions`|When an entry in `dimensions` references an `expression` virtual column, this property influences whether expression evaluation is deferred from cursor processing to the merge step. Options are:
  • `fixedWidth`: Defer expressions with fixed-width inputs (numeric and dictionary-encoded string).
  • `fixedWidthNonNumeric`: Defer expressions with fixed-width inputs (numeric and dictionary-encoded string), unless the expression output and all inputs are numeric.
  • `singleString`: Defer string-typed expressions with a single dictionary-encoded string input.
  • `always`: Defer all expressions. May require building dictionaries for expression inputs.

These properties only take effect when the `groupBy` query can be vectorized. Non-vectorized queries only defer string-typed expressions of single string inputs.|`fixedWidthNonNumeric`| #### Array based result rows diff --git a/docs/release-info/migr-mvd-array.md b/docs/release-info/migr-mvd-array.md index 6ec9e9ff241..e089a6a733e 100644 --- a/docs/release-info/migr-mvd-array.md +++ b/docs/release-info/migr-mvd-array.md @@ -242,5 +242,5 @@ For an example, see [Ingesting arrays: Native batch and streaming ingestion](../ * For SQL-based batch ingestion, include the [query context parameter](../multi-stage-query/reference.md#context-parameters) `"arrayIngestMode": "array"` and reference the relevant array type (`VARCHAR ARRAY`, `BIGINT ARRAY`, or `DOUBLE ARRAY`) in the [EXTEND clause](../multi-stage-query/reference.md#extern-function) that lists the column names and data types. For examples, see [Ingesting arrays: SQL-based ingestion](../querying/arrays.md#sql-based-ingestion). - As a best practice, always use the ARRAY data type in your input schema. If you want to ingest MVDs, explicitly wrap the string array in [ARRAY_TO_MV](../querying/sql-functions.md#array_to_mv). For an example, see [Multi-value dimensions: SQL-based ingestion](/querying/multi-value-dimensions.md#sql-based-ingestion). + As a best practice, always use the ARRAY data type in your input schema. If you want to ingest MVDs, explicitly wrap the string array in [ARRAY_TO_MV](../querying/sql-functions.md#array_to_mv). For an example, see [Multi-value dimensions: SQL-based ingestion](../querying/multi-value-dimensions.md#sql-based-ingestion). diff --git a/docs/release-info/migr-subquery-limit.md b/docs/release-info/migr-subquery-limit.md index 853be50294d..6801aafdc7b 100644 --- a/docs/release-info/migr-subquery-limit.md +++ b/docs/release-info/migr-subquery-limit.md @@ -52,6 +52,10 @@ This property takes precedence over `maxSubqueryRows`. You can set both `maxSubqueryRows` and `maxSubqueryBytes` at cluster level and override them in individual queries. See [Overriding default query context values](../configuration#overriding-default-query-context-values) for more information. +Make sure you enable the Broker monitor `SubqueryCountStatsMonitor` so that Druid emits metrics for subquery statistics. +To do this, add `org.apache.druid.server.metrics.SubqueryCountStatsMonitor` to the `druid.monitoring.monitors` property in your Broker's `runtime.properties` configuration file. +See [Metrics monitors](../configuration/index.md#metrics-monitors) for more information. + ## Learn more See the following topics for more information: diff --git a/docs/release-info/migration-guide.md b/docs/release-info/migration-guide.md index ca31fce327f..92053b83a11 100644 --- a/docs/release-info/migration-guide.md +++ b/docs/release-info/migration-guide.md @@ -28,7 +28,7 @@ In general, when we introduce new features and behaviors into Apache Druid, we m The guides in this section outline breaking changes introduced in Druid 25.0.0 and later. Each guide provides instructions to migrate to new features. -## Migrate to arrays from multi-value dimensions +## Migrate from multi-value dimensions to arrays Druid now supports SQL-compliant array types. Whenever possible, you should use the array type over multi-value dimensions. See [Migration guide: MVDs to arrays](migr-mvd-array.md). @@ -36,6 +36,6 @@ Druid now supports SQL-compliant array types. Whenever possible, you should use Druid encodes string columns into dictionaries for better compression. Front-coded dictionary encoding reduces storage and improves performance by optimizing for strings that share similar beginning substrings. See [Migration guide: front-coded dictionaries](migr-front-coded-dict.md) for more information. -## Migrate to `maxSubqueryBytes` from `maxSubqueryRows` +## Migrate from `maxSubqueryRows` to `maxSubqueryBytes` Druid allows you to set a byte-based limit on subquery size to prevent Brokers from running out of memory when handling large subqueries. The byte-based subquery limit overrides Druid's row-based subquery limit. We recommend that you move towards using byte-based limits starting in Druid 30.0.0. See [Migration guide: subquery limit](migr-subquery-limit.md) for more information. diff --git a/docs/release-info/release-notes.md b/docs/release-info/release-notes.md index 768ceef697b..82e371e8406 100644 --- a/docs/release-info/release-notes.md +++ b/docs/release-info/release-notes.md @@ -97,6 +97,19 @@ This section contains detailed release notes separated by areas. ### Upgrade notes +#### Front-coded dictionaries + + + +In Druid 32.0.0, the front coded dictionaries feature will be turned on by default. Front-coded dictionaries reduce storage and improve performance by optimizing for strings where the front part looks similar. + +Once this feature is on, you cannot easily downgrade to an earlier version that does not support the feature. + +For more information, see [Migration guide: front-coded dictionaries](./migr-front-coded-dict.md). + +If you're already using this feature, you don't need to take any action. + + ### Incompatible changes ### Developer notes diff --git a/docs/release-info/upgrade-notes.md b/docs/release-info/upgrade-notes.md index fd622f21ea3..52adccabbc7 100644 --- a/docs/release-info/upgrade-notes.md +++ b/docs/release-info/upgrade-notes.md @@ -26,6 +26,18 @@ The upgrade notes assume that you are upgrading from the Druid version that imme For the full release notes for a specific version, see the [releases page](https://github.com/apache/druid/releases). +## Announcements + +#### Front-coded dictionaries + +In Druid 32.0.0, the front coded dictionaries feature will be turned on by default. Front-coded dictionaries reduce storage and improve performance by optimizing for strings where the front part looks similar. + +Once this feature is on, you cannot easily downgrade to an earlier version that does not support the feature. + +For more information, see [Migration guide: front-coded dictionaries](./migr-front-coded-dict.md). + +If you're already using this feature, you don't need to take any action. + ## 30.0.0 ### Upgrade notes diff --git a/docs/tutorials/index.md b/docs/tutorials/index.md index 480ad5aced1..90e9836e28a 100644 --- a/docs/tutorials/index.md +++ b/docs/tutorials/index.md @@ -1,7 +1,7 @@ --- id: index -title: "Quickstart (local)" -sidebar_label: Quickstart (local) +title: "Local quickstart" +sidebar_label: Local quickstart --- diff --git a/processing/src/main/java/org/apache/druid/common/config/ConfigManager.java b/processing/src/main/java/org/apache/druid/common/config/ConfigManager.java index a9be4c6bf2d..7ba1481e97d 100644 --- a/processing/src/main/java/org/apache/druid/common/config/ConfigManager.java +++ b/processing/src/main/java/org/apache/druid/common/config/ConfigManager.java @@ -61,7 +61,11 @@ public class ConfigManager private volatile PollingCallable poller; @Inject - public ConfigManager(MetadataStorageConnector dbConnector, Supplier dbTables, Supplier config) + public ConfigManager( + MetadataStorageConnector dbConnector, + Supplier dbTables, + Supplier config + ) { this.dbConnector = dbConnector; this.config = config; diff --git a/processing/src/main/java/org/apache/druid/error/DruidException.java b/processing/src/main/java/org/apache/druid/error/DruidException.java index 555e7c67bb1..a04f3f6512c 100644 --- a/processing/src/main/java/org/apache/druid/error/DruidException.java +++ b/processing/src/main/java/org/apache/druid/error/DruidException.java @@ -331,19 +331,19 @@ public class DruidException extends RuntimeException } /** - * Category of error. The simplest way to describe this is that it exists as a classification of errors that + * Category of error. The simplest way to describe this is that it exists as a classification of errors that * enables us to identify the expected response code (e.g. HTTP status code) of a specific DruidException */ public enum Category { /** * Means that the exception is being created defensively, because we want to validate something but expect that - * it should never actually be hit. Using this category is good to provide an indication to future reviewers and + * it should never actually be hit. Using this category is good to provide an indication to future reviewers and * developers that the case being checked is not intended to actually be able to occur in the wild. */ DEFENSIVE(500), /** - * Means that the input provided was malformed in some way. Generally speaking, it is hoped that errors of this + * Means that the input provided was malformed in some way. Generally speaking, it is hoped that errors of this * category have messages written either targeting the USER or ADMIN personas as those are the general users * of the APIs who could generate invalid inputs. */ @@ -356,9 +356,8 @@ public class DruidException extends RuntimeException * Means that an action that was attempted is forbidden */ FORBIDDEN(403), - /** - * Means that the requsted requested resource cannot be found. + * Means that the requested resource cannot be found. */ NOT_FOUND(404), /** diff --git a/processing/src/main/java/org/apache/druid/frame/allocation/MemoryAllocatorFactory.java b/processing/src/main/java/org/apache/druid/frame/allocation/MemoryAllocatorFactory.java index edb74ef19f9..cec34518306 100644 --- a/processing/src/main/java/org/apache/druid/frame/allocation/MemoryAllocatorFactory.java +++ b/processing/src/main/java/org/apache/druid/frame/allocation/MemoryAllocatorFactory.java @@ -22,7 +22,7 @@ package org.apache.druid.frame.allocation; /** * Factory for {@link MemoryAllocator}. * - * Used by {@link org.apache.druid.frame.write.FrameWriters#makeFrameWriterFactory} to create + * Used by {@link org.apache.druid.frame.write.FrameWriters#makeRowBasedFrameWriterFactory} to create * {@link org.apache.druid.frame.write.FrameWriterFactory}. */ public interface MemoryAllocatorFactory diff --git a/processing/src/main/java/org/apache/druid/frame/field/FieldWriters.java b/processing/src/main/java/org/apache/druid/frame/field/FieldWriters.java index cc7f68d099d..028c9fd39c5 100644 --- a/processing/src/main/java/org/apache/druid/frame/field/FieldWriters.java +++ b/processing/src/main/java/org/apache/druid/frame/field/FieldWriters.java @@ -56,7 +56,8 @@ public class FieldWriters public static FieldWriter create( final ColumnSelectorFactory columnSelectorFactory, final String columnName, - final ColumnType columnType + final ColumnType columnType, + final boolean removeNullBytes ) { if (columnType == null) { @@ -74,7 +75,7 @@ public class FieldWriters return makeDoubleWriter(columnSelectorFactory, columnName); case STRING: - return makeStringWriter(columnSelectorFactory, columnName); + return makeStringWriter(columnSelectorFactory, columnName, removeNullBytes); case COMPLEX: return makeComplexWriter(columnSelectorFactory, columnName, columnType.getComplexTypeName()); @@ -82,7 +83,7 @@ public class FieldWriters case ARRAY: switch (columnType.getElementType().getType()) { case STRING: - return makeStringArrayWriter(columnSelectorFactory, columnName); + return makeStringArrayWriter(columnSelectorFactory, columnName, removeNullBytes); case LONG: return makeLongArrayWriter(columnSelectorFactory, columnName); case FLOAT: @@ -124,20 +125,22 @@ public class FieldWriters private static FieldWriter makeStringWriter( final ColumnSelectorFactory selectorFactory, - final String columnName + final String columnName, + final boolean removeNullBytes ) { final DimensionSelector selector = selectorFactory.makeDimensionSelector(DefaultDimensionSpec.of(columnName)); - return new StringFieldWriter(selector); + return new StringFieldWriter(selector, removeNullBytes); } private static FieldWriter makeStringArrayWriter( final ColumnSelectorFactory selectorFactory, - final String columnName + final String columnName, + final boolean removeNullBytes ) { final ColumnValueSelector selector = selectorFactory.makeColumnValueSelector(columnName); - return new StringArrayFieldWriter(selector); + return new StringArrayFieldWriter(selector, removeNullBytes); } private static FieldWriter makeLongArrayWriter( diff --git a/processing/src/main/java/org/apache/druid/frame/field/StringArrayFieldWriter.java b/processing/src/main/java/org/apache/druid/frame/field/StringArrayFieldWriter.java index d6faed0ae9f..8b17e3241c8 100644 --- a/processing/src/main/java/org/apache/druid/frame/field/StringArrayFieldWriter.java +++ b/processing/src/main/java/org/apache/druid/frame/field/StringArrayFieldWriter.java @@ -33,10 +33,12 @@ import org.apache.druid.segment.ColumnValueSelector; public class StringArrayFieldWriter implements FieldWriter { private final BaseObjectColumnValueSelector selector; + private final boolean removeNullBytes; - public StringArrayFieldWriter(final BaseObjectColumnValueSelector selector) + public StringArrayFieldWriter(final BaseObjectColumnValueSelector selector, final boolean removeNullBytes) { this.selector = selector; + this.removeNullBytes = removeNullBytes; } @Override @@ -46,7 +48,8 @@ public class StringArrayFieldWriter implements FieldWriter memory, position, maxSize, - FrameWriterUtils.getUtf8ByteBuffersFromStringArraySelector(selector) + FrameWriterUtils.getUtf8ByteBuffersFromStringArraySelector(selector), + removeNullBytes ); } diff --git a/processing/src/main/java/org/apache/druid/frame/field/StringFieldWriter.java b/processing/src/main/java/org/apache/druid/frame/field/StringFieldWriter.java index 8513210c0aa..2ffb79a12da 100644 --- a/processing/src/main/java/org/apache/druid/frame/field/StringFieldWriter.java +++ b/processing/src/main/java/org/apache/druid/frame/field/StringFieldWriter.java @@ -55,17 +55,19 @@ public class StringFieldWriter implements FieldWriter private static final byte NULL_ROW_SIZE = 2; // NULL_ROW + ROW_TERMINATOR private final DimensionSelector selector; + private final boolean removeNullBytes; - public StringFieldWriter(final DimensionSelector selector) + public StringFieldWriter(final DimensionSelector selector, final boolean removeNullbytes) { this.selector = selector; + this.removeNullBytes = removeNullbytes; } @Override public long writeTo(final WritableMemory memory, final long position, final long maxSize) { final List byteBuffers = FrameWriterUtils.getUtf8ByteBuffersFromStringSelector(selector, true); - return writeUtf8ByteBuffers(memory, position, maxSize, byteBuffers); + return writeUtf8ByteBuffers(memory, position, maxSize, byteBuffers, removeNullBytes); } @Override @@ -89,7 +91,8 @@ public class StringFieldWriter implements FieldWriter final WritableMemory memory, final long position, final long maxSize, - @Nullable final List byteBuffers + @Nullable final List byteBuffers, + final boolean removeNullBytes ) { if (byteBuffers == null) { @@ -125,7 +128,13 @@ public class StringFieldWriter implements FieldWriter written++; if (len > 0) { - FrameWriterUtils.copyByteBufferToMemory(utf8Datum, memory, position + written, len, false); + FrameWriterUtils.copyByteBufferToMemoryDisallowingNullBytes( + utf8Datum, + memory, + position + written, + len, + removeNullBytes + ); written += len; } } diff --git a/processing/src/main/java/org/apache/druid/frame/processor/SuperSorter.java b/processing/src/main/java/org/apache/druid/frame/processor/SuperSorter.java index 440da49d7c2..164bcdb9559 100644 --- a/processing/src/main/java/org/apache/druid/frame/processor/SuperSorter.java +++ b/processing/src/main/java/org/apache/druid/frame/processor/SuperSorter.java @@ -37,7 +37,6 @@ import it.unimi.dsi.fastutil.longs.LongRBTreeSet; import it.unimi.dsi.fastutil.longs.LongSortedSet; import org.apache.druid.common.guava.FutureUtils; import org.apache.druid.frame.Frame; -import org.apache.druid.frame.FrameType; import org.apache.druid.frame.allocation.MemoryAllocatorFactory; import org.apache.druid.frame.allocation.SingleMemoryAllocatorFactory; import org.apache.druid.frame.channel.BlockingQueueFrameChannel; @@ -129,6 +128,7 @@ public class SuperSorter private final int maxActiveProcessors; private final long rowLimit; private final String cancellationId; + private final boolean removeNullBytes; private final Object runWorkersLock = new Object(); @@ -219,7 +219,8 @@ public class SuperSorter final int maxChannelsPerProcessor, final long rowLimit, @Nullable final String cancellationId, - final SuperSorterProgressTracker superSorterProgressTracker + final SuperSorterProgressTracker superSorterProgressTracker, + final boolean removeNullBytes ) { this.inputChannels = inputChannels; @@ -234,6 +235,7 @@ public class SuperSorter this.rowLimit = rowLimit; this.cancellationId = cancellationId; this.superSorterProgressTracker = superSorterProgressTracker; + this.removeNullBytes = removeNullBytes; for (int i = 0; i < inputChannels.size(); i++) { inputChannelsToRead.add(i); @@ -623,12 +625,13 @@ public class SuperSorter in, frameReader, writableChannel, - FrameWriters.makeFrameWriterFactory( - FrameType.ROW_BASED, // Row-based frames are generally preferred as inputs to mergers + FrameWriters.makeRowBasedFrameWriterFactory( + // Row-based frames are generally preferred as inputs to mergers frameAllocatorFactory, frameReader.signature(), // No sortColumns, because FrameChannelMerger generates frames that are sorted all on its own - Collections.emptyList() + Collections.emptyList(), + removeNullBytes ), sortKey, partitions, diff --git a/processing/src/main/java/org/apache/druid/frame/segment/FrameCursorUtils.java b/processing/src/main/java/org/apache/druid/frame/segment/FrameCursorUtils.java index 3cb5c686e9d..de970363bb4 100644 --- a/processing/src/main/java/org/apache/druid/frame/segment/FrameCursorUtils.java +++ b/processing/src/main/java/org/apache/druid/frame/segment/FrameCursorUtils.java @@ -23,6 +23,7 @@ import org.apache.druid.error.DruidException; import org.apache.druid.frame.Frame; import org.apache.druid.frame.write.FrameWriter; import org.apache.druid.frame.write.FrameWriterFactory; +import org.apache.druid.frame.write.UnsupportedColumnTypeException; import org.apache.druid.java.util.common.Intervals; import org.apache.druid.java.util.common.guava.Sequence; import org.apache.druid.java.util.common.guava.Sequences; @@ -32,6 +33,7 @@ import org.apache.druid.query.filter.Filter; import org.apache.druid.query.ordering.StringComparators; import org.apache.druid.segment.Cursor; import org.apache.druid.segment.column.ColumnHolder; +import org.apache.druid.segment.column.RowSignature; import org.apache.druid.segment.filter.BoundFilter; import org.apache.druid.segment.filter.Filters; import org.joda.time.Interval; @@ -100,13 +102,18 @@ public class FrameCursorUtils /** * Writes a {@link Cursor} to a sequence of {@link Frame}. This method iterates over the rows of the cursor, * and writes the columns to the frames. The iterable is lazy, and it traverses the required portion of the cursor - * as required + * as required. + *

+ * If the type is missing from the signature, the method throws an exception without advancing/modifying/closing the + * cursor */ public static Iterable cursorToFramesIterable( final Cursor cursor, final FrameWriterFactory frameWriterFactory ) { + throwIfColumnsHaveUnknownType(frameWriterFactory.signature()); + return () -> new Iterator() { @Override @@ -158,7 +165,19 @@ public class FrameCursorUtils final FrameWriterFactory frameWriterFactory ) { - return Sequences.simple(cursorToFramesIterable(cursor, frameWriterFactory)); } + + /** + * Throws {@link UnsupportedColumnTypeException} if the row signature has columns with unknown types. This is used to + * pre-determine if the frames can be materialized as rows, without touching the resource generating the frames. + */ + public static void throwIfColumnsHaveUnknownType(final RowSignature rowSignature) + { + for (int i = 0; i < rowSignature.size(); ++i) { + if (!rowSignature.getColumnType(i).isPresent()) { + throw new UnsupportedColumnTypeException(rowSignature.getColumnName(i), null); + } + } + } } diff --git a/processing/src/main/java/org/apache/druid/frame/write/FrameWriter.java b/processing/src/main/java/org/apache/druid/frame/write/FrameWriter.java index 050bb6513d2..70c892ef33e 100644 --- a/processing/src/main/java/org/apache/druid/frame/write/FrameWriter.java +++ b/processing/src/main/java/org/apache/druid/frame/write/FrameWriter.java @@ -28,7 +28,7 @@ import java.nio.ByteOrder; /** * Writer for {@link org.apache.druid.frame.Frame}. See that class for format information. * - * Generally obtained through a {@link FrameWriters#makeFrameWriterFactory} + * Generally obtained through a {@link FrameWriters#makeRowBasedFrameWriterFactory} */ public interface FrameWriter extends Closeable { diff --git a/processing/src/main/java/org/apache/druid/frame/write/FrameWriterUtils.java b/processing/src/main/java/org/apache/druid/frame/write/FrameWriterUtils.java index 0b497b545ac..d857cf33e03 100644 --- a/processing/src/main/java/org/apache/druid/frame/write/FrameWriterUtils.java +++ b/processing/src/main/java/org/apache/druid/frame/write/FrameWriterUtils.java @@ -242,16 +242,48 @@ public class FrameWriterUtils } /** - * Copies "len" bytes from {@code src.position()} to "dstPosition" in "memory". Does not update the position of src. - * - * @throws InvalidNullByteException if "allowNullBytes" is false and a null byte is encountered + * Copies {@code src} to {@code dst} without making any modification to the source data. */ - public static void copyByteBufferToMemory( + public static void copyByteBufferToMemoryAllowingNullBytes( + final ByteBuffer src, + final WritableMemory dst, + final long dstPosition, + final int len + ) + { + copyByteBufferToMemory(src, dst, dstPosition, len, true, false); + } + + /** + * Copies {@code src} to {@code dst}, disallowing null bytes to be written to the destination. If {@code removeNullBytes} + * is true, the method will drop the null bytes, and if it is false, the method will throw an exception. + */ + public static void copyByteBufferToMemoryDisallowingNullBytes( final ByteBuffer src, final WritableMemory dst, final long dstPosition, final int len, - final boolean allowNullBytes + final boolean removeNullBytes + ) + { + copyByteBufferToMemory(src, dst, dstPosition, len, false, removeNullBytes); + } + + /** + * Copies "len" bytes from {@code src.position()} to "dstPosition" in "memory". Does not update the position of src. + *

+ * Whenever "allowNullBytes" is true, "removeNullBytes" must be false. Use the methods {@link #copyByteBufferToMemoryAllowingNullBytes} + * and {@link #copyByteBufferToMemoryDisallowingNullBytes} to copy between the memory + *

+ * @throws InvalidNullByteException if "allowNullBytes" and "removeNullBytes" is false and a null byte is encountered + */ + private static void copyByteBufferToMemory( + final ByteBuffer src, + final WritableMemory dst, + final long dstPosition, + final int len, + final boolean allowNullBytes, + final boolean removeNullBytes ) { if (src.remaining() < len) { @@ -262,21 +294,39 @@ public class FrameWriterUtils } final int srcEnd = src.position() + len; - long q = dstPosition; - for (int p = src.position(); p < srcEnd; p++, q++) { - final byte b = src.get(p); - - if (!allowNullBytes && b == 0) { - ByteBuffer duplicate = src.duplicate(); - duplicate.limit(srcEnd); - throw InvalidNullByteException.builder() - .value(StringUtils.fromUtf8(duplicate)) - .position(p - src.position()) - .build(); + if (allowNullBytes) { + if (src.hasArray()) { + // Null bytes are ignored and the src buffer is backed by an array. Bulk copying to the destination would be the fastest + dst.putByteArray(dstPosition, src.array(), src.arrayOffset() + src.position(), len); + } else { + // Null bytes are ignored and the src buffer is not backed by an array. We can copy the byte buffer to the destination individually + long q = dstPosition; + for (int p = src.position(); p < srcEnd; p++, q++) { + final byte b = src.get(p); + dst.putByte(q, b); + } } + } else { + long q = dstPosition; + for (int p = src.position(); p < srcEnd; p++) { + final byte b = src.get(p); - dst.putByte(q, b); + if (b == 0) { + if (!removeNullBytes) { + // Cannot ignore the null byte, but cannot remove them as well. Therefore, throw an error. + ByteBuffer duplicate = src.duplicate(); + duplicate.limit(srcEnd); + throw InvalidNullByteException.builder() + .value(StringUtils.fromUtf8(duplicate)) + .position(p - src.position()) + .build(); + } + } else { + dst.putByte(q, b); + q++; + } + } } } diff --git a/processing/src/main/java/org/apache/druid/frame/write/FrameWriters.java b/processing/src/main/java/org/apache/druid/frame/write/FrameWriters.java index 4993cf9b2dc..186d05460c3 100644 --- a/processing/src/main/java/org/apache/druid/frame/write/FrameWriters.java +++ b/processing/src/main/java/org/apache/druid/frame/write/FrameWriters.java @@ -19,13 +19,10 @@ package org.apache.druid.frame.write; -import com.google.common.base.Preconditions; -import org.apache.druid.frame.FrameType; import org.apache.druid.frame.allocation.MemoryAllocatorFactory; import org.apache.druid.frame.key.KeyColumn; import org.apache.druid.frame.write.columnar.ColumnarFrameWriterFactory; import org.apache.druid.java.util.common.IAE; -import org.apache.druid.java.util.common.ISE; import org.apache.druid.segment.column.ColumnType; import org.apache.druid.segment.column.RowSignature; @@ -47,28 +44,29 @@ public class FrameWriters /** * Creates a {@link FrameWriterFactory}. * - * @param frameType type of the frames * @param allocatorFactory supplier of allocators, which ultimately determine frame size. Frames are closed and * written once the allocator runs out of memory. * @param signature signature of the frames * @param sortColumns sort columns for the frames. If nonempty, {@link FrameSort#sort} is used to sort the * resulting frames. */ - public static FrameWriterFactory makeFrameWriterFactory( - final FrameType frameType, + public static FrameWriterFactory makeRowBasedFrameWriterFactory( + final MemoryAllocatorFactory allocatorFactory, + final RowSignature signature, + final List sortColumns, + final boolean removeNullBytes + ) + { + return new RowBasedFrameWriterFactory(allocatorFactory, signature, sortColumns, removeNullBytes); + } + + public static FrameWriterFactory makeColumnBasedFrameWriterFactory( final MemoryAllocatorFactory allocatorFactory, final RowSignature signature, final List sortColumns ) { - switch (Preconditions.checkNotNull(frameType, "frameType")) { - case COLUMNAR: - return new ColumnarFrameWriterFactory(allocatorFactory, signature, sortColumns); - case ROW_BASED: - return new RowBasedFrameWriterFactory(allocatorFactory, signature, sortColumns); - default: - throw new ISE("Unrecognized frame type [%s]", frameType); - } + return new ColumnarFrameWriterFactory(allocatorFactory, signature, sortColumns); } /** diff --git a/processing/src/main/java/org/apache/druid/frame/write/RowBasedFrameWriterFactory.java b/processing/src/main/java/org/apache/druid/frame/write/RowBasedFrameWriterFactory.java index 720d808bf32..d5fca90aa8c 100644 --- a/processing/src/main/java/org/apache/druid/frame/write/RowBasedFrameWriterFactory.java +++ b/processing/src/main/java/org/apache/druid/frame/write/RowBasedFrameWriterFactory.java @@ -40,16 +40,19 @@ public class RowBasedFrameWriterFactory implements FrameWriterFactory private final MemoryAllocatorFactory allocatorFactory; private final RowSignature signature; private final List sortColumns; + private final boolean removeNullBytes; public RowBasedFrameWriterFactory( final MemoryAllocatorFactory allocatorFactory, final RowSignature signature, - final List sortColumns + final List sortColumns, + final boolean removeNullBytes ) { this.allocatorFactory = allocatorFactory; this.signature = signature; this.sortColumns = sortColumns; + this.removeNullBytes = removeNullBytes; FrameWriterUtils.verifySortColumns(sortColumns, signature); } @@ -70,7 +73,7 @@ public class RowBasedFrameWriterFactory implements FrameWriterFactory return new RowBasedFrameWriter( signature, sortColumns, - makeFieldWriters(columnSelectorFactory), + makeFieldWriters(columnSelectorFactory, removeNullBytes), FrameReaderUtils.makeRowMemorySupplier(columnSelectorFactory, signature), rowOrderMemory, rowOffsetMemory, @@ -102,7 +105,7 @@ public class RowBasedFrameWriterFactory implements FrameWriterFactory * The returned {@link FieldWriter} objects are not thread-safe, and should only be used with a * single frame writer. */ - private List makeFieldWriters(final ColumnSelectorFactory columnSelectorFactory) + private List makeFieldWriters(final ColumnSelectorFactory columnSelectorFactory, final boolean removeNullBytes) { final List fieldWriters = new ArrayList<>(); @@ -111,7 +114,7 @@ public class RowBasedFrameWriterFactory implements FrameWriterFactory final String column = signature.getColumnName(i); // note: null type won't work, but we'll get a nice error from FrameColumnWriters.create final ColumnType columnType = signature.getColumnType(i).orElse(null); - fieldWriters.add(FieldWriters.create(columnSelectorFactory, column, columnType)); + fieldWriters.add(FieldWriters.create(columnSelectorFactory, column, columnType, removeNullBytes)); } } catch (Throwable e) { diff --git a/processing/src/main/java/org/apache/druid/frame/write/columnar/StringFrameColumnWriter.java b/processing/src/main/java/org/apache/druid/frame/write/columnar/StringFrameColumnWriter.java index ec812d9654c..8eee0fd0cef 100644 --- a/processing/src/main/java/org/apache/druid/frame/write/columnar/StringFrameColumnWriter.java +++ b/processing/src/main/java/org/apache/druid/frame/write/columnar/StringFrameColumnWriter.java @@ -164,12 +164,11 @@ public abstract class StringFrameColumnWriter imp assert stringDataCursor != null; // Won't be null when len > 0, since utf8DataByteLength would be > 0. // Since we allow null bytes, this call wouldn't throw InvalidNullByteException - FrameWriterUtils.copyByteBufferToMemory( + FrameWriterUtils.copyByteBufferToMemoryAllowingNullBytes( utf8Datum, stringDataCursor.memory(), stringDataCursor.start() + lastStringLength, - len, - true + len ); } diff --git a/processing/src/main/java/org/apache/druid/common/guava/ThreadRenamingCallable.java b/processing/src/main/java/org/apache/druid/indexer/CompactionEngine.java similarity index 54% rename from processing/src/main/java/org/apache/druid/common/guava/ThreadRenamingCallable.java rename to processing/src/main/java/org/apache/druid/indexer/CompactionEngine.java index 491d3890a57..a95f64b896a 100644 --- a/processing/src/main/java/org/apache/druid/common/guava/ThreadRenamingCallable.java +++ b/processing/src/main/java/org/apache/druid/indexer/CompactionEngine.java @@ -17,34 +17,33 @@ * under the License. */ -package org.apache.druid.common.guava; +package org.apache.druid.indexer; -import java.util.concurrent.Callable; +import com.fasterxml.jackson.annotation.JsonCreator; +import com.fasterxml.jackson.annotation.JsonValue; +import org.apache.druid.java.util.common.StringUtils; -public abstract class ThreadRenamingCallable implements Callable +import javax.annotation.Nullable; + +/** + * Encapsulates the Engine to be used for a compaction task. + * Should be kept in sync with the subtypes for {@link org.apache.druid.indexing.common.task.CompactionRunner}. + */ +public enum CompactionEngine { - private final String name; - - public ThreadRenamingCallable( - String name - ) - { - this.name = name; - } + NATIVE, + MSQ; @Override - public final T call() + @JsonValue + public String toString() { - final Thread currThread = Thread.currentThread(); - String currName = currThread.getName(); - try { - currThread.setName(name); - return doCall(); - } - finally { - currThread.setName(currName); - } + return StringUtils.toLowerCase(this.name()); } - public abstract T doCall(); + @JsonCreator + public static CompactionEngine fromString(@Nullable String name) + { + return name == null ? null : valueOf(StringUtils.toUpperCase(name)); + } } diff --git a/processing/src/main/java/org/apache/druid/indexer/partitions/SecondaryPartitionType.java b/processing/src/main/java/org/apache/druid/indexer/partitions/SecondaryPartitionType.java index a8f1a1f8539..45a38c65815 100644 --- a/processing/src/main/java/org/apache/druid/indexer/partitions/SecondaryPartitionType.java +++ b/processing/src/main/java/org/apache/druid/indexer/partitions/SecondaryPartitionType.java @@ -43,8 +43,8 @@ public enum SecondaryPartitionType /** * Range partitioning partitions segments in the same time chunk based on the value range of the partition dimension. * - * @see SingleDimensionPartitionsSpec - * @see org.apache.druid.timeline.partition.SingleDimensionShardSpec + * @see DimensionRangePartitionsSpec + * @see org.apache.druid.timeline.partition.DimensionRangeShardSpec */ RANGE } diff --git a/processing/src/main/java/org/apache/druid/jackson/AggregatorsModule.java b/processing/src/main/java/org/apache/druid/jackson/AggregatorsModule.java index f7aca511e17..200e6fcb139 100644 --- a/processing/src/main/java/org/apache/druid/jackson/AggregatorsModule.java +++ b/processing/src/main/java/org/apache/druid/jackson/AggregatorsModule.java @@ -83,6 +83,16 @@ public class AggregatorsModule extends SimpleModule { super("AggregatorFactories"); + registerComplexMetricsAndSerde(); + + setMixInAnnotation(AggregatorFactory.class, AggregatorFactoryMixin.class); + setMixInAnnotation(PostAggregator.class, PostAggregatorMixin.class); + + addSerializer(DoubleMeanHolder.class, DoubleMeanHolder.Serializer.INSTANCE); + } + + public static void registerComplexMetricsAndSerde() + { ComplexMetrics.registerSerde(HyperUniquesSerde.TYPE_NAME, new HyperUniquesSerde()); ComplexMetrics.registerSerde(PreComputedHyperUniquesSerde.TYPE_NAME, new PreComputedHyperUniquesSerde()); ComplexMetrics.registerSerde( @@ -102,11 +112,6 @@ public class AggregatorsModule extends SimpleModule SerializablePairLongLongComplexMetricSerde.TYPE_NAME, new SerializablePairLongLongComplexMetricSerde() ); - - setMixInAnnotation(AggregatorFactory.class, AggregatorFactoryMixin.class); - setMixInAnnotation(PostAggregator.class, PostAggregatorMixin.class); - - addSerializer(DoubleMeanHolder.class, DoubleMeanHolder.Serializer.INSTANCE); } @JsonTypeInfo(use = JsonTypeInfo.Id.NAME, property = "type") diff --git a/processing/src/main/java/org/apache/druid/java/util/common/concurrent/ScheduledExecutors.java b/processing/src/main/java/org/apache/druid/java/util/common/concurrent/ScheduledExecutors.java index 97f43f6c9b9..72cdaf15edf 100644 --- a/processing/src/main/java/org/apache/druid/java/util/common/concurrent/ScheduledExecutors.java +++ b/processing/src/main/java/org/apache/druid/java/util/common/concurrent/ScheduledExecutors.java @@ -64,16 +64,6 @@ public class ScheduledExecutors ); } - /** - * Run callable repeatedly with the given delay between calls, after the given - * initial delay, until it returns Signal.STOP. Exceptions are caught and - * logged as errors. - */ - public static void scheduleWithFixedDelay(ScheduledExecutorService exec, Duration delay, Callable callable) - { - scheduleWithFixedDelay(exec, delay, delay, callable); - } - /** * Run callable repeatedly with the given delay between calls, until it * returns Signal.STOP. Exceptions are caught and logged as errors. diff --git a/processing/src/main/java/org/apache/druid/java/util/metrics/CgroupCpuSetMonitor.java b/processing/src/main/java/org/apache/druid/java/util/metrics/CgroupCpuSetMonitor.java index 0534e00259f..84de0fd216d 100644 --- a/processing/src/main/java/org/apache/druid/java/util/metrics/CgroupCpuSetMonitor.java +++ b/processing/src/main/java/org/apache/druid/java/util/metrics/CgroupCpuSetMonitor.java @@ -24,6 +24,7 @@ import org.apache.druid.java.util.emitter.service.ServiceEmitter; import org.apache.druid.java.util.emitter.service.ServiceMetricEvent; import org.apache.druid.java.util.metrics.cgroups.CgroupDiscoverer; import org.apache.druid.java.util.metrics.cgroups.CpuSet; +import org.apache.druid.java.util.metrics.cgroups.ProcSelfCgroupDiscoverer; import java.util.Map; @@ -41,7 +42,7 @@ public class CgroupCpuSetMonitor extends FeedDefiningMonitor public CgroupCpuSetMonitor(final Map dimensions, String feed) { - this(null, dimensions, feed); + this(new ProcSelfCgroupDiscoverer(), dimensions, feed); } public CgroupCpuSetMonitor(final Map dimensions) diff --git a/processing/src/main/java/org/apache/druid/math/expr/Function.java b/processing/src/main/java/org/apache/druid/math/expr/Function.java index 48bc0570aaa..d62d632f40b 100644 --- a/processing/src/main/java/org/apache/druid/math/expr/Function.java +++ b/processing/src/main/java/org/apache/druid/math/expr/Function.java @@ -622,11 +622,10 @@ public interface Function extends NamedFunction ExprEval exprEval = expr.eval(bindings); ExpressionType exprType = exprEval.type(); - if (isValidType(exprType)) { - outputType = ExpressionTypeConversion.function(outputType, exprType); - } - if (exprEval.value() != null) { + if (isValidType(exprType)) { + outputType = ExpressionTypeConversion.function(outputType, exprType); + } evals.add(exprEval); } } diff --git a/processing/src/main/java/org/apache/druid/query/QueryContext.java b/processing/src/main/java/org/apache/druid/query/QueryContext.java index daa6760f8f3..8b2f7e27e68 100644 --- a/processing/src/main/java/org/apache/druid/query/QueryContext.java +++ b/processing/src/main/java/org/apache/druid/query/QueryContext.java @@ -613,14 +613,6 @@ public class QueryContext ); } - public boolean isWindowingStrictValidation() - { - return getBoolean( - QueryContexts.WINDOWING_STRICT_VALIDATION, - QueryContexts.DEFAULT_WINDOWING_STRICT_VALIDATION - ); - } - public boolean isCatalogValidationEnabled() { return getBoolean( diff --git a/processing/src/main/java/org/apache/druid/query/QueryContexts.java b/processing/src/main/java/org/apache/druid/query/QueryContexts.java index 61520a04bc2..402001efe7d 100644 --- a/processing/src/main/java/org/apache/druid/query/QueryContexts.java +++ b/processing/src/main/java/org/apache/druid/query/QueryContexts.java @@ -87,7 +87,6 @@ public class QueryContexts public static final String SERIALIZE_DATE_TIME_AS_LONG_INNER_KEY = "serializeDateTimeAsLongInner"; public static final String UNCOVERED_INTERVALS_LIMIT_KEY = "uncoveredIntervalsLimit"; public static final String MIN_TOP_N_THRESHOLD = "minTopNThreshold"; - public static final String WINDOWING_STRICT_VALIDATION = "windowingStrictValidation"; public static final String CATALOG_VALIDATION_ENABLED = "catalogValidationEnabled"; // Unique identifier for the query, that is used to map the global shared resources (specifically merge buffers) to the // query's runtime @@ -126,7 +125,6 @@ public class QueryContexts public static final int DEFAULT_IN_FUNCTION_THRESHOLD = 100; public static final int DEFAULT_IN_FUNCTION_EXPR_THRESHOLD = 2; public static final boolean DEFAULT_ENABLE_TIME_BOUNDARY_PLANNING = false; - public static final boolean DEFAULT_WINDOWING_STRICT_VALIDATION = true; public static final boolean DEFAULT_CATALOG_VALIDATION_ENABLED = true; @SuppressWarnings("unused") // Used by Jackson serialization diff --git a/processing/src/main/java/org/apache/druid/query/QueryToolChest.java b/processing/src/main/java/org/apache/druid/query/QueryToolChest.java index b0678f247c9..fa394beec43 100644 --- a/processing/src/main/java/org/apache/druid/query/QueryToolChest.java +++ b/processing/src/main/java/org/apache/druid/query/QueryToolChest.java @@ -251,19 +251,36 @@ public abstract class QueryToolChest getResultTypeReference(); + /** + * Like {@link #getCacheStrategy(Query, ObjectMapper)} but the caller doesn't supply the object mapper for deserializing + * and converting the cached data to desired type. It's upto the individual implementations to decide the appropriate action in that case. + * It can either throw an exception outright or decide if the query requires the object mapper for proper downstream processing and + * work with the generic java types if not. + *

+ * @deprecated Use {@link #getCacheStrategy(Query, ObjectMapper)} instead + */ + @Deprecated + @Nullable + public CacheStrategy getCacheStrategy(QueryType query) + { + return null; + } + /** * Returns a CacheStrategy to be used to load data into the cache and remove it from the cache. *

* This is optional. If it returns null, caching is effectively disabled for the query. * * @param query The query whose results might be cached + * @param mapper Object mapper to convert the deserialized generic java objects to desired types. It can be nullable + * to preserve backward compatibility. * @param The type of object that will be stored in the cache * @return A CacheStrategy that can be used to populate and read from the Cache */ @Nullable - public CacheStrategy getCacheStrategy(QueryType query) + public CacheStrategy getCacheStrategy(QueryType query, @Nullable ObjectMapper mapper) { - return null; + return getCacheStrategy(query); } /** diff --git a/processing/src/main/java/org/apache/druid/query/datasourcemetadata/DataSourceQueryQueryToolChest.java b/processing/src/main/java/org/apache/druid/query/datasourcemetadata/DataSourceQueryQueryToolChest.java index dbe8922f2e9..21fb5c53afc 100644 --- a/processing/src/main/java/org/apache/druid/query/datasourcemetadata/DataSourceQueryQueryToolChest.java +++ b/processing/src/main/java/org/apache/druid/query/datasourcemetadata/DataSourceQueryQueryToolChest.java @@ -20,6 +20,7 @@ package org.apache.druid.query.datasourcemetadata; import com.fasterxml.jackson.core.type.TypeReference; +import com.fasterxml.jackson.databind.ObjectMapper; import com.google.common.base.Function; import com.google.common.base.Functions; import com.google.inject.Inject; @@ -38,6 +39,7 @@ import org.apache.druid.query.aggregation.MetricManipulationFn; import org.apache.druid.query.context.ResponseContext; import org.apache.druid.timeline.LogicalSegment; +import javax.annotation.Nullable; import java.util.List; import java.util.stream.Collectors; @@ -119,4 +121,10 @@ public class DataSourceQueryQueryToolChest { return null; } + + @Override + public CacheStrategy getCacheStrategy(DataSourceMetadataQuery query, @Nullable ObjectMapper mapper) + { + return null; + } } diff --git a/processing/src/main/java/org/apache/druid/query/expression/ExprUtils.java b/processing/src/main/java/org/apache/druid/query/expression/ExprUtils.java index e2bd808d7b9..be513b40248 100644 --- a/processing/src/main/java/org/apache/druid/query/expression/ExprUtils.java +++ b/processing/src/main/java/org/apache/druid/query/expression/ExprUtils.java @@ -20,6 +20,7 @@ package org.apache.druid.query.expression; import org.apache.druid.common.config.NullHandling; +import org.apache.druid.error.InvalidInput; import org.apache.druid.java.util.common.DateTimes; import org.apache.druid.java.util.common.IAE; import org.apache.druid.java.util.common.granularity.PeriodGranularity; @@ -45,13 +46,25 @@ public class ExprUtils } static PeriodGranularity toPeriodGranularity( + final Expr wrappingExpr, final Expr periodArg, @Nullable final Expr originArg, @Nullable final Expr timeZoneArg, final Expr.ObjectBinding bindings ) { - final Period period = new Period(periodArg.eval(bindings).asString()); + final Period period; + try { + period = new Period(periodArg.eval(bindings).asString()); + } + catch (IllegalArgumentException iae) { + throw InvalidInput.exception( + "Invalid period[%s] specified for expression[%s]: [%s]", + periodArg.stringify(), + wrappingExpr.stringify(), + iae.getMessage() + ); + } final DateTime origin; final DateTimeZone timeZone; @@ -69,7 +82,7 @@ public class ExprUtils final Object value = originArg.eval(bindings).valueOrDefault(); if (value instanceof String && NullHandling.isNullOrEquivalent((String) value)) { // We get a blank string here, when sql compatible null handling is enabled - // and expression contains empty string for for origin + // and expression contains empty string for origin // e.g timestamp_floor(\"__time\",'PT1M','','UTC') origin = null; } else { diff --git a/processing/src/main/java/org/apache/druid/query/expression/TimestampCeilExprMacro.java b/processing/src/main/java/org/apache/druid/query/expression/TimestampCeilExprMacro.java index cfd63f1ea61..3c5102ae7a2 100644 --- a/processing/src/main/java/org/apache/druid/query/expression/TimestampCeilExprMacro.java +++ b/processing/src/main/java/org/apache/druid/query/expression/TimestampCeilExprMacro.java @@ -63,7 +63,7 @@ public class TimestampCeilExprMacro implements ExprMacroTable.ExprMacro TimestampCeilExpr(final TimestampCeilExprMacro macro, final List args) { super(macro, args); - this.granularity = getGranularity(args, InputBindings.nilBindings()); + this.granularity = getGranularity(this, args, InputBindings.nilBindings()); } @Nonnull @@ -113,9 +113,14 @@ public class TimestampCeilExprMacro implements ExprMacroTable.ExprMacro } } - private static PeriodGranularity getGranularity(final List args, final Expr.ObjectBinding bindings) + private static PeriodGranularity getGranularity( + final Expr expr, + final List args, + final Expr.ObjectBinding bindings + ) { return ExprUtils.toPeriodGranularity( + expr, args.get(1), args.size() > 2 ? args.get(2) : null, args.size() > 3 ? args.get(3) : null, @@ -135,7 +140,7 @@ public class TimestampCeilExprMacro implements ExprMacroTable.ExprMacro @Override public ExprEval eval(final ObjectBinding bindings) { - final PeriodGranularity granularity = getGranularity(args, bindings); + final PeriodGranularity granularity = getGranularity(this, args, bindings); long argTime = args.get(0).eval(bindings).asLong(); long bucketStartTime = granularity.bucketStart(argTime); if (argTime == bucketStartTime) { diff --git a/processing/src/main/java/org/apache/druid/query/expression/TimestampFloorExprMacro.java b/processing/src/main/java/org/apache/druid/query/expression/TimestampFloorExprMacro.java index a243273b8f0..02eed7327f1 100644 --- a/processing/src/main/java/org/apache/druid/query/expression/TimestampFloorExprMacro.java +++ b/processing/src/main/java/org/apache/druid/query/expression/TimestampFloorExprMacro.java @@ -56,9 +56,14 @@ public class TimestampFloorExprMacro implements ExprMacroTable.ExprMacro } } - private static PeriodGranularity computeGranularity(final List args, final Expr.ObjectBinding bindings) + private static PeriodGranularity computeGranularity( + final Expr expr, + final List args, + final Expr.ObjectBinding bindings + ) { return ExprUtils.toPeriodGranularity( + expr, args.get(1), args.size() > 2 ? args.get(2) : null, args.size() > 3 ? args.get(3) : null, @@ -73,7 +78,7 @@ public class TimestampFloorExprMacro implements ExprMacroTable.ExprMacro TimestampFloorExpr(final TimestampFloorExprMacro macro, final List args) { super(macro, args); - this.granularity = computeGranularity(args, InputBindings.nilBindings()); + this.granularity = computeGranularity(this, args, InputBindings.nilBindings()); } /** @@ -170,7 +175,7 @@ public class TimestampFloorExprMacro implements ExprMacroTable.ExprMacro @Override public ExprEval eval(final ObjectBinding bindings) { - final PeriodGranularity granularity = computeGranularity(args, bindings); + final PeriodGranularity granularity = computeGranularity(this, args, bindings); return ExprEval.of(granularity.bucketStart(args.get(0).eval(bindings).asLong())); } diff --git a/processing/src/main/java/org/apache/druid/query/filter/EqualityFilter.java b/processing/src/main/java/org/apache/druid/query/filter/EqualityFilter.java index 06506c64d1a..f7b2dd1cdb9 100644 --- a/processing/src/main/java/org/apache/druid/query/filter/EqualityFilter.java +++ b/processing/src/main/java/org/apache/druid/query/filter/EqualityFilter.java @@ -31,6 +31,7 @@ import com.google.common.collect.TreeRangeSet; import org.apache.druid.error.InvalidInput; import org.apache.druid.java.util.common.IAE; import org.apache.druid.math.expr.ExprEval; +import org.apache.druid.math.expr.ExprType; import org.apache.druid.math.expr.ExpressionType; import org.apache.druid.query.cache.CacheKeyBuilder; import org.apache.druid.query.filter.vector.VectorValueMatcher; @@ -43,12 +44,14 @@ import org.apache.druid.segment.ColumnInspector; import org.apache.druid.segment.ColumnProcessorFactory; import org.apache.druid.segment.ColumnProcessors; import org.apache.druid.segment.ColumnSelectorFactory; +import org.apache.druid.segment.DimensionHandlerUtils; import org.apache.druid.segment.DimensionSelector; import org.apache.druid.segment.column.ColumnCapabilities; import org.apache.druid.segment.column.ColumnIndexSupplier; import org.apache.druid.segment.column.ColumnType; import org.apache.druid.segment.column.TypeSignature; import org.apache.druid.segment.column.TypeStrategy; +import org.apache.druid.segment.column.Types; import org.apache.druid.segment.column.ValueType; import org.apache.druid.segment.filter.Filters; import org.apache.druid.segment.filter.PredicateValueMatcherFactory; @@ -244,8 +247,9 @@ public class EqualityFilter extends AbstractOptimizableDimFilter implements Filt public VectorValueMatcher makeVectorMatcher(VectorColumnSelectorFactory factory) { final ColumnCapabilities capabilities = factory.getColumnCapabilities(column); - - if (matchValueType.isPrimitive() && (capabilities == null || capabilities.isPrimitive())) { + final boolean primitiveMatch = matchValueType.isPrimitive() && (capabilities == null || capabilities.isPrimitive()); + if (primitiveMatch && useSimpleEquality(capabilities, matchValueType)) { + // if possible, use simplified value matcher instead of predicate return ColumnProcessors.makeVectorProcessor( column, VectorValueMatcherColumnProcessorFactory.instance(), @@ -298,6 +302,20 @@ public class EqualityFilter extends AbstractOptimizableDimFilter implements Filt ); } + /** + * Can the match value type be cast directly to column type for equality comparison? For non-numeric match types, we + * just use exact string equality regardless of the column type. For numeric match value types against string columns, + * we instead cast the string to the match value type number for matching equality. + */ + public static boolean useSimpleEquality(TypeSignature columnType, ColumnType matchValueType) + { + if (Types.is(columnType, ValueType.STRING)) { + return !matchValueType.isNumeric(); + } + return true; + } + + @Nullable public static BitmapColumnIndex getEqualityIndex( String column, ExprEval matchValueEval, @@ -311,20 +329,22 @@ public class EqualityFilter extends AbstractOptimizableDimFilter implements Filt return new AllUnknownBitmapColumnIndex(selector); } - final ValueIndexes valueIndexes = indexSupplier.as(ValueIndexes.class); - if (valueIndexes != null) { - // matchValueEval.value() cannot be null here due to check in the constructor - //noinspection DataFlowIssue - return valueIndexes.forValue(matchValueEval.value(), matchValueType); - } + if (useSimpleEquality(selector.getColumnCapabilities(column), matchValueType)) { + final ValueIndexes valueIndexes = indexSupplier.as(ValueIndexes.class); + if (valueIndexes != null) { + // matchValueEval.value() cannot be null here due to check in the constructor + //noinspection DataFlowIssue + return valueIndexes.forValue(matchValueEval.value(), matchValueType); + } + if (matchValueType.isPrimitive()) { + final StringValueSetIndexes stringValueSetIndexes = indexSupplier.as(StringValueSetIndexes.class); + if (stringValueSetIndexes != null) { - if (matchValueType.isPrimitive()) { - final StringValueSetIndexes stringValueSetIndexes = indexSupplier.as(StringValueSetIndexes.class); - if (stringValueSetIndexes != null) { - - return stringValueSetIndexes.forValue(matchValueEval.asString()); + return stringValueSetIndexes.forValue(matchValueEval.asString()); + } } } + // fall back to predicate based index if it is available final DruidPredicateIndexes predicateIndexes = indexSupplier.as(DruidPredicateIndexes.class); if (predicateIndexes != null) { @@ -408,11 +428,38 @@ public class EqualityFilter extends AbstractOptimizableDimFilter implements Filt private Supplier> makeStringPredicateSupplier() { return Suppliers.memoize(() -> { - final ExprEval castForComparison = ExprEval.castForEqualityComparison(matchValue, ExpressionType.STRING); - if (castForComparison == null) { - return DruidObjectPredicate.alwaysFalseWithNullUnknown(); + // when matching strings to numeric match values, use numeric comparator to implicitly cast the string to number + if (matchValue.type().isNumeric()) { + if (matchValue.type().is(ExprType.LONG)) { + return value -> { + if (value == null) { + return DruidPredicateMatch.UNKNOWN; + } + final Long l = DimensionHandlerUtils.convertObjectToLong(value); + if (l == null) { + return DruidPredicateMatch.FALSE; + } + return DruidPredicateMatch.of(matchValue.asLong() == l); + }; + } else { + return value -> { + if (value == null) { + return DruidPredicateMatch.UNKNOWN; + } + final Double d = DimensionHandlerUtils.convertObjectToDouble(value); + if (d == null) { + return DruidPredicateMatch.FALSE; + } + return DruidPredicateMatch.of(matchValue.asDouble() == d); + }; + } + } else { + final ExprEval castForComparison = ExprEval.castForEqualityComparison(matchValue, ExpressionType.STRING); + if (castForComparison == null) { + return DruidObjectPredicate.alwaysFalseWithNullUnknown(); + } + return DruidObjectPredicate.equalTo(castForComparison.asString()); } - return DruidObjectPredicate.equalTo(castForComparison.asString()); }); } @@ -548,6 +595,10 @@ public class EqualityFilter extends AbstractOptimizableDimFilter implements Filt @Override public ValueMatcher makeDimensionProcessor(DimensionSelector selector, boolean multiValue) { + // use the predicate matcher when matching numeric values since it casts the strings to numeric types + if (matchValue.type().isNumeric()) { + return predicateMatcherFactory.makeDimensionProcessor(selector, multiValue); + } final ExprEval castForComparison = ExprEval.castForEqualityComparison(matchValue, ExpressionType.STRING); if (castForComparison == null) { return ValueMatchers.makeAlwaysFalseWithNullUnknownDimensionMatcher(selector, multiValue); diff --git a/processing/src/main/java/org/apache/druid/query/filter/RangeFilter.java b/processing/src/main/java/org/apache/druid/query/filter/RangeFilter.java index 63fc48559ac..527b5912208 100644 --- a/processing/src/main/java/org/apache/druid/query/filter/RangeFilter.java +++ b/processing/src/main/java/org/apache/druid/query/filter/RangeFilter.java @@ -314,8 +314,7 @@ public class RangeFilter extends AbstractOptimizableDimFilter implements Filter final String upper = hasUpperBound() ? upperEval.asString() : null; return rangeIndexes.forRange(lower, lowerOpen, upper, upperOpen); } - } - if (matchValueType.isNumeric()) { + } else if (matchValueType.isNumeric()) { final NumericRangeIndexes rangeIndexes = indexSupplier.as(NumericRangeIndexes.class); if (rangeIndexes != null) { final Number lower = (Number) lowerEval.value(); diff --git a/processing/src/main/java/org/apache/druid/query/filter/SelectorDimFilter.java b/processing/src/main/java/org/apache/druid/query/filter/SelectorDimFilter.java index 05cd4730e4b..019862e5014 100644 --- a/processing/src/main/java/org/apache/druid/query/filter/SelectorDimFilter.java +++ b/processing/src/main/java/org/apache/druid/query/filter/SelectorDimFilter.java @@ -34,7 +34,6 @@ import org.apache.druid.segment.filter.DimensionPredicateFilter; import org.apache.druid.segment.filter.SelectorFilter; import javax.annotation.Nullable; - import java.util.Objects; import java.util.Set; diff --git a/processing/src/main/java/org/apache/druid/query/filter/TypedInFilter.java b/processing/src/main/java/org/apache/druid/query/filter/TypedInFilter.java index 63e3fbd4541..1230b522111 100644 --- a/processing/src/main/java/org/apache/druid/query/filter/TypedInFilter.java +++ b/processing/src/main/java/org/apache/druid/query/filter/TypedInFilter.java @@ -36,15 +36,21 @@ import com.google.common.collect.Sets; import com.google.common.collect.TreeRangeSet; import com.google.common.hash.Hasher; import com.google.common.hash.Hashing; +import com.google.common.primitives.Doubles; import it.unimi.dsi.fastutil.doubles.DoubleOpenHashSet; +import it.unimi.dsi.fastutil.doubles.DoubleSet; import it.unimi.dsi.fastutil.floats.FloatOpenHashSet; import it.unimi.dsi.fastutil.longs.LongOpenHashSet; +import it.unimi.dsi.fastutil.longs.LongSet; import it.unimi.dsi.fastutil.objects.ObjectArrays; import org.apache.druid.common.config.NullHandling; +import org.apache.druid.common.guava.GuavaUtils; import org.apache.druid.error.InvalidInput; import org.apache.druid.java.util.common.IAE; import org.apache.druid.java.util.common.StringUtils; import org.apache.druid.math.expr.Evals; +import org.apache.druid.math.expr.ExprEval; +import org.apache.druid.math.expr.ExpressionType; import org.apache.druid.query.cache.CacheKeyBuilder; import org.apache.druid.query.filter.vector.VectorValueMatcher; import org.apache.druid.query.filter.vector.VectorValueMatcherColumnProcessorFactory; @@ -301,9 +307,11 @@ public class TypedInFilter extends AbstractOptimizableDimFilter implements Filte } } - final ValueSetIndexes valueSetIndexes = indexSupplier.as(ValueSetIndexes.class); - if (valueSetIndexes != null) { - return valueSetIndexes.forSortedValues(sortedMatchValues.get(), matchValueType); + if (EqualityFilter.useSimpleEquality(selector.getColumnCapabilities(column), matchValueType)) { + final ValueSetIndexes valueSetIndexes = indexSupplier.as(ValueSetIndexes.class); + if (valueSetIndexes != null) { + return valueSetIndexes.forSortedValues(sortedMatchValues.get(), matchValueType); + } } return Filters.makePredicateIndex( @@ -452,20 +460,20 @@ public class TypedInFilter extends AbstractOptimizableDimFilter implements Filte } @Nullable - private static Object coerceValue(@Nullable Object o, ColumnType matchValueType) + private static T coerceValue(@Nullable Object o, ColumnType matchValueType) { if (o == null) { return null; } switch (matchValueType.getType()) { case STRING: - return DimensionHandlerUtils.convertObjectToString(o); + return (T) DimensionHandlerUtils.convertObjectToString(o); case LONG: - return DimensionHandlerUtils.convertObjectToLong(o); + return (T) DimensionHandlerUtils.convertObjectToLong(o); case FLOAT: - return DimensionHandlerUtils.convertObjectToFloat(o); + return (T) DimensionHandlerUtils.convertObjectToFloat(o); case DOUBLE: - return DimensionHandlerUtils.convertObjectToDouble(o); + return (T) DimensionHandlerUtils.convertObjectToDouble(o); default: throw InvalidInput.exception("Unsupported matchValueType[%s]", matchValueType); } @@ -540,11 +548,51 @@ public class TypedInFilter extends AbstractOptimizableDimFilter implements Filte final int index = Collections.binarySearch(sortedValues, value, comparator); return DruidPredicateMatch.of(index >= 0); }; + } else if (matchValueType.is(ValueType.LONG)) { + final LongSet valueSet = new LongOpenHashSet(sortedValues.size()); + for (Object o : sortedValues) { + final Long l = DimensionHandlerUtils.convertObjectToLong(o); + if (l != null) { + valueSet.add(l.longValue()); + } + } + return value -> { + if (value == null) { + return containsNull ? DruidPredicateMatch.TRUE : DruidPredicateMatch.UNKNOWN; + } + final Long castValue = GuavaUtils.tryParseLong(value); + if (castValue == null) { + return DruidPredicateMatch.FALSE; + } + return DruidPredicateMatch.of(valueSet.contains(castValue)); + }; + } else if (matchValueType.isNumeric()) { + // double or float + final DoubleSet valueSet = new DoubleOpenHashSet(sortedValues.size()); + for (Object o : sortedValues) { + Double d = DimensionHandlerUtils.convertObjectToDouble(o); + if (d != null) { + valueSet.add(d.doubleValue()); + } + } + return value -> { + if (value == null) { + return containsNull ? DruidPredicateMatch.TRUE : DruidPredicateMatch.UNKNOWN; + } + + final Double d = Doubles.tryParse(value); + if (d == null) { + return DruidPredicateMatch.FALSE; + } + return DruidPredicateMatch.of(valueSet.contains(d)); + }; } + // convert set to strings + final ExpressionType matchExpressionType = ExpressionType.fromColumnTypeStrict(matchValueType); final Set stringSet = Sets.newHashSetWithExpectedSize(sortedValues.size()); for (Object o : sortedValues) { - stringSet.add(Evals.asString(o)); + stringSet.add(ExprEval.ofType(matchExpressionType, o).castTo(ExpressionType.STRING).asString()); } return value -> { if (value == null) { diff --git a/processing/src/main/java/org/apache/druid/query/groupby/DeferExpressionDimensions.java b/processing/src/main/java/org/apache/druid/query/groupby/DeferExpressionDimensions.java new file mode 100644 index 00000000000..9f6a88f08b2 --- /dev/null +++ b/processing/src/main/java/org/apache/druid/query/groupby/DeferExpressionDimensions.java @@ -0,0 +1,216 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.druid.query.groupby; + +import com.fasterxml.jackson.annotation.JsonCreator; +import com.fasterxml.jackson.annotation.JsonValue; +import org.apache.druid.java.util.common.IAE; +import org.apache.druid.math.expr.ExprType; +import org.apache.druid.query.dimension.DimensionSpec; +import org.apache.druid.segment.ColumnInspector; +import org.apache.druid.segment.column.ColumnCapabilities; +import org.apache.druid.segment.column.ValueType; +import org.apache.druid.segment.vector.VectorColumnSelectorFactory; +import org.apache.druid.segment.virtual.ExpressionPlan; +import org.apache.druid.segment.virtual.ExpressionVirtualColumn; + +import java.util.List; + +/** + * Controls deferral of {@link ExpressionVirtualColumn} in {@link GroupByQuery}. + */ +public enum DeferExpressionDimensions +{ + SINGLE_STRING("singleString") { + @Override + public boolean useDeferredGroupBySelector( + ExpressionPlan plan, + List requiredBindingsList, + ColumnInspector inspector + ) + { + return false; + } + }, + + /** + * Defer expressions when their input variables are all fixed-width types (primitive numbers, or dictionary encoded). + */ + FIXED_WIDTH("fixedWidth") { + @Override + public boolean useDeferredGroupBySelector( + ExpressionPlan plan, + List requiredBindingsList, + ColumnInspector inspector + ) + { + if (isInnatelyDeferrable(plan, requiredBindingsList, inspector)) { + return false; + } + + for (final String requiredBinding : requiredBindingsList) { + final ColumnCapabilities capabilities = inspector.getColumnCapabilities(requiredBinding); + if (capabilities == null) { + return false; + } + + if (!capabilities.isNumeric() && !isDictionaryEncodedScalarString(capabilities)) { + // Not fixed-width. + return false; + } + } + + return true; + } + }, + + /** + * Defer expressions when their input variables are all fixed-width types (primitive numbers, or dictionary encoded). + */ + FIXED_WIDTH_NON_NUMERIC("fixedWidthNonNumeric") { + @Override + public boolean useDeferredGroupBySelector( + ExpressionPlan plan, + List requiredBindingsList, + ColumnInspector inspector + ) + { + if (isInnatelyDeferrable(plan, requiredBindingsList, inspector)) { + return false; + } + + boolean allNumericInputs = true; + + for (final String requiredBinding : requiredBindingsList) { + final ColumnCapabilities capabilities = inspector.getColumnCapabilities(requiredBinding); + if (capabilities == null) { + return false; + } + + allNumericInputs = allNumericInputs && capabilities.isNumeric(); + + if (!capabilities.isNumeric() && !isDictionaryEncodedScalarString(capabilities)) { + // Not fixed-width. + return false; + } + } + + return !allNumericInputs || (plan.getOutputType() != null && !plan.getOutputType().isNumeric()); + } + }, + + ALWAYS("always") { + @Override + public boolean useDeferredGroupBySelector( + ExpressionPlan plan, + List requiredBindingsList, + ColumnInspector inspector + ) + { + return !isInnatelyDeferrable(plan, requiredBindingsList, inspector); + } + }; + + public static final String JSON_KEY = "deferExpressionDimensions"; + + private final String jsonName; + + DeferExpressionDimensions(String jsonName) + { + this.jsonName = jsonName; + } + + @JsonCreator + public static DeferExpressionDimensions fromString(final String jsonName) + { + for (final DeferExpressionDimensions value : values()) { + if (value.jsonName.equals(jsonName)) { + return value; + } + } + + throw new IAE("Invalid value[%s] for[%s]", jsonName, JSON_KEY); + } + + public abstract boolean useDeferredGroupBySelector( + ExpressionPlan plan, + List requiredBindingsList, + ColumnInspector inspector + ); + + @Override + @JsonValue + public String toString() + { + return jsonName; + } + + + /** + * {@link VectorColumnSelectorFactory} currently can only make dictionary encoded selectors for string types, so + * we can only consider them as fixed width. Additionally, to err on the side of safety, multi-value string columns + * are also not considered fixed width because expressions process multi-value dimensions as single rows, so we would + * need all dictionary ids to be present in the combined key. + * + * At the time of this javadoc, vector group by does not support multi-value dimensions anyway, so this isn't really + * a problem, but if it did, we could consider allowing them if we ensure that all multi-value inputs are used as + * scalars and so the expression can be applied separately to each individual dictionary id (e.g. the equivalent of + * {@link ExpressionPlan.Trait#SINGLE_INPUT_MAPPABLE} but for all multi-value string inputs of the expression). + */ + private static boolean isDictionaryEncodedScalarString(ColumnCapabilities capabilities) + { + return capabilities.isDictionaryEncoded().isTrue() && + capabilities.is(ValueType.STRING) && + capabilities.hasMultipleValues().isFalse(); + } + + /** + * Whether the given expression can be deferred innately by the selector created by + * {@link ExpressionVirtualColumn#makeSingleValueVectorDimensionSelector(DimensionSpec, VectorColumnSelectorFactory)}. + * + * In this case, all options for this enum return false from + * {@link #useDeferredGroupBySelector(ExpressionPlan, List, ColumnInspector)}, because there is no need to defer + * redundantly. + */ + private static boolean isInnatelyDeferrable( + ExpressionPlan plan, + List requiredBindingsList, + ColumnInspector inspector + ) + { + if (plan.getOutputType() != null + && plan.getOutputType().is(ExprType.STRING) + && requiredBindingsList.size() <= 1) { + for (final String requiredBinding : requiredBindingsList) { + final ColumnCapabilities requiredBindingCapabilities = inspector.getColumnCapabilities(requiredBinding); + + if (requiredBindingCapabilities == null + || !requiredBindingCapabilities.is(ValueType.STRING) + || !requiredBindingCapabilities.isDictionaryEncoded().isTrue()) { + return false; + } + } + + return true; + } else { + return false; + } + } +} diff --git a/processing/src/main/java/org/apache/druid/query/groupby/GroupByQuery.java b/processing/src/main/java/org/apache/druid/query/groupby/GroupByQuery.java index cdcf9e3daf4..994705f55e3 100644 --- a/processing/src/main/java/org/apache/druid/query/groupby/GroupByQuery.java +++ b/processing/src/main/java/org/apache/druid/query/groupby/GroupByQuery.java @@ -560,15 +560,20 @@ public class GroupByQuery extends BaseQuery return false; } - /** - * When limit push down is applied, the partial results would be sorted by the ordering specified by the - * limit/order spec (unlike non-push down case where the results always use the default natural ascending order), - * so when merging these partial result streams, the merge needs to use the same ordering to get correct results. - */ - private Ordering getRowOrderingForPushDown( - final boolean granular, - final DefaultLimitSpec limitSpec - ) + public Ordering getRowOrdering(final boolean granular) + { + return getOrderingAndDimensions(granular).getRowOrdering(); + } + + public List getDimensionNamesInOrder() + { + return getOrderingAndDimensions(false).getDimensions() + .stream() + .map(DimensionSpec::getOutputName) + .collect(Collectors.toList()); + } + + public OrderingAndDimensions getOrderingAndDimensions(final boolean granular) { final boolean sortByDimsFirst = getContextSortByDimsFirst(); @@ -577,18 +582,30 @@ public class GroupByQuery extends BaseQuery final List needsReverseList = new ArrayList<>(); final List dimensionTypes = new ArrayList<>(); final List comparators = new ArrayList<>(); + final List dimensionsInOrder = new ArrayList<>(); - for (OrderByColumnSpec orderSpec : limitSpec.getColumns()) { - boolean needsReverse = orderSpec.getDirection() != OrderByColumnSpec.Direction.ASCENDING; - int dimIndex = OrderByColumnSpec.getDimIndexForOrderBy(orderSpec, dimensions); - if (dimIndex >= 0) { - DimensionSpec dim = dimensions.get(dimIndex); - orderedFieldNumbers.add(resultRowSignature.indexOf(dim.getOutputName())); - dimsInOrderBy.add(dimIndex); - needsReverseList.add(needsReverse); - final ColumnType type = dimensions.get(dimIndex).getOutputType(); - dimensionTypes.add(type); - comparators.add(orderSpec.getDimensionComparator()); + /* + * When limit push down is applied, the partial results would be sorted by the ordering specified by the + * limit/order spec (unlike non-push down case where the results always use the default natural ascending order), + * so when merging these partial result streams, the merge needs to use the same ordering to get correct results. + */ + if (isApplyLimitPushDown()) { + DefaultLimitSpec limitSpec1 = (DefaultLimitSpec) limitSpec; + if (!DefaultLimitSpec.sortingOrderHasNonGroupingFields(limitSpec1, dimensions)) { + for (OrderByColumnSpec orderSpec : ((DefaultLimitSpec) limitSpec).getColumns()) { + boolean needsReverse = orderSpec.getDirection() != OrderByColumnSpec.Direction.ASCENDING; + int dimIndex = OrderByColumnSpec.getDimIndexForOrderBy(orderSpec, dimensions); + if (dimIndex >= 0) { + DimensionSpec dim = dimensions.get(dimIndex); + orderedFieldNumbers.add(resultRowSignature.indexOf(dim.getOutputName())); + dimsInOrderBy.add(dimIndex); + needsReverseList.add(needsReverse); + final ColumnType type = dimensions.get(dimIndex).getOutputType(); + dimensionTypes.add(type); + comparators.add(orderSpec.getDimensionComparator()); + dimensionsInOrder.add(dim); + } + } } } @@ -599,14 +616,16 @@ public class GroupByQuery extends BaseQuery final ColumnType type = dimensions.get(i).getOutputType(); dimensionTypes.add(type); comparators.add(StringComparators.NATURAL); + dimensionsInOrder.add(dimensions.get(i)); } } final Comparator timeComparator = getTimeComparator(granular); + Ordering ordering; if (timeComparator == null) { - return Ordering.from( - (lhs, rhs) -> compareDimsForLimitPushDown( + ordering = Ordering.from( + (lhs, rhs) -> compareDims( orderedFieldNumbers, needsReverseList, dimensionTypes, @@ -616,9 +635,9 @@ public class GroupByQuery extends BaseQuery ) ); } else if (sortByDimsFirst) { - return Ordering.from( + ordering = Ordering.from( (lhs, rhs) -> { - final int cmp = compareDimsForLimitPushDown( + final int cmp = compareDims( orderedFieldNumbers, needsReverseList, dimensionTypes, @@ -634,7 +653,7 @@ public class GroupByQuery extends BaseQuery } ); } else { - return Ordering.from( + ordering = Ordering.from( (lhs, rhs) -> { final int timeCompare = timeComparator.compare(lhs, rhs); @@ -642,7 +661,7 @@ public class GroupByQuery extends BaseQuery return timeCompare; } - return compareDimsForLimitPushDown( + return compareDims( orderedFieldNumbers, needsReverseList, dimensionTypes, @@ -653,45 +672,8 @@ public class GroupByQuery extends BaseQuery } ); } - } - public Ordering getRowOrdering(final boolean granular) - { - if (isApplyLimitPushDown()) { - if (!DefaultLimitSpec.sortingOrderHasNonGroupingFields((DefaultLimitSpec) limitSpec, dimensions)) { - return getRowOrderingForPushDown(granular, (DefaultLimitSpec) limitSpec); - } - } - - final boolean sortByDimsFirst = getContextSortByDimsFirst(); - final Comparator timeComparator = getTimeComparator(granular); - - if (timeComparator == null) { - return Ordering.from((lhs, rhs) -> compareDims(dimensions, lhs, rhs)); - } else if (sortByDimsFirst) { - return Ordering.from( - (lhs, rhs) -> { - final int cmp = compareDims(dimensions, lhs, rhs); - if (cmp != 0) { - return cmp; - } - - return timeComparator.compare(lhs, rhs); - } - ); - } else { - return Ordering.from( - (lhs, rhs) -> { - final int timeCompare = timeComparator.compare(lhs, rhs); - - if (timeCompare != 0) { - return timeCompare; - } - - return compareDims(dimensions, lhs, rhs); - } - ); - } + return new OrderingAndDimensions(ordering, dimensionsInOrder); } @Nullable @@ -716,25 +698,6 @@ public class GroupByQuery extends BaseQuery } } - private int compareDims(List dimensions, ResultRow lhs, ResultRow rhs) - { - final int dimensionStart = getResultRowDimensionStart(); - - for (int i = 0; i < dimensions.size(); i++) { - DimensionSpec dimension = dimensions.get(i); - final int dimCompare = DimensionHandlerUtils.compareObjectsAsType( - lhs.get(dimensionStart + i), - rhs.get(dimensionStart + i), - dimension.getOutputType() - ); - if (dimCompare != 0) { - return dimCompare; - } - } - - return 0; - } - /** * Computes the timestamp that will be returned by {@link #getUniversalTimestamp()}. */ @@ -760,12 +723,12 @@ public class GroupByQuery extends BaseQuery } /** - * Compares the dimensions for limit pushdown. + * Compares the dimensions. * * Due to legacy reason, the provided StringComparator for the arrays isn't applied and must be changed once we * get rid of the StringComparators for array types */ - private static int compareDimsForLimitPushDown( + private static int compareDims( final IntList fields, final List needsReverseList, final List dimensionTypes, @@ -924,6 +887,28 @@ public class GroupByQuery extends BaseQuery } } + public static class OrderingAndDimensions + { + Ordering rowOrdering; + List dimensions; + + public OrderingAndDimensions(Ordering rowOrdering, List dimensions) + { + this.rowOrdering = rowOrdering; + this.dimensions = dimensions; + } + + public Ordering getRowOrdering() + { + return rowOrdering; + } + + public List getDimensions() + { + return dimensions; + } + } + public static class Builder { @Nullable diff --git a/processing/src/main/java/org/apache/druid/query/groupby/GroupByQueryConfig.java b/processing/src/main/java/org/apache/druid/query/groupby/GroupByQueryConfig.java index 2fb4dfdd4d2..9950695f28c 100644 --- a/processing/src/main/java/org/apache/druid/query/groupby/GroupByQueryConfig.java +++ b/processing/src/main/java/org/apache/druid/query/groupby/GroupByQueryConfig.java @@ -27,6 +27,8 @@ import org.apache.druid.query.QueryContext; import org.apache.druid.query.QueryContexts; import org.apache.druid.utils.JvmUtils; +import java.util.Optional; + /** * */ @@ -44,6 +46,7 @@ public class GroupByQueryConfig public static final String CTX_KEY_ARRAY_RESULT_ROWS = "resultAsArray"; public static final String CTX_KEY_ENABLE_MULTI_VALUE_UNNESTING = "groupByEnableMultiValueUnnesting"; public static final String CTX_KEY_BUFFER_GROUPER_MAX_SIZE = "bufferGrouperMaxSize"; + public static final String CTX_KEY_DEFER_EXPRESSION_DIMENSIONS = "deferExpressionDimensions"; private static final String CTX_KEY_IS_SINGLE_THREADED = "groupByIsSingleThreaded"; private static final String CTX_KEY_BUFFER_GROUPER_INITIAL_BUCKETS = "bufferGrouperInitialBuckets"; private static final String CTX_KEY_BUFFER_GROUPER_MAX_LOAD_FACTOR = "bufferGrouperMaxLoadFactor"; @@ -119,6 +122,9 @@ public class GroupByQueryConfig @JsonProperty private boolean mergeThreadLocal = false; + @JsonProperty + private DeferExpressionDimensions deferExpressionDimensions = DeferExpressionDimensions.FIXED_WIDTH_NON_NUMERIC; + @JsonProperty private boolean vectorize = true; @@ -277,6 +283,11 @@ public class GroupByQueryConfig return mergeThreadLocal; } + public DeferExpressionDimensions getDeferExpressionDimensions() + { + return deferExpressionDimensions; + } + public boolean isVectorize() { return vectorize; @@ -350,6 +361,10 @@ public class GroupByQueryConfig getNumParallelCombineThreads() ); newConfig.mergeThreadLocal = queryContext.getBoolean(CTX_KEY_MERGE_THREAD_LOCAL, isMergeThreadLocal()); + newConfig.deferExpressionDimensions = + Optional.ofNullable(queryContext.getString(CTX_KEY_DEFER_EXPRESSION_DIMENSIONS)) + .map(DeferExpressionDimensions::fromString) + .orElse(getDeferExpressionDimensions()); newConfig.vectorize = queryContext.getBoolean(QueryContexts.VECTORIZE_KEY, isVectorize()); newConfig.enableMultiValueUnnesting = queryContext.getBoolean( CTX_KEY_ENABLE_MULTI_VALUE_UNNESTING, @@ -378,6 +393,8 @@ public class GroupByQueryConfig ", vectorize=" + vectorize + ", forcePushDownNestedQuery=" + forcePushDownNestedQuery + ", enableMultiValueUnnesting=" + enableMultiValueUnnesting + + ", mergeThreadLocal=" + mergeThreadLocal + + ", deferExpressionDimensions=" + deferExpressionDimensions + '}'; } } diff --git a/processing/src/main/java/org/apache/druid/query/groupby/GroupByQueryQueryToolChest.java b/processing/src/main/java/org/apache/druid/query/groupby/GroupByQueryQueryToolChest.java index 47064fefbe6..d69e09c9ff0 100644 --- a/processing/src/main/java/org/apache/druid/query/groupby/GroupByQueryQueryToolChest.java +++ b/processing/src/main/java/org/apache/druid/query/groupby/GroupByQueryQueryToolChest.java @@ -38,7 +38,6 @@ import com.google.inject.Inject; import org.apache.druid.data.input.Row; import org.apache.druid.error.DruidException; import org.apache.druid.frame.Frame; -import org.apache.druid.frame.FrameType; import org.apache.druid.frame.allocation.MemoryAllocatorFactory; import org.apache.druid.frame.segment.FrameCursorUtils; import org.apache.druid.frame.write.FrameWriterFactory; @@ -78,8 +77,10 @@ import org.apache.druid.segment.column.ColumnType; import org.apache.druid.segment.column.NullableTypeStrategy; import org.apache.druid.segment.column.RowSignature; import org.apache.druid.segment.column.ValueType; +import org.apache.druid.segment.nested.StructuredData; import org.joda.time.DateTime; +import javax.annotation.Nullable; import java.io.Closeable; import java.io.IOException; import java.util.ArrayList; @@ -472,7 +473,7 @@ public class GroupByQueryQueryToolChest extends QueryToolChest deserializer = new JsonDeserializer() { - final Class[] dimensionClasses = createDimensionClasses(); + final Class[] dimensionClasses = createDimensionClasses(query); boolean containsComplexDimensions = query.getDimensions() .stream() .anyMatch( @@ -525,30 +526,6 @@ public class GroupByQueryQueryToolChest extends QueryToolChest[] createDimensionClasses() - { - final List queryDimensions = query.getDimensions(); - final Class[] classes = new Class[queryDimensions.size()]; - for (int i = 0; i < queryDimensions.size(); ++i) { - final ColumnType dimensionOutputType = queryDimensions.get(i).getOutputType(); - if (dimensionOutputType.is(ValueType.COMPLEX)) { - NullableTypeStrategy nullableTypeStrategy = dimensionOutputType.getNullableStrategy(); - if (!nullableTypeStrategy.groupable()) { - throw DruidException.defensive( - "Ungroupable dimension [%s] with type [%s] found in the query.", - queryDimensions.get(i).getDimension(), - dimensionOutputType - ); - } - classes[i] = nullableTypeStrategy.getClazz(); - } else { - classes[i] = Object.class; - } - } - return classes; - } - }; class GroupByResultRowModule extends SimpleModule @@ -598,9 +575,32 @@ public class GroupByQueryQueryToolChest extends QueryToolChest getCacheStrategy(final GroupByQuery query) + public CacheStrategy getCacheStrategy(GroupByQuery query) { + return getCacheStrategy(query, null); + } + + @Override + public CacheStrategy getCacheStrategy( + final GroupByQuery query, + @Nullable final ObjectMapper mapper + ) + { + + for (DimensionSpec dimension : query.getDimensions()) { + if (dimension.getOutputType().is(ValueType.COMPLEX) && !dimension.getOutputType().equals(ColumnType.NESTED_DATA)) { + if (mapper == null) { + throw DruidException.defensive( + "Cannot deserialize complex dimension of type[%s] from result cache if object mapper is not provided", + dimension.getOutputType().getComplexTypeName() + ); + } + } + } + final Class[] dimensionClasses = createDimensionClasses(query); + return new CacheStrategy() { private static final byte CACHE_STRATEGY_VERSION = 0x1; @@ -727,13 +727,29 @@ public class GroupByQueryQueryToolChest extends QueryToolChest() @@ -861,4 +878,27 @@ public class GroupByQueryQueryToolChest extends QueryToolChest[] createDimensionClasses(final GroupByQuery query) + { + final List queryDimensions = query.getDimensions(); + final Class[] classes = new Class[queryDimensions.size()]; + for (int i = 0; i < queryDimensions.size(); ++i) { + final ColumnType dimensionOutputType = queryDimensions.get(i).getOutputType(); + if (dimensionOutputType.is(ValueType.COMPLEX)) { + NullableTypeStrategy nullableTypeStrategy = dimensionOutputType.getNullableStrategy(); + if (!nullableTypeStrategy.groupable()) { + throw DruidException.defensive( + "Ungroupable dimension [%s] with type [%s] found in the query.", + queryDimensions.get(i).getDimension(), + dimensionOutputType + ); + } + classes[i] = nullableTypeStrategy.getClazz(); + } else { + classes[i] = Object.class; + } + } + return classes; + } } diff --git a/processing/src/main/java/org/apache/druid/query/groupby/GroupingEngine.java b/processing/src/main/java/org/apache/druid/query/groupby/GroupingEngine.java index 6451fb9b943..ab1ee1052b4 100644 --- a/processing/src/main/java/org/apache/druid/query/groupby/GroupingEngine.java +++ b/processing/src/main/java/org/apache/druid/query/groupby/GroupingEngine.java @@ -686,8 +686,7 @@ public class GroupingEngine processingConfig.intermediateComputeSizeBytes() ); - List queryDimNames = baseSubtotalQuery.getDimensions().stream().map(DimensionSpec::getOutputName) - .collect(Collectors.toList()); + List queryDimNamesInOrder = baseSubtotalQuery.getDimensionNamesInOrder(); // Only needed to make LimitSpec.filterColumns(..) call later in case base query has a non default LimitSpec. Set aggsAndPostAggs = null; @@ -724,7 +723,7 @@ public class GroupingEngine .withLimitSpec(subtotalQueryLimitSpec); final GroupByRowProcessor.ResultSupplier resultSupplierOneFinal = resultSupplierOne; - if (Utils.isPrefix(subtotalSpec, queryDimNames)) { + if (Utils.isPrefix(subtotalSpec, queryDimNamesInOrder)) { // Since subtotalSpec is a prefix of base query dimensions, so results from base query are also sorted // by subtotalSpec as needed by stream merging. subtotalsResults.add( diff --git a/processing/src/main/java/org/apache/druid/query/groupby/epinephelinae/GroupByQueryEngine.java b/processing/src/main/java/org/apache/druid/query/groupby/epinephelinae/GroupByQueryEngine.java index 085e6022aab..35f09c5446d 100644 --- a/processing/src/main/java/org/apache/druid/query/groupby/epinephelinae/GroupByQueryEngine.java +++ b/processing/src/main/java/org/apache/druid/query/groupby/epinephelinae/GroupByQueryEngine.java @@ -211,24 +211,25 @@ public class GroupByQueryEngine final List dimensions ) { - return dimensions - .stream() - .allMatch( - dimension -> { - if (dimension.mustDecorate()) { - // DimensionSpecs that decorate may turn singly-valued columns into multi-valued selectors. - // To be safe, we must return false here. - return false; - } + for (DimensionSpec dimension : dimensions) { + if (dimension.mustDecorate()) { + // DimensionSpecs that decorate may turn singly-valued columns into multi-valued selectors. + // To be safe, we must return false here. + return false; + } - // Now check column capabilities, which must be present and explicitly not multi-valued and not arrays - final ColumnCapabilities columnCapabilities = inspector.getColumnCapabilities(dimension.getDimension()); - return dimension.getOutputType().isArray() - || (columnCapabilities != null - && columnCapabilities.hasMultipleValues().isFalse() - && !columnCapabilities.isArray() - ); - }); + // if dimension spec type is array, skip it since we can handle array or multi-valued + if (dimension.getOutputType().isArray()) { + continue; + } + + // Now check column capabilities, which must be present and explicitly not multi-valued and not arrays + final ColumnCapabilities capabilities = inspector.getColumnCapabilities(dimension.getDimension()); + if (capabilities == null || capabilities.hasMultipleValues().isMaybeTrue() || capabilities.isArray()) { + return false; + } + } + return true; } private abstract static class GroupByEngineIterator implements Iterator, Closeable diff --git a/processing/src/main/java/org/apache/druid/query/groupby/epinephelinae/Grouper.java b/processing/src/main/java/org/apache/druid/query/groupby/epinephelinae/Grouper.java index 591624f1ab8..0f3faedb707 100644 --- a/processing/src/main/java/org/apache/druid/query/groupby/epinephelinae/Grouper.java +++ b/processing/src/main/java/org/apache/druid/query/groupby/epinephelinae/Grouper.java @@ -19,6 +19,7 @@ package org.apache.druid.query.groupby.epinephelinae; +import com.fasterxml.jackson.databind.ObjectMapper; import com.google.common.base.Preconditions; import org.apache.druid.java.util.common.parsers.CloseableIterator; import org.apache.druid.query.aggregation.AggregatorFactory; @@ -232,6 +233,16 @@ public interface Grouper extends Closeable */ BufferComparator bufferComparatorWithAggregators(AggregatorFactory[] aggregatorFactories, int[] aggregatorOffsets); + /** + * Decorates the object mapper enabling it to read and write query results' grouping keys. It is used by the + * {@link SpillingGrouper} to preserve the types of the dimensions after serializing and deserializing them on the + * spilled files. + */ + default ObjectMapper decorateObjectMapper(ObjectMapper spillMapper) + { + return spillMapper; + } + /** * Reset the keySerde to its initial state. After this method is called, {@link #readFromByteBuffer} * and {@link #bufferComparator()} may no longer work properly on previously-serialized keys. diff --git a/processing/src/main/java/org/apache/druid/query/groupby/epinephelinae/RowBasedGrouperHelper.java b/processing/src/main/java/org/apache/druid/query/groupby/epinephelinae/RowBasedGrouperHelper.java index 491c28d4142..da8a0e04623 100644 --- a/processing/src/main/java/org/apache/druid/query/groupby/epinephelinae/RowBasedGrouperHelper.java +++ b/processing/src/main/java/org/apache/druid/query/groupby/epinephelinae/RowBasedGrouperHelper.java @@ -19,9 +19,14 @@ package org.apache.druid.query.groupby.epinephelinae; -import com.fasterxml.jackson.annotation.JsonCreator; import com.fasterxml.jackson.annotation.JsonValue; +import com.fasterxml.jackson.core.JsonParser; +import com.fasterxml.jackson.core.JsonToken; +import com.fasterxml.jackson.core.ObjectCodec; +import com.fasterxml.jackson.databind.DeserializationContext; +import com.fasterxml.jackson.databind.JsonDeserializer; import com.fasterxml.jackson.databind.ObjectMapper; +import com.fasterxml.jackson.databind.module.SimpleModule; import com.google.common.base.Preconditions; import com.google.common.base.Supplier; import com.google.common.primitives.Ints; @@ -84,6 +89,7 @@ import org.joda.time.Interval; import javax.annotation.Nullable; import java.io.Closeable; +import java.io.IOException; import java.nio.ByteBuffer; import java.util.ArrayList; import java.util.Arrays; @@ -666,22 +672,6 @@ public class RowBasedGrouperHelper this.key = key; } - @JsonCreator - public static RowBasedKey fromJsonArray(final Object[] key) - { - // Type info is lost during serde: - // Floats may be deserialized as doubles, Longs may be deserialized as integers, convert them back - for (int i = 0; i < key.length; i++) { - if (key[i] instanceof Integer) { - key[i] = ((Integer) key[i]).longValue(); - } else if (key[i] instanceof Double) { - key[i] = ((Double) key[i]).floatValue(); - } - } - - return new RowBasedKey(key); - } - @JsonValue public Object[] getKey() { @@ -1371,6 +1361,65 @@ public class RowBasedGrouperHelper ); } + @Override + public ObjectMapper decorateObjectMapper(ObjectMapper spillMapper) + { + + final JsonDeserializer deserializer = new JsonDeserializer() + { + @Override + public RowBasedKey deserialize( + JsonParser jp, + DeserializationContext deserializationContext + ) throws IOException + { + if (!jp.isExpectedStartArrayToken()) { + throw DruidException.defensive("Expected array start token, received [%s]", jp.getCurrentToken()); + } + jp.nextToken(); + + final ObjectCodec codec = jp.getCodec(); + final int timestampAdjustment = includeTimestamp ? 1 : 0; + final int dimsToRead = timestampAdjustment + serdeHelpers.length; + int dimsReadSoFar = 0; + final Object[] objects = new Object[dimsToRead]; + + if (includeTimestamp) { + DruidException.conditionalDefensive( + jp.currentToken() != JsonToken.END_ARRAY, + "Unexpected end of array when deserializing timestamp from the spilled files" + ); + objects[dimsReadSoFar] = codec.readValue(jp, Long.class); + + ++dimsReadSoFar; + jp.nextToken(); + } + + while (jp.currentToken() != JsonToken.END_ARRAY) { + objects[dimsReadSoFar] = + codec.readValue(jp, serdeHelpers[dimsReadSoFar - timestampAdjustment].getClazz()); + + ++dimsReadSoFar; + jp.nextToken(); + } + + return new RowBasedKey(objects); + } + }; + + class SpillModule extends SimpleModule + { + public SpillModule() + { + addDeserializer(RowBasedKey.class, deserializer); + } + } + + final ObjectMapper newObjectMapper = spillMapper.copy(); + newObjectMapper.registerModule(new SpillModule()); + return newObjectMapper; + } + @Override public void reset() { @@ -1588,6 +1637,7 @@ public class RowBasedGrouperHelper { final BufferComparator bufferComparator; final String columnTypeName; + final Class clazz; final List dictionary; final Object2IntMap reverseDictionary; @@ -1613,6 +1663,7 @@ public class RowBasedGrouperHelper dictionary.get(lhsBuffer.getInt(lhsPosition + keyBufferPosition)), dictionary.get(rhsBuffer.getInt(rhsPosition + keyBufferPosition)) ); + clazz = columnType.getNullableStrategy().getClazz(); } // Asserts that we don't entertain any complex types without a typename, to prevent intermixing dictionaries of @@ -1645,6 +1696,12 @@ public class RowBasedGrouperHelper { return reverseDictionary; } + + @Override + public Class getClazz() + { + return clazz; + } } @@ -1726,6 +1783,14 @@ public class RowBasedGrouperHelper { return reverseDictionary; } + + @Override + public Class getClazz() + { + // Jackson deserializes Object[] containing longs to Object[] containing string if Object[].class is returned + // Therefore we are using Object.class + return Object.class; + } } private class ArrayStringRowBasedKeySerdeHelper extends DictionaryBuildingSingleValuedRowBasedKeySerdeHelper @@ -1770,6 +1835,12 @@ public class RowBasedGrouperHelper { return reverseStringArrayDictionary; } + + @Override + public Class getClazz() + { + return Object[].class; + } } private abstract class AbstractStringRowBasedKeySerdeHelper implements RowBasedKeySerdeHelper @@ -1819,6 +1890,12 @@ public class RowBasedGrouperHelper { return bufferComparator; } + + @Override + public Class getClazz() + { + return String.class; + } } private class DynamicDictionaryStringRowBasedKeySerdeHelper extends AbstractStringRowBasedKeySerdeHelper @@ -1937,6 +2014,12 @@ public class RowBasedGrouperHelper { return bufferComparator; } + + @Override + public Class getClazz() + { + return Long.class; + } } private class FloatRowBasedKeySerdeHelper implements RowBasedKeySerdeHelper @@ -1982,6 +2065,12 @@ public class RowBasedGrouperHelper { return bufferComparator; } + + @Override + public Class getClazz() + { + return Float.class; + } } private class DoubleRowBasedKeySerdeHelper implements RowBasedKeySerdeHelper @@ -2027,6 +2116,12 @@ public class RowBasedGrouperHelper { return bufferComparator; } + + @Override + public Class getClazz() + { + return Double.class; + } } // This class is only used when SQL compatible null handling is enabled. @@ -2082,6 +2177,12 @@ public class RowBasedGrouperHelper { return comparator; } + + @Override + public Class getClazz() + { + return delegate.getClazz(); + } } } } diff --git a/processing/src/main/java/org/apache/druid/query/groupby/epinephelinae/RowBasedKeySerdeHelper.java b/processing/src/main/java/org/apache/druid/query/groupby/epinephelinae/RowBasedKeySerdeHelper.java index 1cb29d23bc0..71372ca238b 100644 --- a/processing/src/main/java/org/apache/druid/query/groupby/epinephelinae/RowBasedKeySerdeHelper.java +++ b/processing/src/main/java/org/apache/druid/query/groupby/epinephelinae/RowBasedKeySerdeHelper.java @@ -65,4 +65,9 @@ interface RowBasedKeySerdeHelper * Return a {@link BufferComparator} to compare keys stored in ByteBuffer. */ BufferComparator getBufferComparator(); + + /** + * Returns the expected class of the key which used to deserialize the objects correctly from the spilled files. + */ + Class getClazz(); } diff --git a/processing/src/main/java/org/apache/druid/query/groupby/epinephelinae/SpillingGrouper.java b/processing/src/main/java/org/apache/druid/query/groupby/epinephelinae/SpillingGrouper.java index 4e9b96102a1..d8a7760c11d 100644 --- a/processing/src/main/java/org/apache/druid/query/groupby/epinephelinae/SpillingGrouper.java +++ b/processing/src/main/java/org/apache/druid/query/groupby/epinephelinae/SpillingGrouper.java @@ -152,7 +152,7 @@ public class SpillingGrouper implements Grouper } this.aggregatorFactories = aggregatorFactories; this.temporaryStorage = temporaryStorage; - this.spillMapper = spillMapper; + this.spillMapper = keySerde.decorateObjectMapper(spillMapper); this.spillingAllowed = spillingAllowed; this.sortHasNonGroupingFields = sortHasNonGroupingFields; } diff --git a/processing/src/main/java/org/apache/druid/query/groupby/epinephelinae/vector/VectorGroupByEngine.java b/processing/src/main/java/org/apache/druid/query/groupby/epinephelinae/vector/VectorGroupByEngine.java index a2fc9cec8a6..75f4539e8c3 100644 --- a/processing/src/main/java/org/apache/druid/query/groupby/epinephelinae/vector/VectorGroupByEngine.java +++ b/processing/src/main/java/org/apache/druid/query/groupby/epinephelinae/vector/VectorGroupByEngine.java @@ -29,6 +29,8 @@ import org.apache.druid.java.util.common.io.Closer; import org.apache.druid.java.util.common.parsers.CloseableIterator; import org.apache.druid.query.DruidProcessingConfig; import org.apache.druid.query.aggregation.AggregatorAdapters; +import org.apache.druid.query.aggregation.AggregatorFactory; +import org.apache.druid.query.dimension.DefaultDimensionSpec; import org.apache.druid.query.dimension.DimensionSpec; import org.apache.druid.query.filter.Filter; import org.apache.druid.query.groupby.GroupByQuery; @@ -55,7 +57,6 @@ import org.joda.time.DateTime; import org.joda.time.Interval; import javax.annotation.Nullable; - import java.io.IOException; import java.nio.ByteBuffer; import java.util.Collections; @@ -138,12 +139,22 @@ public class VectorGroupByEngine try { final VectorColumnSelectorFactory columnSelectorFactory = cursor.getColumnSelectorFactory(); final List dimensions = query.getDimensions().stream().map( - dimensionSpec -> - ColumnProcessors.makeVectorProcessor( + dimensionSpec -> { + if (dimensionSpec instanceof DefaultDimensionSpec) { + // Delegate creation of GroupByVectorColumnSelector to the column selector factory, so that + // virtual columns (like ExpressionVirtualColumn) can control their own grouping behavior. + return columnSelectorFactory.makeGroupByVectorColumnSelector( + dimensionSpec.getDimension(), + config.getDeferExpressionDimensions() + ); + } else { + return ColumnProcessors.makeVectorProcessor( dimensionSpec, GroupByVectorColumnProcessorFactory.instance(), columnSelectorFactory - ) + ); + } + } ).collect(Collectors.toList()); return new VectorGroupByEngineIterator( @@ -194,9 +205,7 @@ public class VectorGroupByEngine return adapter.canVectorize(filter, query.getVirtualColumns(), false) && canVectorizeDimensions(inspector, query.getDimensions()) && VirtualColumns.shouldVectorize(query, query.getVirtualColumns(), adapter) - && query.getAggregatorSpecs() - .stream() - .allMatch(aggregatorFactory -> aggregatorFactory.canVectorize(inspector)); + && canVectorizeAggregators(inspector, query.getAggregatorSpecs()); } private static boolean canVectorizeDimensions( @@ -204,35 +213,45 @@ public class VectorGroupByEngine final List dimensions ) { - return dimensions - .stream() - .allMatch( - dimension -> { - if (!dimension.canVectorize()) { - return false; - } + for (DimensionSpec dimension : dimensions) { + if (!dimension.canVectorize()) { + return false; + } - if (dimension.mustDecorate()) { - // group by on multi value dimensions are not currently supported - // DimensionSpecs that decorate may turn singly-valued columns into multi-valued selectors. - // To be safe, we must return false here. - return false; - } + if (dimension.mustDecorate()) { + // group by on multi value dimensions are not currently supported + // DimensionSpecs that decorate may turn singly-valued columns into multi-valued selectors. + // To be safe, we must return false here. + return false; + } - if (!dimension.getOutputType().isPrimitive()) { - // group by on arrays and complex types is not currently supported in the vector processing engine - return false; - } + if (!dimension.getOutputType().isPrimitive()) { + // group by on arrays and complex types is not currently supported in the vector processing engine + return false; + } - // Now check column capabilities. - final ColumnCapabilities columnCapabilities = inspector.getColumnCapabilities(dimension.getDimension()); - // null here currently means the column does not exist, nil columns can be vectorized - if (columnCapabilities == null) { - return true; - } - // must be single valued - return columnCapabilities.hasMultipleValues().isFalse(); - }); + // Now check column capabilities. + final ColumnCapabilities columnCapabilities = inspector.getColumnCapabilities(dimension.getDimension()); + if (columnCapabilities != null && columnCapabilities.hasMultipleValues().isMaybeTrue()) { + // null here currently means the column does not exist, nil columns can be vectorized + // multi-value columns implicit unnest is not currently supported in the vector processing engine + return false; + } + } + return true; + } + + public static boolean canVectorizeAggregators( + final ColumnInspector inspector, + final List aggregatorFactories + ) + { + for (AggregatorFactory aggregatorFactory : aggregatorFactories) { + if (!aggregatorFactory.canVectorize(inspector)) { + return false; + } + } + return true; } @VisibleForTesting diff --git a/processing/src/main/java/org/apache/druid/query/metadata/SegmentMetadataQueryQueryToolChest.java b/processing/src/main/java/org/apache/druid/query/metadata/SegmentMetadataQueryQueryToolChest.java index 912ecb1ac32..fd8d7e7009c 100644 --- a/processing/src/main/java/org/apache/druid/query/metadata/SegmentMetadataQueryQueryToolChest.java +++ b/processing/src/main/java/org/apache/druid/query/metadata/SegmentMetadataQueryQueryToolChest.java @@ -20,6 +20,7 @@ package org.apache.druid.query.metadata; import com.fasterxml.jackson.core.type.TypeReference; +import com.fasterxml.jackson.databind.ObjectMapper; import com.google.common.annotations.VisibleForTesting; import com.google.common.base.Function; import com.google.common.base.Functions; @@ -62,6 +63,7 @@ import org.apache.druid.utils.CollectionUtils; import org.joda.time.DateTime; import org.joda.time.Interval; +import javax.annotation.Nullable; import java.util.ArrayList; import java.util.Arrays; import java.util.Comparator; @@ -184,6 +186,15 @@ public class SegmentMetadataQueryQueryToolChest extends QueryToolChest getCacheStrategy(final SegmentMetadataQuery query) + { + return getCacheStrategy(query, null); + } + + @Override + public CacheStrategy getCacheStrategy( + final SegmentMetadataQuery query, + @Nullable final ObjectMapper objectMapper + ) { return new CacheStrategy() { diff --git a/processing/src/main/java/org/apache/druid/query/operator/window/WindowFrame.java b/processing/src/main/java/org/apache/druid/query/operator/window/WindowFrame.java index b70df2c5203..fca50c25b28 100644 --- a/processing/src/main/java/org/apache/druid/query/operator/window/WindowFrame.java +++ b/processing/src/main/java/org/apache/druid/query/operator/window/WindowFrame.java @@ -161,9 +161,9 @@ public class WindowFrame public int getLowerOffsetClamped(int maxRows) { if (lowerUnbounded) { - return maxRows; + return -maxRows; } - return Math.min(maxRows, lowerOffset); + return Math.max(-maxRows, lowerOffset); } /** diff --git a/processing/src/main/java/org/apache/druid/query/rowsandcols/ArrayListRowsAndColumns.java b/processing/src/main/java/org/apache/druid/query/rowsandcols/ArrayListRowsAndColumns.java index 6f546009511..04f9eddbff0 100644 --- a/processing/src/main/java/org/apache/druid/query/rowsandcols/ArrayListRowsAndColumns.java +++ b/processing/src/main/java/org/apache/druid/query/rowsandcols/ArrayListRowsAndColumns.java @@ -259,8 +259,8 @@ public class ArrayListRowsAndColumns implements AppendableRowsAndColumn rowSignature, extraColumns, columnNames, - startOffset, - endOffset + this.startOffset + startOffset, + this.startOffset + endOffset ); } diff --git a/processing/src/main/java/org/apache/druid/query/rowsandcols/LazilyDecoratedRowsAndColumns.java b/processing/src/main/java/org/apache/druid/query/rowsandcols/LazilyDecoratedRowsAndColumns.java index 6c2647aaf76..0dae40467f3 100644 --- a/processing/src/main/java/org/apache/druid/query/rowsandcols/LazilyDecoratedRowsAndColumns.java +++ b/processing/src/main/java/org/apache/druid/query/rowsandcols/LazilyDecoratedRowsAndColumns.java @@ -21,7 +21,6 @@ package org.apache.druid.query.rowsandcols; import com.google.common.collect.ImmutableList; import org.apache.druid.frame.Frame; -import org.apache.druid.frame.FrameType; import org.apache.druid.frame.allocation.ArenaMemoryAllocatorFactory; import org.apache.druid.frame.key.KeyColumn; import org.apache.druid.frame.key.KeyOrder; @@ -275,8 +274,7 @@ public class LazilyDecoratedRowsAndColumns implements RowsAndColumns } } - final FrameWriterFactory frameWriterFactory = FrameWriters.makeFrameWriterFactory( - FrameType.COLUMNAR, + final FrameWriterFactory frameWriterFactory = FrameWriters.makeColumnBasedFrameWriterFactory( new ArenaMemoryAllocatorFactory(200 << 20), // 200 MB, because, why not? signature, sortColumns @@ -392,8 +390,7 @@ public class LazilyDecoratedRowsAndColumns implements RowsAndColumns long remainingRowsToSkip = limit.getOffset(); long remainingRowsToFetch = limit.getLimitOrMax(); - final FrameWriter frameWriter = FrameWriters.makeFrameWriterFactory( - FrameType.COLUMNAR, + final FrameWriter frameWriter = FrameWriters.makeColumnBasedFrameWriterFactory( memFactory, sigBob.build(), Collections.emptyList() diff --git a/processing/src/main/java/org/apache/druid/query/rowsandcols/StorageAdapterRowsAndColumns.java b/processing/src/main/java/org/apache/druid/query/rowsandcols/StorageAdapterRowsAndColumns.java index bcc1d075cbe..edc6518ca04 100644 --- a/processing/src/main/java/org/apache/druid/query/rowsandcols/StorageAdapterRowsAndColumns.java +++ b/processing/src/main/java/org/apache/druid/query/rowsandcols/StorageAdapterRowsAndColumns.java @@ -20,7 +20,6 @@ package org.apache.druid.query.rowsandcols; import org.apache.druid.frame.Frame; -import org.apache.druid.frame.FrameType; import org.apache.druid.frame.allocation.ArenaMemoryAllocatorFactory; import org.apache.druid.frame.write.FrameWriter; import org.apache.druid.frame.write.FrameWriterFactory; @@ -119,8 +118,7 @@ public class StorageAdapterRowsAndColumns implements CloseableShapeshifter, Rows final ColumnSelectorFactory columnSelectorFactory = in.getColumnSelectorFactory(); - final FrameWriterFactory frameWriterFactory = FrameWriters.makeFrameWriterFactory( - FrameType.COLUMNAR, + final FrameWriterFactory frameWriterFactory = FrameWriters.makeColumnBasedFrameWriterFactory( new ArenaMemoryAllocatorFactory(200 << 20), // 200 MB, because, why not? rowSignature, Collections.emptyList() diff --git a/processing/src/main/java/org/apache/druid/query/rowsandcols/semantic/DefaultFramedOnHeapAggregatable.java b/processing/src/main/java/org/apache/druid/query/rowsandcols/semantic/DefaultFramedOnHeapAggregatable.java index 106fa9674a0..83952873050 100644 --- a/processing/src/main/java/org/apache/druid/query/rowsandcols/semantic/DefaultFramedOnHeapAggregatable.java +++ b/processing/src/main/java/org/apache/druid/query/rowsandcols/semantic/DefaultFramedOnHeapAggregatable.java @@ -29,7 +29,6 @@ import org.apache.druid.query.dimension.DimensionSpec; import org.apache.druid.query.monomorphicprocessing.RuntimeShapeInspector; import org.apache.druid.query.operator.window.WindowFrame; import org.apache.druid.query.rowsandcols.RowsAndColumns; -import org.apache.druid.query.rowsandcols.column.ConstantObjectColumn; import org.apache.druid.query.rowsandcols.column.ObjectArrayColumn; import org.apache.druid.segment.ColumnSelectorFactory; import org.apache.druid.segment.ColumnValueSelector; @@ -40,7 +39,6 @@ import org.apache.druid.segment.column.ColumnCapabilitiesImpl; import javax.annotation.Nonnull; import javax.annotation.Nullable; - import java.util.Arrays; import java.util.Iterator; import java.util.concurrent.atomic.AtomicInteger; @@ -61,34 +59,15 @@ public class DefaultFramedOnHeapAggregatable implements FramedOnHeapAggregatable AggregatorFactory[] aggFactories ) { - if (frame.isLowerUnbounded() && frame.isUpperUnbounded()) { - return computeUnboundedAggregates(aggFactories); - } - - if (frame.getPeerType() == WindowFrame.PeerType.ROWS) { - if (frame.isLowerUnbounded()) { - return computeCumulativeAggregates(aggFactories, frame.getUpperOffset()); - } else if (frame.isUpperUnbounded()) { - return computeReverseCumulativeAggregates(aggFactories, frame.getLowerOffset()); - } else { - final int numRows = rac.numRows(); - int lowerOffset = frame.getLowerOffset(); - int upperOffset = frame.getUpperOffset(); - - if (numRows < lowerOffset + upperOffset + 1) { - // In this case, there are not enough rows to completely build up the full window aperture before it needs to - // also start contracting the aperture because of the upper offset. So we use a method that specifically - // handles checks for both expanding and reducing the aperture on every iteration. - return aggregateWindowApertureInFlux(aggFactories, lowerOffset, upperOffset); - } else { - // In this case, there are 3 distinct phases that allow us to loop with less - // branches, so we have a method that specifically does that. - return aggregateWindowApertureWellBehaved(aggFactories, lowerOffset, upperOffset); - } - } - } else { - return computeGroupAggregates(aggFactories, frame); + Iterable groupIterator = buildIteratorFor(rac, frame); + ResultPopulator resultRac = new ResultPopulator(aggFactories, rac.numRows()); + AggIntervalCursor aggCursor = new AggIntervalCursor(rac, aggFactories); + for (AggInterval aggInterval : groupIterator) { + aggCursor.moveTo(aggInterval.inputRows); + resultRac.write(aggInterval.outputRows, aggCursor); } + resultRac.appendTo(rac); + return rac; } /** @@ -124,22 +103,34 @@ public class DefaultFramedOnHeapAggregatable implements FramedOnHeapAggregatable } } - private RowsAndColumns computeGroupAggregates( - AggregatorFactory[] aggFactories, - WindowFrame frame) + public static Iterable buildIteratorFor(AppendableRowsAndColumns rac, WindowFrame frame) { - Iterable groupIterator = buildGroupIteratorFor(rac, frame); - ResultPopulator resultRac = new ResultPopulator(aggFactories, rac.numRows()); - AggIntervalCursor aggCursor = new AggIntervalCursor(rac, aggFactories); - for (AggInterval aggInterval : groupIterator) { - aggCursor.moveTo(aggInterval.inputRows); - resultRac.write(aggInterval.outputRows, aggCursor); + int numRows = rac.numRows(); + if (frame.getLowerOffsetClamped(numRows) == -numRows && frame.getUpperOffsetClamped(numRows) == numRows) { + return buildUnboundedIteratorFor(rac, frame); + } else if (frame.getPeerType() == WindowFrame.PeerType.RANGE) { + return buildGroupIteratorFor(rac, frame); + } else { + return buildRowIteratorFor(rac, frame); } - resultRac.appendTo(rac); - return rac; } - public static Iterable buildGroupIteratorFor(AppendableRowsAndColumns rac, WindowFrame frame) + private static Iterable buildUnboundedIteratorFor(AppendableRowsAndColumns rac, WindowFrame frame) + { + int[] groupBoundaries = new int[]{0, rac.numRows()}; + return new GroupIteratorForWindowFrame(frame, groupBoundaries); + } + + private static Iterable buildRowIteratorFor(AppendableRowsAndColumns rac, WindowFrame frame) + { + int[] groupBoundaries = new int[rac.numRows() + 1]; + for (int j = 0; j < groupBoundaries.length; j++) { + groupBoundaries[j] = j; + } + return new GroupIteratorForWindowFrame(frame, groupBoundaries); + } + + private static Iterable buildGroupIteratorFor(AppendableRowsAndColumns rac, WindowFrame frame) { int[] groupBoundaries = ClusteredGroupPartitioner.fromRAC(rac).computeBoundaries(frame.getOrderByColNames()); return new GroupIteratorForWindowFrame(frame, groupBoundaries); @@ -187,7 +178,7 @@ public class DefaultFramedOnHeapAggregatable implements FramedOnHeapAggregatable groupToRowIndex(relativeGroupId(1)) ), Interval.of( - groupToRowIndex(relativeGroupId(-lowerOffset)), + groupToRowIndex(relativeGroupId(lowerOffset)), groupToRowIndex(relativeGroupId(upperOffset)) ) ); @@ -365,6 +356,10 @@ public class DefaultFramedOnHeapAggregatable implements FramedOnHeapAggregatable for (int i = currentRows.b; i < newRows.b; i++) { aggregate(i); } + } else if (currentRows.a > newRows.a && currentRows.b == newRows.b) { + for (int i = newRows.a; i < currentRows.a; i++) { + aggregate(i); + } } else { newAggregators(); for (int i : newRows) { @@ -390,428 +385,6 @@ public class DefaultFramedOnHeapAggregatable implements FramedOnHeapAggregatable } } - private AppendableRowsAndColumns computeUnboundedAggregates(AggregatorFactory[] aggFactories) - { - Aggregator[] aggs = new Aggregator[aggFactories.length]; - - - AtomicInteger currRow = new AtomicInteger(0); - final ColumnSelectorFactory columnSelectorFactory = ColumnSelectorFactoryMaker.fromRAC(rac).make(currRow); - - for (int i = 0; i < aggFactories.length; i++) { - aggs[i] = aggFactories[i].factorize(columnSelectorFactory); - } - - int numRows = rac.numRows(); - int rowId = currRow.get(); - while (rowId < numRows) { - for (Aggregator agg : aggs) { - agg.aggregate(); - } - rowId = currRow.incrementAndGet(); - } - - for (int i = 0; i < aggFactories.length; ++i) { - rac.addColumn( - aggFactories[i].getName(), - new ConstantObjectColumn(aggs[i].get(), numRows, aggFactories[i].getIntermediateType()) - ); - aggs[i].close(); - } - return rac; - } - - private AppendableRowsAndColumns computeCumulativeAggregates(AggregatorFactory[] aggFactories, int upperOffset) - { - int numRows = rac.numRows(); - if (upperOffset > numRows) { - return computeUnboundedAggregates(aggFactories); - } - - - // We store the results in an Object array for convenience. This is definitely sub-par from a memory management - // point of view as we should use native arrays when possible. This will be fine for now, but it probably makes - // sense to look at optimizing this in the future. That said, such an optimization might best come by having - // a specialized implementation of this interface against, say, a Frame object that can deal with arrays instead - // of trying to optimize this generic implementation. - Object[][] results = new Object[aggFactories.length][numRows]; - int resultStorageIndex = 0; - - AtomicInteger rowIdProvider = new AtomicInteger(0); - final ColumnSelectorFactory columnSelectorFactory = ColumnSelectorFactoryMaker.fromRAC(rac).make(rowIdProvider); - - AggregatorFactory[] combiningFactories = new AggregatorFactory[aggFactories.length]; - Aggregator[] aggs = new Aggregator[aggFactories.length]; - for (int i = 0; i < aggFactories.length; i++) { - combiningFactories[i] = aggFactories[i].getCombiningFactory(); - aggs[i] = aggFactories[i].factorize(columnSelectorFactory); - } - - // If there is an upper offset, we accumulate those aggregations before starting to generate results - for (int i = 0; i < upperOffset; ++i) { - for (Aggregator agg : aggs) { - agg.aggregate(); - } - rowIdProvider.incrementAndGet(); - } - - // Prime the results - if (rowIdProvider.get() < numRows) { - for (int i = 0; i < aggs.length; i++) { - aggs[i].aggregate(); - results[i][resultStorageIndex] = aggFactories[i].finalizeComputation(aggs[i].get()); - aggs[i].close(); - aggs[i] = aggFactories[i].factorize(columnSelectorFactory); - } - - ++resultStorageIndex; - rowIdProvider.incrementAndGet(); - } - - // From here out, we want to aggregate, peel off a row of results and then accumulate the aggregation - for (int rowId = rowIdProvider.get(); rowId < numRows; ++rowId) { - for (int i = 0; i < aggs.length; i++) { - aggs[i].aggregate(); - results[i][resultStorageIndex] = aggFactories[i].finalizeComputation(aggs[i].get()); - aggs[i].close(); - - // Use a combining aggregator to combine the result we just got with the result from the previous row - // This is a lot of hoops to jump through just to combine two values, but AggregatorFactory.combine - // allows for mutation of either of the arguments passed in, so it cannot be meaningfully used in this - // context. Instead, we have to jump through these hoops to make sure that we are generating a new object. - // It would've been nice if the AggregatorFactory interface had methods that were more usable for this, - // but it doesn't so :shrug: - final CumulativeColumnSelectorFactory combiningFactory = new CumulativeColumnSelectorFactory( - aggFactories[i], - results[i], - resultStorageIndex - 1 - ); - final Aggregator combiningAgg = combiningFactories[i].factorize(combiningFactory); - combiningAgg.aggregate(); - combiningFactory.increment(); - combiningAgg.aggregate(); - results[i][resultStorageIndex] = aggFactories[i].finalizeComputation(combiningAgg.get()); - combiningAgg.close(); - - aggs[i] = aggFactories[i].factorize(columnSelectorFactory); - } - - ++resultStorageIndex; - rowIdProvider.incrementAndGet(); - } - - // If we haven't filled up all of the results yet, there are no more rows, so just point the rest of the results - // at the last result that we generated - for (Object[] resultArr : results) { - Arrays.fill(resultArr, resultStorageIndex, resultArr.length, resultArr[resultStorageIndex - 1]); - } - - return makeReturnRAC(aggFactories, results); - } - - private AppendableRowsAndColumns computeReverseCumulativeAggregates(AggregatorFactory[] aggFactories, int lowerOffset) - { - int numRows = rac.numRows(); - if (lowerOffset > numRows) { - return computeUnboundedAggregates(aggFactories); - } - - // We store the results in an Object array for convenience. This is definitely sub-par from a memory management - // point of view as we should use native arrays when possible. This will be fine for now, but it probably makes - // sense to look at optimizing this in the future. That said, such an optimization might best come by having - // a specialized implementation of this interface against, say, a Frame object that can deal with arrays instead - // of trying to optimize this generic implementation. - Object[][] results = new Object[aggFactories.length][numRows]; - int resultStorageIndex = numRows - 1; - - AtomicInteger rowIdProvider = new AtomicInteger(numRows - 1); - final ColumnSelectorFactory columnSelectorFactory = ColumnSelectorFactoryMaker.fromRAC(rac).make(rowIdProvider); - - AggregatorFactory[] combiningFactories = new AggregatorFactory[aggFactories.length]; - Aggregator[] aggs = new Aggregator[aggFactories.length]; - for (int i = 0; i < aggFactories.length; i++) { - combiningFactories[i] = aggFactories[i].getCombiningFactory(); - aggs[i] = aggFactories[i].factorize(columnSelectorFactory); - } - - // If there is a lower offset, we accumulate those aggregations before starting to generate results - for (int i = 0; i < lowerOffset; ++i) { - for (Aggregator agg : aggs) { - agg.aggregate(); - } - rowIdProvider.decrementAndGet(); - } - - // Prime the results - if (rowIdProvider.get() >= 0) { - for (int i = 0; i < aggs.length; i++) { - aggs[i].aggregate(); - results[i][resultStorageIndex] = aggFactories[i].finalizeComputation(aggs[i].get()); - aggs[i].close(); - aggs[i] = aggFactories[i].factorize(columnSelectorFactory); - } - - --resultStorageIndex; - rowIdProvider.decrementAndGet(); - } - - // From here out, we want to aggregate, peel off a row of results and then accumulate the aggregation - for (int rowId = rowIdProvider.get(); rowId >= 0; --rowId) { - for (int i = 0; i < aggs.length; i++) { - aggs[i].aggregate(); - results[i][resultStorageIndex] = aggFactories[i].finalizeComputation(aggs[i].get()); - aggs[i].close(); - - // Use a combining aggregator to combine the result we just got with the result from the previous row - // This is a lot of hoops to jump through just to combine two values, but AggregatorFactory.combine - // allows for mutation of either of the arguments passed in, so it cannot be meaningfully used in this - // context. Instead, we have to jump through these hoops to make sure that we are generating a new object. - // It would've been nice if the AggregatorFactory interface had methods that were more usable for this, - // but it doesn't so :shrug: - final CumulativeColumnSelectorFactory combiningFactory = new CumulativeColumnSelectorFactory( - aggFactories[i], - results[i], - resultStorageIndex + 1 - ); - final Aggregator combiningAgg = combiningFactories[i].factorize(combiningFactory); - combiningAgg.aggregate(); - combiningFactory.decrement(); - combiningAgg.aggregate(); - results[i][resultStorageIndex] = aggFactories[i].finalizeComputation(combiningAgg.get()); - combiningAgg.close(); - - aggs[i] = aggFactories[i].factorize(columnSelectorFactory); - } - - --resultStorageIndex; - rowIdProvider.decrementAndGet(); - } - - // If we haven't filled up all of the results yet, there are no more rows, so just point the rest of the results - // at the last result that we generated - for (Object[] resultArr : results) { - Arrays.fill(resultArr, 0, resultStorageIndex + 1, resultArr[resultStorageIndex + 1]); - } - - return makeReturnRAC(aggFactories, results); - } - - private AppendableRowsAndColumns aggregateWindowApertureWellBehaved( - AggregatorFactory[] aggFactories, - int lowerOffset, - int upperOffset - ) - { - // There are 3 different phases of operation when we have more rows than our window size - // 1. Our window is not full, as we walk the rows we build up towards filling it - // 2. Our window is full, as we walk the rows we take a value off and add a new aggregation - // 3. We are nearing the end of the rows, we need to start shrinking the window aperture - - int numRows = rac.numRows(); - int windowSize = lowerOffset + upperOffset + 1; - - // We store the results in an Object array for convenience. This is definitely sub-par from a memory management - // point of view as we should use native arrays when possible. This will be fine for now, but it probably makes - // sense to look at optimizing this in the future. That said, such an optimization might best come by having - // a specialized implementation of this interface against, say, a Frame object that can deal with arrays instead - // of trying to optimize this generic implementation. - Object[][] results = new Object[aggFactories.length][numRows]; - int resultStorageIndex = 0; - - AtomicInteger rowIdProvider = new AtomicInteger(0); - final ColumnSelectorFactory columnSelectorFactory = ColumnSelectorFactoryMaker.fromRAC(rac).make(rowIdProvider); - - // This is the number of aggregators to actually aggregate for the current row. - // Which also doubles as the nextIndex to roll through as we roll things in and out of the window - int nextIndex = lowerOffset + 1; - - Aggregator[][] aggregators = new Aggregator[aggFactories.length][windowSize]; - for (int i = 0; i < aggregators.length; i++) { - final AggregatorFactory aggFactory = aggFactories[i]; - // instantiate the aggregators that need to be read on the first row. - for (int j = 0; j < nextIndex; j++) { - aggregators[i][j] = aggFactory.factorize(columnSelectorFactory); - } - } - - // The first few rows will slowly build out the window to consume the upper-offset. The window will not - // be full until we have walked upperOffset number of rows, so phase 1 runs until we have consumed - // upperOffset number of rows. - for (int upperIndex = 0; upperIndex < upperOffset; ++upperIndex) { - for (Aggregator[] aggregator : aggregators) { - for (int j = 0; j < nextIndex; ++j) { - aggregator[j].aggregate(); - } - } - - for (int i = 0; i < aggFactories.length; ++i) { - aggregators[i][nextIndex] = aggFactories[i].factorize(columnSelectorFactory); - } - ++nextIndex; - rowIdProvider.incrementAndGet(); - } - - // End Phase 1, Enter Phase 2. At this point, nextIndex == windowSize, rowIdProvider is the same as - // upperOffset and the aggregators matrix is entirely non-null. We need to iterate until our window has all of - // the aggregators in it to fill up the final result set. - int endResultStorageIndex = numRows - windowSize; - for (; resultStorageIndex < endResultStorageIndex; ++resultStorageIndex) { - for (Aggregator[] aggregator : aggregators) { - for (Aggregator value : aggregator) { - value.aggregate(); - } - } - - if (nextIndex == windowSize) { - // Wrap back around and start pruning from the beginning of the window - nextIndex = 0; - } - - for (int i = 0; i < aggFactories.length; ++i) { - results[i][resultStorageIndex] = aggFactories[i].finalizeComputation(aggregators[i][nextIndex].get()); - aggregators[i][nextIndex].close(); - aggregators[i][nextIndex] = aggFactories[i].factorize(columnSelectorFactory); - } - - ++nextIndex; - rowIdProvider.incrementAndGet(); - } - - if (nextIndex == windowSize) { - nextIndex = 0; - } - - // End Phase 2, enter Phase 3. At this point, our window has enough aggregators in it to fill up our final - // result set. This means that for each new row that we complete, the window will "shrink" until we hit numRows, - // at which point we will collect anything yet remaining and be done. - - if (nextIndex != 0) { - // Start by organizing the aggregators so that we are 0-indexed from nextIndex. This trades off creating - // a new array of references in exchange for removing branches inside of the loop. It also makes the logic - // simpler to understand. - - Aggregator[][] reorganizedAggs = new Aggregator[aggFactories.length][windowSize]; - for (int i = 0; i < aggFactories.length; i++) { - System.arraycopy(aggregators[i], nextIndex, reorganizedAggs[i], 0, windowSize - nextIndex); - System.arraycopy(aggregators[i], 0, reorganizedAggs[i], windowSize - nextIndex, nextIndex); - } - aggregators = reorganizedAggs; - nextIndex = 0; - } - - for (int rowId = rowIdProvider.get(); rowId < numRows; ++rowId) { - for (Aggregator[] aggregator : aggregators) { - for (int j = nextIndex; j < aggregator.length; ++j) { - aggregator[j].aggregate(); - } - } - - for (int i = 0; i < aggFactories.length; ++i) { - results[i][resultStorageIndex] = aggFactories[i].finalizeComputation(aggregators[i][nextIndex].get()); - aggregators[i][nextIndex].close(); - aggregators[i][nextIndex] = null; - } - - ++nextIndex; - ++resultStorageIndex; - rowIdProvider.incrementAndGet(); - } - - // End Phase 3, anything left in the window needs to be collected and put into our results - for (; nextIndex < windowSize; ++nextIndex) { - for (int i = 0; i < aggFactories.length; ++i) { - results[i][resultStorageIndex] = aggFactories[i].finalizeComputation(aggregators[i][nextIndex].get()); - aggregators[i][nextIndex].close(); - aggregators[i][nextIndex] = null; - } - ++resultStorageIndex; - } - - return makeReturnRAC(aggFactories, results); - } - - private AppendableRowsAndColumns aggregateWindowApertureInFlux( - AggregatorFactory[] aggFactories, - int lowerOffset, - int upperOffset - ) - { - // In this case, we need to store a value for all items, so our windowSize is equivalent to the number of rows - // from the RowsAndColumns object that we are using. - int windowSize = rac.numRows(); - - // We store the results in an Object array for convenience. This is definitely sub-par from a memory management - // point of view as we should use native arrays when possible. This will be fine for now, but it probably makes - // sense to look at optimizing this in the future. That said, such an optimization might best come by having - // a specialized implementation of this interface against, say, a Frame object that can deal with arrays instead - // of trying to optimize this generic implementation. - Object[][] results = new Object[aggFactories.length][windowSize]; - int resultStorageIndex = 0; - - AtomicInteger rowIdProvider = new AtomicInteger(0); - final ColumnSelectorFactory columnSelectorFactory = ColumnSelectorFactoryMaker.fromRAC(rac).make(rowIdProvider); - - Aggregator[][] aggregators = new Aggregator[aggFactories.length][windowSize]; - for (int i = 0; i < aggregators.length; i++) { - final AggregatorFactory aggFactory = aggFactories[i]; - for (int j = 0; j < aggregators[i].length; j++) { - aggregators[i][j] = aggFactory.factorize(columnSelectorFactory); - } - } - - // This is the index to stop at for the current window aperture - // The first row is used by all of the results for the lowerOffset num results, plus 1 for the "current row" - int stopIndex = Math.min(lowerOffset + 1, windowSize); - - int startIndex = 0; - int rowId = rowIdProvider.get(); - while (rowId < windowSize) { - for (Aggregator[] aggregator : aggregators) { - for (int j = startIndex; j < stopIndex; ++j) { - aggregator[j].aggregate(); - } - } - - if (rowId >= upperOffset) { - for (int i = 0; i < aggregators.length; ++i) { - results[i][resultStorageIndex] = aggFactories[i].finalizeComputation(aggregators[i][startIndex].get()); - aggregators[i][startIndex].close(); - aggregators[i][startIndex] = null; - } - - ++resultStorageIndex; - ++startIndex; - } - - if (stopIndex < windowSize) { - ++stopIndex; - } - rowId = rowIdProvider.incrementAndGet(); - } - - - for (; startIndex < windowSize; ++startIndex) { - for (int i = 0; i < aggregators.length; ++i) { - results[i][resultStorageIndex] = aggFactories[i].finalizeComputation(aggregators[i][startIndex].get()); - aggregators[i][startIndex].close(); - aggregators[i][startIndex] = null; - } - ++resultStorageIndex; - } - - return makeReturnRAC(aggFactories, results); - } - - private AppendableRowsAndColumns makeReturnRAC(AggregatorFactory[] aggFactories, Object[][] results) - { - for (int i = 0; i < aggFactories.length; ++i) { - rac.addColumn( - aggFactories[i].getName(), new ObjectArrayColumn(results[i], aggFactories[i].getIntermediateType()) - ); - } - return rac; - } - private static class CumulativeColumnSelectorFactory implements ColumnSelectorFactory { private final ColumnCapabilitiesImpl columnCapabilities; @@ -831,16 +404,6 @@ public class DefaultFramedOnHeapAggregatable implements FramedOnHeapAggregatable .setType(factory.getIntermediateType()); } - public void increment() - { - ++index; - } - - public void decrement() - { - --index; - } - @Override @Nonnull public DimensionSelector makeDimensionSelector(@Nonnull DimensionSpec dimensionSpec) diff --git a/processing/src/main/java/org/apache/druid/query/scan/ScanResultValueFramesIterable.java b/processing/src/main/java/org/apache/druid/query/scan/ScanResultValueFramesIterable.java index 42f57628461..b5fd9d1fd2f 100644 --- a/processing/src/main/java/org/apache/druid/query/scan/ScanResultValueFramesIterable.java +++ b/processing/src/main/java/org/apache/druid/query/scan/ScanResultValueFramesIterable.java @@ -22,8 +22,10 @@ package org.apache.druid.query.scan; import com.google.common.base.Function; import com.google.common.collect.Iterables; import com.google.common.collect.Lists; +import it.unimi.dsi.fastutil.ints.IntArrayList; +import it.unimi.dsi.fastutil.ints.IntList; +import org.apache.druid.error.DruidException; import org.apache.druid.frame.Frame; -import org.apache.druid.frame.FrameType; import org.apache.druid.frame.allocation.MemoryAllocatorFactory; import org.apache.druid.frame.segment.FrameCursorUtils; import org.apache.druid.frame.write.FrameWriter; @@ -36,6 +38,7 @@ import org.apache.druid.java.util.common.io.Closer; import org.apache.druid.query.FrameSignaturePair; import org.apache.druid.query.IterableRowsCursorHelper; import org.apache.druid.segment.Cursor; +import org.apache.druid.segment.column.ColumnType; import org.apache.druid.segment.column.RowSignature; import java.io.Closeable; @@ -157,17 +160,41 @@ public class ScanResultValueFramesIterable implements Iterable currentRows = null; + /** + * Row index pointing to the current row in {@link #currentRows}. This is the exact same row that the {@link #currentCursor} + * is also pointing at. Therefore {@link #currentRows} + {@link #currentCursor} represent the same information as presented + * by {@link #currentCursor}. + */ + int currentRowIndex = -1; + + /** + * Full row signature of the ScanResultValue, used to extract the rows out of it. + */ + RowSignature currentInputRowSignature = null; + + /** + * Row signature of the ScanResultValue, with columns having unknown (null) types trimmed out. This is used to write + * the rows onto the frame. There's an implicit assumption (that we verify), that columns with null typed only + * contain null values, because the underlying segment didn't have the column. + */ + RowSignature currentOutputRowSignature = null; + + /** + * Columns of the currentRows with missing type information. As we materialize the rows onto the frames, we also + * verify that these columns only contain null values. + */ + IntList nullTypedColumns = null; public ScanResultValueFramesIterator( - Sequence resultSequence, - MemoryAllocatorFactory memoryAllocatorFactory, - boolean useNestedForUnknownTypes, - RowSignature defaultRowSignature, - Function> resultFormatMapper + final Sequence resultSequence, + final MemoryAllocatorFactory memoryAllocatorFactory, + final boolean useNestedForUnknownTypes, + final RowSignature defaultRowSignature, + final Function> resultFormatMapper ) { this.memoryAllocatorFactory = memoryAllocatorFactory; @@ -200,26 +227,34 @@ public class ScanResultValueFramesIterable implements Iterable currentCursor, - currentRowSignature - ))) { + final Frame frame; + try (final FrameWriter frameWriter = frameWriterFactory.newFrameWriter( + new SettableCursorColumnSelectorFactory(() -> currentCursor, currentInputRowSignature))) { while (populateCursor()) { // Do till we don't have any more rows, or the next row isn't compatible with the current row if (!frameWriter.addSelection()) { // Add the cursor's row to the frame, till the frame is full break; } + + // Check that the columns with the null types are actually null before advancing + final Object[] currentRow = currentRows.get(currentRowIndex); + for (Integer columnNumber : nullTypedColumns) { + if (currentRow[columnNumber] != null) { + throw DruidException.defensive( + "Expected a null value for column [%s]", + frameWriterFactory.signature().getColumnName(columnNumber) + ); + } + } + firstRowWritten = true; currentCursor.advance(); + currentRowIndex++; } if (!firstRowWritten) { @@ -228,7 +263,9 @@ public class ScanResultValueFramesIterable implements Iterable * Multiple calls to populateCursor, without advancing the {@link #currentCursor} is idempotent. This allows successive @@ -257,7 +294,9 @@ public class ScanResultValueFramesIterable implements Iterable * Return value - * if (hasNext()) is false before calling the method - returns false @@ -275,25 +314,42 @@ public class ScanResultValueFramesIterable implements Iterable formattedRows = Lists.newArrayList(Iterables.transform( + final List formattedRows = Lists.newArrayList(Iterables.transform( rows, (Function) resultFormatMapper.apply(modifiedRowSignature) )); - Pair cursorAndCloseable = IterableRowsCursorHelper.getCursorFromIterable( + final Pair cursorAndCloseable = IterableRowsCursorHelper.getCursorFromIterable( formattedRows, modifiedRowSignature ); @@ -306,7 +362,12 @@ public class ScanResultValueFramesIterable implements Iterable, Object, SearchQuery> getCacheStrategy(final SearchQuery query) + { + return getCacheStrategy(query, null); + } + + @Override + public CacheStrategy, Object, SearchQuery> getCacheStrategy( + final SearchQuery query, + @Nullable final ObjectMapper objectMapper + ) { return new CacheStrategy, Object, SearchQuery>() diff --git a/processing/src/main/java/org/apache/druid/query/timeboundary/TimeBoundaryQueryQueryToolChest.java b/processing/src/main/java/org/apache/druid/query/timeboundary/TimeBoundaryQueryQueryToolChest.java index 9087dd26a88..eab5e0f5abc 100644 --- a/processing/src/main/java/org/apache/druid/query/timeboundary/TimeBoundaryQueryQueryToolChest.java +++ b/processing/src/main/java/org/apache/druid/query/timeboundary/TimeBoundaryQueryQueryToolChest.java @@ -20,6 +20,7 @@ package org.apache.druid.query.timeboundary; import com.fasterxml.jackson.core.type.TypeReference; +import com.fasterxml.jackson.databind.ObjectMapper; import com.google.common.annotations.VisibleForTesting; import com.google.common.base.Function; import com.google.common.base.Functions; @@ -47,6 +48,7 @@ import org.apache.druid.segment.column.ColumnType; import org.apache.druid.segment.column.RowSignature; import org.apache.druid.timeline.LogicalSegment; +import javax.annotation.Nullable; import java.nio.ByteBuffer; import java.util.Comparator; import java.util.List; @@ -163,6 +165,16 @@ public class TimeBoundaryQueryQueryToolChest @Override public CacheStrategy, Object, TimeBoundaryQuery> getCacheStrategy(final TimeBoundaryQuery query) + { + return getCacheStrategy(query, null); + } + + + @Override + public CacheStrategy, Object, TimeBoundaryQuery> getCacheStrategy( + final TimeBoundaryQuery query, + @Nullable final ObjectMapper objectMapper + ) { return new CacheStrategy, Object, TimeBoundaryQuery>() { diff --git a/processing/src/main/java/org/apache/druid/query/timeseries/TimeseriesQueryEngine.java b/processing/src/main/java/org/apache/druid/query/timeseries/TimeseriesQueryEngine.java index c5e83b84e87..d8369c8c6da 100644 --- a/processing/src/main/java/org/apache/druid/query/timeseries/TimeseriesQueryEngine.java +++ b/processing/src/main/java/org/apache/druid/query/timeseries/TimeseriesQueryEngine.java @@ -37,6 +37,7 @@ import org.apache.druid.query.aggregation.Aggregator; import org.apache.druid.query.aggregation.AggregatorAdapters; import org.apache.druid.query.aggregation.AggregatorFactory; import org.apache.druid.query.filter.Filter; +import org.apache.druid.query.groupby.epinephelinae.vector.VectorGroupByEngine; import org.apache.druid.query.vector.VectorCursorGranularizer; import org.apache.druid.segment.ColumnInspector; import org.apache.druid.segment.SegmentMissingException; @@ -103,7 +104,7 @@ public class TimeseriesQueryEngine final boolean doVectorize = query.context().getVectorize().shouldVectorize( adapter.canVectorize(filter, query.getVirtualColumns(), descending) && VirtualColumns.shouldVectorize(query, query.getVirtualColumns(), adapter) - && query.getAggregatorSpecs().stream().allMatch(aggregatorFactory -> aggregatorFactory.canVectorize(inspector)) + && VectorGroupByEngine.canVectorizeAggregators(inspector, query.getAggregatorSpecs()) ); final Sequence> result; diff --git a/processing/src/main/java/org/apache/druid/query/timeseries/TimeseriesQueryQueryToolChest.java b/processing/src/main/java/org/apache/druid/query/timeseries/TimeseriesQueryQueryToolChest.java index 71d36bb9bbe..67c36fe7603 100644 --- a/processing/src/main/java/org/apache/druid/query/timeseries/TimeseriesQueryQueryToolChest.java +++ b/processing/src/main/java/org/apache/druid/query/timeseries/TimeseriesQueryQueryToolChest.java @@ -20,6 +20,7 @@ package org.apache.druid.query.timeseries; import com.fasterxml.jackson.core.type.TypeReference; +import com.fasterxml.jackson.databind.ObjectMapper; import com.google.common.annotations.VisibleForTesting; import com.google.common.base.Function; import com.google.common.base.Preconditions; @@ -31,7 +32,6 @@ import com.google.inject.Inject; import org.apache.commons.lang.StringUtils; import org.apache.druid.data.input.MapBasedRow; import org.apache.druid.frame.Frame; -import org.apache.druid.frame.FrameType; import org.apache.druid.frame.allocation.MemoryAllocatorFactory; import org.apache.druid.frame.segment.FrameCursorUtils; import org.apache.druid.frame.write.FrameWriterFactory; @@ -66,6 +66,7 @@ import org.apache.druid.segment.column.ColumnType; import org.apache.druid.segment.column.RowSignature; import org.joda.time.DateTime; +import javax.annotation.Nullable; import java.io.Closeable; import java.util.ArrayList; import java.util.Collections; @@ -277,6 +278,16 @@ public class TimeseriesQueryQueryToolChest extends QueryToolChest, Object, TimeseriesQuery> getCacheStrategy(final TimeseriesQuery query) + { + return getCacheStrategy(query, null); + } + + + @Override + public CacheStrategy, Object, TimeseriesQuery> getCacheStrategy( + final TimeseriesQuery query, + @Nullable final ObjectMapper objectMapper + ) { return new CacheStrategy, Object, TimeseriesQuery>() { @@ -486,8 +497,9 @@ public class TimeseriesQueryQueryToolChest extends QueryToolChest() diff --git a/processing/src/main/java/org/apache/druid/query/topn/TopNQueryQueryToolChest.java b/processing/src/main/java/org/apache/druid/query/topn/TopNQueryQueryToolChest.java index b850114f3bc..21bc336438a 100644 --- a/processing/src/main/java/org/apache/druid/query/topn/TopNQueryQueryToolChest.java +++ b/processing/src/main/java/org/apache/druid/query/topn/TopNQueryQueryToolChest.java @@ -20,6 +20,7 @@ package org.apache.druid.query.topn; import com.fasterxml.jackson.core.type.TypeReference; +import com.fasterxml.jackson.databind.ObjectMapper; import com.google.common.annotations.VisibleForTesting; import com.google.common.base.Function; import com.google.common.collect.Iterables; @@ -28,7 +29,6 @@ import com.google.common.collect.Maps; import com.google.inject.Inject; import org.apache.druid.error.DruidException; import org.apache.druid.frame.Frame; -import org.apache.druid.frame.FrameType; import org.apache.druid.frame.allocation.MemoryAllocatorFactory; import org.apache.druid.frame.segment.FrameCursorUtils; import org.apache.druid.frame.write.FrameWriterFactory; @@ -65,6 +65,7 @@ import org.apache.druid.segment.DimensionHandlerUtils; import org.apache.druid.segment.column.RowSignature; import org.joda.time.DateTime; +import javax.annotation.Nullable; import java.io.Closeable; import java.util.ArrayList; import java.util.Collections; @@ -269,9 +270,18 @@ public class TopNQueryQueryToolChest extends QueryToolChest, Object, TopNQuery> getCacheStrategy(TopNQuery query) + { + return getCacheStrategy(query, null); + } @Override - public CacheStrategy, Object, TopNQuery> getCacheStrategy(final TopNQuery query) + public CacheStrategy, Object, TopNQuery> getCacheStrategy( + final TopNQuery query, + @Nullable final ObjectMapper objectMapper + ) { return new CacheStrategy, Object, TopNQuery>() { @@ -570,8 +580,9 @@ public class TopNQueryQueryToolChest extends QueryToolChest() diff --git a/processing/src/main/java/org/apache/druid/segment/DimensionHandlerUtils.java b/processing/src/main/java/org/apache/druid/segment/DimensionHandlerUtils.java index da63413cb4b..e129ceb4177 100644 --- a/processing/src/main/java/org/apache/druid/segment/DimensionHandlerUtils.java +++ b/processing/src/main/java/org/apache/druid/segment/DimensionHandlerUtils.java @@ -28,6 +28,7 @@ import org.apache.druid.data.input.impl.DimensionSchema.MultiValueHandling; import org.apache.druid.error.DruidException; import org.apache.druid.java.util.common.IAE; import org.apache.druid.java.util.common.ISE; +import org.apache.druid.java.util.common.StringUtils; import org.apache.druid.java.util.common.parsers.ParseException; import org.apache.druid.math.expr.Evals; import org.apache.druid.query.ColumnSelectorPlus; @@ -309,13 +310,11 @@ public final class DimensionHandlerUtils } @Nullable - public static Long convertObjectToLong(@Nullable Object valObj) - { - return convertObjectToLong(valObj, false); - } - - @Nullable - public static Long convertObjectToLong(@Nullable Object valObj, boolean reportParseExceptions) + public static Long convertObjectToLong( + @Nullable Object valObj, + boolean reportParseExceptions, + @Nullable String objectKey + ) { if (valObj == null) { return null; @@ -330,25 +329,82 @@ public final class DimensionHandlerUtils } else if (valObj instanceof String) { Long ret = DimensionHandlerUtils.getExactLongFromDecimalString((String) valObj); if (reportParseExceptions && ret == null) { - throw new ParseException((String) valObj, "could not convert value [%s] to long", valObj); + final String message; + if (objectKey != null) { + message = StringUtils.nonStrictFormat( + "Could not convert value [%s] to long for dimension [%s].", + valObj, + objectKey + ); + } else { + message = StringUtils.nonStrictFormat( + "Could not convert value [%s] to long.", + valObj + ); + } + throw new ParseException((String) valObj, message); } return ret; } else if (valObj instanceof List) { + final String message; + if (objectKey != null) { + message = StringUtils.nonStrictFormat( + "Could not ingest value [%s] as long for dimension [%s]. A long column cannot have multiple values in the same row.", + valObj, + objectKey + ); + } else { + message = StringUtils.nonStrictFormat( + "Could not ingest value [%s] as long. A long column cannot have multiple values in the same row.", + valObj + ); + } throw new ParseException( valObj.getClass().toString(), - "Could not ingest value %s as long. A long column cannot have multiple values in the same row.", - valObj + message ); } else { + final String message; + if (objectKey != null) { + message = StringUtils.nonStrictFormat( + "Could not convert value [%s] to long for dimension [%s]. Invalid type: [%s]", + valObj, + objectKey, + valObj.getClass() + ); + } else { + message = StringUtils.nonStrictFormat( + valObj.getClass().toString(), + "Could not convert value [%s] to long. Invalid type: [%s]", + valObj, + valObj.getClass() + ); + } throw new ParseException( valObj.getClass().toString(), - "Could not convert value [%s] to long. Invalid type: [%s]", - valObj, - valObj.getClass() + message ); } } + @Nullable + public static Long convertObjectToLong(@Nullable Object valObj) + { + return convertObjectToLong(valObj, false); + } + + @Nullable + public static Long convertObjectToLong(@Nullable Object valObj, boolean reportParseExceptions) + { + return convertObjectToLong(valObj, reportParseExceptions, null); + } + + @Nullable + public static Long convertObjectToLong(@Nullable Object valObj, @Nullable String fieldName) + { + return convertObjectToLong(valObj, false, fieldName); + } + @Nullable public static Float convertObjectToFloat(@Nullable Object valObj) { @@ -358,33 +414,118 @@ public final class DimensionHandlerUtils @Nullable public static Float convertObjectToFloat(@Nullable Object valObj, boolean reportParseExceptions) { - if (valObj == null) { - return null; - } + return convertObjectToFloat(valObj, reportParseExceptions, null); + } - if (valObj instanceof Float) { - return (Float) valObj; - } else if (valObj instanceof Number) { - return ((Number) valObj).floatValue(); - } else if (valObj instanceof String) { - Float ret = Floats.tryParse((String) valObj); - if (reportParseExceptions && ret == null) { - throw new ParseException((String) valObj, "could not convert value [%s] to float", valObj); + @Nullable + public static Float convertObjectToFloat(@Nullable Object valObj, @Nullable String fieldName) + { + return convertObjectToFloat(valObj, false, fieldName); + } + + @Nullable + public static Float convertObjectToFloat(@Nullable Object valObj, boolean reportParseExceptions, @Nullable String fieldName) + { + { + if (valObj == null) { + return null; } - return ret; - } else if (valObj instanceof List) { - throw new ParseException( - valObj.getClass().toString(), - "Could not ingest value %s as float. A float column cannot have multiple values in the same row.", - valObj - ); - } else { - throw new ParseException( - valObj.getClass().toString(), - "Could not convert value [%s] to float. Invalid type: [%s]", - valObj, - valObj.getClass() - ); + + if (valObj instanceof Float) { + return (Float) valObj; + } else if (valObj instanceof Number) { + return ((Number) valObj).floatValue(); + } else if (valObj instanceof String) { + Float ret = Floats.tryParse((String) valObj); + if (reportParseExceptions && ret == null) { + final String message; + if (fieldName != null) { + message = StringUtils.nonStrictFormat( + "Could not convert value [%s] to float for dimension [%s].", + valObj, + fieldName + ); + } else { + message = StringUtils.nonStrictFormat( + "Could not convert value [%s] to float.", + valObj + ); + } + throw new ParseException((String) valObj, message); + } + return ret; + } else if (valObj instanceof List) { + final String message; + if (fieldName != null) { + message = StringUtils.nonStrictFormat( + "Could not ingest value [%s] as float for dimension [%s]. A float column cannot have multiple values in the same row.", + valObj, + fieldName + ); + } else { + message = StringUtils.nonStrictFormat( + "Could not ingest value [%s] as float. A float column cannot have multiple values in the same row.", + valObj + ); + } + throw new ParseException( + valObj.getClass().toString(), + message + ); + } else { + final String message; + if (fieldName != null) { + message = StringUtils.nonStrictFormat( + "Could not convert value [%s] to float for dimension [%s]. Invalid type: [%s]", + valObj, + fieldName, + valObj.getClass() + ); + } else { + message = StringUtils.nonStrictFormat( + "Could not convert value [%s] to float. Invalid type: [%s]", + valObj, + valObj.getClass() + ); + } + throw new ParseException( + valObj.getClass().toString(), + message + ); + } + } + } + + @Nullable + public static Object convertObjectToType( + @Nullable final Object obj, + final TypeSignature type, + final boolean reportParseExceptions, + @Nullable final String fieldName + ) + { + Preconditions.checkNotNull(type, "type"); + + switch (type.getType()) { + case LONG: + return convertObjectToLong(obj, reportParseExceptions, fieldName); + case FLOAT: + return convertObjectToFloat(obj, reportParseExceptions, fieldName); + case DOUBLE: + return convertObjectToDouble(obj, reportParseExceptions, fieldName); + case STRING: + return convertObjectToString(obj); + case ARRAY: + return coerceToObjectArrayWithElementCoercionFunction( + obj, + x -> DimensionHandlerUtils.convertObjectToType(x, type.getElementType(), reportParseExceptions, fieldName) + ); + case COMPLEX: + // Can't coerce complex objects, and we shouldn't need to. If in future selectors behave weirdly, or we need to + // cast them (for some unknown reason), we can have that casting knowledge in the type strategy + return obj; + default: + throw DruidException.defensive("Type[%s] is not supported for dimensions!", type); } } @@ -395,29 +536,7 @@ public final class DimensionHandlerUtils final boolean reportParseExceptions ) { - Preconditions.checkNotNull(type, "type"); - - switch (type.getType()) { - case LONG: - return convertObjectToLong(obj, reportParseExceptions); - case FLOAT: - return convertObjectToFloat(obj, reportParseExceptions); - case DOUBLE: - return convertObjectToDouble(obj, reportParseExceptions); - case STRING: - return convertObjectToString(obj); - case ARRAY: - return coerceToObjectArrayWithElementCoercionFunction( - obj, - x -> DimensionHandlerUtils.convertObjectToType(x, type.getElementType()) - ); - case COMPLEX: - // Can't coerce complex objects, and we shouldn't need to. If in future selectors behave weirdly, or we need to - // cast them (for some unknown reason), we can have that casting knowledge in the type strategy - return obj; - default: - throw DruidException.defensive("Type[%s] is not supported for dimensions!", type); - } + return convertObjectToType(obj, type, reportParseExceptions, null); } @Nullable @@ -506,6 +625,18 @@ public final class DimensionHandlerUtils @Nullable public static Double convertObjectToDouble(@Nullable Object valObj, boolean reportParseExceptions) + { + return convertObjectToDouble(valObj, reportParseExceptions, null); + } + + @Nullable + public static Double convertObjectToDouble(@Nullable Object valObj, @Nullable String fieldName) + { + return convertObjectToDouble(valObj, false, fieldName); + } + + @Nullable + public static Double convertObjectToDouble(@Nullable Object valObj, boolean reportParseExceptions, @Nullable String fieldName) { if (valObj == null) { return null; @@ -518,21 +649,59 @@ public final class DimensionHandlerUtils } else if (valObj instanceof String) { Double ret = Doubles.tryParse((String) valObj); if (reportParseExceptions && ret == null) { - throw new ParseException((String) valObj, "could not convert value [%s] to double", valObj); + final String message; + if (fieldName != null) { + message = StringUtils.nonStrictFormat( + "Could not convert value [%s] to double for dimension [%s].", + valObj, + fieldName + ); + } else { + message = StringUtils.nonStrictFormat( + "Could not convert value [%s] to double.", + valObj + ); + } + throw new ParseException((String) valObj, message); } return ret; } else if (valObj instanceof List) { + final String message; + if (fieldName != null) { + message = StringUtils.nonStrictFormat( + "Could not ingest value [%s] as double for dimension [%s]. A double column cannot have multiple values in the same row.", + valObj, + fieldName + ); + } else { + message = StringUtils.nonStrictFormat( + "Could not ingest value [%s] as double. A double column cannot have multiple values in the same row.", + valObj + ); + } + throw new ParseException( valObj.getClass().toString(), - "Could not ingest value %s as double. A double column cannot have multiple values in the same row.", - valObj + message ); } else { + final String message; + if (fieldName != null) { + message = StringUtils.nonStrictFormat( + "Could not convert value [%s] to double for dimension [%s]. Invalid type: [%s]", + valObj, + fieldName, + valObj.getClass() + ); + } else { + message = StringUtils.nonStrictFormat( + "Could not convert value [%s] to double. Invalid type: [%s]", + valObj, valObj.getClass() + ); + } throw new ParseException( valObj.getClass().toString(), - "Could not convert value [%s] to double. Invalid type: [%s]", - valObj, - valObj.getClass() + message ); } } diff --git a/processing/src/main/java/org/apache/druid/segment/DoubleDimensionHandler.java b/processing/src/main/java/org/apache/druid/segment/DoubleDimensionHandler.java index 9fbc9436be6..e8166708922 100644 --- a/processing/src/main/java/org/apache/druid/segment/DoubleDimensionHandler.java +++ b/processing/src/main/java/org/apache/druid/segment/DoubleDimensionHandler.java @@ -72,7 +72,7 @@ public class DoubleDimensionHandler implements DimensionHandler makeIndexer(boolean useMaxMemoryEstimates) { - return new DoubleDimensionIndexer(); + return new DoubleDimensionIndexer(dimensionName); } @Override diff --git a/processing/src/main/java/org/apache/druid/segment/DoubleDimensionIndexer.java b/processing/src/main/java/org/apache/druid/segment/DoubleDimensionIndexer.java index f4e13cca565..6f1e5184747 100644 --- a/processing/src/main/java/org/apache/druid/segment/DoubleDimensionIndexer.java +++ b/processing/src/main/java/org/apache/druid/segment/DoubleDimensionIndexer.java @@ -39,13 +39,21 @@ import java.util.Objects; public class DoubleDimensionIndexer implements DimensionIndexer { public static final Comparator DOUBLE_COMPARATOR = Comparators.naturalNullsFirst(); - + private final String dimensionName; private volatile boolean hasNulls = false; - @Override - public EncodedKeyComponent processRowValsToUnsortedEncodedKeyComponent(@Nullable Object dimValues, boolean reportParseExceptions) + public DoubleDimensionIndexer(String dimensionName) { - Double d = DimensionHandlerUtils.convertObjectToDouble(dimValues, reportParseExceptions); + this.dimensionName = dimensionName; + } + + @Override + public EncodedKeyComponent processRowValsToUnsortedEncodedKeyComponent( + @Nullable Object dimValues, + boolean reportParseExceptions + ) + { + Double d = DimensionHandlerUtils.convertObjectToDouble(dimValues, reportParseExceptions, dimensionName); if (d == null) { hasNulls = NullHandling.sqlCompatible(); } diff --git a/processing/src/main/java/org/apache/druid/segment/FloatDimensionHandler.java b/processing/src/main/java/org/apache/druid/segment/FloatDimensionHandler.java index 4763b0e8be1..8d3471a4d92 100644 --- a/processing/src/main/java/org/apache/druid/segment/FloatDimensionHandler.java +++ b/processing/src/main/java/org/apache/druid/segment/FloatDimensionHandler.java @@ -72,7 +72,7 @@ public class FloatDimensionHandler implements DimensionHandler makeIndexer(boolean useMaxMemoryEstimates) { - return new FloatDimensionIndexer(); + return new FloatDimensionIndexer(dimensionName); } @Override diff --git a/processing/src/main/java/org/apache/druid/segment/FloatDimensionIndexer.java b/processing/src/main/java/org/apache/druid/segment/FloatDimensionIndexer.java index be5e86b7bb6..16b3c9f7052 100644 --- a/processing/src/main/java/org/apache/druid/segment/FloatDimensionIndexer.java +++ b/processing/src/main/java/org/apache/druid/segment/FloatDimensionIndexer.java @@ -39,13 +39,21 @@ import java.util.Objects; public class FloatDimensionIndexer implements DimensionIndexer { public static final Comparator FLOAT_COMPARATOR = Comparators.naturalNullsFirst(); - + private final String dimensionName; private volatile boolean hasNulls = false; - @Override - public EncodedKeyComponent processRowValsToUnsortedEncodedKeyComponent(@Nullable Object dimValues, boolean reportParseExceptions) + public FloatDimensionIndexer(String dimensionName) { - Float f = DimensionHandlerUtils.convertObjectToFloat(dimValues, reportParseExceptions); + this.dimensionName = dimensionName; + } + + @Override + public EncodedKeyComponent processRowValsToUnsortedEncodedKeyComponent( + @Nullable Object dimValues, + boolean reportParseExceptions + ) + { + Float f = DimensionHandlerUtils.convertObjectToFloat(dimValues, reportParseExceptions, dimensionName); if (f == null) { hasNulls = NullHandling.sqlCompatible(); } diff --git a/processing/src/main/java/org/apache/druid/segment/LongDimensionHandler.java b/processing/src/main/java/org/apache/druid/segment/LongDimensionHandler.java index 51e13396840..64a9f98cd44 100644 --- a/processing/src/main/java/org/apache/druid/segment/LongDimensionHandler.java +++ b/processing/src/main/java/org/apache/druid/segment/LongDimensionHandler.java @@ -72,7 +72,7 @@ public class LongDimensionHandler implements DimensionHandler @Override public DimensionIndexer makeIndexer(boolean useMaxMemoryEstimates) { - return new LongDimensionIndexer(); + return new LongDimensionIndexer(dimensionName); } @Override diff --git a/processing/src/main/java/org/apache/druid/segment/LongDimensionIndexer.java b/processing/src/main/java/org/apache/druid/segment/LongDimensionIndexer.java index 85ed29b9c28..3b273bb7f02 100644 --- a/processing/src/main/java/org/apache/druid/segment/LongDimensionIndexer.java +++ b/processing/src/main/java/org/apache/druid/segment/LongDimensionIndexer.java @@ -39,13 +39,21 @@ import java.util.Objects; public class LongDimensionIndexer implements DimensionIndexer { public static final Comparator LONG_COMPARATOR = Comparators.naturalNullsFirst(); - + private final String dimensionName; private volatile boolean hasNulls = false; - @Override - public EncodedKeyComponent processRowValsToUnsortedEncodedKeyComponent(@Nullable Object dimValues, boolean reportParseExceptions) + public LongDimensionIndexer(String dimensionName) { - Long l = DimensionHandlerUtils.convertObjectToLong(dimValues, reportParseExceptions); + this.dimensionName = dimensionName; + } + + @Override + public EncodedKeyComponent processRowValsToUnsortedEncodedKeyComponent( + @Nullable Object dimValues, + boolean reportParseExceptions + ) + { + Long l = DimensionHandlerUtils.convertObjectToLong(dimValues, reportParseExceptions, dimensionName); if (l == null) { hasNulls = NullHandling.sqlCompatible(); } diff --git a/processing/src/main/java/org/apache/druid/segment/Metadata.java b/processing/src/main/java/org/apache/druid/segment/Metadata.java index e8aa5e646de..e6b5b1f6588 100644 --- a/processing/src/main/java/org/apache/druid/segment/Metadata.java +++ b/processing/src/main/java/org/apache/druid/segment/Metadata.java @@ -108,11 +108,6 @@ public class Metadata return this; } - public Object get(String key) - { - return container.get(key); - } - // arbitrary key-value pairs from the metadata just follow the semantics of last one wins if same // key exists in multiple input Metadata containers // for others e.g. Aggregators, appropriate merging is done diff --git a/processing/src/main/java/org/apache/druid/segment/UnnestDimensionCursor.java b/processing/src/main/java/org/apache/druid/segment/UnnestDimensionCursor.java index 98f0d0949c8..4d4aeaf7046 100644 --- a/processing/src/main/java/org/apache/druid/segment/UnnestDimensionCursor.java +++ b/processing/src/main/java/org/apache/druid/segment/UnnestDimensionCursor.java @@ -19,6 +19,7 @@ package org.apache.druid.segment; +import com.google.common.base.Preconditions; import org.apache.druid.query.BaseQuery; import org.apache.druid.query.dimension.DefaultDimensionSpec; import org.apache.druid.query.dimension.DimensionSpec; @@ -27,6 +28,7 @@ import org.apache.druid.query.filter.ValueMatcher; import org.apache.druid.query.monomorphicprocessing.RuntimeShapeInspector; import org.apache.druid.segment.column.ColumnCapabilities; import org.apache.druid.segment.data.IndexedInts; +import org.checkerframework.checker.nullness.qual.MonotonicNonNull; import org.joda.time.DateTime; import javax.annotation.Nullable; @@ -69,10 +71,12 @@ public class UnnestDimensionCursor implements Cursor private final String outputName; private final ColumnSelectorFactory baseColumnSelectorFactory; private int index; - @Nullable + @MonotonicNonNull private IndexedInts indexedIntsForCurrentRow; private boolean needInitialization; private SingleIndexInts indexIntsForRow; + private final int nullId; + private final int idOffset; public UnnestDimensionCursor( Cursor cursor, @@ -91,11 +95,22 @@ public class UnnestDimensionCursor implements Cursor this.index = 0; this.outputName = outputColumnName; this.needInitialization = true; + // this shouldn't happen, but just in case... + final IdLookup lookup = Preconditions.checkNotNull(dimSelector.idLookup()); + final int nullId = lookup.lookupId(null); + if (nullId < 0) { + this.idOffset = 1; + this.nullId = 0; + } else { + this.idOffset = 0; + this.nullId = nullId; + } } @Override public ColumnSelectorFactory getColumnSelectorFactory() { + return new ColumnSelectorFactory() { @Override @@ -110,15 +125,13 @@ public class UnnestDimensionCursor implements Cursor @Override public IndexedInts getRow() { - // This object reference has been created - // during the call to initialize and referenced henceforth return indexIntsForRow; } @Override public ValueMatcher makeValueMatcher(@Nullable String value) { - final int idForLookup = idLookup().lookupId(value); + final int idForLookup = dimSelector.idLookup().lookupId(value); if (idForLookup < 0) { return new ValueMatcher() { @@ -131,7 +144,7 @@ public class UnnestDimensionCursor implements Cursor return true; } final int rowId = indexedIntsForCurrentRow.get(index); - return lookupName(rowId) == null; + return dimSelector.lookupName(rowId) == null; } return false; } @@ -156,7 +169,7 @@ public class UnnestDimensionCursor implements Cursor return includeUnknown; } final int rowId = indexedIntsForCurrentRow.get(index); - return (includeUnknown && lookupName(rowId) == null) || idForLookup == rowId; + return (includeUnknown && dimSelector.lookupName(rowId) == null) || idForLookup == rowId; } @Override @@ -183,10 +196,10 @@ public class UnnestDimensionCursor implements Cursor @Override public Object getObject() { - if (indexedIntsForCurrentRow == null || indexedIntsForCurrentRow.size() == 0) { + if (indexedIntsForCurrentRow.size() == 0) { return null; } - return lookupName(indexedIntsForCurrentRow.get(index)); + return dimSelector.lookupName(indexedIntsForCurrentRow.get(index)); } @Override @@ -198,14 +211,14 @@ public class UnnestDimensionCursor implements Cursor @Override public int getValueCardinality() { - return dimSelector.getValueCardinality(); + return dimSelector.getValueCardinality() + idOffset; } @Nullable @Override public String lookupName(int id) { - return dimSelector.lookupName(id); + return dimSelector.lookupName(id - idOffset); } @Override @@ -218,21 +231,19 @@ public class UnnestDimensionCursor implements Cursor @Override public IdLookup idLookup() { - return dimSelector.idLookup(); + return name -> name == null ? nullId : dimSelector.idLookup().lookupId(name) + idOffset; } }; } - /* - This ideally should not be called. If called delegate using the makeDimensionSelector - */ @Override public ColumnValueSelector makeColumnValueSelector(String columnName) { - if (!outputName.equals(columnName)) { - return baseColumnSelectorFactory.makeColumnValueSelector(columnName); + if (outputName.equals(columnName)) { + return makeDimensionSelector(DefaultDimensionSpec.of(columnName)); } - return makeDimensionSelector(DefaultDimensionSpec.of(columnName)); + + return baseColumnSelectorFactory.makeColumnValueSelector(columnName); } @Nullable @@ -304,11 +315,7 @@ public class UnnestDimensionCursor implements Cursor { index = 0; this.indexIntsForRow = new SingleIndexInts(); - - if (dimSelector.getObject() != null) { - this.indexedIntsForCurrentRow = dimSelector.getRow(); - } - + this.indexedIntsForCurrentRow = dimSelector.getRow(); needInitialization = false; } @@ -320,30 +327,19 @@ public class UnnestDimensionCursor implements Cursor */ private void advanceAndUpdate() { - if (indexedIntsForCurrentRow == null) { - index = 0; + if (index >= indexedIntsForCurrentRow.size() - 1) { if (!baseCursor.isDone()) { baseCursor.advanceUninterruptibly(); - if (!baseCursor.isDone()) { - indexedIntsForCurrentRow = dimSelector.getRow(); - } } + if (!baseCursor.isDone()) { + indexedIntsForCurrentRow = dimSelector.getRow(); + } + index = 0; } else { - if (index >= indexedIntsForCurrentRow.size() - 1) { - if (!baseCursor.isDone()) { - baseCursor.advanceUninterruptibly(); - } - if (!baseCursor.isDone()) { - indexedIntsForCurrentRow = dimSelector.getRow(); - } - index = 0; - } else { - ++index; - } + ++index; } } - // Helper class to help in returning // getRow from the dimensionSelector // This is set in the initialize method @@ -366,12 +362,11 @@ public class UnnestDimensionCursor implements Cursor @Override public int get(int idx) { - // need to get value from the indexed ints - // only if it is non null and has at least 1 value - if (indexedIntsForCurrentRow != null && indexedIntsForCurrentRow.size() > 0) { - return indexedIntsForCurrentRow.get(index); + // everything that calls get also checks size + if (indexedIntsForCurrentRow.size() == 0) { + return nullId; } - return 0; + return idOffset + indexedIntsForCurrentRow.get(index); } } } diff --git a/processing/src/main/java/org/apache/druid/segment/UnnestStorageAdapter.java b/processing/src/main/java/org/apache/druid/segment/UnnestStorageAdapter.java index 8735844c946..ff4994210e1 100644 --- a/processing/src/main/java/org/apache/druid/segment/UnnestStorageAdapter.java +++ b/processing/src/main/java/org/apache/druid/segment/UnnestStorageAdapter.java @@ -582,10 +582,12 @@ public class UnnestStorageAdapter implements StorageAdapter final TypeSignature outputType = capabilities.isArray() ? capabilities.getElementType() : capabilities.toColumnType(); + final boolean useDimensionCursor = useDimensionCursor(capabilities); return ColumnCapabilitiesImpl.createDefault() .setType(outputType) .setHasMultipleValues(false) - .setDictionaryEncoded(useDimensionCursor(capabilities)); + .setDictionaryEncoded(useDimensionCursor) + .setDictionaryValuesUnique(useDimensionCursor); } } diff --git a/processing/src/main/java/org/apache/druid/segment/VirtualColumn.java b/processing/src/main/java/org/apache/druid/segment/VirtualColumn.java index 3698a8a731b..ca9408d14e4 100644 --- a/processing/src/main/java/org/apache/druid/segment/VirtualColumn.java +++ b/processing/src/main/java/org/apache/druid/segment/VirtualColumn.java @@ -24,6 +24,8 @@ import com.fasterxml.jackson.annotation.JsonTypeInfo; import org.apache.druid.java.util.common.Cacheable; import org.apache.druid.query.dimension.DimensionSpec; import org.apache.druid.query.filter.ColumnIndexSelector; +import org.apache.druid.query.groupby.DeferExpressionDimensions; +import org.apache.druid.query.groupby.epinephelinae.vector.GroupByVectorColumnSelector; import org.apache.druid.segment.column.ColumnCapabilities; import org.apache.druid.segment.column.ColumnIndexSupplier; import org.apache.druid.segment.data.ReadableOffset; @@ -240,6 +242,26 @@ public interface VirtualColumn extends Cacheable return null; } + /** + * Returns a group-by selector. Allows virtual columns to control their own grouping behavior. + * + * @param columnName column name + * @param factory column selector factory + * @param deferExpressionDimensions active value of {@link org.apache.druid.query.groupby.GroupByQueryConfig#CTX_KEY_DEFER_EXPRESSION_DIMENSIONS} + * + * @return selector, or null if this virtual column does not have a specialized one + */ + @SuppressWarnings("unused") + @Nullable + default GroupByVectorColumnSelector makeGroupByVectorColumnSelector( + String columnName, + VectorColumnSelectorFactory factory, + DeferExpressionDimensions deferExpressionDimensions + ) + { + return null; + } + /** * This method is deprecated in favor of {@link #capabilities(ColumnInspector, String)}, which should be used whenever * possible and can support virtual column implementations that need to inspect other columns as inputs. @@ -265,8 +287,9 @@ public interface VirtualColumn extends Cacheable * Examples of this include the {@link ExpressionVirtualColumn}, which takes input from other columns and uses the * {@link ColumnInspector} to infer the output type of expressions based on the types of the inputs. * - * @param inspector column inspector to provide additional information of other available columns + * @param inspector column inspector to provide additional information of other available columns * @param columnName the name this virtual column was referenced with + * * @return capabilities, must not be null */ @Nullable diff --git a/processing/src/main/java/org/apache/druid/segment/VirtualColumns.java b/processing/src/main/java/org/apache/druid/segment/VirtualColumns.java index 583f0425c2a..a32a85d16c7 100644 --- a/processing/src/main/java/org/apache/druid/segment/VirtualColumns.java +++ b/processing/src/main/java/org/apache/druid/segment/VirtualColumns.java @@ -48,13 +48,12 @@ import org.apache.druid.segment.virtual.VirtualizedColumnInspector; import org.apache.druid.segment.virtual.VirtualizedColumnSelectorFactory; import javax.annotation.Nullable; - +import java.util.ArrayList; import java.util.Arrays; import java.util.HashMap; import java.util.List; import java.util.Map; import java.util.Set; -import java.util.stream.Collectors; /** * Class allowing lookup and usage of virtual columns. @@ -86,11 +85,21 @@ public class VirtualColumns implements Cacheable } @JsonCreator - public static VirtualColumns create(List virtualColumns) + public static VirtualColumns create(@Nullable List virtualColumns) { if (virtualColumns == null || virtualColumns.isEmpty()) { return EMPTY; } + return fromIterable(virtualColumns); + } + + public static VirtualColumns create(VirtualColumn... virtualColumns) + { + return create(Arrays.asList(virtualColumns)); + } + + public static VirtualColumns fromIterable(Iterable virtualColumns) + { Map withDotSupport = new HashMap<>(); Map withoutDotSupport = new HashMap<>(); for (VirtualColumn vc : virtualColumns) { @@ -115,11 +124,6 @@ public class VirtualColumns implements Cacheable return new VirtualColumns(ImmutableList.copyOf(virtualColumns), withDotSupport, withoutDotSupport); } - public static VirtualColumns create(VirtualColumn... virtualColumns) - { - return create(Arrays.asList(virtualColumns)); - } - public static VirtualColumns nullToEmpty(@Nullable VirtualColumns virtualColumns) { return virtualColumns == null ? EMPTY : virtualColumns; @@ -134,6 +138,14 @@ public class VirtualColumns implements Cacheable } } + // For equals, hashCode, toString, and serialization: + private final List virtualColumns; + private final List virtualColumnNames; + + // For getVirtualColumn: + private final Map withDotSupport; + private final Map withoutDotSupport; + private VirtualColumns( List virtualColumns, Map withDotSupport, @@ -143,19 +155,14 @@ public class VirtualColumns implements Cacheable this.virtualColumns = virtualColumns; this.withDotSupport = withDotSupport; this.withoutDotSupport = withoutDotSupport; + this.virtualColumnNames = new ArrayList<>(virtualColumns.size()); for (VirtualColumn virtualColumn : virtualColumns) { detectCycles(virtualColumn, null); + virtualColumnNames.add(virtualColumn.getOutputName()); } } - // For equals, hashCode, toString, and serialization: - private final List virtualColumns; - - // For getVirtualColumn: - private final Map withDotSupport; - private final Map withoutDotSupport; - /** * Returns true if a virtual column exists with a particular columnName. * @@ -259,7 +266,12 @@ public class VirtualColumns implements Cacheable public boolean canVectorize(ColumnInspector columnInspector) { final ColumnInspector inspector = wrapInspector(columnInspector); - return virtualColumns.stream().allMatch(virtualColumn -> virtualColumn.canVectorize(inspector)); + for (VirtualColumn virtualColumn : virtualColumns) { + if (!virtualColumn.canVectorize(inspector)) { + return false; + } + } + return true; } /** @@ -468,6 +480,16 @@ public class VirtualColumns implements Cacheable return new CacheKeyBuilder((byte) 0).appendCacheablesIgnoringOrder(virtualColumns).build(); } + public boolean isEmpty() + { + return virtualColumns.isEmpty(); + } + + public List getColumnNames() + { + return virtualColumnNames; + } + private VirtualColumn getVirtualColumnForSelector(String columnName) { VirtualColumn virtualColumn = getVirtualColumn(columnName); @@ -538,14 +560,4 @@ public class VirtualColumns implements Cacheable ((VirtualColumns) obj).virtualColumns.isEmpty(); } } - - public boolean isEmpty() - { - return virtualColumns.isEmpty(); - } - - public List getColumnNames() - { - return virtualColumns.stream().map(v -> v.getOutputName()).collect(Collectors.toList()); - } } diff --git a/processing/src/main/java/org/apache/druid/segment/column/ColumnIndexSupplier.java b/processing/src/main/java/org/apache/druid/segment/column/ColumnIndexSupplier.java index 7e2b21a25d0..73a235ce8ef 100644 --- a/processing/src/main/java/org/apache/druid/segment/column/ColumnIndexSupplier.java +++ b/processing/src/main/java/org/apache/druid/segment/column/ColumnIndexSupplier.java @@ -19,6 +19,8 @@ package org.apache.druid.segment.column; +import org.apache.druid.query.filter.ColumnIndexSelector; + import javax.annotation.Nullable; /** @@ -41,6 +43,37 @@ public interface ColumnIndexSupplier * which can greatly reduce the total number of rows which need to be scanned and processed. * * Objects returned by this method are not thread-safe. + * + * There are several built-in index classes which can be passed as an argument to this method when used from + * {@link org.apache.druid.query.filter.Filter#getBitmapColumnIndex(ColumnIndexSelector)}. Implementors of this + * interface should provide as many of them as possible to participate fully in as many + * {@link org.apache.druid.query.filter.Filter} as possible, as different filters require different index types, + * and may prefer some over others. + * + * Indexes for matching a row to a specific value: + * @see org.apache.druid.segment.index.semantic.NullValueIndex + * @see org.apache.druid.segment.index.semantic.ValueIndexes + * + * Indexes for matching a row to any of a set of values: + * @see org.apache.druid.segment.index.semantic.ValueSetIndexes + * @see org.apache.druid.segment.index.semantic.Utf8ValueSetIndexes + * + * Indexes for matching a row to a range of values: + * @see org.apache.druid.segment.index.semantic.LexicographicalRangeIndexes + * @see org.apache.druid.segment.index.semantic.NumericRangeIndexes + * + * Indexes for matching an array element of a row to a specific value: + * @see org.apache.druid.segment.index.semantic.ArrayElementIndexes + * + * Indexes for matching a row using a {@link org.apache.druid.query.filter.DruidPredicateFactory}: + * @see org.apache.druid.segment.index.semantic.DruidPredicateIndexes + * + * Speciality indexes: + * @see org.apache.druid.segment.index.semantic.SpatialIndex + * + * Low level access to implementation specific index stuff not particularly suitable for use in filtering: + * @see org.apache.druid.segment.index.semantic.DictionaryEncodedValueIndex + * @see org.apache.druid.segment.index.semantic.DictionaryEncodedStringValueIndex */ @Nullable T as(Class clazz); diff --git a/processing/src/main/java/org/apache/druid/segment/column/ObjectStrategyComplexTypeStrategy.java b/processing/src/main/java/org/apache/druid/segment/column/ObjectStrategyComplexTypeStrategy.java index b274e55282e..f80a1cdcf8d 100644 --- a/processing/src/main/java/org/apache/druid/segment/column/ObjectStrategyComplexTypeStrategy.java +++ b/processing/src/main/java/org/apache/druid/segment/column/ObjectStrategyComplexTypeStrategy.java @@ -123,7 +123,7 @@ public class ObjectStrategyComplexTypeStrategy implements TypeStrategy public int hashCode(T o) { if (hashStrategy == null) { - throw DruidException.defensive("hashStrategy not provided"); + throw DruidException.defensive("Type [%s] is not groupable", typeSignature.asTypeString()); } return hashStrategy.hashCode(o); } @@ -132,7 +132,7 @@ public class ObjectStrategyComplexTypeStrategy implements TypeStrategy public boolean equals(T a, T b) { if (hashStrategy == null) { - throw DruidException.defensive("hashStrategy not provided"); + throw DruidException.defensive("Type [%s] is not groupable", typeSignature.asTypeString()); } return hashStrategy.equals(a, b); } @@ -141,7 +141,7 @@ public class ObjectStrategyComplexTypeStrategy implements TypeStrategy public Class getClazz() { if (clazz == null) { - throw DruidException.defensive("hashStrategy not provided"); + throw DruidException.defensive("Type [%s] is not groupable", typeSignature.asTypeString()); } return clazz; } diff --git a/processing/src/main/java/org/apache/druid/segment/column/TypeStrategies.java b/processing/src/main/java/org/apache/druid/segment/column/TypeStrategies.java index bae29179b4d..7ac8def99ec 100644 --- a/processing/src/main/java/org/apache/druid/segment/column/TypeStrategies.java +++ b/processing/src/main/java/org/apache/druid/segment/column/TypeStrategies.java @@ -299,6 +299,12 @@ public class TypeStrategies { return a.equals(b); } + + @Override + public Class getClazz() + { + return Long.class; + } } /** @@ -368,6 +374,12 @@ public class TypeStrategies { return a.equals(b); } + + @Override + public Class getClazz() + { + return Float.class; + } } /** @@ -438,6 +450,12 @@ public class TypeStrategies { return a.equals(b); } + + @Override + public Class getClazz() + { + return Double.class; + } } /** @@ -519,6 +537,12 @@ public class TypeStrategies { return a.equals(b); } + + @Override + public Class getClazz() + { + return String.class; + } } /** @@ -664,5 +688,11 @@ public class TypeStrategies return false; } } + + @Override + public Class getClazz() + { + return Object[].class; + } } } diff --git a/processing/src/main/java/org/apache/druid/segment/column/TypeStrategy.java b/processing/src/main/java/org/apache/druid/segment/column/TypeStrategy.java index c5cff1a0b2f..075fceca473 100644 --- a/processing/src/main/java/org/apache/druid/segment/column/TypeStrategy.java +++ b/processing/src/main/java/org/apache/druid/segment/column/TypeStrategy.java @@ -225,6 +225,6 @@ public interface TypeStrategy extends Comparator, Hash.Strategy */ default Class getClazz() { - throw DruidException.defensive("Not implemented. It is only implemented for complex dimensions which are groupable()"); + throw DruidException.defensive("Not implemented. Check groupable() first"); } } diff --git a/processing/src/main/java/org/apache/druid/segment/incremental/IncrementalIndexAddResult.java b/processing/src/main/java/org/apache/druid/segment/incremental/IncrementalIndexAddResult.java index 077f162f9da..5f84138495c 100644 --- a/processing/src/main/java/org/apache/druid/segment/incremental/IncrementalIndexAddResult.java +++ b/processing/src/main/java/org/apache/druid/segment/incremental/IncrementalIndexAddResult.java @@ -97,10 +97,4 @@ public class IncrementalIndexAddResult { return reasonOfNotAdded == null && parseException == null; } - - @Nullable - public String getReasonOfNotAdded() - { - return reasonOfNotAdded; - } } diff --git a/processing/src/main/java/org/apache/druid/segment/incremental/RowIngestionMeters.java b/processing/src/main/java/org/apache/druid/segment/incremental/RowIngestionMeters.java index 3085376b822..25fc3bae481 100644 --- a/processing/src/main/java/org/apache/druid/segment/incremental/RowIngestionMeters.java +++ b/processing/src/main/java/org/apache/druid/segment/incremental/RowIngestionMeters.java @@ -26,8 +26,8 @@ import java.util.Map; /** * A collection of meters for row ingestion stats, with support for moving average calculations. - * This can eventually replace FireDepartmentMetrics, but moving averages for other stats collected by - * FireDepartmentMetrics are not currently supported, so we continue to use FireDepartmentMetrics alongside + * This can eventually replace SegmentGenerationMetrics, but moving averages for other stats collected by + * SegmentGenerationMetrics are not currently supported, so we continue to use SegmentGenerationMetrics alongside * RowIngestionMeters to avoid unnecessary overhead from maintaining these moving averages. */ @ExtensionPoint diff --git a/processing/src/main/java/org/apache/druid/segment/index/IndexedUtf8ValueIndexes.java b/processing/src/main/java/org/apache/druid/segment/index/IndexedUtf8ValueIndexes.java index 65395d148b2..6015088d558 100644 --- a/processing/src/main/java/org/apache/druid/segment/index/IndexedUtf8ValueIndexes.java +++ b/processing/src/main/java/org/apache/druid/segment/index/IndexedUtf8ValueIndexes.java @@ -230,7 +230,7 @@ public final class IndexedUtf8ValueIndexes= 0 ? position : -(position + 1), baseSet.size()); diff --git a/processing/src/main/java/org/apache/druid/segment/join/HashJoinSegmentStorageAdapter.java b/processing/src/main/java/org/apache/druid/segment/join/HashJoinSegmentStorageAdapter.java index c7126448112..fd9c07a253f 100644 --- a/processing/src/main/java/org/apache/druid/segment/join/HashJoinSegmentStorageAdapter.java +++ b/processing/src/main/java/org/apache/druid/segment/join/HashJoinSegmentStorageAdapter.java @@ -19,7 +19,10 @@ package org.apache.druid.segment.join; +import com.google.common.collect.ImmutableSet; +import com.google.common.collect.Iterables; import com.google.common.collect.Lists; +import com.google.common.collect.Sets; import org.apache.druid.java.util.common.ISE; import org.apache.druid.java.util.common.granularity.Granularity; import org.apache.druid.java.util.common.guava.Sequence; @@ -30,7 +33,6 @@ import org.apache.druid.query.filter.Filter; import org.apache.druid.segment.Cursor; import org.apache.druid.segment.Metadata; import org.apache.druid.segment.StorageAdapter; -import org.apache.druid.segment.VirtualColumn; import org.apache.druid.segment.VirtualColumns; import org.apache.druid.segment.column.ColumnCapabilities; import org.apache.druid.segment.data.Indexed; @@ -46,13 +48,10 @@ import org.joda.time.Interval; import javax.annotation.Nonnull; import javax.annotation.Nullable; -import java.util.ArrayList; import java.util.Arrays; -import java.util.HashSet; import java.util.LinkedHashSet; import java.util.List; import java.util.Optional; -import java.util.Set; public class HashJoinSegmentStorageAdapter implements StorageAdapter { @@ -292,43 +291,40 @@ public class HashJoinSegmentStorageAdapter implements StorageAdapter ); final JoinFilterPreAnalysisKey keyCached = joinFilterPreAnalysis.getKey(); - final JoinFilterSplit joinFilterSplit; - + final JoinFilterPreAnalysis preAnalysis; if (keyIn.equals(keyCached)) { // Common case: key used during filter pre-analysis (keyCached) matches key implied by makeCursors call (keyIn). - joinFilterSplit = JoinFilterAnalyzer.splitFilter(joinFilterPreAnalysis, baseFilter); + preAnalysis = joinFilterPreAnalysis; } else { // Less common case: key differs. Re-analyze the filter. This case can happen when an unnest datasource is // layered on top of a join datasource. - joinFilterSplit = JoinFilterAnalyzer.splitFilter( - JoinFilterAnalyzer.computeJoinFilterPreAnalysis(keyIn), - baseFilter - ); + preAnalysis = JoinFilterAnalyzer.computeJoinFilterPreAnalysis(keyIn); } - final List preJoinVirtualColumns = new ArrayList<>(); - final List postJoinVirtualColumns = new ArrayList<>(); - determineBaseColumnsWithPreAndPostJoinVirtualColumns( - virtualColumns, - preJoinVirtualColumns, - postJoinVirtualColumns + final JoinFilterSplit joinFilterSplit = JoinFilterAnalyzer.splitFilter( + preAnalysis, + baseFilter ); - // We merge the filter on base table specified by the user and filter on the base table that is pushed from - // the join - preJoinVirtualColumns.addAll(joinFilterSplit.getPushDownVirtualColumns()); - final Sequence baseCursorSequence = baseAdapter.makeCursors( joinFilterSplit.getBaseTableFilter().isPresent() ? joinFilterSplit.getBaseTableFilter().get() : null, interval, - VirtualColumns.create(preJoinVirtualColumns), + VirtualColumns.fromIterable( + Iterables.concat( + Sets.difference( + ImmutableSet.copyOf(virtualColumns.getVirtualColumns()), + joinFilterPreAnalysis.getPostJoinVirtualColumns() + ), + joinFilterSplit.getPushDownVirtualColumns() + ) + ), gran, descending, queryMetrics ); - Closer joinablesCloser = Closer.create(); + final Closer joinablesCloser = Closer.create(); return Sequences.map( baseCursorSequence, cursor -> { @@ -341,7 +337,7 @@ public class HashJoinSegmentStorageAdapter implements StorageAdapter return PostJoinCursor.wrap( retVal, - VirtualColumns.create(postJoinVirtualColumns), + VirtualColumns.fromIterable(preAnalysis.getPostJoinVirtualColumns()), joinFilterSplit.getJoinTableFilter().orElse(null) ); } @@ -357,47 +353,6 @@ public class HashJoinSegmentStorageAdapter implements StorageAdapter return !getClauseForColumn(column).isPresent(); } - /** - * Return a String set containing the name of columns that belong to the base table (including any pre-join virtual - * columns as well). - * - * Additionally, if the preJoinVirtualColumns and/or postJoinVirtualColumns arguments are provided, this method - * will add each VirtualColumn in the provided virtualColumns to either preJoinVirtualColumns or - * postJoinVirtualColumns based on whether the virtual column is pre-join or post-join. - * - * @param virtualColumns List of virtual columns from the query - * @param preJoinVirtualColumns If provided, virtual columns determined to be pre-join will be added to this list - * @param postJoinVirtualColumns If provided, virtual columns determined to be post-join will be added to this list - * - * @return The set of base column names, including any pre-join virtual columns. - */ - public Set determineBaseColumnsWithPreAndPostJoinVirtualColumns( - VirtualColumns virtualColumns, - @Nullable List preJoinVirtualColumns, - @Nullable List postJoinVirtualColumns - ) - { - final Set baseColumns = new HashSet<>(baseAdapter.getRowSignature().getColumnNames()); - - for (VirtualColumn virtualColumn : virtualColumns.getVirtualColumns()) { - // Virtual columns cannot depend on each other, so we don't need to check transitive dependencies. - if (baseColumns.containsAll(virtualColumn.requiredColumns())) { - // Since pre-join virtual columns can be computed using only base columns, we include them in the - // base column set. - baseColumns.add(virtualColumn.getOutputName()); - if (preJoinVirtualColumns != null) { - preJoinVirtualColumns.add(virtualColumn); - } - } else { - if (postJoinVirtualColumns != null) { - postJoinVirtualColumns.add(virtualColumn); - } - } - } - - return baseColumns; - } - /** * Returns the JoinableClause corresponding to a particular column, based on the clauses' prefixes. * diff --git a/processing/src/main/java/org/apache/druid/segment/join/filter/JoinFilterAnalyzer.java b/processing/src/main/java/org/apache/druid/segment/join/filter/JoinFilterAnalyzer.java index a4c06e79826..d1641770fa8 100644 --- a/processing/src/main/java/org/apache/druid/segment/join/filter/JoinFilterAnalyzer.java +++ b/processing/src/main/java/org/apache/druid/segment/join/filter/JoinFilterAnalyzer.java @@ -69,8 +69,8 @@ import java.util.Set; * * The result of this pre-analysis method should be passed into the next step of join filter analysis, described below. * - * The {@link #splitFilter(JoinFilterPreAnalysis)} method takes the pre-analysis result and optionally applies the - * filter rewrite and push down operations on a per-segment level. + * The {@link #splitFilter(JoinFilterPreAnalysis, Filter)} method takes the pre-analysis result and optionally applies + * the filter rewrite and push down operations on a per-segment level. */ public class JoinFilterAnalyzer { @@ -90,11 +90,10 @@ public class JoinFilterAnalyzer */ public static JoinFilterPreAnalysis computeJoinFilterPreAnalysis(final JoinFilterPreAnalysisKey key) { - final List preJoinVirtualColumns = new ArrayList<>(); - final List postJoinVirtualColumns = new ArrayList<>(); - final JoinableClauses joinableClauses = JoinableClauses.fromList(key.getJoinableClauses()); - joinableClauses.splitVirtualColumns(key.getVirtualColumns(), preJoinVirtualColumns, postJoinVirtualColumns); + final Set postJoinVirtualColumns = joinableClauses.getPostJoinVirtualColumns( + key.getVirtualColumns() + ); final JoinFilterPreAnalysis.Builder preAnalysisBuilder = new JoinFilterPreAnalysis.Builder(key, postJoinVirtualColumns); @@ -159,13 +158,6 @@ public class JoinFilterAnalyzer return preAnalysisBuilder.withCorrelations(correlations).build(); } - public static JoinFilterSplit splitFilter( - JoinFilterPreAnalysis joinFilterPreAnalysis - ) - { - return splitFilter(joinFilterPreAnalysis, null); - } - /** * @param joinFilterPreAnalysis The pre-analysis computed by {@link #computeJoinFilterPreAnalysis)} * @param baseFilter - Filter on base table that was specified in the query itself @@ -210,7 +202,8 @@ public class JoinFilterAnalyzer ); if (joinFilterAnalysis.isCanPushDown()) { //noinspection OptionalGetWithoutIsPresent isCanPushDown checks isPresent - leftFilters.add(joinFilterAnalysis.getPushDownFilter().get()); + final Filter pushDown = joinFilterAnalysis.getPushDownFilter().get(); + leftFilters.add(pushDown); } if (joinFilterAnalysis.isRetainAfterJoin()) { rightFilters.add(joinFilterAnalysis.getOriginalFilter()); @@ -519,7 +512,7 @@ public class JoinFilterAnalyzer } private static boolean isColumnFromPostJoinVirtualColumns( - List postJoinVirtualColumns, + Set postJoinVirtualColumns, String column ) { @@ -532,7 +525,7 @@ public class JoinFilterAnalyzer } private static boolean areSomeColumnsFromPostJoinVirtualColumns( - List postJoinVirtualColumns, + Set postJoinVirtualColumns, Collection columns ) { diff --git a/processing/src/main/java/org/apache/druid/segment/join/filter/JoinFilterPreAnalysis.java b/processing/src/main/java/org/apache/druid/segment/join/filter/JoinFilterPreAnalysis.java index b8b2e7c7830..b63b72c9f96 100644 --- a/processing/src/main/java/org/apache/druid/segment/join/filter/JoinFilterPreAnalysis.java +++ b/processing/src/main/java/org/apache/druid/segment/join/filter/JoinFilterPreAnalysis.java @@ -50,12 +50,12 @@ public class JoinFilterPreAnalysis private final List normalizedBaseTableClauses; private final List normalizedJoinTableClauses; private final JoinFilterCorrelations correlations; - private final List postJoinVirtualColumns; + private final Set postJoinVirtualColumns; private final Equiconditions equiconditions; private JoinFilterPreAnalysis( final JoinFilterPreAnalysisKey key, - final List postJoinVirtualColumns, + final Set postJoinVirtualColumns, final List normalizedBaseTableClauses, final List normalizedJoinTableClauses, final JoinFilterCorrelations correlations, @@ -86,7 +86,7 @@ public class JoinFilterPreAnalysis return key.getFilter(); } - public List getPostJoinVirtualColumns() + public Set getPostJoinVirtualColumns() { return postJoinVirtualColumns; } @@ -140,13 +140,13 @@ public class JoinFilterPreAnalysis @Nullable private JoinFilterCorrelations correlations; @Nonnull - private final List postJoinVirtualColumns; + private final Set postJoinVirtualColumns; @Nonnull private Equiconditions equiconditions = new Equiconditions(Collections.emptyMap()); public Builder( @Nonnull JoinFilterPreAnalysisKey key, - @Nonnull List postJoinVirtualColumns + @Nonnull Set postJoinVirtualColumns ) { this.key = key; diff --git a/processing/src/main/java/org/apache/druid/segment/join/filter/JoinFilterPreAnalysisKey.java b/processing/src/main/java/org/apache/druid/segment/join/filter/JoinFilterPreAnalysisKey.java index fe863b0fd5b..48c00421b54 100644 --- a/processing/src/main/java/org/apache/druid/segment/join/filter/JoinFilterPreAnalysisKey.java +++ b/processing/src/main/java/org/apache/druid/segment/join/filter/JoinFilterPreAnalysisKey.java @@ -68,6 +68,7 @@ public class JoinFilterPreAnalysisKey return virtualColumns; } + @Nullable public Filter getFilter() { return filter; diff --git a/processing/src/main/java/org/apache/druid/segment/join/filter/JoinableClauses.java b/processing/src/main/java/org/apache/druid/segment/join/filter/JoinableClauses.java index 254d4edc50a..fbf457c55a0 100644 --- a/processing/src/main/java/org/apache/druid/segment/join/filter/JoinableClauses.java +++ b/processing/src/main/java/org/apache/druid/segment/join/filter/JoinableClauses.java @@ -32,8 +32,10 @@ import javax.annotation.Nonnull; import javax.annotation.Nullable; import java.util.ArrayList; import java.util.Collection; +import java.util.HashSet; import java.util.List; import java.util.Optional; +import java.util.Set; import java.util.stream.Collectors; public class JoinableClauses @@ -88,19 +90,20 @@ public class JoinableClauses return joinableClauses; } - public void splitVirtualColumns( - final VirtualColumns virtualColumns, - final List preJoinVirtualColumns, - final List postJoinVirtualColumns + /** + * Retrieve subset of virtual columns which require inputs which are only present on the join table + */ + public Set getPostJoinVirtualColumns( + final VirtualColumns virtualColumns ) { + final Set postJoinVirtualColumns = new HashSet<>(); for (VirtualColumn virtualColumn : virtualColumns.getVirtualColumns()) { if (areSomeColumnsFromJoin(virtualColumn.requiredColumns())) { postJoinVirtualColumns.add(virtualColumn); - } else { - preJoinVirtualColumns.add(virtualColumn); } } + return postJoinVirtualColumns; } public boolean areSomeColumnsFromJoin( diff --git a/processing/src/main/java/org/apache/druid/segment/nested/CompressedNestedDataComplexColumn.java b/processing/src/main/java/org/apache/druid/segment/nested/CompressedNestedDataComplexColumn.java index d3869bd9ef5..7faf837db1c 100644 --- a/processing/src/main/java/org/apache/druid/segment/nested/CompressedNestedDataComplexColumn.java +++ b/processing/src/main/java/org/apache/druid/segment/nested/CompressedNestedDataComplexColumn.java @@ -25,6 +25,7 @@ import com.google.common.collect.ImmutableList; import com.google.common.collect.Sets; import com.google.common.primitives.Doubles; import org.apache.druid.collections.bitmap.ImmutableBitmap; +import org.apache.druid.error.DruidException; import org.apache.druid.java.util.common.IAE; import org.apache.druid.java.util.common.ISE; import org.apache.druid.java.util.common.RE; @@ -915,13 +916,11 @@ public abstract class CompressedNestedDataComplexColumn> localDictionarySupplier = FixedIndexed.read( dataBuffer, diff --git a/processing/src/main/java/org/apache/druid/segment/nested/NestedCommonFormatColumnSerializer.java b/processing/src/main/java/org/apache/druid/segment/nested/NestedCommonFormatColumnSerializer.java index 59c7da7fd21..68e1da96756 100644 --- a/processing/src/main/java/org/apache/druid/segment/nested/NestedCommonFormatColumnSerializer.java +++ b/processing/src/main/java/org/apache/druid/segment/nested/NestedCommonFormatColumnSerializer.java @@ -123,6 +123,6 @@ public abstract class NestedCommonFormatColumnSerializer implements GenericColum */ public static String getInternalFileName(String fileNameBase, String field) { - return StringUtils.format("%s.%s", fileNameBase, field); + return fileNameBase + "." + field; } } diff --git a/processing/src/main/java/org/apache/druid/segment/nested/NestedDataColumnSerializerV4.java b/processing/src/main/java/org/apache/druid/segment/nested/NestedDataColumnSerializerV4.java index efa31a13a3f..3d39b31bbbc 100644 --- a/processing/src/main/java/org/apache/druid/segment/nested/NestedDataColumnSerializerV4.java +++ b/processing/src/main/java/org/apache/druid/segment/nested/NestedDataColumnSerializerV4.java @@ -27,7 +27,6 @@ import org.apache.druid.common.config.NullHandling; import org.apache.druid.java.util.common.FileUtils; import org.apache.druid.java.util.common.ISE; import org.apache.druid.java.util.common.RE; -import org.apache.druid.java.util.common.StringUtils; import org.apache.druid.java.util.common.io.Closer; import org.apache.druid.java.util.common.io.smoosh.FileSmoosher; import org.apache.druid.java.util.common.io.smoosh.SmooshedWriter; @@ -182,7 +181,7 @@ public class NestedDataColumnSerializerV4 implements GenericColumnSerializer subSelectors; + private final int exprKeyBytes; + + /** + * Used internally by {@link #writeKeyToResultRow(MemoryPointer, int, ResultRow, int)} to populate inputs + * for the expression. + */ + private final ResultRow tmpResultRow; + + /** + * Used internally by {@link #writeKeyToResultRow(MemoryPointer, int, ResultRow, int)} to evaluate the expression + * on {@link #tmpResultRow}. + */ + private final Expr.ObjectBinding tmpResultRowBindings; + + ExpressionDeferredGroupByVectorColumnSelector( + final Expr expr, + final RowSignature exprInputSignature, + final List subSelectors + ) + { + this.expr = expr; + this.subSelectors = subSelectors; + this.tmpResultRow = ResultRow.create(subSelectors.size()); + + int exprKeyBytesTmp = 0; + final Map> tmpResultRowSuppliers = new HashMap<>(); + for (int i = 0; i < exprInputSignature.size(); i++) { + final int columnPosition = i; + exprKeyBytesTmp += subSelectors.get(i).getGroupingKeySize(); + tmpResultRowSuppliers.put( + exprInputSignature.getColumnName(i), + InputBindings.inputSupplier( + ExpressionType.fromColumnType(exprInputSignature.getColumnType(columnPosition).orElse(null)), + () -> tmpResultRow.getArray()[columnPosition] + ) + ); + } + this.exprKeyBytes = exprKeyBytesTmp; + this.tmpResultRowBindings = InputBindings.forInputSuppliers(tmpResultRowSuppliers); + } + + @Override + public int getGroupingKeySize() + { + return exprKeyBytes; + } + + @Override + public int writeKeys(WritableMemory keySpace, int keySize, int keyOffset, int startRow, int endRow) + { + int retVal = 0; + for (final GroupByVectorColumnSelector subSelector : subSelectors) { + retVal += subSelector.writeKeys(keySpace, keySize, keyOffset, startRow, endRow); + keyOffset += subSelector.getGroupingKeySize(); + } + return retVal; + } + + @Override + public void writeKeyToResultRow(MemoryPointer keyMemory, int keyOffset, ResultRow resultRow, int resultRowPosition) + { + for (int i = 0; i < subSelectors.size(); i++) { + final GroupByVectorColumnSelector subSelector = subSelectors.get(i); + subSelector.writeKeyToResultRow(keyMemory, keyOffset, tmpResultRow, i); + keyOffset += subSelector.getGroupingKeySize(); + } + + resultRow.getArray()[resultRowPosition] = expr.eval(tmpResultRowBindings).valueOrDefault(); + } + + @Override + public void reset() + { + for (final GroupByVectorColumnSelector subSelector : subSelectors) { + subSelector.reset(); + } + } +} diff --git a/processing/src/main/java/org/apache/druid/segment/virtual/ExpressionVectorSelectors.java b/processing/src/main/java/org/apache/druid/segment/virtual/ExpressionVectorSelectors.java index 654a734e375..5c022cbf305 100644 --- a/processing/src/main/java/org/apache/druid/segment/virtual/ExpressionVectorSelectors.java +++ b/processing/src/main/java/org/apache/druid/segment/virtual/ExpressionVectorSelectors.java @@ -28,8 +28,11 @@ import org.apache.druid.math.expr.vector.CastToTypeVectorProcessor; import org.apache.druid.math.expr.vector.ExprVectorProcessor; import org.apache.druid.math.expr.vector.VectorProcessors; import org.apache.druid.query.dimension.DefaultDimensionSpec; +import org.apache.druid.query.groupby.DeferExpressionDimensions; +import org.apache.druid.query.groupby.epinephelinae.vector.GroupByVectorColumnSelector; import org.apache.druid.segment.column.ColumnCapabilities; import org.apache.druid.segment.column.ColumnType; +import org.apache.druid.segment.column.RowSignature; import org.apache.druid.segment.vector.ConstantVectorSelectors; import org.apache.druid.segment.vector.ReadableVectorInspector; import org.apache.druid.segment.vector.SingleValueDimensionVectorSelector; @@ -37,6 +40,8 @@ import org.apache.druid.segment.vector.VectorColumnSelectorFactory; import org.apache.druid.segment.vector.VectorObjectSelector; import org.apache.druid.segment.vector.VectorValueSelector; +import javax.annotation.Nullable; +import java.util.ArrayList; import java.util.List; public class ExpressionVectorSelectors @@ -107,6 +112,51 @@ public class ExpressionVectorSelectors return new ExpressionVectorObjectSelector(processor, bindings); } + /** + * Creates a {@link ExpressionDeferredGroupByVectorColumnSelector} for the provided expression, if the + * provided {@link DeferExpressionDimensions} says we should. + * + * @param factory column selector factory + * @param expression expression + * @param deferExpressionDimensions active value of {@link org.apache.druid.query.groupby.GroupByQueryConfig#CTX_KEY_DEFER_EXPRESSION_DIMENSIONS} + * + * @return selector, or null if the {@link DeferExpressionDimensions} determines we should not defer the expression + */ + @Nullable + public static GroupByVectorColumnSelector makeGroupByVectorColumnSelector( + VectorColumnSelectorFactory factory, + Expr expression, + DeferExpressionDimensions deferExpressionDimensions + ) + { + final ExpressionPlan plan = ExpressionPlanner.plan(factory, expression); + Preconditions.checkArgument(plan.is(ExpressionPlan.Trait.VECTORIZABLE)); + + final List requiredBindings = plan.getAnalysis().getRequiredBindingsList(); + + if (!deferExpressionDimensions.useDeferredGroupBySelector(plan, requiredBindings, factory)) { + return null; + } + + final RowSignature.Builder requiredBindingsSignatureBuilder = RowSignature.builder(); + final List subSelectors = new ArrayList<>(); + + for (final String columnName : requiredBindings) { + final ColumnCapabilities capabilities = factory.getColumnCapabilities(columnName); + final ColumnType columnType = capabilities != null ? capabilities.toColumnType() : ColumnType.STRING; + final GroupByVectorColumnSelector subSelector = + factory.makeGroupByVectorColumnSelector(columnName, deferExpressionDimensions); + requiredBindingsSignatureBuilder.add(columnName, columnType); + subSelectors.add(subSelector); + } + + return new ExpressionDeferredGroupByVectorColumnSelector( + expression.asSingleThreaded(factory), + requiredBindingsSignatureBuilder.build(), + subSelectors + ); + } + public static VectorObjectSelector castValueSelectorToObject( ReadableVectorInspector inspector, String columnName, diff --git a/processing/src/main/java/org/apache/druid/segment/virtual/ExpressionVirtualColumn.java b/processing/src/main/java/org/apache/druid/segment/virtual/ExpressionVirtualColumn.java index 58952061239..42a723907b9 100644 --- a/processing/src/main/java/org/apache/druid/segment/virtual/ExpressionVirtualColumn.java +++ b/processing/src/main/java/org/apache/druid/segment/virtual/ExpressionVirtualColumn.java @@ -35,6 +35,8 @@ import org.apache.druid.math.expr.Parser; import org.apache.druid.query.cache.CacheKeyBuilder; import org.apache.druid.query.dimension.DimensionSpec; import org.apache.druid.query.filter.ColumnIndexSelector; +import org.apache.druid.query.groupby.DeferExpressionDimensions; +import org.apache.druid.query.groupby.epinephelinae.vector.GroupByVectorColumnSelector; import org.apache.druid.segment.ColumnInspector; import org.apache.druid.segment.ColumnSelectorFactory; import org.apache.druid.segment.ColumnValueSelector; @@ -242,6 +244,28 @@ public class ExpressionVirtualColumn implements VirtualColumn return ExpressionVectorSelectors.makeVectorObjectSelector(factory, parsedExpression.get()); } + @Nullable + @Override + public GroupByVectorColumnSelector makeGroupByVectorColumnSelector( + String columnName, + VectorColumnSelectorFactory factory, + DeferExpressionDimensions deferExpressionDimensions + ) + { + if (isDirectAccess(factory)) { + return factory.makeGroupByVectorColumnSelector( + parsedExpression.get().getBindingIfIdentifier(), + deferExpressionDimensions + ); + } + + return ExpressionVectorSelectors.makeGroupByVectorColumnSelector( + factory, + parsedExpression.get(), + deferExpressionDimensions + ); + } + @Nullable @Override public ColumnIndexSupplier getIndexSupplier( diff --git a/processing/src/test/java/org/apache/druid/frame/field/StringArrayFieldWriterTest.java b/processing/src/test/java/org/apache/druid/frame/field/StringArrayFieldWriterTest.java index 02d4d44cbfc..6aba25ddf22 100644 --- a/processing/src/test/java/org/apache/druid/frame/field/StringArrayFieldWriterTest.java +++ b/processing/src/test/java/org/apache/druid/frame/field/StringArrayFieldWriterTest.java @@ -59,7 +59,7 @@ public class StringArrayFieldWriterTest extends InitializedNullHandlingTest public void setUp() { memory = WritableMemory.allocate(1000); - fieldWriter = new StringArrayFieldWriter(selector); + fieldWriter = new StringArrayFieldWriter(selector, false); } @After diff --git a/processing/src/test/java/org/apache/druid/frame/field/StringFieldReaderTest.java b/processing/src/test/java/org/apache/druid/frame/field/StringFieldReaderTest.java index 04296cb78c3..b0f589ed480 100644 --- a/processing/src/test/java/org/apache/druid/frame/field/StringFieldReaderTest.java +++ b/processing/src/test/java/org/apache/druid/frame/field/StringFieldReaderTest.java @@ -69,7 +69,7 @@ public class StringFieldReaderTest extends InitializedNullHandlingTest public void setUp() { memory = WritableMemory.allocate(1000); - fieldWriter = new StringArrayFieldWriter(writeSelector); + fieldWriter = new StringArrayFieldWriter(writeSelector, false); } @After diff --git a/processing/src/test/java/org/apache/druid/frame/field/StringFieldWriterTest.java b/processing/src/test/java/org/apache/druid/frame/field/StringFieldWriterTest.java index f44b69e2810..0108e772d94 100644 --- a/processing/src/test/java/org/apache/druid/frame/field/StringFieldWriterTest.java +++ b/processing/src/test/java/org/apache/druid/frame/field/StringFieldWriterTest.java @@ -65,8 +65,8 @@ public class StringFieldWriterTest extends InitializedNullHandlingTest public void setUp() { memory = WritableMemory.allocate(1000); - fieldWriter = new StringFieldWriter(selector); - fieldWriterUtf8 = new StringFieldWriter(selectorUtf8); + fieldWriter = new StringFieldWriter(selector, false); + fieldWriterUtf8 = new StringFieldWriter(selectorUtf8, false); } @After diff --git a/processing/src/test/java/org/apache/druid/frame/key/KeyTestUtils.java b/processing/src/test/java/org/apache/druid/frame/key/KeyTestUtils.java index acdbcc48f96..e5ab3c203f4 100644 --- a/processing/src/test/java/org/apache/druid/frame/key/KeyTestUtils.java +++ b/processing/src/test/java/org/apache/druid/frame/key/KeyTestUtils.java @@ -22,7 +22,6 @@ package org.apache.druid.frame.key; import org.apache.datasketches.memory.Memory; import org.apache.datasketches.memory.WritableMemory; import org.apache.druid.frame.Frame; -import org.apache.druid.frame.FrameType; import org.apache.druid.frame.allocation.HeapMemoryAllocator; import org.apache.druid.frame.allocation.SingleMemoryAllocatorFactory; import org.apache.druid.frame.write.FrameWriter; @@ -93,11 +92,11 @@ public class KeyTestUtils false ); - final FrameWriterFactory writerFactory = FrameWriters.makeFrameWriterFactory( - FrameType.ROW_BASED, + final FrameWriterFactory writerFactory = FrameWriters.makeRowBasedFrameWriterFactory( new SingleMemoryAllocatorFactory(HeapMemoryAllocator.unlimited()), keySignature, - Collections.emptyList() + Collections.emptyList(), + false ); try (final FrameWriter writer = writerFactory.newFrameWriter(columnSelectorFactory)) { diff --git a/processing/src/test/java/org/apache/druid/frame/processor/SuperSorterTest.java b/processing/src/test/java/org/apache/druid/frame/processor/SuperSorterTest.java index 5450bd98fd2..3412eec678c 100644 --- a/processing/src/test/java/org/apache/druid/frame/processor/SuperSorterTest.java +++ b/processing/src/test/java/org/apache/druid/frame/processor/SuperSorterTest.java @@ -131,7 +131,8 @@ public class SuperSorterTest 2, -1, null, - superSorterProgressTracker + superSorterProgressTracker, + false ); superSorter.setNoWorkRunnable(() -> outputPartitionsFuture.set(ClusterByPartitions.oneUniversalPartition())); @@ -306,7 +307,8 @@ public class SuperSorterTest maxChannelsPerProcessor, -1, null, - superSorterProgressTracker + superSorterProgressTracker, + false ); superSorter.setNoWorkRunnable(() -> clusterByPartitionsFuture.set(clusterByPartitions)); diff --git a/processing/src/test/java/org/apache/druid/frame/testutil/FrameSequenceBuilder.java b/processing/src/test/java/org/apache/druid/frame/testutil/FrameSequenceBuilder.java index e28cda0c2fb..573e5c52123 100644 --- a/processing/src/test/java/org/apache/druid/frame/testutil/FrameSequenceBuilder.java +++ b/processing/src/test/java/org/apache/druid/frame/testutil/FrameSequenceBuilder.java @@ -19,6 +19,7 @@ package org.apache.druid.frame.testutil; +import org.apache.druid.error.DruidException; import org.apache.druid.frame.Frame; import org.apache.druid.frame.FrameType; import org.apache.druid.frame.allocation.HeapMemoryAllocator; @@ -119,13 +120,23 @@ public class FrameSequenceBuilder public Sequence frames() { - final FrameWriterFactory frameWriterFactory = - FrameWriters.makeFrameWriterFactory( - frameType, - new SingleMemoryAllocatorFactory(allocator), - signature(), - keyColumns - ); + final FrameWriterFactory frameWriterFactory; + if (FrameType.ROW_BASED.equals(frameType)) { + frameWriterFactory = FrameWriters.makeRowBasedFrameWriterFactory( + new SingleMemoryAllocatorFactory(allocator), + signature(), + keyColumns, + false + ); + } else if (FrameType.COLUMNAR.equals(frameType)) { + frameWriterFactory = FrameWriters.makeColumnBasedFrameWriterFactory( + new SingleMemoryAllocatorFactory(allocator), + signature(), + keyColumns + ); + } else { + throw DruidException.defensive("Unrecognized frame type"); + } final Sequence cursors = FrameTestUtil.makeCursorsForAdapter(adapter, populateRowNumber); diff --git a/processing/src/test/java/org/apache/druid/frame/write/FrameWriterTest.java b/processing/src/test/java/org/apache/druid/frame/write/FrameWriterTest.java index 770d79beb76..cde617a395c 100644 --- a/processing/src/test/java/org/apache/druid/frame/write/FrameWriterTest.java +++ b/processing/src/test/java/org/apache/druid/frame/write/FrameWriterTest.java @@ -582,12 +582,21 @@ public class FrameWriterTest extends InitializedNullHandlingTest null, (retVal, cursor) -> { int numRows = 0; - final FrameWriterFactory frameWriterFactory = FrameWriters.makeFrameWriterFactory( - outputFrameType, - new SingleMemoryAllocatorFactory(allocator), - signature, - keyColumns - ); + final FrameWriterFactory frameWriterFactory; + if (FrameType.ROW_BASED.equals(outputFrameType)) { + frameWriterFactory = FrameWriters.makeRowBasedFrameWriterFactory( + new SingleMemoryAllocatorFactory(allocator), + signature, + keyColumns, + false + ); + } else { + frameWriterFactory = FrameWriters.makeColumnBasedFrameWriterFactory( + new SingleMemoryAllocatorFactory(allocator), + signature, + keyColumns + ); + } ColumnSelectorFactory columnSelectorFactory = cursor.getColumnSelectorFactory(); diff --git a/processing/src/test/java/org/apache/druid/frame/write/FrameWriterUtilsTest.java b/processing/src/test/java/org/apache/druid/frame/write/FrameWriterUtilsTest.java new file mode 100644 index 00000000000..992247a5ac5 --- /dev/null +++ b/processing/src/test/java/org/apache/druid/frame/write/FrameWriterUtilsTest.java @@ -0,0 +1,85 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.druid.frame.write; + +import org.apache.datasketches.memory.WritableMemory; +import org.junit.Assert; +import org.junit.Test; + +import java.nio.ByteBuffer; + +public class FrameWriterUtilsTest +{ + + private static final byte[] INPUT_BYTE_ARRAY = new byte[]{0x0A, (byte) 0xA4, 0x00, 0x53}; + private static final ByteBuffer INPUT_BYTE_BUFFER = ByteBuffer.wrap(INPUT_BYTE_ARRAY); + private static final WritableMemory WRITABLE_MEMORY = WritableMemory.allocate(10); + + @Test + public void test_copyByteBufferToMemory_withAllowNullBytesOnArrayBackedBuffer() + { + int originalPosition = INPUT_BYTE_BUFFER.position(); + FrameWriterUtils.copyByteBufferToMemoryAllowingNullBytes(INPUT_BYTE_BUFFER, WRITABLE_MEMORY, 0, 4); + byte[] outputArray = new byte[4]; + WRITABLE_MEMORY.getByteArray(0, outputArray, 0, 4); + Assert.assertArrayEquals(INPUT_BYTE_ARRAY, outputArray); + Assert.assertEquals(originalPosition, INPUT_BYTE_BUFFER.position()); + } + + @Test + public void test_copyByteBufferToMemory_withAllowNullBytes() + { + int originalPosition = INPUT_BYTE_BUFFER.position(); + ByteBuffer inputBuffer = ByteBuffer.allocateDirect(10); + inputBuffer.put(INPUT_BYTE_ARRAY, 0, 4); + inputBuffer.rewind(); + FrameWriterUtils.copyByteBufferToMemoryAllowingNullBytes(inputBuffer, WRITABLE_MEMORY, 0, 4); + byte[] outputArray = new byte[4]; + WRITABLE_MEMORY.getByteArray(0, outputArray, 0, 4); + Assert.assertArrayEquals(INPUT_BYTE_ARRAY, outputArray); + Assert.assertEquals(originalPosition, INPUT_BYTE_BUFFER.position()); + } + + @Test + public void test_copyByteBufferToMemory_withRemoveNullBytes() + { + int originalPosition = INPUT_BYTE_BUFFER.position(); + FrameWriterUtils.copyByteBufferToMemoryDisallowingNullBytes(INPUT_BYTE_BUFFER, WRITABLE_MEMORY, 0, 4, true); + byte[] outputArray = new byte[3]; + WRITABLE_MEMORY.getByteArray(0, outputArray, 0, 3); + Assert.assertArrayEquals(new byte[]{0x0A, (byte) 0xA4, 0x53}, outputArray); + Assert.assertEquals(originalPosition, INPUT_BYTE_BUFFER.position()); + } + + @Test + public void test_copyByteBufferToMemory_withDisallowedNullBytes() + { + Assert.assertThrows( + InvalidNullByteException.class, + () -> FrameWriterUtils.copyByteBufferToMemoryDisallowingNullBytes( + INPUT_BYTE_BUFFER, + WRITABLE_MEMORY, + 0, + 4, + false + ) + ); + } +} diff --git a/processing/src/test/java/org/apache/druid/frame/write/FrameWritersTest.java b/processing/src/test/java/org/apache/druid/frame/write/FrameWritersTest.java index d43ba034c2a..98d9fc058c2 100644 --- a/processing/src/test/java/org/apache/druid/frame/write/FrameWritersTest.java +++ b/processing/src/test/java/org/apache/druid/frame/write/FrameWritersTest.java @@ -19,7 +19,6 @@ package org.apache.druid.frame.write; -import org.apache.druid.frame.FrameType; import org.apache.druid.frame.allocation.ArenaMemoryAllocatorFactory; import org.apache.druid.frame.key.KeyColumn; import org.apache.druid.frame.key.KeyOrder; @@ -37,8 +36,8 @@ import org.junit.internal.matchers.ThrowableMessageMatcher; import java.util.Collections; /** - * Tests {@link FrameWriters#makeFrameWriterFactory} ability to create factories. Largely doesn't test actual - * frame generation via the factories, since that is exercised well enough in other test suites. + * Tests {@link FrameWriters#makeRowBasedFrameWriterFactory} and {@link FrameWriters#makeColumnBasedFrameWriterFactory} ability to create factories. + * Largely doesn't test actual frame generation via the factories, since that is exercised well enough in other test suites. */ public class FrameWritersTest extends InitializedNullHandlingTest { @@ -47,11 +46,11 @@ public class FrameWritersTest extends InitializedNullHandlingTest @Test public void test_rowBased() { - final FrameWriterFactory factory = FrameWriters.makeFrameWriterFactory( - FrameType.ROW_BASED, + final FrameWriterFactory factory = FrameWriters.makeRowBasedFrameWriterFactory( new ArenaMemoryAllocatorFactory(ALLOCATOR_CAPACITY), RowSignature.builder().add("x", ColumnType.LONG).build(), - Collections.singletonList(new KeyColumn("x", KeyOrder.ASCENDING)) + Collections.singletonList(new KeyColumn("x", KeyOrder.ASCENDING)), + false ); MatcherAssert.assertThat(factory, CoreMatchers.instanceOf(RowBasedFrameWriterFactory.class)); @@ -61,8 +60,7 @@ public class FrameWritersTest extends InitializedNullHandlingTest @Test public void test_columnar() { - final FrameWriterFactory factory = FrameWriters.makeFrameWriterFactory( - FrameType.COLUMNAR, + final FrameWriterFactory factory = FrameWriters.makeColumnBasedFrameWriterFactory( new ArenaMemoryAllocatorFactory(ALLOCATOR_CAPACITY), RowSignature.builder() .add("a", ColumnType.LONG) @@ -84,8 +82,7 @@ public class FrameWritersTest extends InitializedNullHandlingTest @Test public void test_columnar_unsupportedColumnType() { - final FrameWriterFactory factory = FrameWriters.makeFrameWriterFactory( - FrameType.COLUMNAR, + final FrameWriterFactory factory = FrameWriters.makeColumnBasedFrameWriterFactory( new ArenaMemoryAllocatorFactory(ALLOCATOR_CAPACITY), RowSignature.builder().add("x", ColumnType.ofArray(ColumnType.LONG_ARRAY)).build(), Collections.emptyList() @@ -106,11 +103,11 @@ public class FrameWritersTest extends InitializedNullHandlingTest final IllegalArgumentException e = Assert.assertThrows( IllegalArgumentException.class, () -> - FrameWriters.makeFrameWriterFactory( - FrameType.ROW_BASED, + FrameWriters.makeRowBasedFrameWriterFactory( new ArenaMemoryAllocatorFactory(ALLOCATOR_CAPACITY), RowSignature.builder().add("x", ColumnType.LONG).add("y", ColumnType.LONG).build(), - Collections.singletonList(new KeyColumn("y", KeyOrder.ASCENDING)) + Collections.singletonList(new KeyColumn("y", KeyOrder.ASCENDING)), + false ) ); @@ -128,8 +125,7 @@ public class FrameWritersTest extends InitializedNullHandlingTest final IllegalArgumentException e = Assert.assertThrows( IllegalArgumentException.class, () -> - FrameWriters.makeFrameWriterFactory( - FrameType.COLUMNAR, + FrameWriters.makeColumnBasedFrameWriterFactory( new ArenaMemoryAllocatorFactory(ALLOCATOR_CAPACITY), RowSignature.builder().add("x", ColumnType.LONG).build(), Collections.singletonList(new KeyColumn("x", KeyOrder.ASCENDING)) diff --git a/server/src/test/java/org/apache/druid/segment/realtime/plumber/IntervalStartVersioningPolicyTest.java b/processing/src/test/java/org/apache/druid/indexer/CompactionEngineTest.java similarity index 58% rename from server/src/test/java/org/apache/druid/segment/realtime/plumber/IntervalStartVersioningPolicyTest.java rename to processing/src/test/java/org/apache/druid/indexer/CompactionEngineTest.java index 15c05fd9c56..b97a6c6a5ea 100644 --- a/server/src/test/java/org/apache/druid/segment/realtime/plumber/IntervalStartVersioningPolicyTest.java +++ b/processing/src/test/java/org/apache/druid/indexer/CompactionEngineTest.java @@ -17,21 +17,27 @@ * under the License. */ -package org.apache.druid.segment.realtime.plumber; +package org.apache.druid.indexer; -import org.apache.druid.java.util.common.Intervals; import org.junit.Assert; import org.junit.Test; -/** - */ -public class IntervalStartVersioningPolicyTest +public class CompactionEngineTest { + @Test - public void testGetVersion() + public void testFromString() { - IntervalStartVersioningPolicy policy = new IntervalStartVersioningPolicy(); - String version = policy.getVersion(Intervals.of("2013-01-01/2013-01-02")); - Assert.assertEquals("2013-01-01T00:00:00.000Z", version); + Assert.assertEquals(CompactionEngine.NATIVE, CompactionEngine.fromString("native")); + Assert.assertEquals(CompactionEngine.MSQ, CompactionEngine.fromString("msq")); + Assert.assertNull(CompactionEngine.fromString(null)); + Assert.assertThrows(IllegalArgumentException.class, () -> CompactionEngine.fromString("random")); + } + + @Test + public void testToString() + { + Assert.assertEquals("native", CompactionEngine.NATIVE.toString()); + Assert.assertEquals("msq", CompactionEngine.MSQ.toString()); } } diff --git a/processing/src/test/java/org/apache/druid/java/util/metrics/StubServiceEmitter.java b/processing/src/test/java/org/apache/druid/java/util/metrics/StubServiceEmitter.java index 2ddba7c6cd8..e4a8b9403dd 100644 --- a/processing/src/test/java/org/apache/druid/java/util/metrics/StubServiceEmitter.java +++ b/processing/src/test/java/org/apache/druid/java/util/metrics/StubServiceEmitter.java @@ -26,9 +26,9 @@ import org.apache.druid.java.util.emitter.service.ServiceMetricEvent; import java.util.ArrayList; import java.util.Collections; -import java.util.HashMap; import java.util.List; import java.util.Map; +import java.util.concurrent.ConcurrentHashMap; /** * Test implementation of {@link ServiceEmitter} that collects emitted metrics @@ -38,7 +38,7 @@ public class StubServiceEmitter extends ServiceEmitter implements MetricsVerifie { private final List events = new ArrayList<>(); private final List alertEvents = new ArrayList<>(); - private final Map> metricEvents = new HashMap<>(); + private final ConcurrentHashMap> metricEvents = new ConcurrentHashMap<>(); public StubServiceEmitter() { diff --git a/processing/src/test/java/org/apache/druid/math/expr/FunctionTest.java b/processing/src/test/java/org/apache/druid/math/expr/FunctionTest.java index d6143fd1fa1..56b3583031f 100644 --- a/processing/src/test/java/org/apache/druid/math/expr/FunctionTest.java +++ b/processing/src/test/java/org/apache/druid/math/expr/FunctionTest.java @@ -677,6 +677,7 @@ public class FunctionTest extends InitializedNullHandlingTest assertExpr("greatest()", null); assertExpr("greatest(null, null)", null); assertExpr("greatest(1, null, 'A')", "A"); + assertExpr("greatest(1.0, 1, null)", 1.0); } @Test @@ -703,6 +704,7 @@ public class FunctionTest extends InitializedNullHandlingTest assertExpr("least()", null); assertExpr("least(null, null)", null); assertExpr("least(1, null, 'A')", "1"); + assertExpr("least(1.0, 1, null)", 1.0); } @Test diff --git a/processing/src/test/java/org/apache/druid/query/FrameBasedInlineDataSourceSerializerTest.java b/processing/src/test/java/org/apache/druid/query/FrameBasedInlineDataSourceSerializerTest.java index 7899a6aed84..238997c2e53 100644 --- a/processing/src/test/java/org/apache/druid/query/FrameBasedInlineDataSourceSerializerTest.java +++ b/processing/src/test/java/org/apache/druid/query/FrameBasedInlineDataSourceSerializerTest.java @@ -24,7 +24,6 @@ import com.fasterxml.jackson.databind.ObjectMapper; import com.google.common.collect.ImmutableList; import org.apache.druid.common.config.NullHandling; import org.apache.druid.frame.Frame; -import org.apache.druid.frame.FrameType; import org.apache.druid.frame.allocation.HeapMemoryAllocator; import org.apache.druid.frame.allocation.SingleMemoryAllocatorFactory; import org.apache.druid.frame.segment.FrameCursorUtils; @@ -134,11 +133,11 @@ public class FrameBasedInlineDataSourceSerializerTest RowSignature modifiedRowSignature = FrameWriterUtils.replaceUnknownTypesWithNestedColumns(rowSignature); Sequence frames = FrameCursorUtils.cursorToFramesSequence( cursor, - FrameWriters.makeFrameWriterFactory( - FrameType.ROW_BASED, + FrameWriters.makeRowBasedFrameWriterFactory( new SingleMemoryAllocatorFactory(HeapMemoryAllocator.unlimited()), modifiedRowSignature, - new ArrayList<>() + new ArrayList<>(), + false ) ); return new FrameBasedInlineDataSource( diff --git a/processing/src/test/java/org/apache/druid/query/QueryContextsTest.java b/processing/src/test/java/org/apache/druid/query/QueryContextsTest.java index 2fa414188a1..554ca7b8f4a 100644 --- a/processing/src/test/java/org/apache/druid/query/QueryContextsTest.java +++ b/processing/src/test/java/org/apache/druid/query/QueryContextsTest.java @@ -151,15 +151,6 @@ public class QueryContextsTest ); } - @Test - public void testDefaultWindowingStrictValidation() - { - Assert.assertEquals( - QueryContexts.DEFAULT_WINDOWING_STRICT_VALIDATION, - QueryContext.empty().isWindowingStrictValidation() - ); - } - @Test public void testCatalogValidationEnabled() { diff --git a/processing/src/test/java/org/apache/druid/query/QueryRunnerTestHelper.java b/processing/src/test/java/org/apache/druid/query/QueryRunnerTestHelper.java index f4699639af1..e562f72955e 100644 --- a/processing/src/test/java/org/apache/druid/query/QueryRunnerTestHelper.java +++ b/processing/src/test/java/org/apache/druid/query/QueryRunnerTestHelper.java @@ -524,8 +524,7 @@ public class QueryRunnerTestHelper final DataSource base = query.getDataSource(); final SegmentReference segmentReference = base.createSegmentMapFunction(query, new AtomicLong()) - .apply(ReferenceCountingSegment.wrapRootGenerationSegment( - adapter)); + .apply(ReferenceCountingSegment.wrapRootGenerationSegment(adapter)); return makeQueryRunner(factory, segmentReference, runnerName); } diff --git a/processing/src/test/java/org/apache/druid/query/aggregation/AggregationTestHelper.java b/processing/src/test/java/org/apache/druid/query/aggregation/AggregationTestHelper.java index 526a62c813f..2ad9f90148a 100644 --- a/processing/src/test/java/org/apache/druid/query/aggregation/AggregationTestHelper.java +++ b/processing/src/test/java/org/apache/druid/query/aggregation/AggregationTestHelper.java @@ -766,7 +766,7 @@ public class AggregationTestHelper implements Closeable String resultStr = mapper.writer().writeValueAsString(yielder); List resultRows = Lists.transform( - readQueryResultArrayFromString(resultStr), + readQueryResultArrayFromString(resultStr, queryPlus.getQuery()), toolChest.makePreComputeManipulatorFn( queryPlus.getQuery(), MetricManipulatorFns.deserializing() @@ -798,11 +798,13 @@ public class AggregationTestHelper implements Closeable }; } - private List readQueryResultArrayFromString(String str) throws Exception + private List readQueryResultArrayFromString(String str, Query query) throws Exception { List result = new ArrayList(); - JsonParser jp = mapper.getFactory().createParser(str); + ObjectMapper decoratedMapper = toolChest.decorateObjectMapper(mapper, query); + + JsonParser jp = decoratedMapper.getFactory().createParser(str); if (jp.nextToken() != JsonToken.START_ARRAY) { throw new IAE("not an array [%s]", str); diff --git a/processing/src/test/java/org/apache/druid/query/groupby/ComplexDimensionGroupByQueryTest.java b/processing/src/test/java/org/apache/druid/query/groupby/ComplexDimensionGroupByQueryTest.java new file mode 100644 index 00000000000..bc1ecbb0ddc --- /dev/null +++ b/processing/src/test/java/org/apache/druid/query/groupby/ComplexDimensionGroupByQueryTest.java @@ -0,0 +1,164 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.druid.query.groupby; + +import com.google.common.collect.ImmutableList; +import com.google.common.collect.ImmutableMap; +import org.apache.druid.java.util.common.Intervals; +import org.apache.druid.java.util.common.granularity.Granularities; +import org.apache.druid.java.util.common.guava.Sequence; +import org.apache.druid.java.util.common.guava.Sequences; +import org.apache.druid.query.QueryContexts; +import org.apache.druid.query.aggregation.AggregationTestHelper; +import org.apache.druid.query.aggregation.CountAggregatorFactory; +import org.apache.druid.query.aggregation.SerializablePairLongString; +import org.apache.druid.query.aggregation.SerializablePairLongStringComplexMetricSerde; +import org.apache.druid.query.dimension.DefaultDimensionSpec; +import org.apache.druid.segment.RowBasedSegment; +import org.apache.druid.segment.Segment; +import org.apache.druid.segment.column.ColumnType; +import org.apache.druid.segment.column.RowSignature; +import org.apache.druid.timeline.SegmentId; +import org.junit.Assert; +import org.junit.Rule; +import org.junit.Test; +import org.junit.rules.TemporaryFolder; +import org.junit.runner.RunWith; +import org.junit.runners.Parameterized; + +import java.util.ArrayList; +import java.util.Collection; +import java.util.Collections; +import java.util.List; +import java.util.Map; + +@RunWith(Parameterized.class) +public class ComplexDimensionGroupByQueryTest +{ + private final QueryContexts.Vectorize vectorize; + private final AggregationTestHelper helper; + private final List segments; + + @Rule + public final TemporaryFolder tempFolder = new TemporaryFolder(); + + public ComplexDimensionGroupByQueryTest(GroupByQueryConfig config, String vectorize) + { + this.vectorize = QueryContexts.Vectorize.fromString(vectorize); + this.helper = AggregationTestHelper.createGroupByQueryAggregationTestHelper( + Collections.emptyList(), + config, + tempFolder + ); + Sequence rows = Sequences.simple( + ImmutableList.of( + new Object[]{new SerializablePairLongString(1L, "abc")}, + new Object[]{new SerializablePairLongString(1L, "abc")}, + new Object[]{new SerializablePairLongString(1L, "def")}, + new Object[]{new SerializablePairLongString(1L, "abc")}, + new Object[]{new SerializablePairLongString(1L, "ghi")}, + new Object[]{new SerializablePairLongString(1L, "def")}, + new Object[]{new SerializablePairLongString(1L, "abc")}, + new Object[]{new SerializablePairLongString(1L, "pqr")}, + new Object[]{new SerializablePairLongString(1L, "xyz")}, + new Object[]{new SerializablePairLongString(1L, "foo")}, + new Object[]{new SerializablePairLongString(1L, "bar")} + ) + ); + RowSignature rowSignature = RowSignature.builder() + .add( + "pair", + ColumnType.ofComplex(SerializablePairLongStringComplexMetricSerde.TYPE_NAME) + ) + .build(); + + this.segments = Collections.singletonList( + new RowBasedSegment<>( + SegmentId.dummy("dummy"), + rows, + columnName -> { + final int columnNumber = rowSignature.indexOf(columnName); + return row -> columnNumber >= 0 ? row[columnNumber] : null; + }, + rowSignature + ) + ); + } + + @Parameterized.Parameters(name = "config = {0}, vectorize = {1}") + public static Collection constructorFeeder() + { + final List constructors = new ArrayList<>(); + for (GroupByQueryConfig config : GroupByQueryRunnerTest.testConfigs()) { + for (String vectorize : new String[]{"false", "force"}) { + constructors.add(new Object[]{config, vectorize}); + } + } + return constructors; + } + + public Map getContext() + { + return ImmutableMap.of( + QueryContexts.VECTORIZE_KEY, vectorize.toString(), + QueryContexts.VECTORIZE_VIRTUAL_COLUMNS_KEY, "true" + ); + } + + @Test + public void testGroupByOnPairClass() + { + GroupByQuery groupQuery = GroupByQuery.builder() + .setDataSource("test_datasource") + .setGranularity(Granularities.ALL) + .setInterval(Intervals.ETERNITY) + .setDimensions(new DefaultDimensionSpec( + "pair", + "pair", + ColumnType.ofComplex(SerializablePairLongStringComplexMetricSerde.TYPE_NAME) + )) + .setAggregatorSpecs(new CountAggregatorFactory("count")) + .setContext(getContext()) + .build(); + + if (vectorize == QueryContexts.Vectorize.FORCE) { + // Cannot vectorize group by on complex dimension + Assert.assertThrows( + RuntimeException.class, + () -> helper.runQueryOnSegmentsObjs(segments, groupQuery).toList() + ); + } else { + List resultRows = helper.runQueryOnSegmentsObjs(segments, groupQuery).toList(); + + Assert.assertArrayEquals( + new ResultRow[]{ + ResultRow.of(new SerializablePairLongString(1L, "abc"), 4L), + ResultRow.of(new SerializablePairLongString(1L, "bar"), 1L), + ResultRow.of(new SerializablePairLongString(1L, "def"), 2L), + ResultRow.of(new SerializablePairLongString(1L, "foo"), 1L), + ResultRow.of(new SerializablePairLongString(1L, "ghi"), 1L), + ResultRow.of(new SerializablePairLongString(1L, "pqr"), 1L), + ResultRow.of(new SerializablePairLongString(1L, "xyz"), 1L) + }, + resultRows.toArray() + ); + } + } +} diff --git a/processing/src/test/java/org/apache/druid/query/groupby/GroupByQueryConfigTest.java b/processing/src/test/java/org/apache/druid/query/groupby/GroupByQueryConfigTest.java index 34ed99cda21..49b0b035f37 100644 --- a/processing/src/test/java/org/apache/druid/query/groupby/GroupByQueryConfigTest.java +++ b/processing/src/test/java/org/apache/druid/query/groupby/GroupByQueryConfigTest.java @@ -75,6 +75,7 @@ public class GroupByQueryConfigTest Assert.assertEquals(5, config2.getConfiguredMaxSelectorDictionarySize()); Assert.assertEquals(6_000_000, config2.getConfiguredMaxMergingDictionarySize()); Assert.assertEquals(7.0, config2.getBufferGrouperMaxLoadFactor(), 0.0); + Assert.assertEquals(DeferExpressionDimensions.FIXED_WIDTH_NON_NUMERIC, config2.getDeferExpressionDimensions()); Assert.assertFalse(config2.isApplyLimitPushDownToSegment()); } @@ -94,6 +95,10 @@ public class GroupByQueryConfigTest .put("maxSelectorDictionarySize", 3) .put("maxMergingDictionarySize", 4) .put("applyLimitPushDownToSegment", true) + .put( + GroupByQueryConfig.CTX_KEY_DEFER_EXPRESSION_DIMENSIONS, + DeferExpressionDimensions.ALWAYS.toString() + ) .build() ) .build() @@ -105,6 +110,7 @@ public class GroupByQueryConfigTest Assert.assertEquals(3, config2.getConfiguredMaxSelectorDictionarySize()); Assert.assertEquals(4, config2.getConfiguredMaxMergingDictionarySize()); Assert.assertEquals(7.0, config2.getBufferGrouperMaxLoadFactor(), 0.0); + Assert.assertEquals(DeferExpressionDimensions.ALWAYS, config2.getDeferExpressionDimensions()); Assert.assertTrue(config2.isApplyLimitPushDownToSegment()); } diff --git a/processing/src/test/java/org/apache/druid/query/groupby/GroupByQueryQueryToolChestTest.java b/processing/src/test/java/org/apache/druid/query/groupby/GroupByQueryQueryToolChestTest.java index f43bbce9d97..7279ca938bd 100644 --- a/processing/src/test/java/org/apache/druid/query/groupby/GroupByQueryQueryToolChestTest.java +++ b/processing/src/test/java/org/apache/druid/query/groupby/GroupByQueryQueryToolChestTest.java @@ -33,6 +33,7 @@ import org.apache.druid.collections.SerializablePair; import org.apache.druid.collections.StupidPool; import org.apache.druid.common.config.NullHandling; import org.apache.druid.data.input.Row; +import org.apache.druid.jackson.AggregatorsModule; import org.apache.druid.java.util.common.DateTimes; import org.apache.druid.java.util.common.StringUtils; import org.apache.druid.java.util.common.granularity.Granularities; @@ -97,6 +98,7 @@ public class GroupByQueryQueryToolChestTest extends InitializedNullHandlingTest public static void setUpClass() { NullHandling.initializeForTests(); + AggregatorsModule.registerComplexMetricsAndSerde(); } @Test @@ -130,11 +132,13 @@ public class GroupByQueryQueryToolChestTest extends InitializedNullHandlingTest .setGranularity(QueryRunnerTestHelper.DAY_GRAN) .build(); + final ObjectMapper mapper = TestHelper.makeJsonMapper(); + final CacheStrategy strategy1 = - new GroupByQueryQueryToolChest(null, null).getCacheStrategy(query1); + new GroupByQueryQueryToolChest(null, null).getCacheStrategy(query1, mapper); final CacheStrategy strategy2 = - new GroupByQueryQueryToolChest(null, null).getCacheStrategy(query2); + new GroupByQueryQueryToolChest(null, null).getCacheStrategy(query2, mapper); Assert.assertTrue(Arrays.equals(strategy1.computeCacheKey(query1), strategy2.computeCacheKey(query2))); Assert.assertFalse(Arrays.equals( @@ -190,11 +194,12 @@ public class GroupByQueryQueryToolChestTest extends InitializedNullHandlingTest ) .build(); + final ObjectMapper mapper = TestHelper.makeJsonMapper(); final CacheStrategy strategy1 = - new GroupByQueryQueryToolChest(null, null).getCacheStrategy(query1); + new GroupByQueryQueryToolChest(null, null).getCacheStrategy(query1, mapper); final CacheStrategy strategy2 = - new GroupByQueryQueryToolChest(null, null).getCacheStrategy(query2); + new GroupByQueryQueryToolChest(null, null).getCacheStrategy(query2, mapper); Assert.assertTrue(Arrays.equals(strategy1.computeCacheKey(query1), strategy2.computeCacheKey(query2))); Assert.assertFalse(Arrays.equals( @@ -252,11 +257,12 @@ public class GroupByQueryQueryToolChestTest extends InitializedNullHandlingTest .setHavingSpec(new GreaterThanHavingSpec(QueryRunnerTestHelper.UNIQUE_METRIC, 10)) .build(); + final ObjectMapper mapper = TestHelper.makeJsonMapper(); final CacheStrategy strategy1 = - new GroupByQueryQueryToolChest(null, null).getCacheStrategy(query1); + new GroupByQueryQueryToolChest(null, null).getCacheStrategy(query1, mapper); final CacheStrategy strategy2 = - new GroupByQueryQueryToolChest(null, null).getCacheStrategy(query2); + new GroupByQueryQueryToolChest(null, null).getCacheStrategy(query2, mapper); Assert.assertTrue(Arrays.equals(strategy1.computeCacheKey(query1), strategy2.computeCacheKey(query2))); Assert.assertFalse(Arrays.equals( @@ -336,11 +342,12 @@ public class GroupByQueryQueryToolChestTest extends InitializedNullHandlingTest .setHavingSpec(andHavingSpec2) .build(); + final ObjectMapper mapper = TestHelper.makeJsonMapper(); final CacheStrategy strategy1 = - new GroupByQueryQueryToolChest(null, null).getCacheStrategy(query1); + new GroupByQueryQueryToolChest(null, null).getCacheStrategy(query1, mapper); final CacheStrategy strategy2 = - new GroupByQueryQueryToolChest(null, null).getCacheStrategy(query2); + new GroupByQueryQueryToolChest(null, null).getCacheStrategy(query2, mapper); Assert.assertTrue(Arrays.equals(strategy1.computeCacheKey(query1), strategy2.computeCacheKey(query2))); Assert.assertFalse(Arrays.equals( @@ -427,11 +434,12 @@ public class GroupByQueryQueryToolChestTest extends InitializedNullHandlingTest .setHavingSpec(havingSpec2) .build(); + final ObjectMapper mapper = TestHelper.makeJsonMapper(); final CacheStrategy strategy1 = - new GroupByQueryQueryToolChest(null, null).getCacheStrategy(query1); + new GroupByQueryQueryToolChest(null, null).getCacheStrategy(query1, mapper); final CacheStrategy strategy2 = - new GroupByQueryQueryToolChest(null, null).getCacheStrategy(query2); + new GroupByQueryQueryToolChest(null, null).getCacheStrategy(query2, mapper); Assert.assertTrue(Arrays.equals(strategy1.computeCacheKey(query1), strategy2.computeCacheKey(query2))); Assert.assertFalse(Arrays.equals( @@ -490,11 +498,12 @@ public class GroupByQueryQueryToolChestTest extends InitializedNullHandlingTest )) .build(); + final ObjectMapper mapper = TestHelper.makeJsonMapper(); final CacheStrategy strategy1 = - new GroupByQueryQueryToolChest(null, null).getCacheStrategy(query1); + new GroupByQueryQueryToolChest(null, null).getCacheStrategy(query1, mapper); final CacheStrategy strategy2 = - new GroupByQueryQueryToolChest(null, null).getCacheStrategy(query2); + new GroupByQueryQueryToolChest(null, null).getCacheStrategy(query2, mapper); Assert.assertTrue(Arrays.equals(strategy1.computeCacheKey(query1), strategy2.computeCacheKey(query2))); Assert.assertFalse(Arrays.equals( @@ -512,6 +521,48 @@ public class GroupByQueryQueryToolChestTest extends InitializedNullHandlingTest doTestCacheStrategy(ColumnType.LONG, 2L); } + @Test + public void testComplexDimensionCacheStrategy() throws IOException + { + final GroupByQuery query1 = GroupByQuery + .builder() + .setDataSource(QueryRunnerTestHelper.DATA_SOURCE) + .setQuerySegmentSpec(QueryRunnerTestHelper.FIRST_TO_THIRD) + .setDimensions(ImmutableList.of( + new DefaultDimensionSpec( + "test", + "test", + ColumnType.ofComplex(SerializablePairLongStringComplexMetricSerde.TYPE_NAME) + ) + )) + .setAggregatorSpecs(QueryRunnerTestHelper.ROWS_COUNT) + .setGranularity(QueryRunnerTestHelper.DAY_GRAN) + .build(); + + ObjectMapper objectMapper = TestHelper.makeJsonMapper(); + + CacheStrategy strategy = + new GroupByQueryQueryToolChest(null, null).getCacheStrategy(query1, objectMapper); + + // test timestamps that result in integer size millis + final ResultRow result1 = ResultRow.of( + 123L, + new SerializablePairLongString(123L, "abc"), + 1 + ); + + Object preparedValue = strategy.prepareForSegmentLevelCache().apply(result1); + + Object fromCacheValue = objectMapper.readValue( + objectMapper.writeValueAsBytes(preparedValue), + strategy.getCacheObjectClazz() + ); + + ResultRow fromCacheResult = strategy.pullFromSegmentLevelCache().apply(fromCacheValue); + + Assert.assertEquals(result1, fromCacheResult); + } + @Test public void testMultiColumnCacheStrategy() throws Exception { @@ -538,8 +589,9 @@ public class GroupByQueryQueryToolChestTest extends InitializedNullHandlingTest .setGranularity(QueryRunnerTestHelper.DAY_GRAN) .build(); + final ObjectMapper mapper = TestHelper.makeJsonMapper(); CacheStrategy strategy = - new GroupByQueryQueryToolChest(null, null).getCacheStrategy(query1); + new GroupByQueryQueryToolChest(null, null).getCacheStrategy(query1, mapper); // test timestamps that result in integer size millis final ResultRow result1 = ResultRow.of( @@ -1054,8 +1106,9 @@ public class GroupByQueryQueryToolChestTest extends InitializedNullHandlingTest .setGranularity(QueryRunnerTestHelper.DAY_GRAN) .build(); + final ObjectMapper mapper = TestHelper.makeJsonMapper(); CacheStrategy strategy = - new GroupByQueryQueryToolChest(null, null).getCacheStrategy(query1); + new GroupByQueryQueryToolChest(null, null).getCacheStrategy(query1, mapper); // test timestamps that result in integer size millis final ResultRow result1 = ResultRow.of( @@ -1147,11 +1200,12 @@ public class GroupByQueryQueryToolChestTest extends InitializedNullHandlingTest .setGranularity(QueryRunnerTestHelper.DAY_GRAN) .build(); + final ObjectMapper mapper = TestHelper.makeJsonMapper(); final CacheStrategy strategy1 = - new GroupByQueryQueryToolChest(null, null).getCacheStrategy(query1); + new GroupByQueryQueryToolChest(null, null).getCacheStrategy(query1, mapper); final CacheStrategy strategy2 = - new GroupByQueryQueryToolChest(null, null).getCacheStrategy(query2); + new GroupByQueryQueryToolChest(null, null).getCacheStrategy(query2, mapper); Assert.assertFalse(Arrays.equals(strategy1.computeCacheKey(query1), strategy2.computeCacheKey(query2))); Assert.assertFalse(Arrays.equals( @@ -1183,11 +1237,12 @@ public class GroupByQueryQueryToolChestTest extends InitializedNullHandlingTest .overrideContext(ImmutableMap.of(GroupByQueryConfig.CTX_KEY_APPLY_LIMIT_PUSH_DOWN, "false")) .build(); + final ObjectMapper mapper = TestHelper.makeJsonMapper(); final CacheStrategy strategy1 = - new GroupByQueryQueryToolChest(null, null).getCacheStrategy(query1); + new GroupByQueryQueryToolChest(null, null).getCacheStrategy(query1, mapper); final CacheStrategy strategy2 = - new GroupByQueryQueryToolChest(null, null).getCacheStrategy(query2); + new GroupByQueryQueryToolChest(null, null).getCacheStrategy(query2, mapper); Assert.assertFalse(Arrays.equals(strategy1.computeCacheKey(query1), strategy2.computeCacheKey(query2))); Assert.assertTrue( @@ -1245,7 +1300,8 @@ public class GroupByQueryQueryToolChestTest extends InitializedNullHandlingTest QueryRunnerTestHelper.NOOP_QUERYWATCHER ); final GroupByQueryQueryToolChest queryToolChest = new GroupByQueryQueryToolChest(groupingEngine, groupByResourcesReservationPool); - CacheStrategy cacheStrategy = queryToolChest.getCacheStrategy(query); + final ObjectMapper mapper = TestHelper.makeJsonMapper(); + CacheStrategy cacheStrategy = queryToolChest.getCacheStrategy(query, mapper); Assert.assertTrue( "result level cache on broker server for GroupByStrategyV2 should be enabled", cacheStrategy.isCacheable(query, false, false) diff --git a/processing/src/test/java/org/apache/druid/query/groupby/GroupByQueryRunnerTest.java b/processing/src/test/java/org/apache/druid/query/groupby/GroupByQueryRunnerTest.java index 3613246fef6..a5dbb49bca5 100644 --- a/processing/src/test/java/org/apache/druid/query/groupby/GroupByQueryRunnerTest.java +++ b/processing/src/test/java/org/apache/druid/query/groupby/GroupByQueryRunnerTest.java @@ -33,6 +33,7 @@ import com.google.common.collect.Sets; import org.apache.druid.common.config.NullHandling; import org.apache.druid.data.input.Row; import org.apache.druid.data.input.Rows; +import org.apache.druid.error.DruidException; import org.apache.druid.java.util.common.DateTimes; import org.apache.druid.java.util.common.HumanReadableBytes; import org.apache.druid.java.util.common.IAE; @@ -9846,6 +9847,40 @@ public class GroupByQueryRunnerTest extends InitializedNullHandlingTest TestHelper.assertExpectedObjects(expectedResults, results, "cardinality-agg"); } + @Test + public void testGroupByDimensionOnMultiStringExpression() + { + GroupByQuery query = makeQueryBuilder() + .setDataSource(QueryRunnerTestHelper.DATA_SOURCE) + .setQuerySegmentSpec(QueryRunnerTestHelper.FIRST_TO_THIRD) + .setVirtualColumns( + new ExpressionVirtualColumn("v0", "concat(quality,market)", ColumnType.STRING, TestExprMacroTable.INSTANCE) + ) + .setDimensions(new DefaultDimensionSpec("v0", "d0")) + .setAggregatorSpecs(QueryRunnerTestHelper.ROWS_COUNT) + .setGranularity(QueryRunnerTestHelper.ALL_GRAN) + .build(); + + List expectedResults = ImmutableList.of( + makeRow(query, "2011-04-01", "d0", "automotivespot", "rows", 2L), + makeRow(query, "2011-04-01", "d0", "businessspot", "rows", 2L), + makeRow(query, "2011-04-01", "d0", "entertainmentspot", "rows", 2L), + makeRow(query, "2011-04-01", "d0", "healthspot", "rows", 2L), + makeRow(query, "2011-04-01", "d0", "mezzaninespot", "rows", 2L), + makeRow(query, "2011-04-01", "d0", "mezzaninetotal_market", "rows", 2L), + makeRow(query, "2011-04-01", "d0", "mezzanineupfront", "rows", 2L), + makeRow(query, "2011-04-01", "d0", "newsspot", "rows", 2L), + makeRow(query, "2011-04-01", "d0", "premiumspot", "rows", 2L), + makeRow(query, "2011-04-01", "d0", "premiumtotal_market", "rows", 2L), + makeRow(query, "2011-04-01", "d0", "premiumupfront", "rows", 2L), + makeRow(query, "2011-04-01", "d0", "technologyspot", "rows", 2L), + makeRow(query, "2011-04-01", "d0", "travelspot", "rows", 2L) + ); + + Iterable results = GroupByQueryRunnerTestHelper.runQuery(factory, runner, query); + TestHelper.assertExpectedObjects(expectedResults, results, "cardinality-agg"); + } + @Test public void testGroupByCardinalityAggOnHyperUnique() { @@ -9931,7 +9966,6 @@ public class GroupByQueryRunnerTest extends InitializedNullHandlingTest @Test public void testGroupByComplexColumn() { - cannotVectorize(); GroupByQuery query = makeQueryBuilder() .setDataSource(QueryRunnerTestHelper.DATA_SOURCE) .setQuerySegmentSpec(QueryRunnerTestHelper.FIRST_TO_THIRD) @@ -9945,7 +9979,8 @@ public class GroupByQueryRunnerTest extends InitializedNullHandlingTest .setGranularity(QueryRunnerTestHelper.ALL_GRAN) .build(); - expectedException.expect(RuntimeException.class); + expectedException.expect(DruidException.class); + expectedException.expectMessage("Type [COMPLEX] is not groupable"); GroupByQueryRunnerTestHelper.runQuery(factory, runner, query); } diff --git a/processing/src/test/java/org/apache/druid/query/groupby/UnnestGroupByQueryRunnerTest.java b/processing/src/test/java/org/apache/druid/query/groupby/UnnestGroupByQueryRunnerTest.java index 13a33191e8e..3976a20bd2d 100644 --- a/processing/src/test/java/org/apache/druid/query/groupby/UnnestGroupByQueryRunnerTest.java +++ b/processing/src/test/java/org/apache/druid/query/groupby/UnnestGroupByQueryRunnerTest.java @@ -25,7 +25,10 @@ import com.google.common.base.Suppliers; import com.google.common.collect.ImmutableList; import com.google.common.collect.ImmutableMap; import org.apache.druid.common.config.NullHandling; +import org.apache.druid.data.input.ListBasedInputRow; +import org.apache.druid.java.util.common.DateTimes; import org.apache.druid.java.util.common.ISE; +import org.apache.druid.java.util.common.Intervals; import org.apache.druid.math.expr.ExprMacroTable; import org.apache.druid.query.DataSource; import org.apache.druid.query.DirectQueryProcessingPool; @@ -43,21 +46,30 @@ import org.apache.druid.query.extraction.StringFormatExtractionFn; import org.apache.druid.query.filter.EqualityFilter; import org.apache.druid.query.filter.NotDimFilter; import org.apache.druid.query.groupby.orderby.OrderByColumnSpec; +import org.apache.druid.query.spec.MultipleIntervalSegmentSpec; import org.apache.druid.segment.IncrementalIndexSegment; +import org.apache.druid.segment.IndexBuilder; +import org.apache.druid.segment.QueryableIndex; +import org.apache.druid.segment.QueryableIndexSegment; import org.apache.druid.segment.TestHelper; import org.apache.druid.segment.TestIndex; import org.apache.druid.segment.column.ColumnType; +import org.apache.druid.segment.column.RowSignature; import org.apache.druid.segment.incremental.IncrementalIndex; +import org.apache.druid.segment.incremental.IncrementalIndexSchema; import org.apache.druid.segment.virtual.ExpressionVirtualColumn; import org.apache.druid.testing.InitializedNullHandlingTest; +import org.joda.time.DateTime; import org.junit.AfterClass; import org.junit.BeforeClass; import org.junit.Rule; import org.junit.Test; import org.junit.rules.ExpectedException; +import org.junit.rules.TemporaryFolder; import org.junit.runner.RunWith; import org.junit.runners.Parameterized; +import java.io.IOException; import java.util.ArrayList; import java.util.Arrays; import java.util.Collection; @@ -78,6 +90,9 @@ public class UnnestGroupByQueryRunnerTest extends InitializedNullHandlingTest @Rule public ExpectedException expectedException = ExpectedException.none(); + @Rule + public final TemporaryFolder tempFolder = new TemporaryFolder(); + public UnnestGroupByQueryRunnerTest( GroupByQueryConfig config, GroupByQueryRunnerFactory factory, @@ -214,6 +229,11 @@ public class UnnestGroupByQueryRunnerTest extends InitializedNullHandlingTest return GroupByQueryRunnerTestHelper.createExpectedRow(query, timestamp, vals); } + private static ResultRow makeRow(final GroupByQuery query, final DateTime timestamp, final Object... vals) + { + return GroupByQueryRunnerTestHelper.createExpectedRow(query, timestamp, vals); + } + @Test public void testGroupBy() { @@ -423,6 +443,9 @@ public class UnnestGroupByQueryRunnerTest extends InitializedNullHandlingTest Iterable results = runQuery(query, TestIndex.getIncrementalTestIndex()); TestHelper.assertExpectedObjects(expectedResults, results, "groupBy"); + + results = runQuery(query, TestIndex.getMMappedTestIndex()); + TestHelper.assertExpectedObjects(expectedResults, results, "groupBy"); } @Test @@ -462,6 +485,9 @@ public class UnnestGroupByQueryRunnerTest extends InitializedNullHandlingTest Iterable results = runQuery(query, TestIndex.getIncrementalTestIndex()); TestHelper.assertExpectedObjects(expectedResults, results, "missing-column"); + + results = runQuery(query, TestIndex.getMMappedTestIndex()); + TestHelper.assertExpectedObjects(expectedResults, results, "missing-column"); } @Test @@ -538,6 +564,9 @@ public class UnnestGroupByQueryRunnerTest extends InitializedNullHandlingTest Iterable results = runQuery(query, TestIndex.getIncrementalTestIndex()); TestHelper.assertExpectedObjects(expectedResults, results, "groupBy-on-unnested-column"); + + results = runQuery(query, TestIndex.getMMappedTestIndex()); + TestHelper.assertExpectedObjects(expectedResults, results, "groupBy-on-unnested-column"); } @Test @@ -627,6 +656,9 @@ public class UnnestGroupByQueryRunnerTest extends InitializedNullHandlingTest Iterable results = runQuery(query, TestIndex.getIncrementalTestIndex()); TestHelper.assertExpectedObjects(expectedResults, results, "groupBy-on-unnested-virtual-column"); + + results = runQuery(query, TestIndex.getMMappedTestIndex()); + TestHelper.assertExpectedObjects(expectedResults, results, "groupBy-on-unnested-virtual-column"); } @Test @@ -678,35 +710,335 @@ public class UnnestGroupByQueryRunnerTest extends InitializedNullHandlingTest Iterable results = runQuery(query, TestIndex.getIncrementalTestIndex()); TestHelper.assertExpectedObjects(expectedResults, results, "groupBy-on-unnested-virtual-columns"); + + results = runQuery(query, TestIndex.getMMappedTestIndex()); + TestHelper.assertExpectedObjects(expectedResults, results, "groupBy-on-unnested-virtual-column"); } - /** - * Use this method instead of makeQueryBuilder() to make sure the context is set properly. Also, avoid - * setContext in tests. Only use overrideContext. - */ - private GroupByQuery.Builder makeQueryBuilder() + @Test + public void testGroupByOnUnnestedStringColumnWithNullStuff() throws IOException { - return GroupByQuery.builder().overrideContext(makeContext()); - } + cannotVectorize(); - private Iterable runQuery(final GroupByQuery query, final IncrementalIndex index) - { - final QueryRunner queryRunner = factory.mergeRunners( - DirectQueryProcessingPool.INSTANCE, - Collections.singletonList( - QueryRunnerTestHelper.makeQueryRunnerWithSegmentMapFn( - factory, - new IncrementalIndexSegment( - index, - QueryRunnerTestHelper.SEGMENT_ID - ), - query, - "rtIndexvc" - ) - ) + final String dim = "mvd"; + final DateTime timestamp = DateTimes.nowUtc(); + final RowSignature signature = RowSignature.builder() + .add(dim, ColumnType.STRING) + .build(); + List dims = Collections.singletonList(dim); + IndexBuilder bob = + IndexBuilder.create() + .schema( + IncrementalIndexSchema.builder() + .withRollup(false) + .build() + ) + .rows( + ImmutableList.of( + new ListBasedInputRow(signature, timestamp, dims, ImmutableList.of(ImmutableList.of("a", "b", "c"))), + new ListBasedInputRow(signature, timestamp, dims, ImmutableList.of()), + new ListBasedInputRow(signature, timestamp, dims, ImmutableList.of(ImmutableList.of())), + new ListBasedInputRow(signature, timestamp, dims, ImmutableList.of("")) + ) + ); + + final DataSource unnestDataSource = UnnestDataSource.create( + new TableDataSource(QueryRunnerTestHelper.DATA_SOURCE), + new ExpressionVirtualColumn( + "v0", + "mvd", + ColumnType.STRING, + TestExprMacroTable.INSTANCE + ), + null ); - return GroupByQueryRunnerTestHelper.runQuery(factory, queryRunner, query); + GroupByQuery query = makeQueryBuilder() + .setDataSource(unnestDataSource) + .setQuerySegmentSpec(new MultipleIntervalSegmentSpec(Collections.singletonList(Intervals.ETERNITY))) + .setDimensions(new DefaultDimensionSpec("v0", "v0")) + .setAggregatorSpecs(QueryRunnerTestHelper.ROWS_COUNT) + .setGranularity(QueryRunnerTestHelper.ALL_GRAN) + .build(); + + List expectedResults = NullHandling.sqlCompatible() ? Arrays.asList( + makeRow(query, timestamp, "v0", null, "rows", 2L), + makeRow(query, timestamp, "v0", "", "rows", 1L), + makeRow(query, timestamp, "v0", "a", "rows", 1L), + makeRow(query, timestamp, "v0", "b", "rows", 1L), + makeRow(query, timestamp, "v0", "c", "rows", 1L) + ) : Arrays.asList( + makeRow(query, timestamp, "v0", null, "rows", 3L), + makeRow(query, timestamp, "v0", "a", "rows", 1L), + makeRow(query, timestamp, "v0", "b", "rows", 1L), + makeRow(query, timestamp, "v0", "c", "rows", 1L) + ); + + Iterable results = runQuery(query, bob.buildIncrementalIndex()); + TestHelper.assertExpectedObjects(expectedResults, results, "group-by-unnested-string-nulls"); + + results = runQuery(query, bob.tmpDir(tempFolder.newFolder()).buildMMappedIndex()); + TestHelper.assertExpectedObjects(expectedResults, results, "group-by-unnested-string-nulls"); + } + + @Test + public void testGroupByOnUnnestedStringColumnWithMoreNullStuff() throws IOException + { + cannotVectorize(); + + final String dim = "mvd"; + final DateTime timestamp = DateTimes.nowUtc(); + final RowSignature signature = RowSignature.builder() + .add(dim, ColumnType.STRING) + .build(); + List dims = Collections.singletonList(dim); + IndexBuilder bob = + IndexBuilder.create() + .schema( + IncrementalIndexSchema.builder() + .withRollup(false) + .build() + ) + .rows( + ImmutableList.of( + new ListBasedInputRow(signature, timestamp, dims, Collections.singletonList(Arrays.asList("a", "b", "c"))), + new ListBasedInputRow(signature, timestamp, dims, Collections.emptyList()), + new ListBasedInputRow(signature, timestamp, dims, Collections.singletonList(null)), + new ListBasedInputRow(signature, timestamp, dims, Collections.singletonList(Collections.emptyList())), + new ListBasedInputRow(signature, timestamp, dims, Collections.singletonList(Arrays.asList(null, null))), + new ListBasedInputRow(signature, timestamp, dims, Collections.singletonList(Collections.singletonList(null))), + new ListBasedInputRow(signature, timestamp, dims, Collections.singletonList(Collections.singletonList(""))) + ) + ); + + final DataSource unnestDataSource = UnnestDataSource.create( + new TableDataSource(QueryRunnerTestHelper.DATA_SOURCE), + new ExpressionVirtualColumn( + "v0", + "mvd", + ColumnType.STRING, + TestExprMacroTable.INSTANCE + ), + null + ); + + GroupByQuery query = makeQueryBuilder() + .setDataSource(unnestDataSource) + .setQuerySegmentSpec(new MultipleIntervalSegmentSpec(Collections.singletonList(Intervals.ETERNITY))) + .setDimensions(new DefaultDimensionSpec("v0", "v0")) + .setAggregatorSpecs(QueryRunnerTestHelper.ROWS_COUNT) + .setGranularity(QueryRunnerTestHelper.ALL_GRAN) + .build(); + + // make sure results are consistent with grouping directly on the column with implicit unnest + GroupByQuery regularQuery = makeQueryBuilder() + .setDataSource(new TableDataSource(QueryRunnerTestHelper.DATA_SOURCE)) + .setQuerySegmentSpec(new MultipleIntervalSegmentSpec(Collections.singletonList(Intervals.ETERNITY))) + .setDimensions(new DefaultDimensionSpec("mvd", "v0")) + .setAggregatorSpecs(QueryRunnerTestHelper.ROWS_COUNT) + .setGranularity(QueryRunnerTestHelper.ALL_GRAN) + .build(); + + List expectedResults = NullHandling.sqlCompatible() ? Arrays.asList( + makeRow(query, timestamp, "v0", null, "rows", 6L), + makeRow(query, timestamp, "v0", "", "rows", 1L), + makeRow(query, timestamp, "v0", "a", "rows", 1L), + makeRow(query, timestamp, "v0", "b", "rows", 1L), + makeRow(query, timestamp, "v0", "c", "rows", 1L) + ) : Arrays.asList( + makeRow(query, timestamp, "v0", null, "rows", 7L), + makeRow(query, timestamp, "v0", "a", "rows", 1L), + makeRow(query, timestamp, "v0", "b", "rows", 1L), + makeRow(query, timestamp, "v0", "c", "rows", 1L) + ); + + Iterable results = runQuery(query, bob.buildIncrementalIndex()); + TestHelper.assertExpectedObjects(expectedResults, results, "group-by-unnested-string-nulls"); + + results = runQuery(regularQuery, bob.buildIncrementalIndex()); + TestHelper.assertExpectedObjects(expectedResults, results, "group-by-unnested-string-nulls"); + + results = runQuery(query, bob.tmpDir(tempFolder.newFolder()).buildMMappedIndex()); + TestHelper.assertExpectedObjects(expectedResults, results, "group-by-unnested-string-nulls"); + } + + @Test + public void testGroupByOnUnnestEmptyTable() + { + cannotVectorize(); + IndexBuilder bob = + IndexBuilder.create() + .rows(ImmutableList.of()); + + final DataSource unnestDataSource = UnnestDataSource.create( + new TableDataSource(QueryRunnerTestHelper.DATA_SOURCE), + new ExpressionVirtualColumn( + "v0", + "mvd", + ColumnType.STRING, + TestExprMacroTable.INSTANCE + ), + null + ); + + GroupByQuery query = makeQueryBuilder() + .setDataSource(unnestDataSource) + .setQuerySegmentSpec(new MultipleIntervalSegmentSpec(Collections.singletonList(Intervals.ETERNITY))) + .setDimensions(new DefaultDimensionSpec("v0", "v0")) + .setAggregatorSpecs(QueryRunnerTestHelper.ROWS_COUNT) + .setGranularity(QueryRunnerTestHelper.ALL_GRAN) + .build(); + + List expectedResults = Collections.emptyList(); + + Iterable results = runQuery(query, bob.buildIncrementalIndex()); + TestHelper.assertExpectedObjects(expectedResults, results, "group-by-unnested-empty"); + + // can only test realtime since empty cannot be persisted + } + + @Test + public void testGroupByOnUnnestEmptyRows() + { + cannotVectorize(); + final String dim = "mvd"; + final DateTime timestamp = DateTimes.nowUtc(); + final RowSignature signature = RowSignature.builder() + .add(dim, ColumnType.STRING) + .build(); + List dims = Collections.singletonList(dim); + IndexBuilder bob = + IndexBuilder.create() + .schema( + IncrementalIndexSchema.builder() + .withRollup(false) + .build() + ) + .rows( + ImmutableList.of( + new ListBasedInputRow(signature, timestamp, dims, Collections.singletonList(Collections.emptyList())) + ) + ); + + final DataSource unnestDataSource = UnnestDataSource.create( + new TableDataSource(QueryRunnerTestHelper.DATA_SOURCE), + new ExpressionVirtualColumn( + "v0", + "mvd", + ColumnType.STRING, + TestExprMacroTable.INSTANCE + ), + null + ); + + GroupByQuery query = makeQueryBuilder() + .setDataSource(unnestDataSource) + .setQuerySegmentSpec(new MultipleIntervalSegmentSpec(Collections.singletonList(Intervals.ETERNITY))) + .setDimensions(new DefaultDimensionSpec("v0", "v0")) + .setAggregatorSpecs(QueryRunnerTestHelper.ROWS_COUNT) + .setGranularity(QueryRunnerTestHelper.ALL_GRAN) + .build(); + + // make sure results are consistent with grouping directly on the column with implicit unnest + GroupByQuery regularQuery = makeQueryBuilder() + .setDataSource(new TableDataSource(QueryRunnerTestHelper.DATA_SOURCE)) + .setQuerySegmentSpec(new MultipleIntervalSegmentSpec(Collections.singletonList(Intervals.ETERNITY))) + .setDimensions(new DefaultDimensionSpec("mvd", "v0")) + .setAggregatorSpecs(QueryRunnerTestHelper.ROWS_COUNT) + .setGranularity(QueryRunnerTestHelper.ALL_GRAN) + .build(); + + List expectedResults = Collections.singletonList( + makeRow(query, timestamp, "v0", null, "rows", 1L) + ); + + Iterable results = runQuery(query, bob.buildIncrementalIndex()); + TestHelper.assertExpectedObjects(expectedResults, results, "group-by-unnested-empty"); + + results = runQuery(regularQuery, bob.buildIncrementalIndex()); + TestHelper.assertExpectedObjects(expectedResults, results, "group-by-unnested-empty"); + + // can only test realtime since empty cannot be persisted + } + + @Test + public void testGroupByOnUnnestedStringColumnDoubleUnnest() throws IOException + { + // not really a sane query to write, but it shouldn't behave differently than a single unnest + // the issue is that the dimension selector handles null differently than if arrays are used from a column value + // selector. the dimension selector cursor puts nulls in the output to be compatible with implict unnest used by + // group-by, while the column selector cursor + cannotVectorize(); + + final String dim = "mvd"; + final DateTime timestamp = DateTimes.nowUtc(); + final RowSignature signature = RowSignature.builder() + .add(dim, ColumnType.STRING) + .build(); + List dims = Collections.singletonList(dim); + IndexBuilder bob = + IndexBuilder.create() + .schema( + IncrementalIndexSchema.builder() + .withRollup(false) + .build() + ) + .rows( + ImmutableList.of( + new ListBasedInputRow(signature, timestamp, dims, ImmutableList.of(ImmutableList.of("a", "b", "c"))), + new ListBasedInputRow(signature, timestamp, dims, ImmutableList.of()), + new ListBasedInputRow(signature, timestamp, dims, ImmutableList.of(ImmutableList.of())), + new ListBasedInputRow(signature, timestamp, dims, ImmutableList.of("")) + ) + ); + + final DataSource unnestDataSource = UnnestDataSource.create( + new TableDataSource(QueryRunnerTestHelper.DATA_SOURCE), + new ExpressionVirtualColumn( + "v0", + "mvd", + ColumnType.STRING, + TestExprMacroTable.INSTANCE + ), + null + ); + final DataSource extraUnnested = UnnestDataSource.create( + unnestDataSource, + new ExpressionVirtualColumn( + "v1", + "v0", + ColumnType.STRING, + TestExprMacroTable.INSTANCE + ), + null + ); + + GroupByQuery query = makeQueryBuilder() + .setDataSource(extraUnnested) + .setQuerySegmentSpec(new MultipleIntervalSegmentSpec(Collections.singletonList(Intervals.ETERNITY))) + .setDimensions(new DefaultDimensionSpec("v1", "v1")) + .setAggregatorSpecs(QueryRunnerTestHelper.ROWS_COUNT) + .setGranularity(QueryRunnerTestHelper.ALL_GRAN) + .build(); + + List expectedResults = NullHandling.sqlCompatible() ? Arrays.asList( + makeRow(query, timestamp, "v1", null, "rows", 2L), + makeRow(query, timestamp, "v1", "", "rows", 1L), + makeRow(query, timestamp, "v1", "a", "rows", 1L), + makeRow(query, timestamp, "v1", "b", "rows", 1L), + makeRow(query, timestamp, "v1", "c", "rows", 1L) + ) : Arrays.asList( + makeRow(query, timestamp, "v1", null, "rows", 3L), + makeRow(query, timestamp, "v1", "a", "rows", 1L), + makeRow(query, timestamp, "v1", "b", "rows", 1L), + makeRow(query, timestamp, "v1", "c", "rows", 1L) + ); + + Iterable results = runQuery(query, bob.buildIncrementalIndex()); + TestHelper.assertExpectedObjects(expectedResults, results, "group-by-unnested-string-nulls-double-unnest"); + + results = runQuery(query, bob.tmpDir(tempFolder.newFolder()).buildMMappedIndex()); + TestHelper.assertExpectedObjects(expectedResults, results, "group-by-unnested-string-nulls-double-unnest"); } @Test @@ -751,6 +1083,9 @@ public class UnnestGroupByQueryRunnerTest extends InitializedNullHandlingTest Iterable results = runQuery(query, TestIndex.getIncrementalTestIndex()); TestHelper.assertExpectedObjects(expectedResults, results, "groupBy-on-unnested-virtual-column"); + + results = runQuery(query, TestIndex.getMMappedTestIndex()); + TestHelper.assertExpectedObjects(expectedResults, results, "groupBy-on-unnested-virtual-column"); } @Test @@ -837,6 +1172,9 @@ public class UnnestGroupByQueryRunnerTest extends InitializedNullHandlingTest Iterable results = runQuery(query, TestIndex.getIncrementalTestIndex()); TestHelper.assertExpectedObjects(expectedResults, results, "groupBy-on-unnested-virtual-column"); + + results = runQuery(query, TestIndex.getMMappedTestIndex()); + TestHelper.assertExpectedObjects(expectedResults, results, "groupBy-on-unnested-virtual-column"); } @Test @@ -931,6 +1269,56 @@ public class UnnestGroupByQueryRunnerTest extends InitializedNullHandlingTest TestHelper.assertExpectedObjects(expectedResults, results, "groupBy-on-unnested-virtual-column"); } + + /** + * Use this method instead of makeQueryBuilder() to make sure the context is set properly. Also, avoid + * setContext in tests. Only use overrideContext. + */ + private GroupByQuery.Builder makeQueryBuilder() + { + return GroupByQuery.builder().overrideContext(makeContext()); + } + + private Iterable runQuery(final GroupByQuery query, final IncrementalIndex index) + { + final QueryRunner queryRunner = factory.mergeRunners( + DirectQueryProcessingPool.INSTANCE, + Collections.singletonList( + QueryRunnerTestHelper.makeQueryRunnerWithSegmentMapFn( + factory, + new IncrementalIndexSegment( + index, + QueryRunnerTestHelper.SEGMENT_ID + ), + query, + "rtIndexvc" + ) + ) + ); + + return GroupByQueryRunnerTestHelper.runQuery(factory, queryRunner, query); + } + + private Iterable runQuery(final GroupByQuery query, QueryableIndex index) + { + final QueryRunner queryRunner = factory.mergeRunners( + DirectQueryProcessingPool.INSTANCE, + Collections.singletonList( + QueryRunnerTestHelper.makeQueryRunnerWithSegmentMapFn( + factory, + new QueryableIndexSegment( + index, + QueryRunnerTestHelper.SEGMENT_ID + ), + query, + "mmapIndexvc" + ) + ) + ); + + return GroupByQueryRunnerTestHelper.runQuery(factory, queryRunner, query); + } + private Map makeContext() { return ImmutableMap.builder() diff --git a/processing/src/test/java/org/apache/druid/query/rowsandcols/ArrayListRowsAndColumnsTest.java b/processing/src/test/java/org/apache/druid/query/rowsandcols/ArrayListRowsAndColumnsTest.java index dd492ae407f..b709678ec17 100644 --- a/processing/src/test/java/org/apache/druid/query/rowsandcols/ArrayListRowsAndColumnsTest.java +++ b/processing/src/test/java/org/apache/druid/query/rowsandcols/ArrayListRowsAndColumnsTest.java @@ -19,12 +19,18 @@ package org.apache.druid.query.rowsandcols; +import com.google.common.collect.ImmutableMap; import org.apache.druid.java.util.common.ISE; import org.apache.druid.query.rowsandcols.column.ColumnAccessor; +import org.apache.druid.query.rowsandcols.column.IntArrayColumn; import org.apache.druid.segment.column.RowSignature; +import org.junit.Assert; +import org.junit.Test; import javax.annotation.Nonnull; import java.util.ArrayList; +import java.util.Collections; +import java.util.Objects; import java.util.function.Function; public class ArrayListRowsAndColumnsTest extends RowsAndColumnsTestBase @@ -35,7 +41,10 @@ public class ArrayListRowsAndColumnsTest extends RowsAndColumnsTestBase } @Nonnull - public static Function> MAKER = input -> { + public static Function> MAKER = input -> buildRAC(input); + + public static ArrayListRowsAndColumns buildRAC(MapOfColumnsRowsAndColumns input) + { ArrayList rows = new ArrayList<>(input.numRows()); ArrayList cols = new ArrayList<>(input.getColumnNames()); @@ -47,7 +56,7 @@ public class ArrayListRowsAndColumnsTest extends RowsAndColumnsTestBase for (int colIndex = 0; colIndex < cols.size(); ++colIndex) { String col = cols.get(colIndex); - final ColumnAccessor column = input.findColumn(col).toAccessor(); + final ColumnAccessor column = Objects.requireNonNull(input.findColumn(col)).toAccessor(); sigBob.add(col, column.getType()); for (int i = 0; i < column.numRows(); ++i) { @@ -66,5 +75,29 @@ public class ArrayListRowsAndColumnsTest extends RowsAndColumnsTestBase }, sigBob.build() ); - }; + } + + @Test + public void testChildRAC() + { + MapOfColumnsRowsAndColumns input = MapOfColumnsRowsAndColumns.fromMap( + ImmutableMap.of( + "colA", new IntArrayColumn(new int[]{1, 1, 1, 1, 2, 2, 2, 2, 2, 2}), + "colB", new IntArrayColumn(new int[]{3, 3, 4, 4, 5, 5, 5, 6, 6, 7}) + ) + ); + + ArrayListRowsAndColumns rac = ArrayListRowsAndColumnsTest.buildRAC(input); + ArrayList childRACs = rac.toClusteredGroupPartitioner() + .partitionOnBoundaries(Collections.singletonList("colA")); + + Assert.assertEquals(2, childRACs.size()); + ArrayListRowsAndColumns childRAC = (ArrayListRowsAndColumns) childRACs.get(1); + ArrayListRowsAndColumns curChildRAC = (ArrayListRowsAndColumns) childRAC.toClusteredGroupPartitioner() + .partitionOnBoundaries(Collections.singletonList( + "colB")) + .get(0); + + Assert.assertEquals(5, curChildRAC.findColumn("colB").toAccessor().getInt(0)); + } } diff --git a/processing/src/test/java/org/apache/druid/query/rowsandcols/RowsAndColumnsTestBase.java b/processing/src/test/java/org/apache/druid/query/rowsandcols/RowsAndColumnsTestBase.java index 281527b3d1e..56be3d50f20 100644 --- a/processing/src/test/java/org/apache/druid/query/rowsandcols/RowsAndColumnsTestBase.java +++ b/processing/src/test/java/org/apache/druid/query/rowsandcols/RowsAndColumnsTestBase.java @@ -28,7 +28,6 @@ import org.junit.Assert; import org.junit.Test; import javax.annotation.Nonnull; - import java.util.ArrayList; import java.util.concurrent.atomic.AtomicReference; import java.util.function.Function; diff --git a/processing/src/test/java/org/apache/druid/query/rowsandcols/concrete/ColumnBasedFrameRowsAndColumnsTest.java b/processing/src/test/java/org/apache/druid/query/rowsandcols/concrete/ColumnBasedFrameRowsAndColumnsTest.java index 3cb3c5772b3..cd1bb1b81ec 100644 --- a/processing/src/test/java/org/apache/druid/query/rowsandcols/concrete/ColumnBasedFrameRowsAndColumnsTest.java +++ b/processing/src/test/java/org/apache/druid/query/rowsandcols/concrete/ColumnBasedFrameRowsAndColumnsTest.java @@ -23,6 +23,7 @@ import org.apache.druid.query.operator.OffsetLimit; import org.apache.druid.query.rowsandcols.LazilyDecoratedRowsAndColumns; import org.apache.druid.query.rowsandcols.MapOfColumnsRowsAndColumns; import org.apache.druid.query.rowsandcols.RowsAndColumnsTestBase; + import java.util.function.Function; public class ColumnBasedFrameRowsAndColumnsTest extends RowsAndColumnsTestBase diff --git a/processing/src/test/java/org/apache/druid/query/rowsandcols/semantic/FramedOnHeapAggregatableTest.java b/processing/src/test/java/org/apache/druid/query/rowsandcols/semantic/FramedOnHeapAggregatableTest.java index d00e12b3b66..d5b11f7a612 100644 --- a/processing/src/test/java/org/apache/druid/query/rowsandcols/semantic/FramedOnHeapAggregatableTest.java +++ b/processing/src/test/java/org/apache/druid/query/rowsandcols/semantic/FramedOnHeapAggregatableTest.java @@ -91,7 +91,7 @@ public class FramedOnHeapAggregatableTest extends SemanticTestBase FramedOnHeapAggregatable agger = FramedOnHeapAggregatable.fromRAC(rac); final RowsAndColumns results = agger.aggregateAll( - new WindowFrame(WindowFrame.PeerType.ROWS, false, 1, false, 2, null), + new WindowFrame(WindowFrame.PeerType.ROWS, false, -1, false, 2, null), new AggregatorFactory[]{ new LongSumAggregatorFactory("sumFromLong", "intCol"), new DoubleMaxAggregatorFactory("maxFromInt", "intCol"), @@ -143,7 +143,7 @@ public class FramedOnHeapAggregatableTest extends SemanticTestBase FramedOnHeapAggregatable agger = FramedOnHeapAggregatable.fromRAC(rac); final RowsAndColumns results = agger.aggregateAll( - new WindowFrame(WindowFrame.PeerType.ROWS, false, 2, false, 0, null), + new WindowFrame(WindowFrame.PeerType.ROWS, false, -2, false, 0, null), new AggregatorFactory[]{ new LongSumAggregatorFactory("sumFromLong", "intCol"), new DoubleMaxAggregatorFactory("maxFromInt", "intCol"), @@ -169,7 +169,7 @@ public class FramedOnHeapAggregatableTest extends SemanticTestBase FramedOnHeapAggregatable agger = FramedOnHeapAggregatable.fromRAC(rac); final RowsAndColumns results = agger.aggregateAll( - new WindowFrame(WindowFrame.PeerType.ROWS, false, 5, false, 7, null), + new WindowFrame(WindowFrame.PeerType.ROWS, false, -5, false, 7, null), new AggregatorFactory[]{ new LongSumAggregatorFactory("sumFromLong", "intCol"), new DoubleMaxAggregatorFactory("maxFromInt", "intCol"), @@ -197,7 +197,7 @@ public class FramedOnHeapAggregatableTest extends SemanticTestBase FramedOnHeapAggregatable agger = FramedOnHeapAggregatable.fromRAC(rac); final RowsAndColumns results = agger.aggregateAll( - new WindowFrame(WindowFrame.PeerType.ROWS, false, 5, false, 1, null), + new WindowFrame(WindowFrame.PeerType.ROWS, false, -5, false, 1, null), new AggregatorFactory[]{ new LongSumAggregatorFactory("sumFromLong", "intCol"), new DoubleMaxAggregatorFactory("maxFromInt", "intCol"), @@ -225,7 +225,7 @@ public class FramedOnHeapAggregatableTest extends SemanticTestBase FramedOnHeapAggregatable agger = FramedOnHeapAggregatable.fromRAC(rac); final RowsAndColumns results = agger.aggregateAll( - new WindowFrame(WindowFrame.PeerType.ROWS, false, 5, false, 0, null), + new WindowFrame(WindowFrame.PeerType.ROWS, false, -5, false, 0, null), new AggregatorFactory[]{ new LongSumAggregatorFactory("sumFromLong", "intCol"), new DoubleMaxAggregatorFactory("maxFromInt", "intCol"), @@ -253,7 +253,7 @@ public class FramedOnHeapAggregatableTest extends SemanticTestBase FramedOnHeapAggregatable agger = FramedOnHeapAggregatable.fromRAC(rac); final RowsAndColumns results = agger.aggregateAll( - new WindowFrame(WindowFrame.PeerType.ROWS, false, 1, false, 7, null), + new WindowFrame(WindowFrame.PeerType.ROWS, false, -1, false, 7, null), new AggregatorFactory[]{ new LongSumAggregatorFactory("sumFromLong", "intCol"), new DoubleMaxAggregatorFactory("maxFromInt", "intCol"), @@ -337,7 +337,7 @@ public class FramedOnHeapAggregatableTest extends SemanticTestBase FramedOnHeapAggregatable agger = FramedOnHeapAggregatable.fromRAC(rac); final RowsAndColumns results = agger.aggregateAll( - new WindowFrame(WindowFrame.PeerType.ROWS, false, 5, false, 0, null), + new WindowFrame(WindowFrame.PeerType.ROWS, false, -5, false, 0, null), new AggregatorFactory[]{ new LongSumAggregatorFactory("sumFromLong", "intCol"), new DoubleMaxAggregatorFactory("maxFromInt", "intCol"), @@ -479,7 +479,7 @@ public class FramedOnHeapAggregatableTest extends SemanticTestBase WindowFrame frame = new WindowFrame( PeerType.RANGE, false, - 1, + -1, false, 0, Collections.singletonList(ColumnWithDirection.ascending("c1")) @@ -517,7 +517,7 @@ public class FramedOnHeapAggregatableTest extends SemanticTestBase WindowFrame frame = new WindowFrame( PeerType.RANGE, false, - 1, + -1, false, 1, Collections.singletonList(ColumnWithDirection.ascending("c1")) @@ -537,7 +537,7 @@ public class FramedOnHeapAggregatableTest extends SemanticTestBase WindowFrame frame = new WindowFrame( PeerType.RANGE, false, - 1, + -1, false, 1, Collections.singletonList(ColumnWithDirection.ascending("c1")) @@ -556,7 +556,7 @@ public class FramedOnHeapAggregatableTest extends SemanticTestBase WindowFrame frame = new WindowFrame( PeerType.RANGE, false, - 1, + -1, false, 2, Collections.singletonList(ColumnWithDirection.ascending("c1")) diff --git a/processing/src/test/java/org/apache/druid/query/scan/ScanResultValueFramesIterableTest.java b/processing/src/test/java/org/apache/druid/query/scan/ScanResultValueFramesIterableTest.java index bdd64c1c8bd..8ffaa45de79 100644 --- a/processing/src/test/java/org/apache/druid/query/scan/ScanResultValueFramesIterableTest.java +++ b/processing/src/test/java/org/apache/druid/query/scan/ScanResultValueFramesIterableTest.java @@ -21,7 +21,9 @@ package org.apache.druid.query.scan; import com.google.common.collect.ImmutableList; import com.google.common.collect.Lists; +import org.apache.druid.error.DruidException; import org.apache.druid.frame.allocation.ArenaMemoryAllocatorFactory; +import org.apache.druid.java.util.common.guava.Sequence; import org.apache.druid.java.util.common.guava.Sequences; import org.apache.druid.query.FrameBasedInlineDataSource; import org.apache.druid.query.FrameSignaturePair; @@ -55,6 +57,18 @@ public class ScanResultValueFramesIterableTest extends InitializedNullHandlingTe .add("col2", ColumnType.LONG) .build(); + private static final RowSignature SIGNATURE3 = RowSignature.builder() + .add("col1", ColumnType.DOUBLE) + .add("col2", ColumnType.LONG) + .add("col3", null) + .build(); + + private static final RowSignature SIGNATURE4 = RowSignature.builder() + .add("col1", ColumnType.DOUBLE) + .add("col3", null) + .add("col2", ColumnType.LONG) + .build(); + @Test public void testEmptySequence() @@ -97,10 +111,10 @@ public class ScanResultValueFramesIterableTest extends InitializedNullHandlingTe Assert.assertEquals(1, frames.size()); QueryToolChestTestHelper.assertArrayResultsEquals( ImmutableList.of( - new Object[]{1L, 1.0D}, - new Object[]{2L, 2.0D}, - new Object[]{1L, 1.0D}, - new Object[]{2L, 2.0D} + new Object[]{1000L, 1100.0D}, + new Object[]{1001L, 1101.0D}, + new Object[]{1000L, 1100.0D}, + new Object[]{1001L, 1101.0D} ), new FrameBasedInlineDataSource(frames, SIGNATURE1).getRowsAsSequence() ); @@ -153,10 +167,10 @@ public class ScanResultValueFramesIterableTest extends InitializedNullHandlingTe Assert.assertEquals(1, frames.size()); QueryToolChestTestHelper.assertArrayResultsEquals( ImmutableList.of( - new Object[]{1L, 1.0D}, - new Object[]{2L, 2.0D}, - new Object[]{1L, 1.0D}, - new Object[]{2L, 2.0D} + new Object[]{1000L, 1100.0D}, + new Object[]{1001L, 1101.0D}, + new Object[]{1000L, 1100.0D}, + new Object[]{1001L, 1101.0D} ), new FrameBasedInlineDataSource(frames, SIGNATURE1).getRowsAsSequence() ); @@ -177,17 +191,43 @@ public class ScanResultValueFramesIterableTest extends InitializedNullHandlingTe Assert.assertEquals(2, frames.size()); QueryToolChestTestHelper.assertArrayResultsEquals( ImmutableList.of( - new Object[]{1L, 1.0D}, - new Object[]{2L, 2.0D} + new Object[]{1000L, 1100.0D}, + new Object[]{1001L, 1101.0D} ), - new FrameBasedInlineDataSource(Collections.singletonList(frames.get(0)), SIGNATURE1).getRowsAsSequence() + new FrameBasedInlineDataSource(frames.subList(0, 1), SIGNATURE1).getRowsAsSequence() ); QueryToolChestTestHelper.assertArrayResultsEquals( ImmutableList.of( - new Object[]{3.0D, 3L}, - new Object[]{4.0D, 4L} + new Object[]{2000.0D, 2100L}, + new Object[]{2001.0D, 2101L} ), - new FrameBasedInlineDataSource(Collections.singletonList(frames.get(1)), SIGNATURE2).getRowsAsSequence() + new FrameBasedInlineDataSource(frames.subList(1, 2), SIGNATURE2).getRowsAsSequence() + ); + } + + @Test + public void testBatchingWithHeterogenousScanResultValuesAndNullTypes() + { + List frames = Lists.newArrayList( + createIterable( + scanResultValue1(2), + scanResultValue3(2) + ) + ); + Assert.assertEquals(2, frames.size()); + QueryToolChestTestHelper.assertArrayResultsEquals( + ImmutableList.of( + new Object[]{1000L, 1100.0D}, + new Object[]{1001L, 1101.0D} + ), + new FrameBasedInlineDataSource(frames.subList(0, 1), SIGNATURE1).getRowsAsSequence() + ); + QueryToolChestTestHelper.assertArrayResultsEquals( + ImmutableList.of( + new Object[]{3000.0D, 3100L}, + new Object[]{3001.0D, 3101L} + ), + new FrameBasedInlineDataSource(frames.subList(1, 2), SIGNATURE2).getRowsAsSequence() ); } @@ -208,20 +248,85 @@ public class ScanResultValueFramesIterableTest extends InitializedNullHandlingTe Assert.assertEquals(2, frames.size()); QueryToolChestTestHelper.assertArrayResultsEquals( ImmutableList.of( - new Object[]{1L, 1.0D}, - new Object[]{2L, 2.0D} + new Object[]{1000L, 1100.0D}, + new Object[]{1001L, 1101.0D} ), - new FrameBasedInlineDataSource(Collections.singletonList(frames.get(0)), SIGNATURE1).getRowsAsSequence() + new FrameBasedInlineDataSource(frames.subList(0, 1), SIGNATURE1).getRowsAsSequence() ); QueryToolChestTestHelper.assertArrayResultsEquals( ImmutableList.of( - new Object[]{3.0D, 3L}, - new Object[]{4.0D, 4L} + new Object[]{2000.0D, 2100L}, + new Object[]{2001.0D, 2101L} ), - new FrameBasedInlineDataSource(Collections.singletonList(frames.get(1)), SIGNATURE2).getRowsAsSequence() + new FrameBasedInlineDataSource(frames.subList(1, 2), SIGNATURE2).getRowsAsSequence() ); } + @Test + public void testBatchingWithHeterogenousAndEmptyScanResultValuesAndNullTypes() + { + List frames = Lists.newArrayList( + createIterable( + scanResultValue1(0), + scanResultValue2(0), + scanResultValue1(2), + scanResultValue1(0), + scanResultValue2(2), + scanResultValue2(0), + scanResultValue2(0) + ) + ); + Assert.assertEquals(2, frames.size()); + QueryToolChestTestHelper.assertArrayResultsEquals( + ImmutableList.of( + new Object[]{1000L, 1100.0D}, + new Object[]{1001L, 1101.0D} + ), + new FrameBasedInlineDataSource(frames.subList(0, 1), SIGNATURE1).getRowsAsSequence() + ); + QueryToolChestTestHelper.assertArrayResultsEquals( + ImmutableList.of( + new Object[]{2000.0D, 2100L}, + new Object[]{2001.0D, 2101L} + ), + new FrameBasedInlineDataSource(frames.subList(1, 2), SIGNATURE2).getRowsAsSequence() + ); + } + + @Test + public void testBatchingWithDifferentRowSignaturesButSameTrimmedRowSignature() + { + List frames = Lists.newArrayList( + createIterable( + scanResultValue3(0), + scanResultValue4(0), + scanResultValue3(2), + scanResultValue3(0), + scanResultValue4(2), + scanResultValue4(0), + scanResultValue3(0) + ) + ); + Assert.assertEquals(1, frames.size()); + QueryToolChestTestHelper.assertArrayResultsEquals( + ImmutableList.of( + new Object[]{3000.0D, 3100L}, + new Object[]{3001.0D, 3101L}, + new Object[]{4000.0D, 4100L}, + new Object[]{4001.0D, 4101L} + ), + new FrameBasedInlineDataSource(frames, SIGNATURE2).getRowsAsSequence() + ); + } + + @Test + public void testExceptionThrownWithMissingType() + { + Sequence frames = Sequences.simple(createIterable(incompleteTypeScanResultValue(1))); + Assert.assertThrows(DruidException.class, frames::toList); + } + + @Test public void testSplitting() { @@ -252,7 +357,9 @@ public class ScanResultValueFramesIterableTest extends InitializedNullHandlingTe return new ScanResultValue( "dummy", ImmutableList.of("col1", "col2"), - IntStream.range(1, 1 + numRows).mapToObj(i -> new Object[]{i, (double) i}).collect(Collectors.toList()), + IntStream.range(1000, 1000 + numRows) + .mapToObj(i -> new Object[]{i, (double) i + 100}) + .collect(Collectors.toList()), SIGNATURE1 ); } @@ -263,8 +370,49 @@ public class ScanResultValueFramesIterableTest extends InitializedNullHandlingTe return new ScanResultValue( "dummy", ImmutableList.of("col1", "col2"), - IntStream.range(3, 3 + numRows).mapToObj(i -> new Object[]{(double) i, i}).collect(Collectors.toList()), + IntStream.range(2000, 2000 + numRows) + .mapToObj(i -> new Object[]{(double) i, i + 100}) + .collect(Collectors.toList()), SIGNATURE2 ); } + + // Signature: col1: DOUBLE, col2: LONG, col3: null + private static ScanResultValue scanResultValue3(int numRows) + { + return new ScanResultValue( + "dummy", + ImmutableList.of("col1", "col2", "col3"), + IntStream.range(3000, 3000 + numRows) + .mapToObj(i -> new Object[]{(double) i, i + 100, null}) + .collect(Collectors.toList()), + SIGNATURE3 + ); + } + + // Signature: col1: DOUBLE, col3: null, col2: LONG + private static ScanResultValue scanResultValue4(int numRows) + { + return new ScanResultValue( + "dummy", + ImmutableList.of("col1", "col3", "col2"), + IntStream.range(4000, 4000 + numRows) + .mapToObj(i -> new Object[]{(double) i, null, i + 100}) + .collect(Collectors.toList()), + SIGNATURE4 + ); + } + + // Contains ScanResultValue with incomplete type, and non-null row + private static ScanResultValue incompleteTypeScanResultValue(int numRows) + { + return new ScanResultValue( + "dummy", + ImmutableList.of("col1", "col3", "col2"), + IntStream.range(5000, 5000 + numRows) + .mapToObj(i -> new Object[]{(double) i, i + 100, i + 200}) + .collect(Collectors.toList()), + SIGNATURE4 + ); + } } diff --git a/processing/src/test/java/org/apache/druid/segment/UnnestStorageAdapterTest.java b/processing/src/test/java/org/apache/druid/segment/UnnestStorageAdapterTest.java index 8dd81a94ce9..b467bc6c938 100644 --- a/processing/src/test/java/org/apache/druid/segment/UnnestStorageAdapterTest.java +++ b/processing/src/test/java/org/apache/druid/segment/UnnestStorageAdapterTest.java @@ -299,11 +299,12 @@ public class UnnestStorageAdapterTest extends InitializedNullHandlingTest } /* each row has 8 entries. - unnest 2 rows -> 16 entries also the value cardinality + unnest 2 rows -> 16 entries also the value cardinality, but null is not present in the dictionary and so is + fabricated so cardinality is 17 unnest of unnest -> 16*8 = 128 rows */ Assert.assertEquals(count, 128); - Assert.assertEquals(dimSelector.getValueCardinality(), 16); + Assert.assertEquals(dimSelector.getValueCardinality(), 17); return null; }); } diff --git a/processing/src/test/java/org/apache/druid/segment/filter/EqualityFilterTests.java b/processing/src/test/java/org/apache/druid/segment/filter/EqualityFilterTests.java index 3d35817531f..fd87969a042 100644 --- a/processing/src/test/java/org/apache/druid/segment/filter/EqualityFilterTests.java +++ b/processing/src/test/java/org/apache/druid/segment/filter/EqualityFilterTests.java @@ -108,6 +108,12 @@ public class EqualityFilterTests NotDimFilter.of(new EqualityFilter("dim0", ColumnType.LONG, 1L, null)), ImmutableList.of("0", "2", "3", "4", "5") ); + + assertFilterMatches(new EqualityFilter("dim0", ColumnType.DOUBLE, 1, null), ImmutableList.of("1")); + assertFilterMatches( + NotDimFilter.of(new EqualityFilter("dim0", ColumnType.DOUBLE, 1, null)), + ImmutableList.of("0", "2", "3", "4", "5") + ); } @Test diff --git a/processing/src/test/java/org/apache/druid/segment/filter/InFilterTests.java b/processing/src/test/java/org/apache/druid/segment/filter/InFilterTests.java index fd8c79096c4..6f5c4b72eb1 100644 --- a/processing/src/test/java/org/apache/druid/segment/filter/InFilterTests.java +++ b/processing/src/test/java/org/apache/druid/segment/filter/InFilterTests.java @@ -138,6 +138,29 @@ public class InFilterTests NotDimFilter.of(inFilter("dim0", ColumnType.STRING, Arrays.asList("e", "x"))), ImmutableList.of("a", "b", "c", "d", "f") ); + + if (NullHandling.sqlCompatible()) { + assertTypedFilterMatches( + inFilter("dim1", ColumnType.LONG, Arrays.asList(2L, 10L)), + ImmutableList.of("b", "c") + ); + + assertTypedFilterMatches( + inFilter("dim1", ColumnType.DOUBLE, Arrays.asList(2.0, 10.0)), + ImmutableList.of("b", "c") + ); + } else { + // in default value mode, we actually end up using a classic InDimFilter, it does not match numbers well + assertTypedFilterMatches( + inFilter("dim1", ColumnType.LONG, Arrays.asList(2L, 10L)), + ImmutableList.of("b", "c") + ); + + assertTypedFilterMatches( + inFilter("dim1", ColumnType.DOUBLE, Arrays.asList(2.0, 10.0)), + ImmutableList.of() + ); + } } @Test public void testSingleValueStringColumnWithNulls() diff --git a/processing/src/test/java/org/apache/druid/segment/incremental/IncrementalIndexTest.java b/processing/src/test/java/org/apache/druid/segment/incremental/IncrementalIndexTest.java index c83c4f0da4c..067c078f5bc 100644 --- a/processing/src/test/java/org/apache/druid/segment/incremental/IncrementalIndexTest.java +++ b/processing/src/test/java/org/apache/druid/segment/incremental/IncrementalIndexTest.java @@ -197,7 +197,7 @@ public class IncrementalIndexTest extends InitializedNullHandlingTest result.getParseException().getInput() ); Assert.assertEquals( - "Found unparseable columns in row: [{string=A, float=19.0, long=asdj, double=21.0}], exceptions: [could not convert value [asdj] to long]", + "Found unparseable columns in row: [{string=A, float=19.0, long=asdj, double=21.0}], exceptions: [Could not convert value [asdj] to long for dimension [long].]", result.getParseException().getMessage() ); @@ -219,7 +219,7 @@ public class IncrementalIndexTest extends InitializedNullHandlingTest result.getParseException().getInput() ); Assert.assertEquals( - "Found unparseable columns in row: [{string=A, float=aaa, long=20, double=21.0}], exceptions: [could not convert value [aaa] to float]", + "Found unparseable columns in row: [{string=A, float=aaa, long=20, double=21.0}], exceptions: [Could not convert value [aaa] to float for dimension [float].]", result.getParseException().getMessage() ); @@ -241,7 +241,7 @@ public class IncrementalIndexTest extends InitializedNullHandlingTest result.getParseException().getInput() ); Assert.assertEquals( - "Found unparseable columns in row: [{string=A, float=19.0, long=20, double=}], exceptions: [could not convert value [] to double]", + "Found unparseable columns in row: [{string=A, float=19.0, long=20, double=}], exceptions: [Could not convert value [] to double for dimension [double].]", result.getParseException().getMessage() ); } @@ -270,7 +270,7 @@ public class IncrementalIndexTest extends InitializedNullHandlingTest result.getParseException().getInput() ); Assert.assertEquals( - "Found unparseable columns in row: [{string=A, float=19.0, long=[10, 5], double=21.0}], exceptions: [Could not ingest value [10, 5] as long. A long column cannot have multiple values in the same row.]", + "Found unparseable columns in row: [{string=A, float=19.0, long=[10, 5], double=21.0}], exceptions: [Could not ingest value [[10, 5]] as long for dimension [long]. A long column cannot have multiple values in the same row.]", result.getParseException().getMessage() ); @@ -292,7 +292,7 @@ public class IncrementalIndexTest extends InitializedNullHandlingTest result.getParseException().getInput() ); Assert.assertEquals( - "Found unparseable columns in row: [{string=A, float=[10.0, 5.0], long=20, double=21.0}], exceptions: [Could not ingest value [10.0, 5.0] as float. A float column cannot have multiple values in the same row.]", + "Found unparseable columns in row: [{string=A, float=[10.0, 5.0], long=20, double=21.0}], exceptions: [Could not ingest value [[10.0, 5.0]] as float for dimension [float]. A float column cannot have multiple values in the same row.]", result.getParseException().getMessage() ); @@ -314,7 +314,7 @@ public class IncrementalIndexTest extends InitializedNullHandlingTest result.getParseException().getInput() ); Assert.assertEquals( - "Found unparseable columns in row: [{string=A, float=19.0, long=20, double=[10.0, 5.0]}], exceptions: [Could not ingest value [10.0, 5.0] as double. A double column cannot have multiple values in the same row.]", + "Found unparseable columns in row: [{string=A, float=19.0, long=20, double=[10.0, 5.0]}], exceptions: [Could not ingest value [[10.0, 5.0]] as double for dimension [double]. A double column cannot have multiple values in the same row.]", result.getParseException().getMessage() ); } diff --git a/processing/src/test/java/org/apache/druid/segment/join/HashJoinSegmentStorageAdapterTest.java b/processing/src/test/java/org/apache/druid/segment/join/HashJoinSegmentStorageAdapterTest.java index 20d032aba38..5931bff8359 100644 --- a/processing/src/test/java/org/apache/druid/segment/join/HashJoinSegmentStorageAdapterTest.java +++ b/processing/src/test/java/org/apache/druid/segment/join/HashJoinSegmentStorageAdapterTest.java @@ -33,7 +33,6 @@ import org.apache.druid.query.filter.Filter; import org.apache.druid.query.filter.InDimFilter; import org.apache.druid.query.filter.OrDimFilter; import org.apache.druid.query.filter.SelectorDimFilter; -import org.apache.druid.segment.VirtualColumn; import org.apache.druid.segment.VirtualColumns; import org.apache.druid.segment.column.ColumnCapabilities; import org.apache.druid.segment.column.ValueType; @@ -45,7 +44,6 @@ import org.apache.druid.segment.join.table.IndexedTableJoinable; import org.junit.Assert; import org.junit.Test; -import java.util.ArrayList; import java.util.Arrays; import java.util.Collections; import java.util.List; @@ -2294,42 +2292,6 @@ public class HashJoinSegmentStorageAdapterTest extends BaseHashJoinSegmentStorag ); } - @Test - public void test_determineBaseColumnsWithPreAndPostJoinVirtualColumns() - { - List joinableClauses = ImmutableList.of(factToCountryOnIsoCode(JoinType.LEFT)); - JoinFilterPreAnalysis analysis = makeDefaultConfigPreAnalysis(null, joinableClauses, VirtualColumns.EMPTY); - HashJoinSegmentStorageAdapter adapter = new HashJoinSegmentStorageAdapter( - factSegment.asStorageAdapter(), - joinableClauses, - analysis - ); - List expectedPreJoin = ImmutableList.of( - makeExpressionVirtualColumn("concat(countryIsoCode,'L')", "v0"), - makeExpressionVirtualColumn("concat(countryIsoCode, countryNumber)", "v1"), - makeExpressionVirtualColumn("channel_uniques - 1", "v2"), - makeExpressionVirtualColumn("channel_uniques - __time", "v3") - ); - - List expectedPostJoin = ImmutableList.of( - makeExpressionVirtualColumn("concat(countryIsoCode, dummyColumn)", "v4"), - makeExpressionVirtualColumn("dummyMetric - __time", "v5") - ); - List actualPreJoin = new ArrayList<>(); - List actualPostJoin = new ArrayList<>(); - List allVirtualColumns = new ArrayList<>(); - allVirtualColumns.addAll(expectedPreJoin); - allVirtualColumns.addAll(expectedPostJoin); - adapter.determineBaseColumnsWithPreAndPostJoinVirtualColumns( - VirtualColumns.create(allVirtualColumns), - actualPreJoin, - actualPostJoin - ); - - Assert.assertEquals(expectedPreJoin, actualPreJoin); - Assert.assertEquals(expectedPostJoin, actualPostJoin); - } - @Test public void test_hasBuiltInFiltersForSingleJoinableClauseWithVariousJoinTypes() { diff --git a/processing/src/test/java/org/apache/druid/segment/join/JoinFilterAnalyzerTest.java b/processing/src/test/java/org/apache/druid/segment/join/JoinFilterAnalyzerTest.java index 1b7f250f847..68a77f38884 100644 --- a/processing/src/test/java/org/apache/druid/segment/join/JoinFilterAnalyzerTest.java +++ b/processing/src/test/java/org/apache/druid/segment/join/JoinFilterAnalyzerTest.java @@ -119,7 +119,7 @@ public class JoinFilterAnalyzerTest extends BaseHashJoinSegmentStorageAdapterTes null, ImmutableSet.of() ); - JoinFilterSplit actualFilterSplit = JoinFilterAnalyzer.splitFilter(joinFilterPreAnalysis); + JoinFilterSplit actualFilterSplit = split(joinFilterPreAnalysis); Assert.assertEquals(expectedFilterSplit, actualFilterSplit); } @@ -183,7 +183,7 @@ public class JoinFilterAnalyzerTest extends BaseHashJoinSegmentStorageAdapterTes new SelectorFilter("rtc.countryName", "United States"), ImmutableSet.of() ); - JoinFilterSplit actualFilterSplit = JoinFilterAnalyzer.splitFilter(joinFilterPreAnalysis); + JoinFilterSplit actualFilterSplit = split(joinFilterPreAnalysis); Assert.assertEquals(expectedFilterSplit, actualFilterSplit); } @@ -249,7 +249,7 @@ public class JoinFilterAnalyzerTest extends BaseHashJoinSegmentStorageAdapterTes ImmutableSet.of() ); - JoinFilterSplit actualFilterSplit = JoinFilterAnalyzer.splitFilter(joinFilterPreAnalysis); + JoinFilterSplit actualFilterSplit = split(joinFilterPreAnalysis); Assert.assertEquals(expectedFilterSplit, actualFilterSplit); } @@ -320,7 +320,7 @@ public class JoinFilterAnalyzerTest extends BaseHashJoinSegmentStorageAdapterTes ImmutableSet.of() ); - JoinFilterSplit actualFilterSplit = JoinFilterAnalyzer.splitFilter(joinFilterPreAnalysis); + JoinFilterSplit actualFilterSplit = split(joinFilterPreAnalysis); Assert.assertEquals(expectedFilterSplit, actualFilterSplit); } @@ -382,7 +382,7 @@ public class JoinFilterAnalyzerTest extends BaseHashJoinSegmentStorageAdapterTes ImmutableSet.of() ); - JoinFilterSplit actualFilterSplit = JoinFilterAnalyzer.splitFilter(joinFilterPreAnalysis); + JoinFilterSplit actualFilterSplit = split(joinFilterPreAnalysis); Assert.assertEquals(expectedFilterSplit, actualFilterSplit); } @@ -461,7 +461,7 @@ public class JoinFilterAnalyzerTest extends BaseHashJoinSegmentStorageAdapterTes ImmutableSet.of() ); - JoinFilterSplit actualFilterSplit = JoinFilterAnalyzer.splitFilter(joinFilterPreAnalysis); + JoinFilterSplit actualFilterSplit = split(joinFilterPreAnalysis); Assert.assertEquals(expectedFilterSplit, actualFilterSplit); } @@ -521,7 +521,7 @@ public class JoinFilterAnalyzerTest extends BaseHashJoinSegmentStorageAdapterTes ImmutableSet.of() ); - JoinFilterSplit actualFilterSplit = JoinFilterAnalyzer.splitFilter(joinFilterPreAnalysis); + JoinFilterSplit actualFilterSplit = split(joinFilterPreAnalysis); Assert.assertEquals(expectedFilterSplit, actualFilterSplit); } @@ -685,7 +685,7 @@ public class JoinFilterAnalyzerTest extends BaseHashJoinSegmentStorageAdapterTes ImmutableSet.of() ); - JoinFilterSplit actualFilterSplit = JoinFilterAnalyzer.splitFilter(joinFilterPreAnalysis); + JoinFilterSplit actualFilterSplit = split(joinFilterPreAnalysis); Assert.assertEquals(expectedFilterSplit, actualFilterSplit); } @@ -758,7 +758,7 @@ public class JoinFilterAnalyzerTest extends BaseHashJoinSegmentStorageAdapterTes ImmutableSet.of() ); - JoinFilterSplit actualFilterSplit = JoinFilterAnalyzer.splitFilter(joinFilterPreAnalysis); + JoinFilterSplit actualFilterSplit = split(joinFilterPreAnalysis); ExpressionVirtualColumn expectedVirtualColumn = new ExpressionVirtualColumn( "JOIN-FILTER-PUSHDOWN-VIRTUAL-COLUMN-0", @@ -933,7 +933,7 @@ public class JoinFilterAnalyzerTest extends BaseHashJoinSegmentStorageAdapterTes ImmutableSet.of() ); - JoinFilterSplit actualFilterSplit = JoinFilterAnalyzer.splitFilter(joinFilterPreAnalysis); + JoinFilterSplit actualFilterSplit = split(joinFilterPreAnalysis); Assert.assertEquals(expectedFilterSplit, actualFilterSplit); } @@ -1013,7 +1013,7 @@ public class JoinFilterAnalyzerTest extends BaseHashJoinSegmentStorageAdapterTes ) ); - JoinFilterSplit actualFilterSplit = JoinFilterAnalyzer.splitFilter(joinFilterPreAnalysis); + JoinFilterSplit actualFilterSplit = split(joinFilterPreAnalysis); Assert.assertEquals( expectedFilterSplit.getBaseTableFilter(), actualFilterSplit.getBaseTableFilter() @@ -1103,7 +1103,7 @@ public class JoinFilterAnalyzerTest extends BaseHashJoinSegmentStorageAdapterTes ) ); - JoinFilterSplit actualFilterSplit = JoinFilterAnalyzer.splitFilter(joinFilterPreAnalysis); + JoinFilterSplit actualFilterSplit = split(joinFilterPreAnalysis); Assert.assertEquals( expectedFilterSplit.getBaseTableFilter(), actualFilterSplit.getBaseTableFilter() @@ -1171,7 +1171,7 @@ public class JoinFilterAnalyzerTest extends BaseHashJoinSegmentStorageAdapterTes ImmutableSet.of() ); - JoinFilterSplit actualFilterSplit = JoinFilterAnalyzer.splitFilter(joinFilterPreAnalysis); + JoinFilterSplit actualFilterSplit = split(joinFilterPreAnalysis); Assert.assertEquals(expectedFilterSplit, actualFilterSplit); } @@ -1228,7 +1228,7 @@ public class JoinFilterAnalyzerTest extends BaseHashJoinSegmentStorageAdapterTes ImmutableSet.of() ); - JoinFilterSplit actualFilterSplit = JoinFilterAnalyzer.splitFilter(joinFilterPreAnalysis); + JoinFilterSplit actualFilterSplit = split(joinFilterPreAnalysis); Assert.assertEquals(expectedFilterSplit, actualFilterSplit); } @@ -1284,7 +1284,7 @@ public class JoinFilterAnalyzerTest extends BaseHashJoinSegmentStorageAdapterTes ImmutableSet.of() ); - JoinFilterSplit actualFilterSplit = JoinFilterAnalyzer.splitFilter(joinFilterPreAnalysis); + JoinFilterSplit actualFilterSplit = split(joinFilterPreAnalysis); Assert.assertEquals(expectedFilterSplit, actualFilterSplit); } @@ -1341,7 +1341,7 @@ public class JoinFilterAnalyzerTest extends BaseHashJoinSegmentStorageAdapterTes ImmutableSet.of() ); - JoinFilterSplit actualFilterSplit = JoinFilterAnalyzer.splitFilter(joinFilterPreAnalysis); + JoinFilterSplit actualFilterSplit = split(joinFilterPreAnalysis); Assert.assertEquals(expectedFilterSplit, actualFilterSplit); } @@ -1396,7 +1396,7 @@ public class JoinFilterAnalyzerTest extends BaseHashJoinSegmentStorageAdapterTes ImmutableSet.of() ); - JoinFilterSplit actualFilterSplit = JoinFilterAnalyzer.splitFilter(joinFilterPreAnalysis); + JoinFilterSplit actualFilterSplit = split(joinFilterPreAnalysis); Assert.assertEquals(expectedFilterSplit, actualFilterSplit); } @@ -1460,7 +1460,7 @@ public class JoinFilterAnalyzerTest extends BaseHashJoinSegmentStorageAdapterTes ImmutableSet.of() ); - JoinFilterSplit actualFilterSplit = JoinFilterAnalyzer.splitFilter(joinFilterPreAnalysis); + JoinFilterSplit actualFilterSplit = split(joinFilterPreAnalysis); Assert.assertEquals(expectedFilterSplit, actualFilterSplit); } @@ -1523,7 +1523,7 @@ public class JoinFilterAnalyzerTest extends BaseHashJoinSegmentStorageAdapterTes ImmutableSet.of() ); - JoinFilterSplit actualFilterSplit = JoinFilterAnalyzer.splitFilter(joinFilterPreAnalysis); + JoinFilterSplit actualFilterSplit = split(joinFilterPreAnalysis); Assert.assertEquals(expectedFilterSplit, actualFilterSplit); } @@ -1578,7 +1578,7 @@ public class JoinFilterAnalyzerTest extends BaseHashJoinSegmentStorageAdapterTes ImmutableSet.of() ); - JoinFilterSplit actualFilterSplit = JoinFilterAnalyzer.splitFilter(joinFilterPreAnalysis); + JoinFilterSplit actualFilterSplit = split(joinFilterPreAnalysis); Assert.assertEquals(expectedFilterSplit, actualFilterSplit); } @@ -1632,7 +1632,7 @@ public class JoinFilterAnalyzerTest extends BaseHashJoinSegmentStorageAdapterTes ImmutableSet.of() ); - JoinFilterSplit actualFilterSplit = JoinFilterAnalyzer.splitFilter(joinFilterPreAnalysis); + JoinFilterSplit actualFilterSplit = split(joinFilterPreAnalysis); Assert.assertEquals(expectedFilterSplit, actualFilterSplit); } @@ -1690,7 +1690,7 @@ public class JoinFilterAnalyzerTest extends BaseHashJoinSegmentStorageAdapterTes ImmutableSet.of() ); - JoinFilterSplit actualFilterSplit = JoinFilterAnalyzer.splitFilter(joinFilterPreAnalysis); + JoinFilterSplit actualFilterSplit = split(joinFilterPreAnalysis); Assert.assertEquals(expectedFilterSplit, actualFilterSplit); } @@ -1747,7 +1747,7 @@ public class JoinFilterAnalyzerTest extends BaseHashJoinSegmentStorageAdapterTes ImmutableSet.of() ); - JoinFilterSplit actualFilterSplit = JoinFilterAnalyzer.splitFilter(joinFilterPreAnalysis); + JoinFilterSplit actualFilterSplit = split(joinFilterPreAnalysis); Assert.assertEquals(expectedFilterSplit, actualFilterSplit); } @@ -1803,7 +1803,7 @@ public class JoinFilterAnalyzerTest extends BaseHashJoinSegmentStorageAdapterTes ImmutableSet.of() ); - JoinFilterSplit actualFilterSplit = JoinFilterAnalyzer.splitFilter(joinFilterPreAnalysis); + JoinFilterSplit actualFilterSplit = split(joinFilterPreAnalysis); Assert.assertEquals(expectedFilterSplit, actualFilterSplit); } @@ -1858,7 +1858,7 @@ public class JoinFilterAnalyzerTest extends BaseHashJoinSegmentStorageAdapterTes ImmutableSet.of() ); - JoinFilterSplit actualFilterSplit = JoinFilterAnalyzer.splitFilter(joinFilterPreAnalysis); + JoinFilterSplit actualFilterSplit = split(joinFilterPreAnalysis); Assert.assertEquals(expectedFilterSplit, actualFilterSplit); } @@ -1924,7 +1924,7 @@ public class JoinFilterAnalyzerTest extends BaseHashJoinSegmentStorageAdapterTes ImmutableSet.of() ); - JoinFilterSplit actualFilterSplit = JoinFilterAnalyzer.splitFilter(joinFilterPreAnalysis); + JoinFilterSplit actualFilterSplit = split(joinFilterPreAnalysis); Assert.assertEquals(expectedFilterSplit, actualFilterSplit); } @@ -1995,7 +1995,7 @@ public class JoinFilterAnalyzerTest extends BaseHashJoinSegmentStorageAdapterTes ImmutableSet.of() ); - JoinFilterSplit actualFilterSplit = JoinFilterAnalyzer.splitFilter(joinFilterPreAnalysis); + JoinFilterSplit actualFilterSplit = split(joinFilterPreAnalysis); Assert.assertEquals(expectedFilterSplit, actualFilterSplit); } @@ -2074,7 +2074,7 @@ public class JoinFilterAnalyzerTest extends BaseHashJoinSegmentStorageAdapterTes ImmutableSet.of() ); - JoinFilterSplit actualFilterSplit = JoinFilterAnalyzer.splitFilter(joinFilterPreAnalysis); + JoinFilterSplit actualFilterSplit = split(joinFilterPreAnalysis); Assert.assertEquals(expectedFilterSplit, actualFilterSplit); } @@ -2133,7 +2133,7 @@ public class JoinFilterAnalyzerTest extends BaseHashJoinSegmentStorageAdapterTes ImmutableSet.of() ); - JoinFilterSplit actualFilterSplit = JoinFilterAnalyzer.splitFilter(joinFilterPreAnalysis); + JoinFilterSplit actualFilterSplit = split(joinFilterPreAnalysis); Assert.assertEquals(expectedFilterSplit, actualFilterSplit); } @@ -2241,7 +2241,7 @@ public class JoinFilterAnalyzerTest extends BaseHashJoinSegmentStorageAdapterTes ImmutableSet.of() ); - JoinFilterSplit actualFilterSplit = JoinFilterAnalyzer.splitFilter(joinFilterPreAnalysis); + JoinFilterSplit actualFilterSplit = split(joinFilterPreAnalysis); Assert.assertEquals(expectedFilterSplit, actualFilterSplit); } @@ -2494,7 +2494,7 @@ public class JoinFilterAnalyzerTest extends BaseHashJoinSegmentStorageAdapterTes expectedVirtualColumns ); - JoinFilterSplit actualFilterSplit = JoinFilterAnalyzer.splitFilter(joinFilterPreAnalysis); + JoinFilterSplit actualFilterSplit = split(joinFilterPreAnalysis); Assert.assertEquals(expectedFilterSplit, actualFilterSplit); } @@ -2556,7 +2556,7 @@ public class JoinFilterAnalyzerTest extends BaseHashJoinSegmentStorageAdapterTes ImmutableSet.of() ); - JoinFilterSplit actualFilterSplit = JoinFilterAnalyzer.splitFilter(joinFilterPreAnalysis); + JoinFilterSplit actualFilterSplit = split(joinFilterPreAnalysis); Assert.assertEquals(expectedFilterSplit, actualFilterSplit); } @@ -2630,7 +2630,7 @@ public class JoinFilterAnalyzerTest extends BaseHashJoinSegmentStorageAdapterTes ImmutableSet.of() ); - JoinFilterSplit actualFilterSplit = JoinFilterAnalyzer.splitFilter(joinFilterPreAnalysis); + JoinFilterSplit actualFilterSplit = split(joinFilterPreAnalysis); Assert.assertEquals(expectedFilterSplit, actualFilterSplit); } @@ -2674,7 +2674,16 @@ public class JoinFilterAnalyzerTest extends BaseHashJoinSegmentStorageAdapterTes null, ImmutableSet.of() ); - JoinFilterSplit actualFilterSplit = JoinFilterAnalyzer.splitFilter(joinFilterPreAnalysis, baseTableFilter); + JoinFilterSplit actualFilterSplit = JoinFilterAnalyzer.splitFilter( + joinFilterPreAnalysis, + baseTableFilter + ); Assert.assertEquals(expectedFilterSplit, actualFilterSplit); } + + + private JoinFilterSplit split(JoinFilterPreAnalysis preAnalysis) + { + return JoinFilterAnalyzer.splitFilter(preAnalysis, null); + } } diff --git a/processing/src/test/java/org/apache/druid/segment/join/table/FrameBasedIndexedTableTest.java b/processing/src/test/java/org/apache/druid/segment/join/table/FrameBasedIndexedTableTest.java index 8b093184d57..64c815f55d4 100644 --- a/processing/src/test/java/org/apache/druid/segment/join/table/FrameBasedIndexedTableTest.java +++ b/processing/src/test/java/org/apache/druid/segment/join/table/FrameBasedIndexedTableTest.java @@ -26,7 +26,6 @@ import it.unimi.dsi.fastutil.ints.IntBidirectionalIterator; import it.unimi.dsi.fastutil.ints.IntSortedSet; import org.apache.druid.common.config.NullHandling; import org.apache.druid.frame.Frame; -import org.apache.druid.frame.FrameType; import org.apache.druid.frame.allocation.HeapMemoryAllocator; import org.apache.druid.frame.allocation.SingleMemoryAllocatorFactory; import org.apache.druid.frame.segment.FrameCursorUtils; @@ -217,8 +216,7 @@ public class FrameBasedIndexedTableTest extends InitializedNullHandlingTest { cursorCloseablePair = IterableRowsCursorHelper.getCursorFromIterable(DATASOURCE_ROWS, ROW_SIGNATURE); Cursor cursor = cursorCloseablePair.lhs; - FrameWriterFactory frameWriterFactory = FrameWriters.makeFrameWriterFactory( - FrameType.COLUMNAR, + FrameWriterFactory frameWriterFactory = FrameWriters.makeColumnBasedFrameWriterFactory( new SingleMemoryAllocatorFactory(HeapMemoryAllocator.unlimited()), ROW_SIGNATURE, new ArrayList<>() diff --git a/processing/src/test/java/org/apache/druid/segment/virtual/ExpressionPlannerTest.java b/processing/src/test/java/org/apache/druid/segment/virtual/ExpressionPlannerTest.java index 9b4d1b84af2..fcae823b626 100644 --- a/processing/src/test/java/org/apache/druid/segment/virtual/ExpressionPlannerTest.java +++ b/processing/src/test/java/org/apache/druid/segment/virtual/ExpressionPlannerTest.java @@ -29,6 +29,7 @@ import org.apache.druid.math.expr.ExprMacroTable; import org.apache.druid.math.expr.ExpressionType; import org.apache.druid.math.expr.Parser; import org.apache.druid.query.expression.TestExprMacroTable; +import org.apache.druid.query.groupby.DeferExpressionDimensions; import org.apache.druid.segment.ColumnInspector; import org.apache.druid.segment.column.ColumnCapabilities; import org.apache.druid.segment.column.ColumnCapabilitiesImpl; @@ -234,6 +235,28 @@ public class ExpressionPlannerTest extends InitializedNullHandlingTest Assert.assertNull(thePlan.getOutputType()); Assert.assertNull(thePlan.inferColumnCapabilities(null)); // no we cannot + + Assert.assertFalse( + DeferExpressionDimensions.SINGLE_STRING.useDeferredGroupBySelector( + thePlan, + thePlan.getAnalysis().getRequiredBindingsList(), + SYNTHETIC_INSPECTOR + ) + ); + Assert.assertFalse( + DeferExpressionDimensions.FIXED_WIDTH_NON_NUMERIC.useDeferredGroupBySelector( + thePlan, + thePlan.getAnalysis().getRequiredBindingsList(), + SYNTHETIC_INSPECTOR + ) + ); + Assert.assertFalse( + DeferExpressionDimensions.FIXED_WIDTH.useDeferredGroupBySelector( + thePlan, + thePlan.getAnalysis().getRequiredBindingsList(), + SYNTHETIC_INSPECTOR + ) + ); } @Test @@ -269,6 +292,28 @@ public class ExpressionPlannerTest extends InitializedNullHandlingTest Assert.assertFalse(inferred.hasMultipleValues().isMaybeTrue()); Assert.assertFalse(inferred.hasBitmapIndexes()); Assert.assertFalse(inferred.hasSpatialIndexes()); + + Assert.assertFalse( + DeferExpressionDimensions.SINGLE_STRING.useDeferredGroupBySelector( + thePlan, + thePlan.getAnalysis().getRequiredBindingsList(), + SYNTHETIC_INSPECTOR + ) + ); + Assert.assertFalse( + DeferExpressionDimensions.FIXED_WIDTH_NON_NUMERIC.useDeferredGroupBySelector( + thePlan, + thePlan.getAnalysis().getRequiredBindingsList(), + SYNTHETIC_INSPECTOR + ) + ); + Assert.assertFalse( + DeferExpressionDimensions.FIXED_WIDTH.useDeferredGroupBySelector( + thePlan, + thePlan.getAnalysis().getRequiredBindingsList(), + SYNTHETIC_INSPECTOR + ) + ); } @Test @@ -348,6 +393,27 @@ public class ExpressionPlannerTest extends InitializedNullHandlingTest Assert.assertFalse(inferred.hasMultipleValues().isMaybeTrue()); Assert.assertFalse(inferred.hasBitmapIndexes()); Assert.assertFalse(inferred.hasSpatialIndexes()); + Assert.assertFalse( + DeferExpressionDimensions.SINGLE_STRING.useDeferredGroupBySelector( + thePlan, + thePlan.getAnalysis().getRequiredBindingsList(), + SYNTHETIC_INSPECTOR + ) + ); + Assert.assertFalse( + DeferExpressionDimensions.FIXED_WIDTH_NON_NUMERIC.useDeferredGroupBySelector( + thePlan, + thePlan.getAnalysis().getRequiredBindingsList(), + SYNTHETIC_INSPECTOR + ) + ); + Assert.assertTrue( + DeferExpressionDimensions.FIXED_WIDTH.useDeferredGroupBySelector( + thePlan, + thePlan.getAnalysis().getRequiredBindingsList(), + SYNTHETIC_INSPECTOR + ) + ); } @Test @@ -387,6 +453,30 @@ public class ExpressionPlannerTest extends InitializedNullHandlingTest Assert.assertTrue(inferred.hasBitmapIndexes()); Assert.assertFalse(inferred.hasSpatialIndexes()); + Assert.assertFalse( + DeferExpressionDimensions.SINGLE_STRING.useDeferredGroupBySelector( + thePlan, + thePlan.getAnalysis().getRequiredBindingsList(), + SYNTHETIC_INSPECTOR + ) + ); + // innately deferrable + Assert.assertFalse( + DeferExpressionDimensions.FIXED_WIDTH_NON_NUMERIC.useDeferredGroupBySelector( + thePlan, + thePlan.getAnalysis().getRequiredBindingsList(), + SYNTHETIC_INSPECTOR + ) + ); + Assert.assertFalse( + DeferExpressionDimensions.FIXED_WIDTH.useDeferredGroupBySelector( + thePlan, + thePlan.getAnalysis().getRequiredBindingsList(), + SYNTHETIC_INSPECTOR + ) + ); + + // multiple input columns thePlan = plan("concat(scalar_dictionary_string, scalar_dictionary_string_nonunique)"); Assert.assertTrue( @@ -430,6 +520,29 @@ public class ExpressionPlannerTest extends InitializedNullHandlingTest Assert.assertFalse(inferred.hasBitmapIndexes()); Assert.assertFalse(inferred.hasSpatialIndexes()); + Assert.assertFalse( + DeferExpressionDimensions.SINGLE_STRING.useDeferredGroupBySelector( + thePlan, + thePlan.getAnalysis().getRequiredBindingsList(), + SYNTHETIC_INSPECTOR + ) + ); + Assert.assertTrue( + DeferExpressionDimensions.FIXED_WIDTH_NON_NUMERIC.useDeferredGroupBySelector( + thePlan, + thePlan.getAnalysis().getRequiredBindingsList(), + SYNTHETIC_INSPECTOR + ) + ); + Assert.assertTrue( + DeferExpressionDimensions.FIXED_WIDTH.useDeferredGroupBySelector( + thePlan, + thePlan.getAnalysis().getRequiredBindingsList(), + SYNTHETIC_INSPECTOR + ) + ); + + // array output of dictionary encoded string are not considered single scalar/mappable, nor vectorizable thePlan = plan("array(scalar_dictionary_string)"); Assert.assertTrue( @@ -448,6 +561,27 @@ public class ExpressionPlannerTest extends InitializedNullHandlingTest ExpressionPlan.Trait.VECTORIZABLE ) ); + Assert.assertFalse( + DeferExpressionDimensions.SINGLE_STRING.useDeferredGroupBySelector( + thePlan, + thePlan.getAnalysis().getRequiredBindingsList(), + SYNTHETIC_INSPECTOR + ) + ); + Assert.assertTrue( + DeferExpressionDimensions.FIXED_WIDTH_NON_NUMERIC.useDeferredGroupBySelector( + thePlan, + thePlan.getAnalysis().getRequiredBindingsList(), + SYNTHETIC_INSPECTOR + ) + ); + Assert.assertTrue( + DeferExpressionDimensions.FIXED_WIDTH.useDeferredGroupBySelector( + thePlan, + thePlan.getAnalysis().getRequiredBindingsList(), + SYNTHETIC_INSPECTOR + ) + ); } @Test @@ -481,6 +615,29 @@ public class ExpressionPlannerTest extends InitializedNullHandlingTest Assert.assertTrue(inferred.hasBitmapIndexes()); Assert.assertFalse(inferred.hasSpatialIndexes()); + Assert.assertFalse( + DeferExpressionDimensions.SINGLE_STRING.useDeferredGroupBySelector( + thePlan, + thePlan.getAnalysis().getRequiredBindingsList(), + SYNTHETIC_INSPECTOR + ) + ); + Assert.assertFalse( + DeferExpressionDimensions.FIXED_WIDTH_NON_NUMERIC.useDeferredGroupBySelector( + thePlan, + thePlan.getAnalysis().getRequiredBindingsList(), + SYNTHETIC_INSPECTOR + ) + ); + Assert.assertFalse( + DeferExpressionDimensions.FIXED_WIDTH.useDeferredGroupBySelector( + thePlan, + thePlan.getAnalysis().getRequiredBindingsList(), + SYNTHETIC_INSPECTOR + ) + ); + + thePlan = plan("concat(scalar_string, multi_dictionary_string_nonunique)"); Assert.assertTrue( thePlan.is( @@ -510,6 +667,28 @@ public class ExpressionPlannerTest extends InitializedNullHandlingTest Assert.assertEquals(ValueType.STRING, inferred.getType()); Assert.assertTrue(inferred.hasMultipleValues().isTrue()); + Assert.assertFalse( + DeferExpressionDimensions.SINGLE_STRING.useDeferredGroupBySelector( + thePlan, + thePlan.getAnalysis().getRequiredBindingsList(), + SYNTHETIC_INSPECTOR + ) + ); + Assert.assertFalse( + DeferExpressionDimensions.FIXED_WIDTH_NON_NUMERIC.useDeferredGroupBySelector( + thePlan, + thePlan.getAnalysis().getRequiredBindingsList(), + SYNTHETIC_INSPECTOR + ) + ); + Assert.assertFalse( + DeferExpressionDimensions.FIXED_WIDTH.useDeferredGroupBySelector( + thePlan, + thePlan.getAnalysis().getRequiredBindingsList(), + SYNTHETIC_INSPECTOR + ) + ); + thePlan = plan("concat(multi_dictionary_string, multi_dictionary_string_nonunique)"); Assert.assertTrue( thePlan.is( @@ -541,6 +720,28 @@ public class ExpressionPlannerTest extends InitializedNullHandlingTest Assert.assertEquals(ValueType.STRING, inferred.getType()); Assert.assertTrue(inferred.hasMultipleValues().isTrue()); + Assert.assertFalse( + DeferExpressionDimensions.SINGLE_STRING.useDeferredGroupBySelector( + thePlan, + thePlan.getAnalysis().getRequiredBindingsList(), + SYNTHETIC_INSPECTOR + ) + ); + Assert.assertFalse( + DeferExpressionDimensions.FIXED_WIDTH_NON_NUMERIC.useDeferredGroupBySelector( + thePlan, + thePlan.getAnalysis().getRequiredBindingsList(), + SYNTHETIC_INSPECTOR + ) + ); + Assert.assertFalse( + DeferExpressionDimensions.FIXED_WIDTH.useDeferredGroupBySelector( + thePlan, + thePlan.getAnalysis().getRequiredBindingsList(), + SYNTHETIC_INSPECTOR + ) + ); + thePlan = plan("array_append(multi_dictionary_string, 'foo')"); Assert.assertTrue( thePlan.is( @@ -556,6 +757,27 @@ public class ExpressionPlannerTest extends InitializedNullHandlingTest ExpressionPlan.Trait.VECTORIZABLE ) ); + Assert.assertFalse( + DeferExpressionDimensions.SINGLE_STRING.useDeferredGroupBySelector( + thePlan, + thePlan.getAnalysis().getRequiredBindingsList(), + SYNTHETIC_INSPECTOR + ) + ); + Assert.assertFalse( + DeferExpressionDimensions.FIXED_WIDTH_NON_NUMERIC.useDeferredGroupBySelector( + thePlan, + thePlan.getAnalysis().getRequiredBindingsList(), + SYNTHETIC_INSPECTOR + ) + ); + Assert.assertFalse( + DeferExpressionDimensions.FIXED_WIDTH.useDeferredGroupBySelector( + thePlan, + thePlan.getAnalysis().getRequiredBindingsList(), + SYNTHETIC_INSPECTOR + ) + ); } @Test @@ -582,6 +804,27 @@ public class ExpressionPlannerTest extends InitializedNullHandlingTest ) ); Assert.assertEquals(ExpressionType.STRING, thePlan.getOutputType()); + Assert.assertFalse( + DeferExpressionDimensions.SINGLE_STRING.useDeferredGroupBySelector( + thePlan, + thePlan.getAnalysis().getRequiredBindingsList(), + SYNTHETIC_INSPECTOR + ) + ); + Assert.assertFalse( + DeferExpressionDimensions.FIXED_WIDTH_NON_NUMERIC.useDeferredGroupBySelector( + thePlan, + thePlan.getAnalysis().getRequiredBindingsList(), + SYNTHETIC_INSPECTOR + ) + ); + Assert.assertFalse( + DeferExpressionDimensions.FIXED_WIDTH.useDeferredGroupBySelector( + thePlan, + thePlan.getAnalysis().getRequiredBindingsList(), + SYNTHETIC_INSPECTOR + ) + ); thePlan = plan("concat(multi_dictionary_string, multi_dictionary_string_nonunique)"); Assert.assertTrue( @@ -631,6 +874,28 @@ public class ExpressionPlannerTest extends InitializedNullHandlingTest // incomplete and unknown skip output type since we don't reliably know Assert.assertNull(thePlan.getOutputType()); Assert.assertNull(thePlan.inferColumnCapabilities(null)); + + Assert.assertFalse( + DeferExpressionDimensions.SINGLE_STRING.useDeferredGroupBySelector( + thePlan, + thePlan.getAnalysis().getRequiredBindingsList(), + SYNTHETIC_INSPECTOR + ) + ); + Assert.assertFalse( + DeferExpressionDimensions.FIXED_WIDTH_NON_NUMERIC.useDeferredGroupBySelector( + thePlan, + thePlan.getAnalysis().getRequiredBindingsList(), + SYNTHETIC_INSPECTOR + ) + ); + Assert.assertFalse( + DeferExpressionDimensions.FIXED_WIDTH.useDeferredGroupBySelector( + thePlan, + thePlan.getAnalysis().getRequiredBindingsList(), + SYNTHETIC_INSPECTOR + ) + ); } @Test @@ -667,14 +932,78 @@ public class ExpressionPlannerTest extends InitializedNullHandlingTest Assert.assertEquals("array_append(\"scalar_string\", 'x')", thePlan.getAppliedFoldExpression("__acc").stringify()); Assert.assertEquals(ExpressionType.STRING_ARRAY, thePlan.getOutputType()); + Assert.assertFalse( + DeferExpressionDimensions.SINGLE_STRING.useDeferredGroupBySelector( + thePlan, + thePlan.getAnalysis().getRequiredBindingsList(), + SYNTHETIC_INSPECTOR + ) + ); + Assert.assertFalse( + DeferExpressionDimensions.FIXED_WIDTH_NON_NUMERIC.useDeferredGroupBySelector( + thePlan, + thePlan.getAnalysis().getRequiredBindingsList(), + SYNTHETIC_INSPECTOR + ) + ); + Assert.assertFalse( + DeferExpressionDimensions.FIXED_WIDTH.useDeferredGroupBySelector( + thePlan, + thePlan.getAnalysis().getRequiredBindingsList(), + SYNTHETIC_INSPECTOR + ) + ); + // multi-valued are cool too thePlan = plan("array_append(multi_dictionary_string, 'x')"); assertArrayInAndOut(thePlan); + Assert.assertFalse( + DeferExpressionDimensions.SINGLE_STRING.useDeferredGroupBySelector( + thePlan, + thePlan.getAnalysis().getRequiredBindingsList(), + SYNTHETIC_INSPECTOR + ) + ); + Assert.assertFalse( + DeferExpressionDimensions.FIXED_WIDTH_NON_NUMERIC.useDeferredGroupBySelector( + thePlan, + thePlan.getAnalysis().getRequiredBindingsList(), + SYNTHETIC_INSPECTOR + ) + ); + Assert.assertFalse( + DeferExpressionDimensions.FIXED_WIDTH.useDeferredGroupBySelector( + thePlan, + thePlan.getAnalysis().getRequiredBindingsList(), + SYNTHETIC_INSPECTOR + ) + ); // what about incomplete inputs with arrays? they are not reported as incomplete because they are treated as arrays thePlan = plan("array_append(string_unknown, 'x')"); assertArrayInAndOut(thePlan); Assert.assertEquals(ExpressionType.STRING_ARRAY, thePlan.getOutputType()); + Assert.assertFalse( + DeferExpressionDimensions.SINGLE_STRING.useDeferredGroupBySelector( + thePlan, + thePlan.getAnalysis().getRequiredBindingsList(), + SYNTHETIC_INSPECTOR + ) + ); + Assert.assertFalse( + DeferExpressionDimensions.FIXED_WIDTH_NON_NUMERIC.useDeferredGroupBySelector( + thePlan, + thePlan.getAnalysis().getRequiredBindingsList(), + SYNTHETIC_INSPECTOR + ) + ); + Assert.assertFalse( + DeferExpressionDimensions.FIXED_WIDTH.useDeferredGroupBySelector( + thePlan, + thePlan.getAnalysis().getRequiredBindingsList(), + SYNTHETIC_INSPECTOR + ) + ); // what about if it is the scalar argument? there it is thePlan = plan("array_append(multi_dictionary_string, string_unknown)"); @@ -696,13 +1025,76 @@ public class ExpressionPlannerTest extends InitializedNullHandlingTest ); // incomplete and unknown skip output type since we don't reliably know Assert.assertNull(thePlan.getOutputType()); + Assert.assertFalse( + DeferExpressionDimensions.SINGLE_STRING.useDeferredGroupBySelector( + thePlan, + thePlan.getAnalysis().getRequiredBindingsList(), + SYNTHETIC_INSPECTOR + ) + ); + Assert.assertFalse( + DeferExpressionDimensions.FIXED_WIDTH_NON_NUMERIC.useDeferredGroupBySelector( + thePlan, + thePlan.getAnalysis().getRequiredBindingsList(), + SYNTHETIC_INSPECTOR + ) + ); + Assert.assertFalse( + DeferExpressionDimensions.FIXED_WIDTH.useDeferredGroupBySelector( + thePlan, + thePlan.getAnalysis().getRequiredBindingsList(), + SYNTHETIC_INSPECTOR + ) + ); // array types are cool too thePlan = plan("array_append(string_array_1, 'x')"); assertArrayInAndOut(thePlan); + Assert.assertFalse( + DeferExpressionDimensions.SINGLE_STRING.useDeferredGroupBySelector( + thePlan, + thePlan.getAnalysis().getRequiredBindingsList(), + SYNTHETIC_INSPECTOR + ) + ); + Assert.assertFalse( + DeferExpressionDimensions.FIXED_WIDTH_NON_NUMERIC.useDeferredGroupBySelector( + thePlan, + thePlan.getAnalysis().getRequiredBindingsList(), + SYNTHETIC_INSPECTOR + ) + ); + Assert.assertFalse( + DeferExpressionDimensions.FIXED_WIDTH.useDeferredGroupBySelector( + thePlan, + thePlan.getAnalysis().getRequiredBindingsList(), + SYNTHETIC_INSPECTOR + ) + ); thePlan = plan("array_append(string_array_1, 'x')"); assertArrayInAndOut(thePlan); + Assert.assertFalse( + DeferExpressionDimensions.SINGLE_STRING.useDeferredGroupBySelector( + thePlan, + thePlan.getAnalysis().getRequiredBindingsList(), + SYNTHETIC_INSPECTOR + ) + ); + Assert.assertFalse( + DeferExpressionDimensions.FIXED_WIDTH_NON_NUMERIC.useDeferredGroupBySelector( + thePlan, + thePlan.getAnalysis().getRequiredBindingsList(), + SYNTHETIC_INSPECTOR + ) + ); + Assert.assertFalse( + DeferExpressionDimensions.FIXED_WIDTH.useDeferredGroupBySelector( + thePlan, + thePlan.getAnalysis().getRequiredBindingsList(), + SYNTHETIC_INSPECTOR + ) + ); } @@ -732,6 +1124,28 @@ public class ExpressionPlannerTest extends InitializedNullHandlingTest ); Assert.assertEquals(ExpressionType.STRING, thePlan.getOutputType()); + Assert.assertFalse( + DeferExpressionDimensions.SINGLE_STRING.useDeferredGroupBySelector( + thePlan, + thePlan.getAnalysis().getRequiredBindingsList(), + SYNTHETIC_INSPECTOR + ) + ); + Assert.assertFalse( + DeferExpressionDimensions.FIXED_WIDTH_NON_NUMERIC.useDeferredGroupBySelector( + thePlan, + thePlan.getAnalysis().getRequiredBindingsList(), + SYNTHETIC_INSPECTOR + ) + ); + Assert.assertFalse( + DeferExpressionDimensions.FIXED_WIDTH.useDeferredGroupBySelector( + thePlan, + thePlan.getAnalysis().getRequiredBindingsList(), + SYNTHETIC_INSPECTOR + ) + ); + // what about a multi-valued input thePlan = plan("array_to_string(array_append(scalar_string, multi_dictionary_string), ',')"); Assert.assertTrue( @@ -761,6 +1175,28 @@ public class ExpressionPlannerTest extends InitializedNullHandlingTest ); // why is this null Assert.assertEquals(ExpressionType.STRING, thePlan.getOutputType()); + + Assert.assertFalse( + DeferExpressionDimensions.SINGLE_STRING.useDeferredGroupBySelector( + thePlan, + thePlan.getAnalysis().getRequiredBindingsList(), + SYNTHETIC_INSPECTOR + ) + ); + Assert.assertFalse( + DeferExpressionDimensions.FIXED_WIDTH_NON_NUMERIC.useDeferredGroupBySelector( + thePlan, + thePlan.getAnalysis().getRequiredBindingsList(), + SYNTHETIC_INSPECTOR + ) + ); + Assert.assertFalse( + DeferExpressionDimensions.FIXED_WIDTH.useDeferredGroupBySelector( + thePlan, + thePlan.getAnalysis().getRequiredBindingsList(), + SYNTHETIC_INSPECTOR + ) + ); } @Test @@ -864,6 +1300,29 @@ public class ExpressionPlannerTest extends InitializedNullHandlingTest ColumnType.NESTED_DATA.getComplexTypeName(), inferred.getComplexTypeName() ); + + Assert.assertFalse( + DeferExpressionDimensions.SINGLE_STRING.useDeferredGroupBySelector( + thePlan, + thePlan.getAnalysis().getRequiredBindingsList(), + SYNTHETIC_INSPECTOR + ) + ); + // all numeric inputs so these are true + Assert.assertTrue( + DeferExpressionDimensions.FIXED_WIDTH_NON_NUMERIC.useDeferredGroupBySelector( + thePlan, + thePlan.getAnalysis().getRequiredBindingsList(), + SYNTHETIC_INSPECTOR + ) + ); + Assert.assertTrue( + DeferExpressionDimensions.FIXED_WIDTH.useDeferredGroupBySelector( + thePlan, + thePlan.getAnalysis().getRequiredBindingsList(), + SYNTHETIC_INSPECTOR + ) + ); } @Test @@ -895,6 +1354,28 @@ public class ExpressionPlannerTest extends InitializedNullHandlingTest inferred.getType() ); Assert.assertFalse(inferred.isDictionaryEncoded().isMaybeTrue()); + + Assert.assertFalse( + DeferExpressionDimensions.SINGLE_STRING.useDeferredGroupBySelector( + thePlan, + thePlan.getAnalysis().getRequiredBindingsList(), + SYNTHETIC_INSPECTOR + ) + ); + Assert.assertFalse( + DeferExpressionDimensions.FIXED_WIDTH_NON_NUMERIC.useDeferredGroupBySelector( + thePlan, + thePlan.getAnalysis().getRequiredBindingsList(), + SYNTHETIC_INSPECTOR + ) + ); + Assert.assertFalse( + DeferExpressionDimensions.FIXED_WIDTH.useDeferredGroupBySelector( + thePlan, + thePlan.getAnalysis().getRequiredBindingsList(), + SYNTHETIC_INSPECTOR + ) + ); } private static ExpressionPlan plan(String expression) diff --git a/processing/src/test/java/org/apache/druid/segment/virtual/ExpressionVectorSelectorsTest.java b/processing/src/test/java/org/apache/druid/segment/virtual/ExpressionVectorSelectorsTest.java index cd8b78ed1f9..6d27439f8a6 100644 --- a/processing/src/test/java/org/apache/druid/segment/virtual/ExpressionVectorSelectorsTest.java +++ b/processing/src/test/java/org/apache/druid/segment/virtual/ExpressionVectorSelectorsTest.java @@ -20,6 +20,7 @@ package org.apache.druid.segment.virtual; import com.google.common.collect.ImmutableList; +import org.apache.datasketches.memory.WritableMemory; import org.apache.druid.java.util.common.granularity.Granularities; import org.apache.druid.java.util.common.guava.Sequence; import org.apache.druid.java.util.common.io.Closer; @@ -29,6 +30,10 @@ import org.apache.druid.math.expr.ExpressionType; import org.apache.druid.math.expr.Parser; import org.apache.druid.query.dimension.DefaultDimensionSpec; import org.apache.druid.query.expression.TestExprMacroTable; +import org.apache.druid.query.groupby.DeferExpressionDimensions; +import org.apache.druid.query.groupby.ResultRow; +import org.apache.druid.query.groupby.epinephelinae.collection.MemoryPointer; +import org.apache.druid.query.groupby.epinephelinae.vector.GroupByVectorColumnSelector; import org.apache.druid.segment.ColumnValueSelector; import org.apache.druid.segment.Cursor; import org.apache.druid.segment.DeprecatedQueryableIndexColumnSelector; @@ -260,14 +265,17 @@ public class ExpressionVectorSelectorsTest extends InitializedNullHandlingTest } else { objectSelector = cursor.getColumnSelectorFactory().makeObjectSelector("v"); } + GroupByVectorColumnSelector groupBySelector = + cursor.getColumnSelectorFactory().makeGroupByVectorColumnSelector("v", DeferExpressionDimensions.ALWAYS); while (!cursor.isDone()) { + final List resultsVector = new ArrayList<>(); boolean[] nulls; switch (outputType.getType()) { case LONG: nulls = selector.getNullVector(); long[] longs = selector.getLongVector(); for (int i = 0; i < selector.getCurrentVectorSize(); i++, rowCount++) { - results.add(nulls != null && nulls[i] ? null : longs[i]); + resultsVector.add(nulls != null && nulls[i] ? null : longs[i]); } break; case DOUBLE: @@ -276,24 +284,26 @@ public class ExpressionVectorSelectorsTest extends InitializedNullHandlingTest nulls = selector.getNullVector(); float[] floats = selector.getFloatVector(); for (int i = 0; i < selector.getCurrentVectorSize(); i++, rowCount++) { - results.add(nulls != null && nulls[i] ? null : (double) floats[i]); + resultsVector.add(nulls != null && nulls[i] ? null : (double) floats[i]); } } else { nulls = selector.getNullVector(); double[] doubles = selector.getDoubleVector(); for (int i = 0; i < selector.getCurrentVectorSize(); i++, rowCount++) { - results.add(nulls != null && nulls[i] ? null : doubles[i]); + resultsVector.add(nulls != null && nulls[i] ? null : doubles[i]); } } break; case STRING: Object[] objects = objectSelector.getObjectVector(); for (int i = 0; i < objectSelector.getCurrentVectorSize(); i++, rowCount++) { - results.add(objects[i]); + resultsVector.add(objects[i]); } break; } + verifyGroupBySelector(groupBySelector, resultsVector); + results.addAll(resultsVector); cursor.advance(); } } @@ -328,4 +338,24 @@ public class ExpressionVectorSelectorsTest extends InitializedNullHandlingTest Assert.assertTrue(rowCountCursor > 0); Assert.assertEquals(rowCountCursor, rowCount); } + + private static void verifyGroupBySelector( + final GroupByVectorColumnSelector groupBySelector, + final List expectedResults + ) + { + final int keyOffset = 1; + final int keySize = groupBySelector.getGroupingKeySize() + keyOffset + 1; // 1 byte before, 1 byte after + final WritableMemory keySpace = + WritableMemory.allocate(keySize * expectedResults.size()); + + final int writeKeysRetVal = groupBySelector.writeKeys(keySpace, keySize, keyOffset, 0, expectedResults.size()); + Assert.assertEquals(0, writeKeysRetVal); + + for (int i = 0; i < expectedResults.size(); i++) { + final ResultRow resultRow = ResultRow.create(1); + groupBySelector.writeKeyToResultRow(new MemoryPointer(keySpace, (long) keySize * i), keyOffset, resultRow, 0); + Assert.assertEquals("row #" + i, expectedResults.get(i), resultRow.getArray()[0]); + } + } } diff --git a/server/src/main/java/org/apache/druid/catalog/model/CatalogUtils.java b/server/src/main/java/org/apache/druid/catalog/model/CatalogUtils.java index 841d592062b..d0ac6c31e76 100644 --- a/server/src/main/java/org/apache/druid/catalog/model/CatalogUtils.java +++ b/server/src/main/java/org/apache/druid/catalog/model/CatalogUtils.java @@ -33,6 +33,7 @@ import org.apache.druid.java.util.common.granularity.GranularityType; import org.apache.druid.java.util.common.granularity.PeriodGranularity; import org.joda.time.Period; +import javax.annotation.Nonnull; import javax.annotation.Nullable; import java.net.URI; @@ -63,17 +64,25 @@ public class CatalogUtils * For the odd interval, the interval name is also accepted (for the other * intervals, the interval name is the descriptive string). */ - public static Granularity asDruidGranularity(String value) + public static Granularity asDruidGranularity(@Nonnull String value) { - if (Strings.isNullOrEmpty(value) || value.equalsIgnoreCase(DatasourceDefn.ALL_GRANULARITY)) { + if (value.equalsIgnoreCase(DatasourceDefn.ALL_GRANULARITY)) { return Granularities.ALL; } + Granularity granularity; try { - return new PeriodGranularity(new Period(value), null, null); + granularity = Granularity.fromString(value); } catch (IllegalArgumentException e) { - throw new IAE(StringUtils.format("'%s' is an invalid period string", value)); + try { + granularity = new PeriodGranularity(new Period(value), null, null); + } + catch (IllegalArgumentException e2) { + throw new IAE("[%s] is an invalid granularity string.", value); + } } + + return granularity; } /** @@ -275,18 +284,12 @@ public class CatalogUtils return merged; } - public static void validateGranularity(String value) + public static void validateGranularity(final String value) { if (value == null) { return; } - Granularity granularity; - try { - granularity = new PeriodGranularity(new Period(value), null, null); - } - catch (IllegalArgumentException e) { - throw new IAE(StringUtils.format("[%s] is an invalid granularity string", value)); - } + final Granularity granularity = asDruidGranularity(value); if (!GranularityType.isStandard(granularity)) { throw new IAE( "Unsupported segment graularity. " diff --git a/server/src/main/java/org/apache/druid/catalog/model/facade/DatasourceFacade.java b/server/src/main/java/org/apache/druid/catalog/model/facade/DatasourceFacade.java index 7ac00d9b608..9e4c2d9df6a 100644 --- a/server/src/main/java/org/apache/druid/catalog/model/facade/DatasourceFacade.java +++ b/server/src/main/java/org/apache/druid/catalog/model/facade/DatasourceFacade.java @@ -30,6 +30,8 @@ import org.apache.druid.java.util.common.granularity.Granularity; import org.apache.druid.java.util.common.logger.Logger; import org.apache.druid.segment.column.ColumnType; +import javax.annotation.Nullable; + import java.util.Collections; import java.util.List; import java.util.Map; @@ -122,6 +124,7 @@ public class DatasourceFacade extends TableFacade return stringProperty(DatasourceDefn.SEGMENT_GRANULARITY_PROPERTY); } + @Nullable public Granularity segmentGranularity() { String definedGranularity = segmentGranularityString(); diff --git a/server/src/main/java/org/apache/druid/segment/realtime/NoopSegmentPublisher.java b/server/src/main/java/org/apache/druid/client/BootstrapSegmentsResponse.java similarity index 68% rename from server/src/main/java/org/apache/druid/segment/realtime/NoopSegmentPublisher.java rename to server/src/main/java/org/apache/druid/client/BootstrapSegmentsResponse.java index ef1283843a4..7d156926242 100644 --- a/server/src/main/java/org/apache/druid/segment/realtime/NoopSegmentPublisher.java +++ b/server/src/main/java/org/apache/druid/client/BootstrapSegmentsResponse.java @@ -17,17 +17,23 @@ * under the License. */ -package org.apache.druid.segment.realtime; +package org.apache.druid.client; +import org.apache.druid.java.util.common.parsers.CloseableIterator; import org.apache.druid.timeline.DataSegment; -/** - */ -public class NoopSegmentPublisher implements SegmentPublisher +public class BootstrapSegmentsResponse { - @Override - public void publishSegment(DataSegment segment) + private final CloseableIterator iterator; + + public BootstrapSegmentsResponse(final CloseableIterator iterator) { - // do nothing + this.iterator = iterator; } + + public CloseableIterator getIterator() + { + return iterator; + } + } diff --git a/server/src/main/java/org/apache/druid/client/CachingClusteredClient.java b/server/src/main/java/org/apache/druid/client/CachingClusteredClient.java index 7bcb4c2ce03..5fa34d6699d 100644 --- a/server/src/main/java/org/apache/druid/client/CachingClusteredClient.java +++ b/server/src/main/java/org/apache/druid/client/CachingClusteredClient.java @@ -275,7 +275,7 @@ public class CachingClusteredClient implements QuerySegmentWalker this.responseContext = responseContext; this.query = queryPlus.getQuery(); this.toolChest = warehouse.getToolChest(query); - this.strategy = toolChest.getCacheStrategy(query); + this.strategy = toolChest.getCacheStrategy(query, objectMapper); this.dataSourceAnalysis = query.getDataSource().getAnalysis(); this.useCache = CacheUtil.isUseSegmentCache(query, strategy, cacheConfig, CacheUtil.ServerType.BROKER); diff --git a/server/src/main/java/org/apache/druid/client/CachingQueryRunner.java b/server/src/main/java/org/apache/druid/client/CachingQueryRunner.java index 9bb9f474dd9..41d4bb4ea63 100644 --- a/server/src/main/java/org/apache/druid/client/CachingQueryRunner.java +++ b/server/src/main/java/org/apache/druid/client/CachingQueryRunner.java @@ -86,7 +86,7 @@ public class CachingQueryRunner implements QueryRunner public Sequence run(QueryPlus queryPlus, ResponseContext responseContext) { Query query = queryPlus.getQuery(); - final CacheStrategy strategy = toolChest.getCacheStrategy(query); + final CacheStrategy strategy = toolChest.getCacheStrategy(query, mapper); final boolean populateCache = canPopulateCache(query, strategy); final boolean useCache = canUseCache(query, strategy); diff --git a/server/src/main/java/org/apache/druid/client/JsonParserIterator.java b/server/src/main/java/org/apache/druid/client/JsonParserIterator.java index 00dea3dff0e..7aa88774397 100644 --- a/server/src/main/java/org/apache/druid/client/JsonParserIterator.java +++ b/server/src/main/java/org/apache/druid/client/JsonParserIterator.java @@ -86,6 +86,16 @@ public class JsonParserIterator implements CloseableIterator this.hasTimeout = timeoutAt > -1; } + /** + * Bypasses Jackson serialization to prevent materialization of results from the {@code future} in memory at once. + * A shortened version of {@link #JsonParserIterator(JavaType, Future, String, Query, String, ObjectMapper)} + * where the URL and host parameters, used solely for logging/errors, are not known. + */ + public JsonParserIterator(JavaType typeRef, Future future, ObjectMapper objectMapper) + { + this(typeRef, future, "", null, "", objectMapper); + } + @Override public boolean hasNext() { diff --git a/server/src/main/java/org/apache/druid/client/coordinator/CoordinatorClient.java b/server/src/main/java/org/apache/druid/client/coordinator/CoordinatorClient.java index aeccee8043b..edeb16665ba 100644 --- a/server/src/main/java/org/apache/druid/client/coordinator/CoordinatorClient.java +++ b/server/src/main/java/org/apache/druid/client/coordinator/CoordinatorClient.java @@ -20,6 +20,7 @@ package org.apache.druid.client.coordinator; import com.google.common.util.concurrent.ListenableFuture; +import org.apache.druid.client.BootstrapSegmentsResponse; import org.apache.druid.client.ImmutableSegmentLoadInfo; import org.apache.druid.query.SegmentDescriptor; import org.apache.druid.rpc.ServiceRetryPolicy; @@ -58,8 +59,19 @@ public interface CoordinatorClient */ ListenableFuture> fetchDataSourceInformation(Set datasources); + /** + * Fetch bootstrap segments from the coordinator. The results must be streamed back to the caller as the + * result set can be large. + */ + ListenableFuture fetchBootstrapSegments(); + /** * Returns a new instance backed by a ServiceClient which follows the provided retryPolicy */ CoordinatorClient withRetryPolicy(ServiceRetryPolicy retryPolicy); + + /** + * Retrieves list of datasources with used segments. + */ + ListenableFuture> fetchDataSourcesWithUsedSegments(); } diff --git a/server/src/main/java/org/apache/druid/client/coordinator/CoordinatorClientImpl.java b/server/src/main/java/org/apache/druid/client/coordinator/CoordinatorClientImpl.java index 93c22bbdbff..fc3deee12ed 100644 --- a/server/src/main/java/org/apache/druid/client/coordinator/CoordinatorClientImpl.java +++ b/server/src/main/java/org/apache/druid/client/coordinator/CoordinatorClientImpl.java @@ -21,17 +21,22 @@ package org.apache.druid.client.coordinator; import com.fasterxml.jackson.core.type.TypeReference; import com.fasterxml.jackson.databind.ObjectMapper; +import com.google.common.util.concurrent.Futures; import com.google.common.util.concurrent.ListenableFuture; +import org.apache.druid.client.BootstrapSegmentsResponse; import org.apache.druid.client.ImmutableSegmentLoadInfo; +import org.apache.druid.client.JsonParserIterator; import org.apache.druid.common.guava.FutureUtils; import org.apache.druid.java.util.common.StringUtils; import org.apache.druid.java.util.common.jackson.JacksonUtils; import org.apache.druid.java.util.http.client.response.BytesFullResponseHandler; +import org.apache.druid.java.util.http.client.response.InputStreamResponseHandler; import org.apache.druid.query.SegmentDescriptor; import org.apache.druid.rpc.RequestBuilder; import org.apache.druid.rpc.ServiceClient; import org.apache.druid.rpc.ServiceRetryPolicy; import org.apache.druid.segment.metadata.DataSourceInformation; +import org.apache.druid.server.coordination.LoadableDataSegment; import org.apache.druid.timeline.DataSegment; import org.jboss.netty.handler.codec.http.HttpMethod; import org.joda.time.Interval; @@ -156,9 +161,44 @@ public class CoordinatorClientImpl implements CoordinatorClient ); } + @Override + public ListenableFuture fetchBootstrapSegments() + { + final String path = "/druid/coordinator/v1/metadata/bootstrapSegments"; + return FutureUtils.transform( + client.asyncRequest( + new RequestBuilder(HttpMethod.POST, path), + new InputStreamResponseHandler() + ), + in -> new BootstrapSegmentsResponse( + new JsonParserIterator<>( + // Some servers, like the Broker, may have PruneLoadSpec set to true for optimization reasons. + // We specifically use LoadableDataSegment here instead of DataSegment so the callers can still correctly + // load the bootstrap segments, as the load specs are guaranteed not to be pruned. + jsonMapper.getTypeFactory().constructType(LoadableDataSegment.class), + Futures.immediateFuture(in), + jsonMapper + ) + ) + ); + } + @Override public CoordinatorClientImpl withRetryPolicy(ServiceRetryPolicy retryPolicy) { return new CoordinatorClientImpl(client.withRetryPolicy(retryPolicy), jsonMapper); } + + @Override + public ListenableFuture> fetchDataSourcesWithUsedSegments() + { + final String path = "/druid/coordinator/v1/metadata/datasources"; + return FutureUtils.transform( + client.asyncRequest( + new RequestBuilder(HttpMethod.GET, path), + new BytesFullResponseHandler() + ), + holder -> JacksonUtils.readValue(jsonMapper, holder.getContent(), new TypeReference>() {}) + ); + } } diff --git a/server/src/main/java/org/apache/druid/client/indexing/ClientCompactionRunnerInfo.java b/server/src/main/java/org/apache/druid/client/indexing/ClientCompactionRunnerInfo.java new file mode 100644 index 00000000000..ed9e22dfaa2 --- /dev/null +++ b/server/src/main/java/org/apache/druid/client/indexing/ClientCompactionRunnerInfo.java @@ -0,0 +1,216 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.druid.client.indexing; + +import com.fasterxml.jackson.annotation.JsonCreator; +import com.fasterxml.jackson.annotation.JsonProperty; +import org.apache.druid.indexer.CompactionEngine; +import org.apache.druid.indexer.partitions.DimensionRangePartitionsSpec; +import org.apache.druid.indexer.partitions.DynamicPartitionsSpec; +import org.apache.druid.indexer.partitions.PartitionsSpec; +import org.apache.druid.query.QueryContext; +import org.apache.druid.query.aggregation.AggregatorFactory; +import org.apache.druid.server.coordinator.CompactionConfigValidationResult; +import org.apache.druid.server.coordinator.DataSourceCompactionConfig; + +import javax.annotation.Nullable; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.List; +import java.util.Map; +import java.util.Objects; + + +/** + * This class is just used to pass the strategy type via the "type" parameter for deserilization to appropriate + * {@link org.apache.druid.indexing.common.task.CompactionRunner} subtype at the overlod. + */ +public class ClientCompactionRunnerInfo +{ + private final CompactionEngine type; + + @JsonCreator + public ClientCompactionRunnerInfo(@JsonProperty("type") CompactionEngine type) + { + this.type = type; + } + + @JsonProperty + public CompactionEngine getType() + { + return type; + } + + @Override + public String toString() + { + return "ClientCompactionRunnerInfo{" + + "type=" + type + + '}'; + } + + @Override + public boolean equals(Object o) + { + if (this == o) { + return true; + } + if (o == null || getClass() != o.getClass()) { + return false; + } + ClientCompactionRunnerInfo that = (ClientCompactionRunnerInfo) o; + return type == that.type; + } + + @Override + public int hashCode() + { + return Objects.hash(type); + } + + public static CompactionConfigValidationResult validateCompactionConfig( + DataSourceCompactionConfig newConfig, + CompactionEngine defaultCompactionEngine + ) + { + CompactionEngine compactionEngine = newConfig.getEngine() == null ? defaultCompactionEngine : newConfig.getEngine(); + if (compactionEngine == CompactionEngine.NATIVE) { + return new CompactionConfigValidationResult(true, null); + } else { + return compactionConfigSupportedByMSQEngine(newConfig); + } + } + + /** + * Checks if the provided compaction config is supported by MSQ. The following configs aren't supported: + *
    + *
  • partitionsSpec of type HashedParititionsSpec.
  • + *
  • maxTotalRows in DynamicPartitionsSpec.
  • + *
  • rollup set to false in granularitySpec when metricsSpec is specified. Null is treated as true.
  • + *
  • queryGranularity set to ALL in granularitySpec.
  • + *
  • Each metric has output column name same as the input name.
  • + *
+ */ + private static CompactionConfigValidationResult compactionConfigSupportedByMSQEngine(DataSourceCompactionConfig newConfig) + { + List validationResults = new ArrayList<>(); + if (newConfig.getTuningConfig() != null) { + validationResults.add(validatePartitionsSpecForMSQ(newConfig.getTuningConfig().getPartitionsSpec())); + } + if (newConfig.getGranularitySpec() != null) { + validationResults.add(validateRollupForMSQ( + newConfig.getMetricsSpec(), + newConfig.getGranularitySpec().isRollup() + )); + } + validationResults.add(validateMaxNumTasksForMSQ(newConfig.getTaskContext())); + validationResults.add(validateMetricsSpecForMSQ(newConfig.getMetricsSpec())); + return validationResults.stream() + .filter(result -> !result.isValid()) + .findFirst() + .orElse(new CompactionConfigValidationResult(true, null)); + } + + /** + * Validate that partitionSpec is either 'dynamic` or 'range', and if 'dynamic', ensure 'maxTotalRows' is null. + */ + public static CompactionConfigValidationResult validatePartitionsSpecForMSQ(PartitionsSpec partitionsSpec) + { + if (!(partitionsSpec instanceof DimensionRangePartitionsSpec + || partitionsSpec instanceof DynamicPartitionsSpec)) { + return new CompactionConfigValidationResult( + false, + "Invalid partitionsSpec type[%s] for MSQ engine. Type must be either 'dynamic' or 'range'.", + partitionsSpec.getClass().getSimpleName() + + ); + } + if (partitionsSpec instanceof DynamicPartitionsSpec + && ((DynamicPartitionsSpec) partitionsSpec).getMaxTotalRows() != null) { + return new CompactionConfigValidationResult( + false, + "maxTotalRows[%d] in DynamicPartitionsSpec not supported for MSQ engine.", + ((DynamicPartitionsSpec) partitionsSpec).getMaxTotalRows() + ); + } + return new CompactionConfigValidationResult(true, null); + } + + /** + * Validate rollup is set to false in granularitySpec when metricsSpec is specified. + */ + public static CompactionConfigValidationResult validateRollupForMSQ( + AggregatorFactory[] metricsSpec, + @Nullable Boolean isRollup + ) + { + if (metricsSpec != null && isRollup != null && !isRollup) { + return new CompactionConfigValidationResult( + false, + "rollup in granularitySpec must be set to True if metricsSpec is specifed for MSQ engine." + ); + } + return new CompactionConfigValidationResult(true, null); + } + + /** + * Validate maxNumTasks >= 2 in context. + */ + public static CompactionConfigValidationResult validateMaxNumTasksForMSQ(Map context) + { + if (context != null) { + int maxNumTasks = QueryContext.of(context) + .getInt(ClientMSQContext.CTX_MAX_NUM_TASKS, ClientMSQContext.DEFAULT_MAX_NUM_TASKS); + if (maxNumTasks < 2) { + return new CompactionConfigValidationResult(false, + "MSQ context maxNumTasks [%,d] cannot be less than 2, " + + "since at least 1 controller and 1 worker is necessary.", + maxNumTasks + ); + } + } + return new CompactionConfigValidationResult(true, null); + } + + /** + * Validate each metric has output column name same as the input name. + */ + public static CompactionConfigValidationResult validateMetricsSpecForMSQ(AggregatorFactory[] metricsSpec) + { + if (metricsSpec == null) { + return new CompactionConfigValidationResult(true, null); + } + return Arrays.stream(metricsSpec) + .filter(aggregatorFactory -> + !(aggregatorFactory.requiredFields().isEmpty() + || aggregatorFactory.requiredFields().size() == 1 + && aggregatorFactory.requiredFields() + .get(0) + .equals(aggregatorFactory.getName()))) + .findFirst() + .map(aggregatorFactory -> + new CompactionConfigValidationResult( + false, + "Different name[%s] and fieldName(s)[%s] for aggregator unsupported for MSQ engine.", + aggregatorFactory.getName(), + aggregatorFactory.requiredFields() + )).orElse(new CompactionConfigValidationResult(true, null)); + } +} diff --git a/server/src/main/java/org/apache/druid/client/indexing/ClientCompactionTaskGranularitySpec.java b/server/src/main/java/org/apache/druid/client/indexing/ClientCompactionTaskGranularitySpec.java index 3ba732cfbf7..27d5b21f43d 100644 --- a/server/src/main/java/org/apache/druid/client/indexing/ClientCompactionTaskGranularitySpec.java +++ b/server/src/main/java/org/apache/druid/client/indexing/ClientCompactionTaskGranularitySpec.java @@ -24,6 +24,7 @@ import com.fasterxml.jackson.annotation.JsonProperty; import org.apache.druid.java.util.common.granularity.Granularity; import org.apache.druid.segment.indexing.granularity.GranularitySpec; +import javax.annotation.Nullable; import java.util.Objects; /** @@ -44,9 +45,9 @@ public class ClientCompactionTaskGranularitySpec @JsonCreator public ClientCompactionTaskGranularitySpec( - @JsonProperty("segmentGranularity") Granularity segmentGranularity, - @JsonProperty("queryGranularity") Granularity queryGranularity, - @JsonProperty("rollup") Boolean rollup + @JsonProperty("segmentGranularity") @Nullable Granularity segmentGranularity, + @JsonProperty("queryGranularity") @Nullable Granularity queryGranularity, + @JsonProperty("rollup") @Nullable Boolean rollup ) { this.queryGranularity = queryGranularity; @@ -55,18 +56,21 @@ public class ClientCompactionTaskGranularitySpec } @JsonProperty + @Nullable public Granularity getSegmentGranularity() { return segmentGranularity; } @JsonProperty + @Nullable public Granularity getQueryGranularity() { return queryGranularity; } @JsonProperty + @Nullable public Boolean isRollup() { return rollup; diff --git a/server/src/main/java/org/apache/druid/client/indexing/ClientCompactionTaskQuery.java b/server/src/main/java/org/apache/druid/client/indexing/ClientCompactionTaskQuery.java index 5873bd229db..d8efe4397ac 100644 --- a/server/src/main/java/org/apache/druid/client/indexing/ClientCompactionTaskQuery.java +++ b/server/src/main/java/org/apache/druid/client/indexing/ClientCompactionTaskQuery.java @@ -46,6 +46,7 @@ public class ClientCompactionTaskQuery implements ClientTaskQuery private final AggregatorFactory[] metricsSpec; private final ClientCompactionTaskTransformSpec transformSpec; private final Map context; + private final ClientCompactionRunnerInfo compactionRunner; @JsonCreator public ClientCompactionTaskQuery( @@ -57,7 +58,8 @@ public class ClientCompactionTaskQuery implements ClientTaskQuery @JsonProperty("dimensionsSpec") ClientCompactionTaskDimensionsSpec dimensionsSpec, @JsonProperty("metricsSpec") AggregatorFactory[] metrics, @JsonProperty("transformSpec") ClientCompactionTaskTransformSpec transformSpec, - @JsonProperty("context") Map context + @JsonProperty("context") Map context, + @JsonProperty("compactionRunner") @Nullable ClientCompactionRunnerInfo compactionRunner ) { this.id = Preconditions.checkNotNull(id, "id"); @@ -69,6 +71,7 @@ public class ClientCompactionTaskQuery implements ClientTaskQuery this.metricsSpec = metrics; this.transformSpec = transformSpec; this.context = context; + this.compactionRunner = compactionRunner; } @JsonProperty @@ -135,6 +138,13 @@ public class ClientCompactionTaskQuery implements ClientTaskQuery return context; } + @JsonProperty("compactionRunner") + @Nullable + public ClientCompactionRunnerInfo getCompactionRunner() + { + return compactionRunner; + } + @Override public boolean equals(Object o) { @@ -153,7 +163,8 @@ public class ClientCompactionTaskQuery implements ClientTaskQuery Objects.equals(dimensionsSpec, that.dimensionsSpec) && Arrays.equals(metricsSpec, that.metricsSpec) && Objects.equals(transformSpec, that.transformSpec) && - Objects.equals(context, that.context); + Objects.equals(context, that.context) && + Objects.equals(compactionRunner, that.compactionRunner); } @Override @@ -167,7 +178,8 @@ public class ClientCompactionTaskQuery implements ClientTaskQuery granularitySpec, dimensionsSpec, transformSpec, - context + context, + compactionRunner ); result = 31 * result + Arrays.hashCode(metricsSpec); return result; @@ -186,6 +198,7 @@ public class ClientCompactionTaskQuery implements ClientTaskQuery ", metricsSpec=" + Arrays.toString(metricsSpec) + ", transformSpec=" + transformSpec + ", context=" + context + + ", compactionRunner=" + compactionRunner + '}'; } } diff --git a/server/src/main/java/org/apache/druid/client/indexing/ClientCompactionTaskQueryTuningConfig.java b/server/src/main/java/org/apache/druid/client/indexing/ClientCompactionTaskQueryTuningConfig.java index 7b1a7c54682..55fe7d0114f 100644 --- a/server/src/main/java/org/apache/druid/client/indexing/ClientCompactionTaskQueryTuningConfig.java +++ b/server/src/main/java/org/apache/druid/client/indexing/ClientCompactionTaskQueryTuningConfig.java @@ -28,6 +28,7 @@ import org.apache.druid.segment.IndexSpec; import org.apache.druid.segment.incremental.AppendableIndexSpec; import org.apache.druid.segment.incremental.OnheapIncrementalIndex; import org.apache.druid.segment.writeout.SegmentWriteOutMediumFactory; +import org.apache.druid.server.coordinator.DataSourceCompactionConfig; import org.apache.druid.server.coordinator.UserCompactionTaskQueryTuningConfig; import org.joda.time.Duration; @@ -79,6 +80,17 @@ public class ClientCompactionTaskQueryTuningConfig @Nullable private final AppendableIndexSpec appendableIndexSpec; + public static ClientCompactionTaskQueryTuningConfig from( + DataSourceCompactionConfig compactionConfig + ) + { + if (compactionConfig == null) { + return from(null, null, null); + } else { + return from(compactionConfig.getTuningConfig(), compactionConfig.getMaxRowsPerSegment(), null); + } + } + public static ClientCompactionTaskQueryTuningConfig from( @Nullable UserCompactionTaskQueryTuningConfig userCompactionTaskQueryTuningConfig, @Nullable Integer maxRowsPerSegment, diff --git a/server/src/main/java/org/apache/druid/metadata/SegmentPublisherProvider.java b/server/src/main/java/org/apache/druid/client/indexing/ClientMSQContext.java similarity index 56% rename from server/src/main/java/org/apache/druid/metadata/SegmentPublisherProvider.java rename to server/src/main/java/org/apache/druid/client/indexing/ClientMSQContext.java index c33c3a826f2..45279bda3ed 100644 --- a/server/src/main/java/org/apache/druid/metadata/SegmentPublisherProvider.java +++ b/server/src/main/java/org/apache/druid/client/indexing/ClientMSQContext.java @@ -17,20 +17,19 @@ * under the License. */ -package org.apache.druid.metadata; - -import com.fasterxml.jackson.annotation.JsonSubTypes; -import com.fasterxml.jackson.annotation.JsonTypeInfo; -import com.google.inject.Provider; -import org.apache.druid.guice.NoopSegmentPublisherProvider; -import org.apache.druid.segment.realtime.SegmentPublisher; +package org.apache.druid.client.indexing; /** + * This class copies over MSQ context parameters from the MSQ extension. This is required to validate the submitted + * compaction config at the coordinator. The values used here should be kept in sync with those in + * {@link org.apache.druid.msq.util.MultiStageQueryContext} */ -@JsonTypeInfo(use = JsonTypeInfo.Id.NAME, property = "type", defaultImpl = NoopSegmentPublisherProvider.class) -@JsonSubTypes(value = { - @JsonSubTypes.Type(name = "metadata", value = MetadataSegmentPublisherProvider.class) -}) -public interface SegmentPublisherProvider extends Provider +public class ClientMSQContext { + public static final String CTX_MAX_NUM_TASKS = "maxNumTasks"; + public static final int DEFAULT_MAX_NUM_TASKS = 2; + /** + * Limit to ensure that an MSQ compaction task doesn't take up all task slots in a cluster. + */ + public static final int MAX_TASK_SLOTS_FOR_MSQ_COMPACTION_TASK = 5; } diff --git a/server/src/main/java/org/apache/druid/guice/SQLMetadataStorageDruidModule.java b/server/src/main/java/org/apache/druid/guice/SQLMetadataStorageDruidModule.java index 7ece79ad40f..7894df83af4 100644 --- a/server/src/main/java/org/apache/druid/guice/SQLMetadataStorageDruidModule.java +++ b/server/src/main/java/org/apache/druid/guice/SQLMetadataStorageDruidModule.java @@ -29,8 +29,6 @@ import org.apache.druid.indexing.overlord.IndexerMetadataStorageCoordinator; import org.apache.druid.metadata.IndexerSQLMetadataStorageCoordinator; import org.apache.druid.metadata.MetadataRuleManager; import org.apache.druid.metadata.MetadataRuleManagerProvider; -import org.apache.druid.metadata.MetadataSegmentPublisher; -import org.apache.druid.metadata.MetadataSegmentPublisherProvider; import org.apache.druid.metadata.MetadataStorageActionHandlerFactory; import org.apache.druid.metadata.MetadataStorageConnector; import org.apache.druid.metadata.MetadataStorageProvider; @@ -38,8 +36,6 @@ import org.apache.druid.metadata.MetadataSupervisorManager; import org.apache.druid.metadata.SQLMetadataConnector; import org.apache.druid.metadata.SQLMetadataRuleManager; import org.apache.druid.metadata.SQLMetadataRuleManagerProvider; -import org.apache.druid.metadata.SQLMetadataSegmentPublisher; -import org.apache.druid.metadata.SQLMetadataSegmentPublisherProvider; import org.apache.druid.metadata.SQLMetadataSupervisorManager; import org.apache.druid.metadata.SegmentsMetadataManager; import org.apache.druid.metadata.SegmentsMetadataManagerProvider; @@ -76,8 +72,6 @@ public class SQLMetadataStorageDruidModule implements Module PolyBind.createChoiceWithDefault(binder, prop, Key.get(SegmentsMetadataManagerProvider.class), defaultValue); PolyBind.createChoiceWithDefault(binder, prop, Key.get(MetadataRuleManager.class), defaultValue); PolyBind.createChoiceWithDefault(binder, prop, Key.get(MetadataRuleManagerProvider.class), defaultValue); - PolyBind.createChoiceWithDefault(binder, prop, Key.get(MetadataSegmentPublisher.class), defaultValue); - PolyBind.createChoiceWithDefault(binder, prop, Key.get(MetadataSegmentPublisherProvider.class), defaultValue); PolyBind.createChoiceWithDefault(binder, prop, Key.get(IndexerMetadataStorageCoordinator.class), defaultValue); PolyBind.createChoiceWithDefault(binder, prop, Key.get(MetadataStorageActionHandlerFactory.class), defaultValue); PolyBind.createChoiceWithDefault(binder, prop, Key.get(MetadataStorageUpdaterJobHandler.class), defaultValue); @@ -109,16 +103,6 @@ public class SQLMetadataStorageDruidModule implements Module .to(SQLMetadataRuleManagerProvider.class) .in(LazySingleton.class); - PolyBind.optionBinder(binder, Key.get(MetadataSegmentPublisher.class)) - .addBinding(type) - .to(SQLMetadataSegmentPublisher.class) - .in(LazySingleton.class); - - PolyBind.optionBinder(binder, Key.get(MetadataSegmentPublisherProvider.class)) - .addBinding(type) - .to(SQLMetadataSegmentPublisherProvider.class) - .in(LazySingleton.class); - PolyBind.optionBinder(binder, Key.get(IndexerMetadataStorageCoordinator.class)) .addBinding(type) .to(IndexerSQLMetadataStorageCoordinator.class) diff --git a/server/src/main/java/org/apache/druid/indexing/overlord/IndexerMetadataStorageCoordinator.java b/server/src/main/java/org/apache/druid/indexing/overlord/IndexerMetadataStorageCoordinator.java index da6dd9ffd95..83b4ac7e474 100644 --- a/server/src/main/java/org/apache/druid/indexing/overlord/IndexerMetadataStorageCoordinator.java +++ b/server/src/main/java/org/apache/druid/indexing/overlord/IndexerMetadataStorageCoordinator.java @@ -39,32 +39,15 @@ import java.util.Map; import java.util.Set; /** + * Handles metadata transactions performed by the Overlord. */ public interface IndexerMetadataStorageCoordinator { /** - * Retrieve all published segments which may include any data in the interval and are marked as used from the - * metadata store. - * - * The order of segments within the returned collection is unspecified, but each segment is guaranteed to appear in - * the collection only once. - * - * @param dataSource The data source to query - * @param interval The interval for which all applicable and used segmented are requested. - * @param visibility Whether only visible or visible as well as overshadowed segments should be returned. The - * visibility is considered within the specified interval: that is, a segment which is visible - * outside of the specified interval, but overshadowed within the specified interval will not be - * returned if {@link Segments#ONLY_VISIBLE} is passed. See more precise description in the doc for - * {@link Segments}. - * @return The DataSegments which include data in the requested interval. These segments may contain data outside the - * requested interval. - * - * @implNote This method doesn't return a {@link Set} because there may be an expectation that {@code Set.contains()} - * is O(1) operation, while it's not the case for the returned collection unless it copies all segments into a new - * {@link java.util.HashSet} or {@link com.google.common.collect.ImmutableSet} which may in turn be unnecessary in - * other use cases. So clients should perform such copy themselves if they need {@link Set} semantics. + * Retrieves all published segments that have partial or complete overlap with + * the given interval and are marked as used. */ - default Collection retrieveUsedSegmentsForInterval( + default Set retrieveUsedSegmentsForInterval( String dataSource, Interval interval, Segments visibility @@ -74,21 +57,16 @@ public interface IndexerMetadataStorageCoordinator } /** - * Retrieve all published used segments in the data source from the metadata store. + * Retrieves all published used segments for the given data source. * - * @param dataSource The data source to query - * - * @return all segments belonging to the given data source - * @see #retrieveUsedSegmentsForInterval(String, Interval, Segments) similar to this method but also accepts data - * interval. + * @see #retrieveUsedSegmentsForInterval(String, Interval, Segments) */ - Collection retrieveAllUsedSegments(String dataSource, Segments visibility); + Set retrieveAllUsedSegments(String dataSource, Segments visibility); /** - * * Retrieve all published segments which are marked as used and the created_date of these segments belonging to the * given data source and list of intervals from the metadata store. - * + *

* Unlike other similar methods in this interface, this method doesn't accept a {@link Segments} "visibility" * parameter. The returned collection may include overshadowed segments and their created_dates, as if {@link * Segments#INCLUDING_OVERSHADOWED} was passed. It's the responsibility of the caller to filter out overshadowed ones @@ -99,32 +77,16 @@ public interface IndexerMetadataStorageCoordinator * * @return The DataSegments and the related created_date of segments */ - Collection> retrieveUsedSegmentsAndCreatedDates(String dataSource, List intervals); + Collection> retrieveUsedSegmentsAndCreatedDates( + String dataSource, + List intervals + ); /** - * Retrieve all published segments which may include any data in the given intervals and are marked as used from the - * metadata store. - *

- * The order of segments within the returned collection is unspecified, but each segment is guaranteed to appear in - * the collection only once. - *

- * - * @param dataSource The data source to query - * @param intervals The intervals for which all applicable and used segments are requested. - * @param visibility Whether only visible or visible as well as overshadowed segments should be returned. The - * visibility is considered within the specified intervals: that is, a segment which is visible - * outside of the specified intervals, but overshadowed on the specified intervals will not be - * returned if {@link Segments#ONLY_VISIBLE} is passed. See more precise description in the doc for - * {@link Segments}. - * @return The DataSegments which include data in the requested intervals. These segments may contain data outside the - * requested intervals. - * - * @implNote This method doesn't return a {@link Set} because there may be an expectation that {@code Set.contains()} - * is O(1) operation, while it's not the case for the returned collection unless it copies all segments into a new - * {@link java.util.HashSet} or {@link com.google.common.collect.ImmutableSet} which may in turn be unnecessary in - * other use cases. So clients should perform such copy themselves if they need {@link Set} semantics. + * Retrieves all published segments that have partial or complete overlap with + * the given intervals and are marked as used. */ - Collection retrieveUsedSegmentsForIntervals( + Set retrieveUsedSegmentsForIntervals( String dataSource, List intervals, Segments visibility @@ -179,6 +141,12 @@ public interface IndexerMetadataStorageCoordinator @Nullable DateTime maxUsedStatusLastUpdatedTime ); + /** + * Retrieves segments for the given IDs, regardless of their visibility + * (visible, overshadowed or unused). + */ + Set retrieveSegmentsById(String dataSource, Set segmentIds); + /** * Mark as unused segments which include ONLY data within the given interval. * @@ -198,7 +166,7 @@ public interface IndexerMetadataStorageCoordinator * * @return set of segments actually added */ - Set commitSegments(Set segments, @Nullable SegmentSchemaMapping segmentSchemaMapping) throws IOException; + Set commitSegments(Set segments, @Nullable SegmentSchemaMapping segmentSchemaMapping); /** * Allocates pending segments for the given requests in the pending segments table. @@ -310,7 +278,7 @@ public interface IndexerMetadataStorageCoordinator @Nullable DataSourceMetadata startMetadata, @Nullable DataSourceMetadata endMetadata, @Nullable SegmentSchemaMapping segmentSchemaMapping - ) throws IOException; + ); /** * Commits segments and corresponding schema created by an APPEND task. @@ -505,4 +473,21 @@ public interface IndexerMetadataStorageCoordinator * @return List of pending segment records */ List getPendingSegments(String datasource, Interval interval); + + /** + * Map from a segment ID to the segment ID from which it was upgraded + * There should be no entry in the map for an original non-upgraded segment + * @param dataSource data source + * @param segmentIds ids of segments + */ + Map retrieveUpgradedFromSegmentIds(String dataSource, Set segmentIds); + + /** + * Map from a segment ID to a set containing + * 1) all segment IDs that were upgraded from it AND are still present in the metadata store + * 2) the segment ID itself if and only if it is still present in the metadata store + * @param dataSource data source + * @param segmentIds ids of the first segments which had the corresponding load spec + */ + Map> retrieveUpgradedToSegmentIds(String dataSource, Set segmentIds); } diff --git a/server/src/main/java/org/apache/druid/metadata/IndexerSQLMetadataStorageCoordinator.java b/server/src/main/java/org/apache/druid/metadata/IndexerSQLMetadataStorageCoordinator.java index 2b9f328a097..ecfad572e74 100644 --- a/server/src/main/java/org/apache/druid/metadata/IndexerSQLMetadataStorageCoordinator.java +++ b/server/src/main/java/org/apache/druid/metadata/IndexerSQLMetadataStorageCoordinator.java @@ -152,7 +152,7 @@ public class IndexerSQLMetadataStorageCoordinator implements IndexerMetadataStor } @Override - public Collection retrieveUsedSegmentsForIntervals( + public Set retrieveUsedSegmentsForIntervals( final String dataSource, final List intervals, final Segments visibility @@ -165,7 +165,7 @@ public class IndexerSQLMetadataStorageCoordinator implements IndexerMetadataStor } @Override - public Collection retrieveAllUsedSegments(String dataSource, Segments visibility) + public Set retrieveAllUsedSegments(String dataSource, Segments visibility) { return doRetrieveUsedSegments(dataSource, Collections.emptyList(), visibility); } @@ -173,7 +173,7 @@ public class IndexerSQLMetadataStorageCoordinator implements IndexerMetadataStor /** * @param intervals empty list means unrestricted interval. */ - private Collection doRetrieveUsedSegments( + private Set doRetrieveUsedSegments( final String dataSource, final List intervals, final Segments visibility @@ -320,6 +320,18 @@ public class IndexerSQLMetadataStorageCoordinator implements IndexerMetadataStor return matchingSegments; } + @Override + public Set retrieveSegmentsById(String dataSource, Set segmentIds) + { + return connector.inReadOnlyTransaction( + (handle, transactionStatus) -> + retrieveSegmentsById(handle, dataSource, segmentIds) + .stream() + .map(DataSegmentPlus::getDataSegment) + .collect(Collectors.toSet()) + ); + } + @Override public int markSegmentsAsUnusedWithinInterval(String dataSource, Interval interval) { @@ -419,7 +431,7 @@ public class IndexerSQLMetadataStorageCoordinator implements IndexerMetadataStor } } - private Collection retrieveAllUsedSegmentsForIntervalsWithHandle( + private Set retrieveAllUsedSegmentsForIntervalsWithHandle( final Handle handle, final String dataSource, final List intervals @@ -428,7 +440,7 @@ public class IndexerSQLMetadataStorageCoordinator implements IndexerMetadataStor try (final CloseableIterator iterator = SqlSegmentsMetadataQuery.forHandle(handle, connector, dbTables, jsonMapper) .retrieveUsedSegments(dataSource, intervals)) { - final List retVal = new ArrayList<>(); + final Set retVal = new HashSet<>(); iterator.forEachRemaining(retVal::add); return retVal; } @@ -438,7 +450,7 @@ public class IndexerSQLMetadataStorageCoordinator implements IndexerMetadataStor public Set commitSegments( final Set segments, @Nullable final SegmentSchemaMapping segmentSchemaMapping - ) throws IOException + ) { final SegmentPublishResult result = commitSegmentsAndMetadata( @@ -462,7 +474,7 @@ public class IndexerSQLMetadataStorageCoordinator implements IndexerMetadataStor @Nullable final DataSourceMetadata startMetadata, @Nullable final DataSourceMetadata endMetadata, @Nullable final SegmentSchemaMapping segmentSchemaMapping - ) throws IOException + ) { verifySegmentsToCommit(segments); @@ -552,6 +564,7 @@ public class IndexerSQLMetadataStorageCoordinator implements IndexerMetadataStor createNewIdsOfAppendSegmentsAfterReplace(handle, replaceSegments, locksHeldByReplaceTask); Map upgradeSegmentMetadata = new HashMap<>(); + final Map upgradedFromSegmentIdMap = new HashMap<>(); for (DataSegmentPlus dataSegmentPlus : upgradedSegments) { segmentsToInsert.add(dataSegmentPlus.getDataSegment()); if (dataSegmentPlus.getSchemaFingerprint() != null && dataSegmentPlus.getNumRows() != null) { @@ -560,6 +573,12 @@ public class IndexerSQLMetadataStorageCoordinator implements IndexerMetadataStor new SegmentMetadata(dataSegmentPlus.getNumRows(), dataSegmentPlus.getSchemaFingerprint()) ); } + if (dataSegmentPlus.getUpgradedFromSegmentId() != null) { + upgradedFromSegmentIdMap.put( + dataSegmentPlus.getDataSegment().getId().toString(), + dataSegmentPlus.getUpgradedFromSegmentId() + ); + } } SegmentPublishResult result = SegmentPublishResult.ok( insertSegments( @@ -567,7 +586,8 @@ public class IndexerSQLMetadataStorageCoordinator implements IndexerMetadataStor segmentsToInsert, segmentSchemaMapping, upgradeSegmentMetadata, - Collections.emptyMap() + Collections.emptyMap(), + upgradedFromSegmentIdMap ), upgradePendingSegmentsOverlappingWith(segmentsToInsert) ); @@ -1396,6 +1416,7 @@ public class IndexerSQLMetadataStorageCoordinator implements IndexerMetadataStor final Set allSegmentsToInsert = new HashSet<>(appendSegments); final Map newVersionSegmentToParent = new HashMap<>(); final Map segmentIdMap = new HashMap<>(); + final Map upgradedFromSegmentIdMap = new HashMap<>(); appendSegments.forEach(segment -> segmentIdMap.put(segment.getId().toString(), segment)); segmentIdsForNewVersions.forEach( pendingSegment -> { @@ -1403,6 +1424,7 @@ public class IndexerSQLMetadataStorageCoordinator implements IndexerMetadataStor final DataSegment oldSegment = segmentIdMap.get(pendingSegment.getUpgradedFromSegmentId()); final SegmentId newVersionSegmentId = pendingSegment.getId().asSegmentId(); newVersionSegmentToParent.put(newVersionSegmentId, oldSegment.getId()); + upgradedFromSegmentIdMap.put(newVersionSegmentId.toString(), oldSegment.getId().toString()); allSegmentsToInsert.add( new DataSegment( pendingSegment.getId().asSegmentId(), @@ -1461,7 +1483,8 @@ public class IndexerSQLMetadataStorageCoordinator implements IndexerMetadataStor allSegmentsToInsert, segmentSchemaMapping, Collections.emptyMap(), - newVersionSegmentToParent + newVersionSegmentToParent, + upgradedFromSegmentIdMap ) ); }, @@ -2080,7 +2103,8 @@ public class IndexerSQLMetadataStorageCoordinator implements IndexerMetadataStor .bind("version", segment.getVersion()) .bind("used", usedSegments.contains(segment)) .bind("payload", jsonMapper.writeValueAsBytes(segment)) - .bind("used_status_last_updated", now); + .bind("used_status_last_updated", now) + .bind("upgraded_from_segment_id", (String) null); if (schemaPersistEnabled) { Long numRows = null; @@ -2205,6 +2229,11 @@ public class IndexerSQLMetadataStorageCoordinator implements IndexerMetadataStor .shardSpec(shardSpec) .build(); + // When the segment already has an upgraded_from_segment_id, reuse it for its children + final String upgradedFromSegmentId = oldSegmentMetadata.getUpgradedFromSegmentId() == null + ? oldSegmentMetadata.getDataSegment().getId().toString() + : oldSegmentMetadata.getUpgradedFromSegmentId(); + upgradedSegments.add( new DataSegmentPlus( dataSegment, @@ -2212,7 +2241,9 @@ public class IndexerSQLMetadataStorageCoordinator implements IndexerMetadataStor null, null, oldSegmentMetadata.getSchemaFingerprint(), - oldSegmentMetadata.getNumRows()) + oldSegmentMetadata.getNumRows(), + upgradedFromSegmentId + ) ); } @@ -2254,7 +2285,8 @@ public class IndexerSQLMetadataStorageCoordinator implements IndexerMetadataStor Set segments, @Nullable SegmentSchemaMapping segmentSchemaMapping, Map upgradeSegmentMetadata, - Map newVersionForAppendToParent + Map newVersionForAppendToParent, + Map upgradedFromSegmentIdMap ) throws IOException { boolean shouldPersistSchema = shouldPersistSchema(segmentSchemaMapping); @@ -2290,7 +2322,8 @@ public class IndexerSQLMetadataStorageCoordinator implements IndexerMetadataStor .bind("version", segment.getVersion()) .bind("used", true) .bind("payload", jsonMapper.writeValueAsBytes(segment)) - .bind("used_status_last_updated", now); + .bind("used_status_last_updated", now) + .bind("upgraded_from_segment_id", upgradedFromSegmentIdMap.get(segment.getId().toString())); if (schemaPersistEnabled) { SegmentMetadata segmentMetadata = @@ -2437,9 +2470,9 @@ public class IndexerSQLMetadataStorageCoordinator implements IndexerMetadataStor { String insertStatement = "INSERT INTO %1$s (id, dataSource, created_date, start, %2$send%2$s," - + " partitioned, version, used, payload, used_status_last_updated %3$s) " + + " partitioned, version, used, payload, used_status_last_updated, upgraded_from_segment_id %3$s) " + "VALUES (:id, :dataSource, :created_date, :start, :end," - + " :partitioned, :version, :used, :payload, :used_status_last_updated %4$s)"; + + " :partitioned, :version, :used, :payload, :used_status_last_updated, :upgraded_from_segment_id %4$s)"; if (schemaPersistEnabled) { return StringUtils.format( @@ -2552,8 +2585,6 @@ public class IndexerSQLMetadataStorageCoordinator implements IndexerMetadataStor * oldCommitMetadata when this function is called (based on T.equals). This method is idempotent in that if * the metadata already equals newCommitMetadata, it will return true. * - * @param handle database handle - * @param dataSource druid dataSource * @param startMetadata dataSource metadata pre-insert must match this startMetadata according to * {@link DataSourceMetadata#matches(DataSourceMetadata)} * @param endMetadata dataSource metadata post-insert will have this endMetadata merged in with @@ -2615,15 +2646,16 @@ public class IndexerSQLMetadataStorageCoordinator implements IndexerMetadataStor if (startMetadataGreaterThanExisting && !startMetadataMatchesExisting) { // Offsets stored in startMetadata is greater than the last commited metadata. - return new DataStoreMetadataUpdateResult(true, false, - "The new start metadata state[%s] is ahead of the last commited" - + " end state[%s]. Try resetting the supervisor.", startMetadata, oldCommitMetadataFromDb + return DataStoreMetadataUpdateResult.failure( + "The new start metadata state[%s] is ahead of the last committed" + + " end state[%s]. Try resetting the supervisor.", + startMetadata, oldCommitMetadataFromDb ); } if (!startMetadataMatchesExisting) { // Not in the desired start state. - return new DataStoreMetadataUpdateResult(true, false, + return DataStoreMetadataUpdateResult.failure( "Inconsistency between stored metadata state[%s] and target state[%s]. Try resetting the supervisor.", oldCommitMetadataFromDb, startMetadata ); @@ -2656,11 +2688,7 @@ public class IndexerSQLMetadataStorageCoordinator implements IndexerMetadataStor retVal = numRows == 1 ? DataStoreMetadataUpdateResult.SUCCESS - : new DataStoreMetadataUpdateResult( - true, - true, - "Failed to insert metadata for datasource [%s]", - dataSource); + : DataStoreMetadataUpdateResult.retryableFailure("Failed to insert metadata for datasource[%s]", dataSource); } else { // Expecting a particular old metadata; use the SHA1 in a compare-and-swap UPDATE final int numRows = handle.createStatement( @@ -2680,11 +2708,7 @@ public class IndexerSQLMetadataStorageCoordinator implements IndexerMetadataStor retVal = numRows == 1 ? DataStoreMetadataUpdateResult.SUCCESS - : new DataStoreMetadataUpdateResult( - true, - true, - "Failed to update metadata for datasource [%s]", - dataSource); + : DataStoreMetadataUpdateResult.retryableFailure("Failed to update metadata for datasource[%s]", dataSource); } if (retVal.isSuccess()) { @@ -2700,19 +2724,12 @@ public class IndexerSQLMetadataStorageCoordinator implements IndexerMetadataStor public boolean deleteDataSourceMetadata(final String dataSource) { return connector.retryWithHandle( - new HandleCallback() - { - @Override - public Boolean withHandle(Handle handle) - { - int rows = handle.createStatement( - StringUtils.format("DELETE from %s WHERE dataSource = :dataSource", dbTables.getDataSourceTable()) - ) - .bind("dataSource", dataSource) - .execute(); + handle -> { + int rows = handle.createStatement( + StringUtils.format("DELETE from %s WHERE dataSource = :dataSource", dbTables.getDataSourceTable()) + ).bind("dataSource", dataSource).execute(); - return rows > 0; - } + return rows > 0; } ); } @@ -2755,17 +2772,12 @@ public class IndexerSQLMetadataStorageCoordinator implements IndexerMetadataStor public void updateSegmentMetadata(final Set segments) { connector.getDBI().inTransaction( - new TransactionCallback() - { - @Override - public Void inTransaction(Handle handle, TransactionStatus transactionStatus) throws Exception - { - for (final DataSegment segment : segments) { - updatePayload(handle, segment); - } - - return null; + (handle, transactionStatus) -> { + for (final DataSegment segment : segments) { + updatePayload(handle, segment); } + + return 0; } ); } @@ -2932,6 +2944,90 @@ public class IndexerSQLMetadataStorageCoordinator implements IndexerMetadataStor ); } + @Override + public Map retrieveUpgradedFromSegmentIds( + final String dataSource, + final Set segmentIds + ) + { + if (segmentIds.isEmpty()) { + return Collections.emptyMap(); + } + + final Map upgradedFromSegmentIds = new HashMap<>(); + final List> partitions = Lists.partition(ImmutableList.copyOf(segmentIds), 100); + for (List partition : partitions) { + final String sql = StringUtils.format( + "SELECT id, upgraded_from_segment_id FROM %s WHERE dataSource = :dataSource %s", + dbTables.getSegmentsTable(), + SqlSegmentsMetadataQuery.getParameterizedInConditionForColumn("id", partition) + ); + connector.retryWithHandle( + handle -> { + Query> query = handle.createQuery(sql) + .bind("dataSource", dataSource); + SqlSegmentsMetadataQuery.bindColumnValuesToQueryWithInCondition("id", partition, query); + return query.map((index, r, ctx) -> { + final String id = r.getString(1); + final String upgradedFromSegmentId = r.getString(2); + if (upgradedFromSegmentId != null) { + upgradedFromSegmentIds.put(id, upgradedFromSegmentId); + } + return null; + }).list(); + } + ); + } + return upgradedFromSegmentIds; + } + + @Override + public Map> retrieveUpgradedToSegmentIds( + final String dataSource, + final Set segmentIds + ) + { + if (segmentIds.isEmpty()) { + return Collections.emptyMap(); + } + + final Map> upgradedToSegmentIds = new HashMap<>(); + retrieveSegmentsById(dataSource, segmentIds) + .stream() + .map(DataSegment::getId) + .map(SegmentId::toString) + .forEach(id -> upgradedToSegmentIds.computeIfAbsent(id, k -> new HashSet<>()).add(id)); + + final List> partitions = Lists.partition(ImmutableList.copyOf(segmentIds), 100); + for (List partition : partitions) { + final String sql = StringUtils.format( + "SELECT id, upgraded_from_segment_id FROM %s WHERE dataSource = :dataSource %s", + dbTables.getSegmentsTable(), + SqlSegmentsMetadataQuery.getParameterizedInConditionForColumn("upgraded_from_segment_id", partition) + ); + + connector.retryWithHandle( + handle -> { + Query> query = handle.createQuery(sql) + .bind("dataSource", dataSource); + SqlSegmentsMetadataQuery.bindColumnValuesToQueryWithInCondition( + "upgraded_from_segment_id", + partition, + query + ); + return query.map((index, r, ctx) -> { + final String upgradedToId = r.getString(1); + final String id = r.getString(2); + upgradedToSegmentIds.computeIfAbsent(id, k -> new HashSet<>()) + .add(upgradedToId); + return null; + }).list(); + } + ); + } + return upgradedToSegmentIds; + } + private static class PendingSegmentsRecord { private final String sequenceName; @@ -2978,10 +3074,21 @@ public class IndexerSQLMetadataStorageCoordinator implements IndexerMetadataStor { private final boolean failed; private final boolean canRetry; - @Nullable private final String errorMsg; + @Nullable + private final String errorMsg; public static final DataStoreMetadataUpdateResult SUCCESS = new DataStoreMetadataUpdateResult(false, false, null); + public static DataStoreMetadataUpdateResult failure(String errorMsgFormat, Object... messageArgs) + { + return new DataStoreMetadataUpdateResult(true, false, errorMsgFormat, messageArgs); + } + + public static DataStoreMetadataUpdateResult retryableFailure(String errorMsgFormat, Object... messageArgs) + { + return new DataStoreMetadataUpdateResult(true, true, errorMsgFormat, messageArgs); + } + DataStoreMetadataUpdateResult(boolean failed, boolean canRetry, @Nullable String errorMsg, Object... errorFormatArgs) { this.failed = failed; @@ -3010,34 +3117,5 @@ public class IndexerSQLMetadataStorageCoordinator implements IndexerMetadataStor { return errorMsg; } - - @Override - public boolean equals(Object o) - { - if (this == o) { - return true; - } - if (o == null || getClass() != o.getClass()) { - return false; - } - DataStoreMetadataUpdateResult that = (DataStoreMetadataUpdateResult) o; - return failed == that.failed && canRetry == that.canRetry && Objects.equals(errorMsg, that.errorMsg); - } - - @Override - public int hashCode() - { - return Objects.hash(failed, canRetry, errorMsg); - } - - @Override - public String toString() - { - return "DataStoreMetadataUpdateResult{" + - "failed=" + failed + - ", canRetry=" + canRetry + - ", errorMsg='" + errorMsg + '\'' + - '}'; - } } } diff --git a/server/src/main/java/org/apache/druid/metadata/MetadataSegmentPublisherProvider.java b/server/src/main/java/org/apache/druid/metadata/MetadataSegmentPublisherProvider.java deleted file mode 100644 index b493148ec90..00000000000 --- a/server/src/main/java/org/apache/druid/metadata/MetadataSegmentPublisherProvider.java +++ /dev/null @@ -1,26 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package org.apache.druid.metadata; - -public interface MetadataSegmentPublisherProvider extends SegmentPublisherProvider -{ - @Override - MetadataSegmentPublisher get(); -} diff --git a/server/src/main/java/org/apache/druid/metadata/PendingSegmentRecord.java b/server/src/main/java/org/apache/druid/metadata/PendingSegmentRecord.java index bfbaad18ef1..f117fe7f28b 100644 --- a/server/src/main/java/org/apache/druid/metadata/PendingSegmentRecord.java +++ b/server/src/main/java/org/apache/druid/metadata/PendingSegmentRecord.java @@ -40,7 +40,10 @@ import java.sql.ResultSet; *
  • id -> id (Unique identifier for pending segment)
  • *
  • sequence_name -> sequenceName (sequence name used for segment allocation)
  • *
  • sequence_prev_id -> sequencePrevId (previous segment id used for segment allocation)
  • - *
  • upgraded_from_segment_id -> upgradedFromSegmentId (Id of the root segment from which this was upgraded)
  • + *
  • upgraded_from_segment_id -> upgradedFromSegmentId + * (ID of the segment which was upgraded to create the current segment. + * If the former was itself created as a result of an upgrade, then this ID + * must refer to the original non-upgraded segment in the hierarchy.)
  • *
  • task_allocator_id -> taskAllocatorId (Associates a task / task group / replica group with the pending segment)
  • * */ diff --git a/server/src/main/java/org/apache/druid/metadata/SQLMetadataConnector.java b/server/src/main/java/org/apache/druid/metadata/SQLMetadataConnector.java index cd927b2fef8..dc87b9fc2fd 100644 --- a/server/src/main/java/org/apache/druid/metadata/SQLMetadataConnector.java +++ b/server/src/main/java/org/apache/druid/metadata/SQLMetadataConnector.java @@ -318,7 +318,7 @@ public abstract class SQLMetadataConnector implements MetadataStorageConnector public void createSegmentTable(final String tableName) { - List columns = new ArrayList<>(); + final List columns = new ArrayList<>(); columns.add("id VARCHAR(255) NOT NULL"); columns.add("dataSource VARCHAR(255) %4$s NOT NULL"); columns.add("created_date VARCHAR(255) NOT NULL"); @@ -587,6 +587,8 @@ public abstract class SQLMetadataConnector implements MetadataStorageConnector Map columnNameTypes = new HashMap<>(); columnNameTypes.put("used_status_last_updated", "VARCHAR(255)"); + columnNameTypes.put("upgraded_from_segment_id", "VARCHAR(255)"); + if (centralizedDatasourceSchemaConfig.isEnabled()) { columnNameTypes.put("schema_fingerprint", "VARCHAR(255)"); columnNameTypes.put("num_rows", "BIGINT"); @@ -619,6 +621,14 @@ public abstract class SQLMetadataConnector implements MetadataStorageConnector } alterTable(tableName, alterCommands); + + final Set createdIndexSet = getIndexOnTable(tableName); + createIndex( + tableName, + StringUtils.format("idx_%1$s_datasource_upgraded_from_segment_id", tableName), + ImmutableList.of("dataSource", "upgraded_from_segment_id"), + createdIndexSet + ); } @Override diff --git a/server/src/main/java/org/apache/druid/metadata/SQLMetadataSegmentPublisher.java b/server/src/main/java/org/apache/druid/metadata/SQLMetadataSegmentPublisher.java deleted file mode 100644 index 48a92ecba4e..00000000000 --- a/server/src/main/java/org/apache/druid/metadata/SQLMetadataSegmentPublisher.java +++ /dev/null @@ -1,126 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package org.apache.druid.metadata; - -import com.fasterxml.jackson.databind.ObjectMapper; -import com.google.inject.Inject; -import org.apache.druid.java.util.common.DateTimes; -import org.apache.druid.java.util.common.StringUtils; -import org.apache.druid.java.util.common.logger.Logger; -import org.apache.druid.timeline.DataSegment; -import org.apache.druid.timeline.partition.NoneShardSpec; -import org.skife.jdbi.v2.DBI; - -import java.io.IOException; -import java.util.List; -import java.util.Map; - -public class SQLMetadataSegmentPublisher implements MetadataSegmentPublisher -{ - private static final Logger log = new Logger(SQLMetadataSegmentPublisher.class); - - private final ObjectMapper jsonMapper; - private final MetadataStorageTablesConfig config; - private final SQLMetadataConnector connector; - private final String statement; - - @Inject - public SQLMetadataSegmentPublisher( - ObjectMapper jsonMapper, - MetadataStorageTablesConfig config, - SQLMetadataConnector connector - ) - { - this.jsonMapper = jsonMapper; - this.config = config; - this.connector = connector; - this.statement = StringUtils.format( - "INSERT INTO %1$s (id, dataSource, created_date, start, %2$send%2$s, partitioned, version, used, payload, used_status_last_updated) " - + "VALUES (:id, :dataSource, :created_date, :start, :end, :partitioned, :version, :used, :payload, :used_status_last_updated)", - config.getSegmentsTable(), connector.getQuoteString() - ); - } - - @Override - public void publishSegment(final DataSegment segment) throws IOException - { - String now = DateTimes.nowUtc().toString(); - publishSegment( - segment.getId().toString(), - segment.getDataSource(), - now, - segment.getInterval().getStart().toString(), - segment.getInterval().getEnd().toString(), - (segment.getShardSpec() instanceof NoneShardSpec) ? false : true, - segment.getVersion(), - true, - jsonMapper.writeValueAsBytes(segment), - now - ); - } - - private void publishSegment( - final String segmentId, - final String dataSource, - final String createdDate, - final String start, - final String end, - final boolean partitioned, - final String version, - final boolean used, - final byte[] payload, - final String usedFlagLastUpdated - ) - { - try { - final DBI dbi = connector.getDBI(); - List> exists = dbi.withHandle( - handle -> handle.createQuery( - StringUtils.format("SELECT id FROM %s WHERE id=:id", config.getSegmentsTable()) - ).bind("id", segmentId).list() - ); - - if (!exists.isEmpty()) { - log.info("Skipping publish of segment[%s] as it already exists in the metadata store.", segmentId); - return; - } - - dbi.withHandle( - handle -> - handle.createStatement(statement) - .bind("id", segmentId) - .bind("dataSource", dataSource) - .bind("created_date", createdDate) - .bind("start", start) - .bind("end", end) - .bind("partitioned", partitioned) - .bind("version", version) - .bind("used", used) - .bind("payload", payload) - .bind("used_status_last_updated", usedFlagLastUpdated) - .execute() - ); - } - catch (Exception e) { - log.error(e, "Exception inserting into DB"); - throw new RuntimeException(e); - } - } -} diff --git a/server/src/main/java/org/apache/druid/metadata/SQLMetadataSegmentPublisherProvider.java b/server/src/main/java/org/apache/druid/metadata/SQLMetadataSegmentPublisherProvider.java deleted file mode 100644 index 89ba4bf9bc7..00000000000 --- a/server/src/main/java/org/apache/druid/metadata/SQLMetadataSegmentPublisherProvider.java +++ /dev/null @@ -1,48 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package org.apache.druid.metadata; - -import com.fasterxml.jackson.annotation.JacksonInject; -import com.fasterxml.jackson.databind.ObjectMapper; - -import javax.validation.constraints.NotNull; - -/** - */ -public class SQLMetadataSegmentPublisherProvider implements MetadataSegmentPublisherProvider -{ - @JacksonInject - @NotNull - private SQLMetadataConnector connector = null; - - @JacksonInject - @NotNull - private MetadataStorageTablesConfig config = null; - - @JacksonInject - @NotNull - private ObjectMapper jsonMapper = null; - - @Override - public MetadataSegmentPublisher get() - { - return new SQLMetadataSegmentPublisher(jsonMapper, config, connector); - } -} diff --git a/server/src/main/java/org/apache/druid/metadata/SqlSegmentsMetadataManager.java b/server/src/main/java/org/apache/druid/metadata/SqlSegmentsMetadataManager.java index 209deb41e7e..2c81603e529 100644 --- a/server/src/main/java/org/apache/druid/metadata/SqlSegmentsMetadataManager.java +++ b/server/src/main/java/org/apache/druid/metadata/SqlSegmentsMetadataManager.java @@ -1153,7 +1153,6 @@ public class SqlSegmentsMetadataManager implements SegmentsMetadataManager return null; }).list(); - segmentSchemaCache.resetTemporaryPublishedMetadataQueryResultOnDBPoll(); return null; }); diff --git a/server/src/main/java/org/apache/druid/metadata/SqlSegmentsMetadataQuery.java b/server/src/main/java/org/apache/druid/metadata/SqlSegmentsMetadataQuery.java index f14cc995050..fc1c84a7037 100644 --- a/server/src/main/java/org/apache/druid/metadata/SqlSegmentsMetadataQuery.java +++ b/server/src/main/java/org/apache/druid/metadata/SqlSegmentsMetadataQuery.java @@ -286,7 +286,7 @@ public class SqlSegmentsMetadataQuery if (includeSchemaInfo) { final Query> query = handle.createQuery( StringUtils.format( - "SELECT payload, used, schema_fingerprint, num_rows FROM %s WHERE dataSource = :dataSource %s", + "SELECT payload, used, schema_fingerprint, num_rows, upgraded_from_segment_id FROM %s WHERE dataSource = :dataSource %s", dbTables.getSegmentsTable(), getParameterizedInConditionForColumn("id", segmentIds) ) ); @@ -306,7 +306,8 @@ public class SqlSegmentsMetadataQuery null, r.getBoolean(2), schemaFingerprint, - numRows + numRows, + r.getString(5) ); } ) @@ -314,7 +315,7 @@ public class SqlSegmentsMetadataQuery } else { final Query> query = handle.createQuery( StringUtils.format( - "SELECT payload, used FROM %s WHERE dataSource = :dataSource %s", + "SELECT payload, used, upgraded_from_segment_id FROM %s WHERE dataSource = :dataSource %s", dbTables.getSegmentsTable(), getParameterizedInConditionForColumn("id", segmentIds) ) ); @@ -331,7 +332,8 @@ public class SqlSegmentsMetadataQuery null, r.getBoolean(2), null, - null + null, + r.getString(3) ) ) .iterator(); @@ -864,6 +866,7 @@ public class SqlSegmentsMetadataQuery DateTimes.of(r.getString(3)), null, null, + null, null )) .iterator(); @@ -980,7 +983,7 @@ public class SqlSegmentsMetadataQuery * * @see #getParameterizedInConditionForColumn(String, List) */ - private static void bindColumnValuesToQueryWithInCondition( + static void bindColumnValuesToQueryWithInCondition( final String columnName, final List values, final SQLStatement query diff --git a/server/src/main/java/org/apache/druid/query/ResultLevelCachingQueryRunner.java b/server/src/main/java/org/apache/druid/query/ResultLevelCachingQueryRunner.java index 182faba7a09..0af6ebca3ed 100644 --- a/server/src/main/java/org/apache/druid/query/ResultLevelCachingQueryRunner.java +++ b/server/src/main/java/org/apache/druid/query/ResultLevelCachingQueryRunner.java @@ -73,7 +73,7 @@ public class ResultLevelCachingQueryRunner implements QueryRunner this.cache = cache; this.cacheConfig = cacheConfig; this.query = query; - this.strategy = queryToolChest.getCacheStrategy(query); + this.strategy = queryToolChest.getCacheStrategy(query, objectMapper); this.populateResultCache = CacheUtil.isPopulateResultCache( query, strategy, diff --git a/server/src/main/java/org/apache/druid/rpc/indexing/OverlordClientImpl.java b/server/src/main/java/org/apache/druid/rpc/indexing/OverlordClientImpl.java index 35276aa723d..3e3d86ca5f2 100644 --- a/server/src/main/java/org/apache/druid/rpc/indexing/OverlordClientImpl.java +++ b/server/src/main/java/org/apache/druid/rpc/indexing/OverlordClientImpl.java @@ -323,9 +323,6 @@ public class OverlordClientImpl implements OverlordClient return new JsonParserIterator<>( jsonMapper.getTypeFactory().constructType(clazz), Futures.immediateFuture(in), - "", // We don't know URL at this point, but it's OK to use empty; it's used for logs/errors - null, - "", // We don't know host at this point, but it's OK to use empty; it's used for logs/errors jsonMapper ); } diff --git a/server/src/main/java/org/apache/druid/segment/realtime/plumber/NoopSegmentHandoffNotifierFactory.java b/server/src/main/java/org/apache/druid/segment/handoff/NoopSegmentHandoffNotifierFactory.java similarity index 90% rename from server/src/main/java/org/apache/druid/segment/realtime/plumber/NoopSegmentHandoffNotifierFactory.java rename to server/src/main/java/org/apache/druid/segment/handoff/NoopSegmentHandoffNotifierFactory.java index 9a51143d64b..bd04aa60839 100644 --- a/server/src/main/java/org/apache/druid/segment/realtime/plumber/NoopSegmentHandoffNotifierFactory.java +++ b/server/src/main/java/org/apache/druid/segment/handoff/NoopSegmentHandoffNotifierFactory.java @@ -17,12 +17,10 @@ * under the License. */ -package org.apache.druid.segment.realtime.plumber; +package org.apache.druid.segment.handoff; import org.apache.druid.java.util.common.logger.Logger; import org.apache.druid.query.SegmentDescriptor; -import org.apache.druid.segment.handoff.SegmentHandoffNotifier; -import org.apache.druid.segment.handoff.SegmentHandoffNotifierFactory; import java.util.concurrent.Executor; diff --git a/server/src/main/java/org/apache/druid/segment/indexing/IOConfig.java b/server/src/main/java/org/apache/druid/segment/indexing/IOConfig.java index b1784806e7e..9a783980a42 100644 --- a/server/src/main/java/org/apache/druid/segment/indexing/IOConfig.java +++ b/server/src/main/java/org/apache/druid/segment/indexing/IOConfig.java @@ -19,15 +19,11 @@ package org.apache.druid.segment.indexing; -import com.fasterxml.jackson.annotation.JsonSubTypes; import com.fasterxml.jackson.annotation.JsonTypeInfo; /** */ @JsonTypeInfo(use = JsonTypeInfo.Id.NAME, property = "type") -@JsonSubTypes(value = { - @JsonSubTypes.Type(name = "realtime", value = RealtimeIOConfig.class) -}) public interface IOConfig { } diff --git a/server/src/main/java/org/apache/druid/segment/indexing/RealtimeIOConfig.java b/server/src/main/java/org/apache/druid/segment/indexing/RealtimeIOConfig.java deleted file mode 100644 index ba8eff09d1e..00000000000 --- a/server/src/main/java/org/apache/druid/segment/indexing/RealtimeIOConfig.java +++ /dev/null @@ -1,54 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package org.apache.druid.segment.indexing; - -import com.fasterxml.jackson.annotation.JsonCreator; -import com.fasterxml.jackson.annotation.JsonProperty; -import org.apache.druid.data.input.FirehoseFactory; -import org.apache.druid.segment.realtime.plumber.PlumberSchool; - -/** - */ -public class RealtimeIOConfig implements IOConfig -{ - private final FirehoseFactory firehoseFactory; - private final PlumberSchool plumberSchool; - - @JsonCreator - public RealtimeIOConfig( - @JsonProperty("firehose") FirehoseFactory firehoseFactory, - @JsonProperty("plumber") PlumberSchool plumberSchool - ) - { - this.firehoseFactory = firehoseFactory; - this.plumberSchool = plumberSchool; - } - - @JsonProperty("firehose") - public FirehoseFactory getFirehoseFactory() - { - return firehoseFactory; - } - - public PlumberSchool getPlumberSchool() - { - return plumberSchool; - } -} diff --git a/server/src/main/java/org/apache/druid/segment/indexing/RealtimeTuningConfig.java b/server/src/main/java/org/apache/druid/segment/indexing/RealtimeTuningConfig.java deleted file mode 100644 index a75a79b4dd9..00000000000 --- a/server/src/main/java/org/apache/druid/segment/indexing/RealtimeTuningConfig.java +++ /dev/null @@ -1,420 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package org.apache.druid.segment.indexing; - -import com.fasterxml.jackson.annotation.JsonCreator; -import com.fasterxml.jackson.annotation.JsonProperty; -import com.google.common.base.Preconditions; -import org.apache.druid.indexer.partitions.PartitionsSpec; -import org.apache.druid.segment.IndexSpec; -import org.apache.druid.segment.incremental.AppendableIndexSpec; -import org.apache.druid.segment.realtime.appenderator.AppenderatorConfig; -import org.apache.druid.segment.realtime.plumber.IntervalStartVersioningPolicy; -import org.apache.druid.segment.realtime.plumber.RejectionPolicyFactory; -import org.apache.druid.segment.realtime.plumber.ServerTimeRejectionPolicyFactory; -import org.apache.druid.segment.realtime.plumber.VersioningPolicy; -import org.apache.druid.segment.writeout.SegmentWriteOutMediumFactory; -import org.apache.druid.timeline.partition.NumberedShardSpec; -import org.apache.druid.timeline.partition.ShardSpec; -import org.joda.time.Period; - -import javax.annotation.Nullable; -import java.io.File; -import java.time.Duration; - -/** - * - */ -public class RealtimeTuningConfig implements AppenderatorConfig -{ - private static final Period DEFAULT_INTERMEDIATE_PERSIST_PERIOD = new Period("PT10M"); - private static final Period DEFAULT_WINDOW_PERIOD = new Period("PT10M"); - private static final VersioningPolicy DEFAULT_VERSIONING_POLICY = new IntervalStartVersioningPolicy(); - private static final RejectionPolicyFactory DEFAULT_REJECTION_POLICY_FACTORY = new ServerTimeRejectionPolicyFactory(); - private static final int DEFAULT_MAX_PENDING_PERSISTS = 0; - private static final ShardSpec DEFAULT_SHARD_SPEC = new NumberedShardSpec(0, 1); - private static final IndexSpec DEFAULT_INDEX_SPEC = IndexSpec.DEFAULT; - private static final Boolean DEFAULT_REPORT_PARSE_EXCEPTIONS = Boolean.FALSE; - private static final long DEFAULT_HANDOFF_CONDITION_TIMEOUT = Duration.ofMinutes(15).toMillis(); - private static final long DEFAULT_ALERT_TIMEOUT = 0; - private static final String DEFAULT_DEDUP_COLUMN = null; - - // Might make sense for this to be a builder - public static RealtimeTuningConfig makeDefaultTuningConfig(final @Nullable File basePersistDirectory) - { - return new RealtimeTuningConfig( - DEFAULT_APPENDABLE_INDEX, - DEFAULT_MAX_ROWS_IN_MEMORY_REALTIME, - 0L, - DEFAULT_SKIP_BYTES_IN_MEMORY_OVERHEAD_CHECK, - DEFAULT_INTERMEDIATE_PERSIST_PERIOD, - DEFAULT_WINDOW_PERIOD, - basePersistDirectory, - DEFAULT_VERSIONING_POLICY, - DEFAULT_REJECTION_POLICY_FACTORY, - DEFAULT_MAX_PENDING_PERSISTS, - DEFAULT_SHARD_SPEC, - DEFAULT_INDEX_SPEC, - DEFAULT_INDEX_SPEC, - 0, - 0, - DEFAULT_REPORT_PARSE_EXCEPTIONS, - DEFAULT_HANDOFF_CONDITION_TIMEOUT, - DEFAULT_ALERT_TIMEOUT, - null, - DEFAULT_DEDUP_COLUMN, - DEFAULT_NUM_PERSIST_THREADS - ); - } - - private final AppendableIndexSpec appendableIndexSpec; - private final int maxRowsInMemory; - private final long maxBytesInMemory; - private final boolean skipBytesInMemoryOverheadCheck; - private final Period intermediatePersistPeriod; - private final Period windowPeriod; - private final File basePersistDirectory; - private final VersioningPolicy versioningPolicy; - private final RejectionPolicyFactory rejectionPolicyFactory; - private final int maxPendingPersists; - private final ShardSpec shardSpec; - private final IndexSpec indexSpec; - private final IndexSpec indexSpecForIntermediatePersists; - private final int persistThreadPriority; - private final int mergeThreadPriority; - private final boolean reportParseExceptions; - private final long handoffConditionTimeout; - private final long alertTimeout; - @Nullable - private final SegmentWriteOutMediumFactory segmentWriteOutMediumFactory; - @Nullable - private final String dedupColumn; - private final int numPersistThreads; - - public RealtimeTuningConfig( - @Nullable AppendableIndexSpec appendableIndexSpec, - Integer maxRowsInMemory, - Long maxBytesInMemory, - @Nullable Boolean skipBytesInMemoryOverheadCheck, - Period intermediatePersistPeriod, - Period windowPeriod, - File basePersistDirectory, - VersioningPolicy versioningPolicy, - RejectionPolicyFactory rejectionPolicyFactory, - Integer maxPendingPersists, - ShardSpec shardSpec, - IndexSpec indexSpec, - @Nullable IndexSpec indexSpecForIntermediatePersists, - int persistThreadPriority, - int mergeThreadPriority, - Boolean reportParseExceptions, - Long handoffConditionTimeout, - Long alertTimeout, - @Nullable SegmentWriteOutMediumFactory segmentWriteOutMediumFactory, - @Nullable String dedupColumn, - @Nullable Integer numPersistThreads - ) - { - this.appendableIndexSpec = appendableIndexSpec == null ? DEFAULT_APPENDABLE_INDEX : appendableIndexSpec; - this.maxRowsInMemory = maxRowsInMemory == null ? DEFAULT_MAX_ROWS_IN_MEMORY_REALTIME : maxRowsInMemory; - // initializing this to 0, it will be lazily initialized to a value - // @see #getMaxBytesInMemoryOrDefault() - this.maxBytesInMemory = maxBytesInMemory == null ? 0 : maxBytesInMemory; - this.skipBytesInMemoryOverheadCheck = skipBytesInMemoryOverheadCheck == null ? - DEFAULT_SKIP_BYTES_IN_MEMORY_OVERHEAD_CHECK : skipBytesInMemoryOverheadCheck; - this.intermediatePersistPeriod = intermediatePersistPeriod == null - ? DEFAULT_INTERMEDIATE_PERSIST_PERIOD - : intermediatePersistPeriod; - this.windowPeriod = windowPeriod == null ? DEFAULT_WINDOW_PERIOD : windowPeriod; - this.basePersistDirectory = basePersistDirectory; - this.versioningPolicy = versioningPolicy; - this.rejectionPolicyFactory = rejectionPolicyFactory == null - ? DEFAULT_REJECTION_POLICY_FACTORY - : rejectionPolicyFactory; - this.maxPendingPersists = maxPendingPersists == null ? DEFAULT_MAX_PENDING_PERSISTS : maxPendingPersists; - this.shardSpec = shardSpec == null ? DEFAULT_SHARD_SPEC : shardSpec; - this.indexSpec = indexSpec == null ? DEFAULT_INDEX_SPEC : indexSpec; - this.indexSpecForIntermediatePersists = indexSpecForIntermediatePersists == null ? - this.indexSpec : indexSpecForIntermediatePersists; - this.mergeThreadPriority = mergeThreadPriority; - this.persistThreadPriority = persistThreadPriority; - this.reportParseExceptions = reportParseExceptions == null - ? DEFAULT_REPORT_PARSE_EXCEPTIONS - : reportParseExceptions; - this.handoffConditionTimeout = handoffConditionTimeout == null - ? DEFAULT_HANDOFF_CONDITION_TIMEOUT - : handoffConditionTimeout; - Preconditions.checkArgument(this.handoffConditionTimeout >= 0, "handoffConditionTimeout must be >= 0"); - - this.alertTimeout = alertTimeout == null ? DEFAULT_ALERT_TIMEOUT : alertTimeout; - Preconditions.checkArgument(this.alertTimeout >= 0, "alertTimeout must be >= 0"); - this.segmentWriteOutMediumFactory = segmentWriteOutMediumFactory; - this.dedupColumn = dedupColumn == null ? DEFAULT_DEDUP_COLUMN : dedupColumn; - this.numPersistThreads = numPersistThreads == null ? - DEFAULT_NUM_PERSIST_THREADS : Math.max(numPersistThreads, DEFAULT_NUM_PERSIST_THREADS); - } - - @JsonCreator - private RealtimeTuningConfig( - @JsonProperty("appendableIndexSpec") @Nullable AppendableIndexSpec appendableIndexSpec, - @JsonProperty("maxRowsInMemory") Integer maxRowsInMemory, - @JsonProperty("maxBytesInMemory") Long maxBytesInMemory, - @JsonProperty("skipBytesInMemoryOverheadCheck") @Nullable Boolean skipBytesInMemoryOverheadCheck, - @JsonProperty("intermediatePersistPeriod") Period intermediatePersistPeriod, - @JsonProperty("windowPeriod") Period windowPeriod, - @JsonProperty("rejectionPolicy") RejectionPolicyFactory rejectionPolicyFactory, - @JsonProperty("maxPendingPersists") Integer maxPendingPersists, - @JsonProperty("shardSpec") ShardSpec shardSpec, - @JsonProperty("indexSpec") IndexSpec indexSpec, - @JsonProperty("indexSpecForIntermediatePersists") @Nullable IndexSpec indexSpecForIntermediatePersists, - @JsonProperty("persistThreadPriority") int persistThreadPriority, - @JsonProperty("mergeThreadPriority") int mergeThreadPriority, - @JsonProperty("reportParseExceptions") Boolean reportParseExceptions, - @JsonProperty("handoffConditionTimeout") Long handoffConditionTimeout, - @JsonProperty("alertTimeout") Long alertTimeout, - @JsonProperty("segmentWriteOutMediumFactory") @Nullable SegmentWriteOutMediumFactory segmentWriteOutMediumFactory, - @JsonProperty("dedupColumn") @Nullable String dedupColumn, - @JsonProperty("numPersistThreads") @Nullable Integer numPersistThreads - ) - { - this( - appendableIndexSpec, - maxRowsInMemory, - maxBytesInMemory, - skipBytesInMemoryOverheadCheck, - intermediatePersistPeriod, - windowPeriod, - null, - null, - rejectionPolicyFactory, - maxPendingPersists, - shardSpec, - indexSpec, - indexSpecForIntermediatePersists, - persistThreadPriority, - mergeThreadPriority, - reportParseExceptions, - handoffConditionTimeout, - alertTimeout, - segmentWriteOutMediumFactory, - dedupColumn, - numPersistThreads - ); - } - - @Override - @JsonProperty - public AppendableIndexSpec getAppendableIndexSpec() - { - return appendableIndexSpec; - } - - @Override - @JsonProperty - public int getMaxRowsInMemory() - { - return maxRowsInMemory; - } - - @Override - @JsonProperty - public long getMaxBytesInMemory() - { - return maxBytesInMemory; - } - - @JsonProperty - @Override - public boolean isSkipBytesInMemoryOverheadCheck() - { - return skipBytesInMemoryOverheadCheck; - } - - @Override - @JsonProperty - public Period getIntermediatePersistPeriod() - { - return intermediatePersistPeriod; - } - - @JsonProperty - public Period getWindowPeriod() - { - return windowPeriod; - } - - @Override - public File getBasePersistDirectory() - { - return Preconditions.checkNotNull(basePersistDirectory, "basePersistDirectory not set"); - } - - public VersioningPolicy getVersioningPolicy() - { - return Preconditions.checkNotNull(versioningPolicy, "versioningPolicy not set"); - } - - @JsonProperty("rejectionPolicy") - public RejectionPolicyFactory getRejectionPolicyFactory() - { - return rejectionPolicyFactory; - } - - @Override - @JsonProperty - public int getMaxPendingPersists() - { - return maxPendingPersists; - } - - @Override - public PartitionsSpec getPartitionsSpec() - { - throw new UnsupportedOperationException(); - } - - @JsonProperty - public ShardSpec getShardSpec() - { - return shardSpec; - } - - @Override - @JsonProperty - public IndexSpec getIndexSpec() - { - return indexSpec; - } - - @JsonProperty - @Override - public IndexSpec getIndexSpecForIntermediatePersists() - { - return indexSpecForIntermediatePersists; - } - - @JsonProperty - public int getPersistThreadPriority() - { - return this.persistThreadPriority; - } - - @JsonProperty - public int getMergeThreadPriority() - { - return this.mergeThreadPriority; - } - - @Override - @JsonProperty - public boolean isReportParseExceptions() - { - return reportParseExceptions; - } - - @JsonProperty - public long getHandoffConditionTimeout() - { - return handoffConditionTimeout; - } - - @JsonProperty - public long getAlertTimeout() - { - return alertTimeout; - } - - @Override - @JsonProperty - @Nullable - public SegmentWriteOutMediumFactory getSegmentWriteOutMediumFactory() - { - return segmentWriteOutMediumFactory; - } - - @JsonProperty - @Nullable - public String getDedupColumn() - { - return dedupColumn; - } - - @Override - @JsonProperty - public int getNumPersistThreads() - { - return numPersistThreads; - } - - public RealtimeTuningConfig withVersioningPolicy(VersioningPolicy policy) - { - return new RealtimeTuningConfig( - appendableIndexSpec, - maxRowsInMemory, - maxBytesInMemory, - skipBytesInMemoryOverheadCheck, - intermediatePersistPeriod, - windowPeriod, - basePersistDirectory, - policy, - rejectionPolicyFactory, - maxPendingPersists, - shardSpec, - indexSpec, - indexSpecForIntermediatePersists, - persistThreadPriority, - mergeThreadPriority, - reportParseExceptions, - handoffConditionTimeout, - alertTimeout, - segmentWriteOutMediumFactory, - dedupColumn, - numPersistThreads - ); - } - - @Override - public RealtimeTuningConfig withBasePersistDirectory(File dir) - { - return new RealtimeTuningConfig( - appendableIndexSpec, - maxRowsInMemory, - maxBytesInMemory, - skipBytesInMemoryOverheadCheck, - intermediatePersistPeriod, - windowPeriod, - dir, - versioningPolicy, - rejectionPolicyFactory, - maxPendingPersists, - shardSpec, - indexSpec, - indexSpecForIntermediatePersists, - persistThreadPriority, - mergeThreadPriority, - reportParseExceptions, - handoffConditionTimeout, - alertTimeout, - segmentWriteOutMediumFactory, - dedupColumn, - numPersistThreads - ); - } -} diff --git a/server/src/main/java/org/apache/druid/segment/indexing/TuningConfig.java b/server/src/main/java/org/apache/druid/segment/indexing/TuningConfig.java index ba638c2c485..e190106281a 100644 --- a/server/src/main/java/org/apache/druid/segment/indexing/TuningConfig.java +++ b/server/src/main/java/org/apache/druid/segment/indexing/TuningConfig.java @@ -19,7 +19,6 @@ package org.apache.druid.segment.indexing; -import com.fasterxml.jackson.annotation.JsonSubTypes; import com.fasterxml.jackson.annotation.JsonTypeInfo; import org.apache.druid.indexer.partitions.PartitionsSpec; import org.apache.druid.segment.IndexSpec; @@ -29,9 +28,6 @@ import org.apache.druid.segment.incremental.OnheapIncrementalIndex; /** */ @JsonTypeInfo(use = JsonTypeInfo.Id.NAME, property = "type") -@JsonSubTypes(value = { - @JsonSubTypes.Type(name = "realtime", value = RealtimeTuningConfig.class) -}) public interface TuningConfig { boolean DEFAULT_LOG_PARSE_EXCEPTIONS = false; diff --git a/server/src/main/java/org/apache/druid/segment/loading/LocalDataSegmentPuller.java b/server/src/main/java/org/apache/druid/segment/loading/LocalDataSegmentPuller.java index 0f27dac9e1b..27807a5c3c8 100644 --- a/server/src/main/java/org/apache/druid/segment/loading/LocalDataSegmentPuller.java +++ b/server/src/main/java/org/apache/druid/segment/loading/LocalDataSegmentPuller.java @@ -19,7 +19,6 @@ package org.apache.druid.segment.loading; -import com.google.common.annotations.VisibleForTesting; import com.google.common.base.Predicate; import com.google.common.io.Files; import org.apache.druid.java.util.common.FileUtils; @@ -117,12 +116,6 @@ public class LocalDataSegmentPuller implements URIDataPuller private static final Logger log = new Logger(LocalDataSegmentPuller.class); - @VisibleForTesting - public void getSegmentFiles(DataSegment segment, File dir) throws SegmentLoadingException - { - getSegmentFiles(getFile(segment), dir); - } - public FileUtils.FileCopyResult getSegmentFiles(final File sourceFile, final File dir) throws SegmentLoadingException { if (sourceFile.isDirectory()) { diff --git a/server/src/main/java/org/apache/druid/segment/metadata/AbstractSegmentMetadataCache.java b/server/src/main/java/org/apache/druid/segment/metadata/AbstractSegmentMetadataCache.java index 9cb2297db82..88e6ee97b98 100644 --- a/server/src/main/java/org/apache/druid/segment/metadata/AbstractSegmentMetadataCache.java +++ b/server/src/main/java/org/apache/druid/segment/metadata/AbstractSegmentMetadataCache.java @@ -200,7 +200,7 @@ public abstract class AbstractSegmentMetadataCache tables = new ConcurrentHashMap<>(); + protected final ConcurrentHashMap tables = new ConcurrentHashMap<>(); /** * This lock coordinates the access from multiple threads to those variables guarded by this lock. @@ -269,9 +269,10 @@ public abstract class AbstractSegmentMetadataCache
  • Metadata query is executed only for those non-realtime segments for which the schema is not cached.
  • *
  • Datasources marked for refresh are then rebuilt.
  • * + *

    + * It is important to note that the datasource schema returned in {@link #getDatasource} & {@link #getDataSourceInformationMap()} + * also includes columns from cold segments. + * Cold segments are processed in a separate thread and datasource schema from cold segments is separately stored. + *

    */ @ManageLifecycle public class CoordinatorSegmentMetadataCache extends AbstractSegmentMetadataCache { private static final EmittingLogger log = new EmittingLogger(CoordinatorSegmentMetadataCache.class); + private static final Long COLD_SCHEMA_PERIOD_MULTIPLIER = 3L; + private static final Long COLD_SCHEMA_SLOWNESS_THRESHOLD_MILLIS = TimeUnit.SECONDS.toMillis(50); private final SegmentMetadataCacheConfig config; private final ColumnTypeMergePolicy columnTypeMergePolicy; private final SegmentSchemaCache segmentSchemaCache; private final SegmentSchemaBackFillQueue segmentSchemaBackfillQueue; + private final SqlSegmentsMetadataManager sqlSegmentsMetadataManager; + private volatile SegmentReplicationStatus segmentReplicationStatus = null; + + // Datasource schema built from only cold segments. + private final ConcurrentHashMap coldSchemaTable = new ConcurrentHashMap<>(); + + // Period for cold schema processing thread. This is a multiple of segment polling period. + // Cold schema processing runs slower than the segment poll to save processing cost of all segments. + // The downside is a delay in columns from cold segment reflecting in the datasource schema. + private final long coldSchemaExecPeriodMillis; + private final ScheduledExecutorService coldSchemaExec; private @Nullable Future cacheExecFuture = null; + private @Nullable Future coldSchemaExecFuture = null; @Inject public CoordinatorSegmentMetadataCache( @@ -92,7 +128,9 @@ public class CoordinatorSegmentMetadataCache extends AbstractSegmentMetadataCach InternalQueryConfig internalQueryConfig, ServiceEmitter emitter, SegmentSchemaCache segmentSchemaCache, - SegmentSchemaBackFillQueue segmentSchemaBackfillQueue + SegmentSchemaBackFillQueue segmentSchemaBackfillQueue, + SqlSegmentsMetadataManager sqlSegmentsMetadataManager, + Supplier segmentsMetadataManagerConfigSupplier ) { super(queryLifecycleFactory, config, escalator, internalQueryConfig, emitter); @@ -100,6 +138,15 @@ public class CoordinatorSegmentMetadataCache extends AbstractSegmentMetadataCach this.columnTypeMergePolicy = config.getMetadataColumnTypeMergePolicy(); this.segmentSchemaCache = segmentSchemaCache; this.segmentSchemaBackfillQueue = segmentSchemaBackfillQueue; + this.sqlSegmentsMetadataManager = sqlSegmentsMetadataManager; + this.coldSchemaExecPeriodMillis = + segmentsMetadataManagerConfigSupplier.get().getPollDuration().getMillis() * COLD_SCHEMA_PERIOD_MULTIPLIER; + coldSchemaExec = Executors.newSingleThreadScheduledExecutor( + new ThreadFactoryBuilder() + .setNameFormat("DruidColdSchema-ScheduledExecutor-%d") + .setDaemon(false) + .build() + ); initServerViewTimelineCallback(serverView); } @@ -168,11 +215,15 @@ public class CoordinatorSegmentMetadataCache extends AbstractSegmentMetadataCach { callbackExec.shutdownNow(); cacheExec.shutdownNow(); + coldSchemaExec.shutdownNow(); segmentSchemaCache.onLeaderStop(); segmentSchemaBackfillQueue.onLeaderStop(); if (cacheExecFuture != null) { cacheExecFuture.cancel(true); } + if (coldSchemaExecFuture != null) { + coldSchemaExecFuture.cancel(true); + } } public void onLeaderStart() @@ -181,6 +232,12 @@ public class CoordinatorSegmentMetadataCache extends AbstractSegmentMetadataCach try { segmentSchemaBackfillQueue.onLeaderStart(); cacheExecFuture = cacheExec.submit(this::cacheExecLoop); + coldSchemaExecFuture = coldSchemaExec.schedule( + this::coldDatasourceSchemaExec, + coldSchemaExecPeriodMillis, + TimeUnit.MILLISECONDS + ); + if (config.isAwaitInitializationOnStart()) { awaitInitialization(); } @@ -196,6 +253,9 @@ public class CoordinatorSegmentMetadataCache extends AbstractSegmentMetadataCach if (cacheExecFuture != null) { cacheExecFuture.cancel(true); } + if (coldSchemaExecFuture != null) { + coldSchemaExecFuture.cancel(true); + } segmentSchemaCache.onLeaderStop(); segmentSchemaBackfillQueue.onLeaderStop(); } @@ -209,6 +269,11 @@ public class CoordinatorSegmentMetadataCache extends AbstractSegmentMetadataCach segmentSchemaCache.awaitInitialization(); } + public void updateSegmentReplicationStatus(SegmentReplicationStatus segmentReplicationStatus) + { + this.segmentReplicationStatus = segmentReplicationStatus; + } + @Override protected void unmarkSegmentAsMutable(SegmentId segmentId) { @@ -336,6 +401,62 @@ public class CoordinatorSegmentMetadataCache extends AbstractSegmentMetadataCach return availableSegmentMetadata; } + @Override + public DataSourceInformation getDatasource(String name) + { + return getMergedDatasourceInformation(tables.get(name), coldSchemaTable.get(name)).orElse(null); + } + + @Override + public Map getDataSourceInformationMap() + { + Map hot = new HashMap<>(tables); + Map cold = new HashMap<>(coldSchemaTable); + Set combinedDatasources = new HashSet<>(hot.keySet()); + combinedDatasources.addAll(cold.keySet()); + ImmutableMap.Builder combined = ImmutableMap.builder(); + + for (String dataSource : combinedDatasources) { + getMergedDatasourceInformation(hot.get(dataSource), cold.get(dataSource)) + .ifPresent(merged -> combined.put( + dataSource, + merged + )); + } + + return combined.build(); + } + + private Optional getMergedDatasourceInformation( + final DataSourceInformation hot, + final DataSourceInformation cold + ) + { + if (hot == null && cold == null) { + return Optional.empty(); + } else if (hot != null && cold == null) { + return Optional.of(hot); + } else if (hot == null && cold != null) { + return Optional.of(cold); + } else { + final Map columnTypes = new LinkedHashMap<>(); + + List signatures = new ArrayList<>(); + // hot datasource schema takes precedence + signatures.add(hot.getRowSignature()); + signatures.add(cold.getRowSignature()); + + for (RowSignature signature : signatures) { + mergeRowSignature(columnTypes, signature); + } + + final RowSignature.Builder builder = RowSignature.builder(); + columnTypes.forEach(builder::add); + + return Optional.of(new DataSourceInformation(hot.getDataSource(), builder.build())); + } + } + /** * Executes SegmentMetadataQuery to fetch schema information for each segment in the refresh list. * The schema information for individual segments is combined to construct a table schema, which is then cached. @@ -382,6 +503,7 @@ public class CoordinatorSegmentMetadataCache extends AbstractSegmentMetadataCach // Rebuild the datasources. for (String dataSource : dataSourcesToRebuild) { final RowSignature rowSignature = buildDataSourceRowSignature(dataSource); + if (rowSignature == null) { log.info("RowSignature null for dataSource [%s], implying that it no longer exists. All metadata removed.", dataSource); tables.remove(dataSource); @@ -419,6 +541,94 @@ public class CoordinatorSegmentMetadataCache extends AbstractSegmentMetadataCach return cachedSegments; } + @Nullable + private Integer getReplicationFactor(SegmentId segmentId) + { + if (segmentReplicationStatus == null) { + return null; + } + SegmentReplicaCount replicaCountsInCluster = segmentReplicationStatus.getReplicaCountsInCluster(segmentId); + return replicaCountsInCluster == null ? null : replicaCountsInCluster.required(); + } + + @VisibleForTesting + protected void coldDatasourceSchemaExec() + { + Stopwatch stopwatch = Stopwatch.createStarted(); + + Set dataSourceWithColdSegmentSet = new HashSet<>(); + + int datasources = 0; + int segments = 0; + int dataSourceWithColdSegments = 0; + + Collection immutableDataSources = + sqlSegmentsMetadataManager.getImmutableDataSourcesWithAllUsedSegments(); + + for (ImmutableDruidDataSource dataSource : immutableDataSources) { + datasources++; + Collection dataSegments = dataSource.getSegments(); + + final Map columnTypes = new LinkedHashMap<>(); + + for (DataSegment segment : dataSegments) { + Integer replicationFactor = getReplicationFactor(segment.getId()); + if (replicationFactor != null && replicationFactor != 0) { + continue; + } + Optional optionalSchema = segmentSchemaCache.getSchemaForSegment(segment.getId()); + if (optionalSchema.isPresent()) { + RowSignature rowSignature = optionalSchema.get().getSchemaPayload().getRowSignature(); + mergeRowSignature(columnTypes, rowSignature); + } + segments++; + } + + if (columnTypes.isEmpty()) { + // this datasource doesn't have any cold segment + continue; + } + + final RowSignature.Builder builder = RowSignature.builder(); + columnTypes.forEach(builder::add); + + RowSignature coldSignature = builder.build(); + + String dataSourceName = dataSource.getName(); + dataSourceWithColdSegmentSet.add(dataSourceName); + dataSourceWithColdSegments++; + + log.debug("[%s] signature from cold segments is [%s]", dataSourceName, coldSignature); + + coldSchemaTable.put(dataSourceName, new DataSourceInformation(dataSourceName, coldSignature)); + } + + // remove any stale datasource from the map + coldSchemaTable.keySet().retainAll(dataSourceWithColdSegmentSet); + + String executionStatsLog = StringUtils.format( + "Cold schema processing took [%d] millis. " + + "Processed total [%d] datasources, [%d] segments. Found [%d] datasources with cold segments.", + stopwatch.millisElapsed(), datasources, segments, dataSourceWithColdSegments + ); + if (stopwatch.millisElapsed() > COLD_SCHEMA_SLOWNESS_THRESHOLD_MILLIS) { + log.info(executionStatsLog); + } else { + log.debug(executionStatsLog); + } + } + + private void mergeRowSignature(final Map columnTypes, final RowSignature signature) + { + for (String column : signature.getColumnNames()) { + final ColumnType columnType = + signature.getColumnType(column) + .orElseThrow(() -> new ISE("Encountered null type for column [%s]", column)); + + columnTypes.compute(column, (c, existingType) -> columnTypeMergePolicy.merge(existingType, columnType)); + } + } + @VisibleForTesting @Nullable @Override @@ -434,13 +644,7 @@ public class CoordinatorSegmentMetadataCache extends AbstractSegmentMetadataCach Optional optionalSchema = segmentSchemaCache.getSchemaForSegment(segmentId); if (optionalSchema.isPresent()) { RowSignature rowSignature = optionalSchema.get().getSchemaPayload().getRowSignature(); - for (String column : rowSignature.getColumnNames()) { - final ColumnType columnType = - rowSignature.getColumnType(column) - .orElseThrow(() -> new ISE("Encountered null type for column [%s]", column)); - - columnTypes.compute(column, (c, existingType) -> columnTypeMergePolicy.merge(existingType, columnType)); - } + mergeRowSignature(columnTypes, rowSignature); } else { // mark it for refresh, however, this case shouldn't arise by design markSegmentAsNeedRefresh(segmentId); diff --git a/server/src/main/java/org/apache/druid/segment/metadata/SegmentSchemaBackFillQueue.java b/server/src/main/java/org/apache/druid/segment/metadata/SegmentSchemaBackFillQueue.java index 7855e11da37..4e5b9815ced 100644 --- a/server/src/main/java/org/apache/druid/segment/metadata/SegmentSchemaBackFillQueue.java +++ b/server/src/main/java/org/apache/druid/segment/metadata/SegmentSchemaBackFillQueue.java @@ -179,7 +179,6 @@ public class SegmentSchemaBackFillQueue entry.getValue(), CentralizedDatasourceSchemaConfig.SCHEMA_VERSION ); - // Mark the segments as published in the cache. for (SegmentSchemaMetadataPlus plus : entry.getValue()) { segmentSchemaCache.markMetadataQueryResultPublished(plus.getSegmentId()); diff --git a/server/src/main/java/org/apache/druid/segment/metadata/SegmentSchemaCache.java b/server/src/main/java/org/apache/druid/segment/metadata/SegmentSchemaCache.java index c28e2b693bb..3364a3beb7c 100644 --- a/server/src/main/java/org/apache/druid/segment/metadata/SegmentSchemaCache.java +++ b/server/src/main/java/org/apache/druid/segment/metadata/SegmentSchemaCache.java @@ -19,6 +19,7 @@ package org.apache.druid.segment.metadata; +import com.google.common.annotations.VisibleForTesting; import com.google.common.collect.ImmutableMap; import com.google.inject.Inject; import org.apache.druid.guice.LazySingleton; @@ -145,6 +146,12 @@ public class SegmentSchemaCache public void updateFinalizedSegmentSchema(FinalizedSegmentSchemaInfo finalizedSegmentSchemaInfo) { this.finalizedSegmentSchemaInfo = finalizedSegmentSchemaInfo; + + // remove metadata for segments which have been polled in the last database poll + temporaryPublishedMetadataQueryResults + .keySet() + .removeAll(finalizedSegmentSchemaInfo.getFinalizedSegmentMetadata().keySet()); + setInitialized(); } @@ -185,14 +192,6 @@ public class SegmentSchemaCache temporaryMetadataQueryResults.remove(segmentId); } - /** - * temporaryPublishedMetadataQueryResults is reset after each DB poll. - */ - public void resetTemporaryPublishedMetadataQueryResultOnDBPoll() - { - temporaryPublishedMetadataQueryResults.clear(); - } - /** * Fetch schema for a given segment. Note, that there is no check on schema version in this method, * since schema corresponding to a particular version {@link CentralizedDatasourceSchemaConfig#SCHEMA_VERSION} is cached. @@ -325,6 +324,12 @@ public class SegmentSchemaCache ); } + @VisibleForTesting + SchemaPayloadPlus getTemporaryPublishedMetadataQueryResults(SegmentId id) + { + return temporaryPublishedMetadataQueryResults.get(id); + } + /** * This class encapsulates schema information for segments polled from the DB. */ diff --git a/server/src/main/java/org/apache/druid/segment/realtime/FireDepartment.java b/server/src/main/java/org/apache/druid/segment/realtime/FireDepartment.java deleted file mode 100644 index 2c25a1de174..00000000000 --- a/server/src/main/java/org/apache/druid/segment/realtime/FireDepartment.java +++ /dev/null @@ -1,100 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package org.apache.druid.segment.realtime; - -import com.fasterxml.jackson.annotation.JsonCreator; -import com.fasterxml.jackson.annotation.JsonProperty; -import com.google.common.base.Preconditions; -import org.apache.druid.data.input.Firehose; -import org.apache.druid.segment.indexing.DataSchema; -import org.apache.druid.segment.indexing.IngestionSpec; -import org.apache.druid.segment.indexing.RealtimeIOConfig; -import org.apache.druid.segment.indexing.RealtimeTuningConfig; - -import java.io.IOException; - -/** - * A Fire Department has a Firehose and a Plumber. - *

    - * This is a metaphor for a realtime stream (Firehose) and a coordinator of sinks (Plumber). The Firehose provides the - * realtime stream of data. The Plumber directs each drop of water from the firehose into the correct sink and makes - * sure that the sinks don't overflow. - */ -public class FireDepartment extends IngestionSpec -{ - private final DataSchema dataSchema; - private final RealtimeIOConfig ioConfig; - private final RealtimeTuningConfig tuningConfig; - private final FireDepartmentMetrics metrics = new FireDepartmentMetrics(); - - @JsonCreator - public FireDepartment( - @JsonProperty("dataSchema") DataSchema dataSchema, - @JsonProperty("ioConfig") RealtimeIOConfig ioConfig, - @JsonProperty("tuningConfig") RealtimeTuningConfig tuningConfig - ) - { - super(dataSchema, ioConfig, tuningConfig); - Preconditions.checkNotNull(dataSchema, "dataSchema"); - Preconditions.checkNotNull(ioConfig, "ioConfig"); - - this.dataSchema = dataSchema; - this.ioConfig = ioConfig; - this.tuningConfig = tuningConfig == null ? RealtimeTuningConfig.makeDefaultTuningConfig(null) : tuningConfig; - - } - - /** - * Provides the data schema for the feed that this FireDepartment is in charge of. - * - * @return the Schema for this feed. - */ - @JsonProperty("dataSchema") - @Override - public DataSchema getDataSchema() - { - return dataSchema; - } - - @JsonProperty("ioConfig") - @Override - public RealtimeIOConfig getIOConfig() - { - return ioConfig; - } - - @JsonProperty("tuningConfig") - @Override - public RealtimeTuningConfig getTuningConfig() - { - return tuningConfig; - } - - public Firehose connect() throws IOException - { - return ioConfig.getFirehoseFactory() - .connect(Preconditions.checkNotNull(dataSchema.getParser(), "inputRowParser"), null); - } - - public FireDepartmentMetrics getMetrics() - { - return metrics; - } -} diff --git a/server/src/main/java/org/apache/druid/segment/realtime/FireDepartmentConfig.java b/server/src/main/java/org/apache/druid/segment/realtime/FireDepartmentConfig.java deleted file mode 100644 index 299bc6e5455..00000000000 --- a/server/src/main/java/org/apache/druid/segment/realtime/FireDepartmentConfig.java +++ /dev/null @@ -1,58 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package org.apache.druid.segment.realtime; - -import com.fasterxml.jackson.annotation.JsonCreator; -import com.fasterxml.jackson.annotation.JsonProperty; -import com.google.common.base.Preconditions; -import org.joda.time.Period; - -/** - */ -public class FireDepartmentConfig -{ - private final int maxRowsInMemory; - private final Period intermediatePersistPeriod; - - @JsonCreator - public FireDepartmentConfig( - @JsonProperty("maxRowsInMemory") int maxRowsInMemory, - @JsonProperty("intermediatePersistPeriod") Period intermediatePersistPeriod - ) - { - this.maxRowsInMemory = maxRowsInMemory; - this.intermediatePersistPeriod = intermediatePersistPeriod; - - Preconditions.checkArgument(maxRowsInMemory > 0, "maxRowsInMemory[%s] should be greater than 0", maxRowsInMemory); - Preconditions.checkNotNull(intermediatePersistPeriod, "intermediatePersistPeriod"); - } - - @JsonProperty - public int getMaxRowsInMemory() - { - return maxRowsInMemory; - } - - @JsonProperty - public Period getIntermediatePersistPeriod() - { - return intermediatePersistPeriod; - } -} diff --git a/server/src/main/java/org/apache/druid/segment/realtime/RealtimeMetricsMonitor.java b/server/src/main/java/org/apache/druid/segment/realtime/RealtimeMetricsMonitor.java deleted file mode 100644 index c923c9e7bbd..00000000000 --- a/server/src/main/java/org/apache/druid/segment/realtime/RealtimeMetricsMonitor.java +++ /dev/null @@ -1,132 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package org.apache.druid.segment.realtime; - -import com.google.common.collect.ImmutableMap; -import com.google.inject.Inject; -import org.apache.druid.java.util.emitter.EmittingLogger; -import org.apache.druid.java.util.emitter.service.ServiceEmitter; -import org.apache.druid.java.util.emitter.service.ServiceMetricEvent; -import org.apache.druid.java.util.metrics.AbstractMonitor; -import org.apache.druid.java.util.metrics.MonitorUtils; -import org.apache.druid.query.DruidMetrics; - -import java.util.HashMap; -import java.util.List; -import java.util.Map; - -/** - * RealtimeMetricsMonitor is only used by RealtimeIndexTask, this monitor only supports FireDepartmentMetrics. - * New ingestion task types should support RowIngestionMeters and use TaskRealtimeMetricsMonitor instead. - * Please see the comment on RowIngestionMeters for more information regarding the relationship between - * RowIngestionMeters and FireDepartmentMetrics. - */ -public class RealtimeMetricsMonitor extends AbstractMonitor -{ - private static final EmittingLogger log = new EmittingLogger(RealtimeMetricsMonitor.class); - - private final Map previousValues; - private final List fireDepartments; - private final Map dimensions; - - @Inject - public RealtimeMetricsMonitor(List fireDepartments) - { - this(fireDepartments, ImmutableMap.of()); - } - - public RealtimeMetricsMonitor(List fireDepartments, Map dimensions) - { - this.fireDepartments = fireDepartments; - this.previousValues = new HashMap<>(); - this.dimensions = ImmutableMap.copyOf(dimensions); - } - - @Override - public boolean doMonitor(ServiceEmitter emitter) - { - for (FireDepartment fireDepartment : fireDepartments) { - FireDepartmentMetrics metrics = fireDepartment.getMetrics().snapshot(); - FireDepartmentMetrics previous = previousValues.get(fireDepartment); - - if (previous == null) { - previous = new FireDepartmentMetrics(); - } - - final ServiceMetricEvent.Builder builder = new ServiceMetricEvent.Builder() - .setDimension(DruidMetrics.DATASOURCE, fireDepartment.getDataSchema().getDataSource()); - MonitorUtils.addDimensionsToBuilder(builder, dimensions); - - final long thrownAway = metrics.thrownAway() - previous.thrownAway(); - if (thrownAway > 0) { - log.warn( - "[%,d] events thrown away. Possible causes: null events, events filtered out by transformSpec, or events outside windowPeriod.", - thrownAway - ); - } - emitter.emit(builder.setMetric("ingest/events/thrownAway", thrownAway)); - final long unparseable = metrics.unparseable() - previous.unparseable(); - if (unparseable > 0) { - log.error( - "[%,d] unparseable events discarded. Turn on debug logging to see exception stack trace.", - unparseable - ); - } - emitter.emit(builder.setMetric("ingest/events/unparseable", unparseable)); - final long dedup = metrics.dedup() - previous.dedup(); - if (dedup > 0) { - log.warn("[%,d] duplicate events!", dedup); - } - emitter.emit(builder.setMetric("ingest/events/duplicate", dedup)); - - emitter.emit(builder.setMetric("ingest/events/processed", metrics.processed() - previous.processed())); - emitter.emit(builder.setMetric("ingest/rows/output", metrics.rowOutput() - previous.rowOutput())); - emitter.emit(builder.setMetric("ingest/persists/count", metrics.numPersists() - previous.numPersists())); - emitter.emit(builder.setMetric("ingest/persists/time", metrics.persistTimeMillis() - previous.persistTimeMillis())); - emitter.emit(builder.setMetric("ingest/persists/cpu", metrics.persistCpuTime() - previous.persistCpuTime())); - emitter.emit( - builder.setMetric( - "ingest/persists/backPressure", - metrics.persistBackPressureMillis() - previous.persistBackPressureMillis() - ) - ); - emitter.emit(builder.setMetric("ingest/persists/failed", metrics.failedPersists() - previous.failedPersists())); - emitter.emit(builder.setMetric("ingest/handoff/failed", metrics.failedHandoffs() - previous.failedHandoffs())); - emitter.emit(builder.setMetric("ingest/merge/time", metrics.mergeTimeMillis() - previous.mergeTimeMillis())); - emitter.emit(builder.setMetric("ingest/merge/cpu", metrics.mergeCpuTime() - previous.mergeCpuTime())); - emitter.emit(builder.setMetric("ingest/handoff/count", metrics.handOffCount() - previous.handOffCount())); - emitter.emit(builder.setMetric("ingest/sink/count", metrics.sinkCount())); - - long messageGap = metrics.messageGap(); - if (messageGap >= 0) { - emitter.emit(builder.setMetric("ingest/events/messageGap", messageGap)); - } - - long maxSegmentHandoffTime = metrics.maxSegmentHandoffTime(); - if (maxSegmentHandoffTime >= 0) { - emitter.emit(builder.setMetric("ingest/handoff/time", maxSegmentHandoffTime)); - } - - previousValues.put(fireDepartment, metrics); - } - - return true; - } -} diff --git a/server/src/main/java/org/apache/druid/segment/realtime/FireDepartmentMetrics.java b/server/src/main/java/org/apache/druid/segment/realtime/SegmentGenerationMetrics.java similarity index 79% rename from server/src/main/java/org/apache/druid/segment/realtime/FireDepartmentMetrics.java rename to server/src/main/java/org/apache/druid/segment/realtime/SegmentGenerationMetrics.java index 8fb1f1309c1..564a3322bf9 100644 --- a/server/src/main/java/org/apache/druid/segment/realtime/FireDepartmentMetrics.java +++ b/server/src/main/java/org/apache/druid/segment/realtime/SegmentGenerationMetrics.java @@ -25,17 +25,14 @@ import java.util.concurrent.atomic.AtomicBoolean; import java.util.concurrent.atomic.AtomicLong; /** + * Metrics for segment generation. */ -public class FireDepartmentMetrics +public class SegmentGenerationMetrics { private static final long NO_EMIT_SEGMENT_HANDOFF_TIME = -1L; private static final long NO_EMIT_MESSAGE_GAP = -1L; - private final AtomicLong processedCount = new AtomicLong(0); - private final AtomicLong processedWithErrorsCount = new AtomicLong(0); - private final AtomicLong thrownAwayCount = new AtomicLong(0); - private final AtomicLong unparseableCount = new AtomicLong(0); private final AtomicLong dedupCount = new AtomicLong(0); private final AtomicLong rowOutputCount = new AtomicLong(0); private final AtomicLong numPersists = new AtomicLong(0); @@ -58,31 +55,6 @@ public class FireDepartmentMetrics private final AtomicLong maxSegmentHandoffTime = new AtomicLong(NO_EMIT_SEGMENT_HANDOFF_TIME); - public void incrementProcessed() - { - processedCount.incrementAndGet(); - } - - public void incrementProcessedWithErrors() - { - processedWithErrorsCount.incrementAndGet(); - } - - public void incrementThrownAway() - { - thrownAwayCount.incrementAndGet(); - } - - public void incrementDedup() - { - dedupCount.incrementAndGet(); - } - - public void incrementUnparseable() - { - unparseableCount.incrementAndGet(); - } - public void incrementRowOutputCount(long numRows) { rowOutputCount.addAndGet(numRows); @@ -113,11 +85,6 @@ public class FireDepartmentMetrics failedHandoffs.incrementAndGet(); } - public void incrementMergeTimeMillis(long millis) - { - mergeTimeMillis.addAndGet(millis); - } - public void incrementMergedRows(long rows) { mergedRows.addAndGet(rows); @@ -128,16 +95,6 @@ public class FireDepartmentMetrics pushedRows.addAndGet(rows); } - public void incrementMergeCpuTime(long mergeTime) - { - mergeCpuTime.addAndGet(mergeTime); - } - - public void incrementPersistCpuTime(long persistTime) - { - persistCpuTime.addAndGet(persistTime); - } - public void incrementHandOffCount() { handOffCount.incrementAndGet(); @@ -169,26 +126,6 @@ public class FireDepartmentMetrics return processingDone.get(); } - public long processed() - { - return processedCount.get(); - } - - public long processedWithErrors() - { - return processedWithErrorsCount.get(); - } - - public long thrownAway() - { - return thrownAwayCount.get(); - } - - public long unparseable() - { - return unparseableCount.get(); - } - public long dedup() { return dedupCount.get(); @@ -268,13 +205,9 @@ public class FireDepartmentMetrics return maxSegmentHandoffTime.get(); } - public FireDepartmentMetrics snapshot() + public SegmentGenerationMetrics snapshot() { - final FireDepartmentMetrics retVal = new FireDepartmentMetrics(); - retVal.processedCount.set(processedCount.get()); - retVal.processedWithErrorsCount.set(processedWithErrorsCount.get()); - retVal.thrownAwayCount.set(thrownAwayCount.get()); - retVal.unparseableCount.set(unparseableCount.get()); + final SegmentGenerationMetrics retVal = new SegmentGenerationMetrics(); retVal.dedupCount.set(dedupCount.get()); retVal.rowOutputCount.set(rowOutputCount.get()); retVal.numPersists.set(numPersists.get()); diff --git a/server/src/main/java/org/apache/druid/segment/realtime/SegmentPublisher.java b/server/src/main/java/org/apache/druid/segment/realtime/SegmentPublisher.java deleted file mode 100644 index 7910e211ca5..00000000000 --- a/server/src/main/java/org/apache/druid/segment/realtime/SegmentPublisher.java +++ /dev/null @@ -1,29 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package org.apache.druid.segment.realtime; - -import org.apache.druid.timeline.DataSegment; - -import java.io.IOException; - -public interface SegmentPublisher -{ - void publishSegment(DataSegment segment) throws IOException; -} diff --git a/server/src/main/java/org/apache/druid/segment/realtime/appenderator/AppenderatorFactory.java b/server/src/main/java/org/apache/druid/segment/realtime/appenderator/AppenderatorFactory.java deleted file mode 100644 index 4b8698490e9..00000000000 --- a/server/src/main/java/org/apache/druid/segment/realtime/appenderator/AppenderatorFactory.java +++ /dev/null @@ -1,40 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package org.apache.druid.segment.realtime.appenderator; - -import com.fasterxml.jackson.annotation.JsonSubTypes; -import com.fasterxml.jackson.annotation.JsonTypeInfo; -import org.apache.druid.segment.indexing.DataSchema; -import org.apache.druid.segment.indexing.RealtimeTuningConfig; -import org.apache.druid.segment.realtime.FireDepartmentMetrics; - -@JsonTypeInfo(use = JsonTypeInfo.Id.NAME, property = "type") -@JsonSubTypes(value = { - @JsonSubTypes.Type(name = "default", value = DefaultRealtimeAppenderatorFactory.class), - @JsonSubTypes.Type(name = "offline", value = DefaultOfflineAppenderatorFactory.class) -}) -public interface AppenderatorFactory -{ - Appenderator build( - DataSchema schema, - RealtimeTuningConfig config, - FireDepartmentMetrics metrics - ); -} diff --git a/server/src/main/java/org/apache/druid/segment/realtime/appenderator/AppenderatorImpl.java b/server/src/main/java/org/apache/druid/segment/realtime/appenderator/AppenderatorImpl.java index 764d7239736..734abc6ed54 100644 --- a/server/src/main/java/org/apache/druid/segment/realtime/appenderator/AppenderatorImpl.java +++ b/server/src/main/java/org/apache/druid/segment/realtime/appenderator/AppenderatorImpl.java @@ -73,9 +73,9 @@ import org.apache.druid.segment.indexing.DataSchema; import org.apache.druid.segment.loading.DataSegmentPusher; import org.apache.druid.segment.metadata.CentralizedDatasourceSchemaConfig; import org.apache.druid.segment.metadata.FingerprintGenerator; -import org.apache.druid.segment.realtime.FireDepartmentMetrics; import org.apache.druid.segment.realtime.FireHydrant; -import org.apache.druid.segment.realtime.plumber.Sink; +import org.apache.druid.segment.realtime.SegmentGenerationMetrics; +import org.apache.druid.segment.realtime.sink.Sink; import org.apache.druid.server.coordination.DataSegmentAnnouncer; import org.apache.druid.timeline.DataSegment; import org.apache.druid.timeline.SegmentId; @@ -135,7 +135,7 @@ public class AppenderatorImpl implements Appenderator private final String myId; private final DataSchema schema; private final AppenderatorConfig tuningConfig; - private final FireDepartmentMetrics metrics; + private final SegmentGenerationMetrics metrics; private final DataSegmentPusher dataSegmentPusher; private final ObjectMapper objectMapper; private final DataSegmentAnnouncer segmentAnnouncer; @@ -211,7 +211,7 @@ public class AppenderatorImpl implements Appenderator String id, DataSchema schema, AppenderatorConfig tuningConfig, - FireDepartmentMetrics metrics, + SegmentGenerationMetrics metrics, DataSegmentPusher dataSegmentPusher, ObjectMapper objectMapper, DataSegmentAnnouncer segmentAnnouncer, @@ -514,8 +514,7 @@ public class AppenderatorImpl implements Appenderator tuningConfig.getAppendableIndexSpec(), tuningConfig.getMaxRowsInMemory(), maxBytesTuningConfig, - useMaxMemoryEstimates, - null + useMaxMemoryEstimates ); bytesCurrentlyInMemory.addAndGet(calculateSinkMemoryInUsed()); @@ -1340,7 +1339,6 @@ public class AppenderatorImpl implements Appenderator tuningConfig.getMaxRowsInMemory(), maxBytesTuningConfig, useMaxMemoryEstimates, - null, hydrants ); rowsSoFar += currSink.getNumRows(); diff --git a/server/src/main/java/org/apache/druid/segment/realtime/appenderator/AppenderatorPlumber.java b/server/src/main/java/org/apache/druid/segment/realtime/appenderator/AppenderatorPlumber.java deleted file mode 100644 index 56af6382afa..00000000000 --- a/server/src/main/java/org/apache/druid/segment/realtime/appenderator/AppenderatorPlumber.java +++ /dev/null @@ -1,492 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package org.apache.druid.segment.realtime.appenderator; - -import com.google.common.base.Function; -import com.google.common.base.Joiner; -import com.google.common.base.Stopwatch; -import com.google.common.base.Supplier; -import com.google.common.collect.ImmutableList; -import com.google.common.collect.ImmutableMap; -import com.google.common.collect.Lists; -import com.google.common.util.concurrent.FutureCallback; -import com.google.common.util.concurrent.Futures; -import com.google.common.util.concurrent.MoreExecutors; -import org.apache.druid.common.guava.ThreadRenamingCallable; -import org.apache.druid.data.input.Committer; -import org.apache.druid.data.input.InputRow; -import org.apache.druid.java.util.common.DateTimes; -import org.apache.druid.java.util.common.ISE; -import org.apache.druid.java.util.common.StringUtils; -import org.apache.druid.java.util.common.concurrent.Execs; -import org.apache.druid.java.util.common.concurrent.ScheduledExecutors; -import org.apache.druid.java.util.common.granularity.Granularity; -import org.apache.druid.java.util.common.guava.Sequence; -import org.apache.druid.java.util.emitter.EmittingLogger; -import org.apache.druid.query.Query; -import org.apache.druid.query.QueryPlus; -import org.apache.druid.query.QueryRunner; -import org.apache.druid.query.context.ResponseContext; -import org.apache.druid.segment.handoff.SegmentHandoffNotifier; -import org.apache.druid.segment.incremental.IncrementalIndexAddResult; -import org.apache.druid.segment.incremental.IndexSizeExceededException; -import org.apache.druid.segment.indexing.DataSchema; -import org.apache.druid.segment.indexing.RealtimeTuningConfig; -import org.apache.druid.segment.realtime.FireDepartmentMetrics; -import org.apache.druid.segment.realtime.SegmentPublisher; -import org.apache.druid.segment.realtime.plumber.Committers; -import org.apache.druid.segment.realtime.plumber.Plumber; -import org.apache.druid.segment.realtime.plumber.RejectionPolicy; -import org.apache.druid.segment.realtime.plumber.VersioningPolicy; -import org.apache.druid.server.coordination.DataSegmentAnnouncer; -import org.apache.druid.timeline.DataSegment; -import org.joda.time.DateTime; -import org.joda.time.Duration; -import org.joda.time.Interval; -import org.joda.time.Period; - -import java.io.IOException; -import java.util.ArrayList; -import java.util.List; -import java.util.Map; -import java.util.concurrent.ConcurrentHashMap; -import java.util.concurrent.ConcurrentMap; -import java.util.concurrent.ScheduledExecutorService; -import java.util.concurrent.TimeUnit; - -public class AppenderatorPlumber implements Plumber -{ - private static final EmittingLogger log = new EmittingLogger(AppenderatorPlumber.class); - private static final int WARN_DELAY = 1000; - - private final DataSchema schema; - private final RealtimeTuningConfig config; - private final RejectionPolicy rejectionPolicy; - private final FireDepartmentMetrics metrics; - private final DataSegmentAnnouncer segmentAnnouncer; - private final SegmentPublisher segmentPublisher; - private final SegmentHandoffNotifier handoffNotifier; - private final Object handoffCondition = new Object(); - private final ConcurrentMap segments = new ConcurrentHashMap<>(); - private final Appenderator appenderator; - - private volatile boolean shuttingDown = false; - private volatile boolean stopped = false; - private volatile boolean cleanShutdown = true; - private volatile ScheduledExecutorService scheduledExecutor = null; - - private volatile Supplier lastCommitterSupplier = null; - - public AppenderatorPlumber( - DataSchema schema, - RealtimeTuningConfig config, - FireDepartmentMetrics metrics, - DataSegmentAnnouncer segmentAnnouncer, - SegmentPublisher segmentPublisher, - SegmentHandoffNotifier handoffNotifier, - Appenderator appenderator - ) - { - this.schema = schema; - this.config = config; - this.rejectionPolicy = config.getRejectionPolicyFactory().create(config.getWindowPeriod()); - this.metrics = metrics; - this.segmentAnnouncer = segmentAnnouncer; - this.segmentPublisher = segmentPublisher; - this.handoffNotifier = handoffNotifier; - this.appenderator = appenderator; - - log.info("Creating plumber using rejectionPolicy[%s]", getRejectionPolicy()); - } - - public Map getSegmentsView() - { - return ImmutableMap.copyOf(segments); - } - - public DataSchema getSchema() - { - return schema; - } - - public RealtimeTuningConfig getConfig() - { - return config; - } - - public RejectionPolicy getRejectionPolicy() - { - return rejectionPolicy; - } - - @Override - public Object startJob() - { - handoffNotifier.start(); - Object retVal = appenderator.startJob(); - initializeExecutors(); - startPersistThread(); - // Push pending sinks bootstrapped from previous run - mergeAndPush(); - return retVal; - } - - @Override - public IncrementalIndexAddResult add(InputRow row, Supplier committerSupplier) - throws IndexSizeExceededException - { - final SegmentIdWithShardSpec identifier = getSegmentIdentifier(row.getTimestampFromEpoch()); - if (identifier == null) { - return Plumber.THROWAWAY; - } - - try { - final Appenderator.AppenderatorAddResult addResult = appenderator.add(identifier, row, committerSupplier); - lastCommitterSupplier = committerSupplier; - return new IncrementalIndexAddResult(addResult.getNumRowsInSegment(), 0); - } - catch (SegmentNotWritableException e) { - // Segment already started handoff - return Plumber.NOT_WRITABLE; - } - } - - @Override - public QueryRunner getQueryRunner(final Query query) - { - return new QueryRunner() - { - @Override - public Sequence run(final QueryPlus queryPlus, final ResponseContext responseContext) - { - return queryPlus.run(appenderator, responseContext); - } - }; - } - - @Override - public void persist(final Committer committer) - { - final Stopwatch runExecStopwatch = Stopwatch.createStarted(); - appenderator.persistAll(committer); - - final long startDelay = runExecStopwatch.elapsed(TimeUnit.MILLISECONDS); - metrics.incrementPersistBackPressureMillis(startDelay); - if (startDelay > WARN_DELAY) { - log.warn("Ingestion was throttled for [%,d] millis because persists were pending.", startDelay); - } - runExecStopwatch.stop(); - } - - @Override - public void finishJob() - { - log.info("Shutting down..."); - - shuttingDown = true; - - List pending = appenderator.getSegments(); - if (pending.isEmpty()) { - log.info("No segments to hand off."); - } else { - log.info("Pushing segments: %s", Joiner.on(", ").join(pending)); - } - - try { - if (lastCommitterSupplier != null) { - // Push all remaining data - mergeAndPush(); - } - - synchronized (handoffCondition) { - while (!segments.isEmpty()) { - log.info("Waiting to hand off: %s", Joiner.on(", ").join(pending)); - handoffCondition.wait(); - pending = appenderator.getSegments(); - } - } - } - catch (Exception e) { - throw new RuntimeException(e); - } - finally { - stopped = true; - handoffNotifier.close(); - shutdownExecutors(); - appenderator.close(); - } - - if (!cleanShutdown) { - throw new ISE("Exception occurred during persist and merge."); - } - } - - private SegmentIdWithShardSpec getSegmentIdentifier(long timestamp) - { - if (!rejectionPolicy.accept(timestamp)) { - return null; - } - - final Granularity segmentGranularity = schema.getGranularitySpec().getSegmentGranularity(); - final VersioningPolicy versioningPolicy = config.getVersioningPolicy(); - - DateTime truncatedDateTime = segmentGranularity.bucketStart(DateTimes.utc(timestamp)); - final long truncatedTime = truncatedDateTime.getMillis(); - - SegmentIdWithShardSpec retVal = segments.get(truncatedTime); - - if (retVal == null) { - final Interval interval = new Interval( - truncatedDateTime, - segmentGranularity.increment(truncatedDateTime) - ); - - retVal = new SegmentIdWithShardSpec( - schema.getDataSource(), - interval, - versioningPolicy.getVersion(interval), - config.getShardSpec() - ); - addSegment(retVal); - - } - - return retVal; - } - - protected void initializeExecutors() - { - if (scheduledExecutor == null) { - scheduledExecutor = Execs.scheduledSingleThreaded("plumber_scheduled_%d"); - } - } - - protected void shutdownExecutors() - { - if (scheduledExecutor != null) { - scheduledExecutor.shutdown(); - } - } - - private void addSegment(final SegmentIdWithShardSpec identifier) - { - segments.put(identifier.getInterval().getStartMillis(), identifier); - try { - segmentAnnouncer.announceSegment( - new DataSegment( - identifier.getDataSource(), - identifier.getInterval(), - identifier.getVersion(), - ImmutableMap.of(), - ImmutableList.of(), - ImmutableList.of(), - identifier.getShardSpec(), - null, - 0 - ) - ); - } - catch (IOException e) { - log.makeAlert(e, "Failed to announce new segment[%s]", identifier.getDataSource()) - .addData("interval", identifier.getInterval()) - .emit(); - } - } - - public void dropSegment(final SegmentIdWithShardSpec identifier) - { - log.info("Dropping segment: %s", identifier); - segments.remove(identifier.getInterval().getStartMillis()); - - Futures.addCallback( - appenderator.drop(identifier), - new FutureCallback() - { - @Override - public void onSuccess(Object result) - { - log.info("Dropped segment: %s", identifier); - } - - @Override - public void onFailure(Throwable e) - { - // TODO: Retry? - log.warn(e, "Failed to drop segment: %s", identifier); - } - }, - MoreExecutors.directExecutor() - ); - } - - private void startPersistThread() - { - final Granularity segmentGranularity = schema.getGranularitySpec().getSegmentGranularity(); - final Period windowPeriod = config.getWindowPeriod(); - - final DateTime truncatedNow = segmentGranularity.bucketStart(DateTimes.nowUtc()); - final long windowMillis = windowPeriod.toStandardDuration().getMillis(); - - log.info( - "Expect to run at [%s]", - DateTimes.nowUtc().plus( - new Duration( - System.currentTimeMillis(), - segmentGranularity.increment(truncatedNow).getMillis() + windowMillis - ) - ) - ); - - String threadName = StringUtils.format( - "%s-overseer-%d", - schema.getDataSource(), - config.getShardSpec().getPartitionNum() - ); - ThreadRenamingCallable threadRenamingCallable = - new ThreadRenamingCallable(threadName) - { - @Override - public ScheduledExecutors.Signal doCall() - { - if (stopped) { - log.info("Stopping merge-n-push overseer thread"); - return ScheduledExecutors.Signal.STOP; - } - - mergeAndPush(); - - if (stopped) { - log.info("Stopping merge-n-push overseer thread"); - return ScheduledExecutors.Signal.STOP; - } else { - return ScheduledExecutors.Signal.REPEAT; - } - } - }; - Duration initialDelay = new Duration( - System.currentTimeMillis(), - segmentGranularity.increment(truncatedNow).getMillis() + windowMillis - ); - Duration rate = new Duration(truncatedNow, segmentGranularity.increment(truncatedNow)); - ScheduledExecutors.scheduleAtFixedRate(scheduledExecutor, initialDelay, rate, threadRenamingCallable); - } - - private void mergeAndPush() - { - final Granularity segmentGranularity = schema.getGranularitySpec().getSegmentGranularity(); - final Period windowPeriod = config.getWindowPeriod(); - - final long windowMillis = windowPeriod.toStandardDuration().getMillis(); - log.info("Starting merge and push."); - DateTime minTimestampAsDate = segmentGranularity.bucketStart( - DateTimes.utc(Math.max(windowMillis, rejectionPolicy.getCurrMaxTime().getMillis()) - windowMillis) - ); - long minTimestamp = minTimestampAsDate.getMillis(); - - final List appenderatorSegments = appenderator.getSegments(); - final List segmentsToPush = new ArrayList<>(); - - if (shuttingDown) { - log.info("Found [%,d] segments. Attempting to hand off all of them.", appenderatorSegments.size()); - segmentsToPush.addAll(appenderatorSegments); - } else { - log.info( - "Found [%,d] segments. Attempting to hand off segments that start before [%s].", - appenderatorSegments.size(), - minTimestampAsDate - ); - - for (SegmentIdWithShardSpec segment : appenderatorSegments) { - final Long intervalStart = segment.getInterval().getStartMillis(); - if (intervalStart < minTimestamp) { - log.info("Adding entry [%s] for merge and push.", segment); - segmentsToPush.add(segment); - } else { - log.info( - "Skipping persist and merge for entry [%s] : Start time [%s] >= [%s] min timestamp required in this run. Segment will be picked up in a future run.", - segment, - DateTimes.utc(intervalStart), - minTimestampAsDate - ); - } - } - } - - log.info("Found [%,d] segments to persist and merge", segmentsToPush.size()); - - final Function errorHandler = new Function() - { - @Override - public Void apply(Throwable throwable) - { - final List segmentIdentifierStrings = Lists.transform( - segmentsToPush, - SegmentIdWithShardSpec::toString - ); - - log.makeAlert(throwable, "Failed to publish merged indexes[%s]", schema.getDataSource()) - .addData("segments", segmentIdentifierStrings) - .emit(); - - if (shuttingDown) { - // We're trying to shut down, and these segments failed to push. Let's just get rid of them. - // This call will also delete possibly-partially-written files, so we don't need to do it explicitly. - cleanShutdown = false; - for (SegmentIdWithShardSpec identifier : segmentsToPush) { - dropSegment(identifier); - } - } - - return null; - } - }; - - // WARNING: Committers.nil() here means that on-disk data can get out of sync with committing. - Futures.addCallback( - appenderator.push(segmentsToPush, Committers.nil(), false), - new FutureCallback() - { - @Override - public void onSuccess(SegmentsAndCommitMetadata result) - { - // Immediately publish after pushing - for (DataSegment pushedSegment : result.getSegments()) { - try { - segmentPublisher.publishSegment(pushedSegment); - } - catch (Exception e) { - errorHandler.apply(e); - } - } - - log.info("Published [%,d] sinks.", segmentsToPush.size()); - } - - @Override - public void onFailure(Throwable e) - { - log.warn(e, "Failed to push [%,d] segments.", segmentsToPush.size()); - errorHandler.apply(e); - } - }, - MoreExecutors.directExecutor() - ); - } -} diff --git a/server/src/main/java/org/apache/druid/segment/realtime/appenderator/AppenderatorPlumberSchool.java b/server/src/main/java/org/apache/druid/segment/realtime/appenderator/AppenderatorPlumberSchool.java deleted file mode 100644 index 8d188111f64..00000000000 --- a/server/src/main/java/org/apache/druid/segment/realtime/appenderator/AppenderatorPlumberSchool.java +++ /dev/null @@ -1,84 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package org.apache.druid.segment.realtime.appenderator; - -import com.fasterxml.jackson.annotation.JacksonInject; -import com.fasterxml.jackson.annotation.JsonCreator; -import com.fasterxml.jackson.annotation.JsonProperty; -import org.apache.druid.segment.handoff.SegmentHandoffNotifierFactory; -import org.apache.druid.segment.indexing.DataSchema; -import org.apache.druid.segment.indexing.RealtimeTuningConfig; -import org.apache.druid.segment.realtime.FireDepartmentMetrics; -import org.apache.druid.segment.realtime.SegmentPublisher; -import org.apache.druid.segment.realtime.plumber.Plumber; -import org.apache.druid.segment.realtime.plumber.PlumberSchool; -import org.apache.druid.server.coordination.DataSegmentAnnouncer; - -public class AppenderatorPlumberSchool implements PlumberSchool -{ - private final AppenderatorFactory appenderatorFactory; - private final DataSegmentAnnouncer segmentAnnouncer; - private final SegmentHandoffNotifierFactory handoffNotifierFactory; - private final SegmentPublisher segmentPublisher; - - @JsonCreator - public AppenderatorPlumberSchool( - @JsonProperty("appenderator") AppenderatorFactory appenderatorFactory, - @JacksonInject DataSegmentAnnouncer segmentAnnouncer, - @JacksonInject SegmentHandoffNotifierFactory handoffNotifierFactory, - @JacksonInject SegmentPublisher segmentPublisher - ) - { - this.appenderatorFactory = appenderatorFactory; - this.segmentAnnouncer = segmentAnnouncer; - this.handoffNotifierFactory = handoffNotifierFactory; - this.segmentPublisher = segmentPublisher; - } - - @Override - public Plumber findPlumber( - final DataSchema schema, - final RealtimeTuningConfig config, - final FireDepartmentMetrics metrics - ) - { - final Appenderator appenderator = appenderatorFactory.build( - schema, - config, - metrics - ); - - return new AppenderatorPlumber( - schema, - config, - metrics, - segmentAnnouncer, - segmentPublisher, - handoffNotifierFactory.createSegmentHandoffNotifier(schema.getDataSource()), - appenderator - ); - } - - @JsonProperty("appenderator") - public AppenderatorFactory getAppenderatorFactory() - { - return appenderatorFactory; - } -} diff --git a/server/src/main/java/org/apache/druid/segment/realtime/appenderator/Appenderators.java b/server/src/main/java/org/apache/druid/segment/realtime/appenderator/Appenderators.java index 35cd526b1ea..56af5f3f62c 100644 --- a/server/src/main/java/org/apache/druid/segment/realtime/appenderator/Appenderators.java +++ b/server/src/main/java/org/apache/druid/segment/realtime/appenderator/Appenderators.java @@ -35,7 +35,7 @@ import org.apache.druid.segment.indexing.DataSchema; import org.apache.druid.segment.loading.DataSegmentPusher; import org.apache.druid.segment.loading.SegmentLoaderConfig; import org.apache.druid.segment.metadata.CentralizedDatasourceSchemaConfig; -import org.apache.druid.segment.realtime.FireDepartmentMetrics; +import org.apache.druid.segment.realtime.SegmentGenerationMetrics; import org.apache.druid.server.coordination.DataSegmentAnnouncer; import org.apache.druid.server.coordination.NoopDataSegmentAnnouncer; import org.apache.druid.timeline.VersionedIntervalTimeline; @@ -47,7 +47,7 @@ public class Appenderators String id, DataSchema schema, AppenderatorConfig config, - FireDepartmentMetrics metrics, + SegmentGenerationMetrics metrics, DataSegmentPusher dataSegmentPusher, ObjectMapper objectMapper, IndexIO indexIO, @@ -101,7 +101,7 @@ public class Appenderators String id, DataSchema schema, AppenderatorConfig config, - FireDepartmentMetrics metrics, + SegmentGenerationMetrics metrics, DataSegmentPusher dataSegmentPusher, ObjectMapper objectMapper, IndexIO indexIO, @@ -135,7 +135,7 @@ public class Appenderators String id, DataSchema schema, AppenderatorConfig config, - FireDepartmentMetrics metrics, + SegmentGenerationMetrics metrics, DataSegmentPusher dataSegmentPusher, ObjectMapper objectMapper, IndexIO indexIO, @@ -173,7 +173,7 @@ public class Appenderators String id, DataSchema schema, AppenderatorConfig config, - FireDepartmentMetrics metrics, + SegmentGenerationMetrics metrics, DataSegmentPusher dataSegmentPusher, ObjectMapper objectMapper, IndexIO indexIO, diff --git a/server/src/main/java/org/apache/druid/segment/realtime/appenderator/AppenderatorsManager.java b/server/src/main/java/org/apache/druid/segment/realtime/appenderator/AppenderatorsManager.java index aa11d85ef35..3bbf364656e 100644 --- a/server/src/main/java/org/apache/druid/segment/realtime/appenderator/AppenderatorsManager.java +++ b/server/src/main/java/org/apache/druid/segment/realtime/appenderator/AppenderatorsManager.java @@ -38,7 +38,7 @@ import org.apache.druid.segment.join.JoinableFactory; import org.apache.druid.segment.loading.DataSegmentPusher; import org.apache.druid.segment.loading.SegmentLoaderConfig; import org.apache.druid.segment.metadata.CentralizedDatasourceSchemaConfig; -import org.apache.druid.segment.realtime.FireDepartmentMetrics; +import org.apache.druid.segment.realtime.SegmentGenerationMetrics; import org.apache.druid.server.coordination.DataSegmentAnnouncer; import org.joda.time.Interval; @@ -70,7 +70,7 @@ public interface AppenderatorsManager String taskId, DataSchema schema, AppenderatorConfig config, - FireDepartmentMetrics metrics, + SegmentGenerationMetrics metrics, DataSegmentPusher dataSegmentPusher, ObjectMapper objectMapper, IndexIO indexIO, @@ -96,7 +96,7 @@ public interface AppenderatorsManager String taskId, DataSchema schema, AppenderatorConfig config, - FireDepartmentMetrics metrics, + SegmentGenerationMetrics metrics, DataSegmentPusher dataSegmentPusher, ObjectMapper objectMapper, IndexIO indexIO, @@ -111,7 +111,7 @@ public interface AppenderatorsManager String taskId, DataSchema schema, AppenderatorConfig config, - FireDepartmentMetrics metrics, + SegmentGenerationMetrics metrics, DataSegmentPusher dataSegmentPusher, ObjectMapper objectMapper, IndexIO indexIO, @@ -126,7 +126,7 @@ public interface AppenderatorsManager String taskId, DataSchema schema, AppenderatorConfig config, - FireDepartmentMetrics metrics, + SegmentGenerationMetrics metrics, DataSegmentPusher dataSegmentPusher, ObjectMapper objectMapper, IndexIO indexIO, diff --git a/server/src/main/java/org/apache/druid/segment/realtime/appenderator/BaseAppenderatorDriver.java b/server/src/main/java/org/apache/druid/segment/realtime/appenderator/BaseAppenderatorDriver.java index 65df4f56761..fa7d037c92c 100644 --- a/server/src/main/java/org/apache/druid/segment/realtime/appenderator/BaseAppenderatorDriver.java +++ b/server/src/main/java/org/apache/druid/segment/realtime/appenderator/BaseAppenderatorDriver.java @@ -46,6 +46,7 @@ import org.apache.druid.java.util.common.logger.Logger; import org.apache.druid.segment.loading.DataSegmentKiller; import org.apache.druid.segment.realtime.appenderator.SegmentWithState.SegmentState; import org.apache.druid.timeline.DataSegment; +import org.apache.druid.timeline.SegmentId; import org.apache.druid.utils.CollectionUtils; import org.joda.time.DateTime; import org.joda.time.Interval; @@ -253,7 +254,7 @@ public abstract class BaseAppenderatorDriver implements Closeable private static final Logger log = new Logger(BaseAppenderatorDriver.class); private final SegmentAllocator segmentAllocator; - private final UsedSegmentChecker usedSegmentChecker; + private final PublishedSegmentRetriever publishedSegmentRetriever; private final DataSegmentKiller dataSegmentKiller; protected final Appenderator appenderator; @@ -268,13 +269,13 @@ public abstract class BaseAppenderatorDriver implements Closeable BaseAppenderatorDriver( Appenderator appenderator, SegmentAllocator segmentAllocator, - UsedSegmentChecker usedSegmentChecker, + PublishedSegmentRetriever segmentRetriever, DataSegmentKiller dataSegmentKiller ) { this.appenderator = Preconditions.checkNotNull(appenderator, "appenderator"); this.segmentAllocator = Preconditions.checkNotNull(segmentAllocator, "segmentAllocator"); - this.usedSegmentChecker = Preconditions.checkNotNull(usedSegmentChecker, "usedSegmentChecker"); + this.publishedSegmentRetriever = Preconditions.checkNotNull(segmentRetriever, "segmentRetriever"); this.dataSegmentKiller = Preconditions.checkNotNull(dataSegmentKiller, "dataSegmentKiller"); this.executor = MoreExecutors.listeningDecorator( Execs.singleThreaded("[" + StringUtils.encodeForFormat(appenderator.getId()) + "]-publish") @@ -622,7 +623,6 @@ public abstract class BaseAppenderatorDriver implements Closeable return RetryUtils.retry( () -> { try { - final Set upgradedSegments = new HashSet<>(); final ImmutableSet ourSegments = ImmutableSet.copyOf(pushedAndTombstones); final SegmentPublishResult publishResult = publisher.publishSegments( segmentsToBeOverwritten, @@ -633,22 +633,25 @@ public abstract class BaseAppenderatorDriver implements Closeable ); if (publishResult.isSuccess()) { log.info( - "Published [%s] segments with commit metadata [%s]", + "Published [%d] segments with commit metadata[%s].", segmentsAndCommitMetadata.getSegments().size(), callerMetadata ); log.infoSegments(segmentsAndCommitMetadata.getSegments(), "Published segments"); - // This set must contain only those segments that were upgraded as a result of a concurrent replace. - upgradedSegments.addAll(publishResult.getSegments()); + + // Log segments upgraded as a result of a concurrent replace + final Set upgradedSegments = new HashSet<>(publishResult.getSegments()); segmentsAndCommitMetadata.getSegments().forEach(upgradedSegments::remove); if (!upgradedSegments.isEmpty()) { log.info("Published [%d] upgraded segments.", upgradedSegments.size()); log.infoSegments(upgradedSegments, "Upgraded segments"); } - log.info("Published segment schemas: [%s]", segmentsAndCommitMetadata.getSegmentSchemaMapping()); + + log.info("Published segment schemas[%s].", segmentsAndCommitMetadata.getSegmentSchemaMapping()); + return segmentsAndCommitMetadata.withUpgradedSegments(upgradedSegments); } else { - // Publishing didn't affirmatively succeed. However, segments with our identifiers may still be active - // now after all, for two possible reasons: + // Publishing didn't affirmatively succeed. However, segments + // with these IDs may have already been published: // // 1) A replica may have beat us to publishing these segments. In this case we want to delete the // segments we pushed (if they had unique paths) to avoid wasting space on deep storage. @@ -656,29 +659,28 @@ public abstract class BaseAppenderatorDriver implements Closeable // from the overlord. In this case we do not want to delete the segments we pushed, since they are // now live! - final Set segmentsIdentifiers = segmentsAndCommitMetadata + final Set segmentIds = segmentsAndCommitMetadata .getSegments() .stream() - .map(SegmentIdWithShardSpec::fromDataSegment) + .map(DataSegment::getId) .collect(Collectors.toSet()); - final Set activeSegments = usedSegmentChecker.findUsedSegments(segmentsIdentifiers); - - if (activeSegments.equals(ourSegments)) { + final Set publishedSegments = publishedSegmentRetriever.findPublishedSegments(segmentIds); + if (publishedSegments.equals(ourSegments)) { log.info( - "Could not publish [%s] segments, but checked and found them already published; continuing.", + "Could not publish [%d] segments, but they have already been published by another task.", ourSegments.size() ); log.infoSegments( segmentsAndCommitMetadata.getSegments(), "Could not publish segments" ); - log.info("Could not publish segment and schemas: [%s]", segmentsAndCommitMetadata.getSegmentSchemaMapping()); + log.info("Could not publish segment schemas[%s]", segmentsAndCommitMetadata.getSegmentSchemaMapping()); // Clean up pushed segments if they are physically disjoint from the published ones (this means // they were probably pushed by a replica, and with the unique paths option). final boolean physicallyDisjoint = Sets.intersection( - activeSegments.stream().map(DataSegment::getLoadSpec).collect(Collectors.toSet()), + publishedSegments.stream().map(DataSegment::getLoadSpec).collect(Collectors.toSet()), ourSegments.stream().map(DataSegment::getLoadSpec).collect(Collectors.toSet()) ).isEmpty(); @@ -698,8 +700,9 @@ public abstract class BaseAppenderatorDriver implements Closeable } throw new ISE("Failed to publish segments"); } + + return segmentsAndCommitMetadata; } - return segmentsAndCommitMetadata.withUpgradedSegments(upgradedSegments); } catch (Exception e) { // Must not remove segments here, we aren't sure if our transaction succeeded or not. diff --git a/server/src/main/java/org/apache/druid/segment/realtime/appenderator/BatchAppenderator.java b/server/src/main/java/org/apache/druid/segment/realtime/appenderator/BatchAppenderator.java index 128de15196d..b544d33705c 100644 --- a/server/src/main/java/org/apache/druid/segment/realtime/appenderator/BatchAppenderator.java +++ b/server/src/main/java/org/apache/druid/segment/realtime/appenderator/BatchAppenderator.java @@ -67,9 +67,9 @@ import org.apache.druid.segment.indexing.DataSchema; import org.apache.druid.segment.loading.DataSegmentPusher; import org.apache.druid.segment.metadata.CentralizedDatasourceSchemaConfig; import org.apache.druid.segment.metadata.FingerprintGenerator; -import org.apache.druid.segment.realtime.FireDepartmentMetrics; import org.apache.druid.segment.realtime.FireHydrant; -import org.apache.druid.segment.realtime.plumber.Sink; +import org.apache.druid.segment.realtime.SegmentGenerationMetrics; +import org.apache.druid.segment.realtime.sink.Sink; import org.apache.druid.timeline.DataSegment; import org.joda.time.Interval; @@ -113,7 +113,7 @@ public class BatchAppenderator implements Appenderator private final String myId; private final DataSchema schema; private final AppenderatorConfig tuningConfig; - private final FireDepartmentMetrics metrics; + private final SegmentGenerationMetrics metrics; private final DataSegmentPusher dataSegmentPusher; private final ObjectMapper objectMapper; private final IndexIO indexIO; @@ -164,7 +164,7 @@ public class BatchAppenderator implements Appenderator String id, DataSchema schema, AppenderatorConfig tuningConfig, - FireDepartmentMetrics metrics, + SegmentGenerationMetrics metrics, DataSegmentPusher dataSegmentPusher, ObjectMapper objectMapper, IndexIO indexIO, @@ -481,8 +481,7 @@ public class BatchAppenderator implements Appenderator tuningConfig.getAppendableIndexSpec(), tuningConfig.getMaxRowsInMemory(), maxBytesTuningConfig, - useMaxMemoryEstimates, - null + useMaxMemoryEstimates ); bytesCurrentlyInMemory += calculateSinkMemoryInUsed(); sinks.put(identifier, retVal); @@ -1075,7 +1074,6 @@ public class BatchAppenderator implements Appenderator tuningConfig.getMaxRowsInMemory(), maxBytesTuningConfig, useMaxMemoryEstimates, - null, hydrants ); retVal.finishWriting(); // this sink is not writable diff --git a/server/src/main/java/org/apache/druid/segment/realtime/appenderator/BatchAppenderatorDriver.java b/server/src/main/java/org/apache/druid/segment/realtime/appenderator/BatchAppenderatorDriver.java index e08fcf601df..81a02ab1eec 100644 --- a/server/src/main/java/org/apache/druid/segment/realtime/appenderator/BatchAppenderatorDriver.java +++ b/server/src/main/java/org/apache/druid/segment/realtime/appenderator/BatchAppenderatorDriver.java @@ -61,20 +61,16 @@ import java.util.stream.Collectors; public class BatchAppenderatorDriver extends BaseAppenderatorDriver { /** - * Create a driver. - * - * @param appenderator appenderator - * @param segmentAllocator segment allocator - * @param usedSegmentChecker used segment checker + * Creates a driver for batch ingestion. */ public BatchAppenderatorDriver( Appenderator appenderator, SegmentAllocator segmentAllocator, - UsedSegmentChecker usedSegmentChecker, + PublishedSegmentRetriever segmentRetriever, DataSegmentKiller dataSegmentKiller ) { - super(appenderator, segmentAllocator, usedSegmentChecker, dataSegmentKiller); + super(appenderator, segmentAllocator, segmentRetriever, dataSegmentKiller); } @Nullable diff --git a/server/src/main/java/org/apache/druid/segment/realtime/appenderator/DefaultOfflineAppenderatorFactory.java b/server/src/main/java/org/apache/druid/segment/realtime/appenderator/DefaultOfflineAppenderatorFactory.java deleted file mode 100644 index b5574c686c4..00000000000 --- a/server/src/main/java/org/apache/druid/segment/realtime/appenderator/DefaultOfflineAppenderatorFactory.java +++ /dev/null @@ -1,92 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package org.apache.druid.segment.realtime.appenderator; - -import com.fasterxml.jackson.annotation.JacksonInject; -import com.fasterxml.jackson.annotation.JsonCreator; -import com.fasterxml.jackson.annotation.JsonProperty; -import com.fasterxml.jackson.databind.ObjectMapper; -import org.apache.druid.segment.IndexIO; -import org.apache.druid.segment.IndexMerger; -import org.apache.druid.segment.incremental.NoopRowIngestionMeters; -import org.apache.druid.segment.incremental.ParseExceptionHandler; -import org.apache.druid.segment.incremental.RowIngestionMeters; -import org.apache.druid.segment.indexing.DataSchema; -import org.apache.druid.segment.indexing.RealtimeTuningConfig; -import org.apache.druid.segment.loading.DataSegmentPusher; -import org.apache.druid.segment.metadata.CentralizedDatasourceSchemaConfig; -import org.apache.druid.segment.realtime.FireDepartmentMetrics; - -public class DefaultOfflineAppenderatorFactory implements AppenderatorFactory -{ - private final DataSegmentPusher dataSegmentPusher; - private final ObjectMapper objectMapper; - private final IndexIO indexIO; - private final IndexMerger indexMerger; - private final CentralizedDatasourceSchemaConfig centralizedDatasourceSchemaConfig; - - @JsonCreator - public DefaultOfflineAppenderatorFactory( - @JacksonInject DataSegmentPusher dataSegmentPusher, - @JacksonInject ObjectMapper objectMapper, - @JacksonInject IndexIO indexIO, - @JacksonInject IndexMerger indexMerger, - @JsonProperty("centralizedDatasourceSchemaConfig") CentralizedDatasourceSchemaConfig centralizedDatasourceSchemaConfig - ) - { - this.dataSegmentPusher = dataSegmentPusher; - this.objectMapper = objectMapper; - this.indexIO = indexIO; - this.indexMerger = indexMerger; - this.centralizedDatasourceSchemaConfig = centralizedDatasourceSchemaConfig; - - } - - @JsonProperty - public CentralizedDatasourceSchemaConfig getCentralizedDatasourceSchemaConfig() - { - return centralizedDatasourceSchemaConfig; - } - - @Override - public Appenderator build(DataSchema schema, RealtimeTuningConfig config, FireDepartmentMetrics metrics) - { - final RowIngestionMeters rowIngestionMeters = new NoopRowIngestionMeters(); - return Appenderators.createClosedSegmentsOffline( - schema.getDataSource(), - schema, - config, - metrics, - dataSegmentPusher, - objectMapper, - indexIO, - indexMerger, - rowIngestionMeters, - new ParseExceptionHandler( - rowIngestionMeters, - false, - config.isReportParseExceptions() ? 0 : Integer.MAX_VALUE, - 0 - ), - true, - centralizedDatasourceSchemaConfig - ); - } -} diff --git a/server/src/main/java/org/apache/druid/segment/realtime/appenderator/DefaultRealtimeAppenderatorFactory.java b/server/src/main/java/org/apache/druid/segment/realtime/appenderator/DefaultRealtimeAppenderatorFactory.java deleted file mode 100644 index e64c315484d..00000000000 --- a/server/src/main/java/org/apache/druid/segment/realtime/appenderator/DefaultRealtimeAppenderatorFactory.java +++ /dev/null @@ -1,142 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package org.apache.druid.segment.realtime.appenderator; - -import com.fasterxml.jackson.annotation.JacksonInject; -import com.fasterxml.jackson.databind.ObjectMapper; -import org.apache.druid.client.cache.Cache; -import org.apache.druid.client.cache.CacheConfig; -import org.apache.druid.client.cache.CachePopulatorStats; -import org.apache.druid.guice.annotations.Json; -import org.apache.druid.java.util.emitter.service.ServiceEmitter; -import org.apache.druid.query.QueryProcessingPool; -import org.apache.druid.query.QueryRunnerFactoryConglomerate; -import org.apache.druid.segment.IndexIO; -import org.apache.druid.segment.IndexMerger; -import org.apache.druid.segment.incremental.NoopRowIngestionMeters; -import org.apache.druid.segment.incremental.ParseExceptionHandler; -import org.apache.druid.segment.incremental.RowIngestionMeters; -import org.apache.druid.segment.indexing.DataSchema; -import org.apache.druid.segment.indexing.RealtimeTuningConfig; -import org.apache.druid.segment.join.JoinableFactory; -import org.apache.druid.segment.loading.DataSegmentPusher; -import org.apache.druid.segment.realtime.FireDepartmentMetrics; -import org.apache.druid.server.coordination.DataSegmentAnnouncer; -import org.apache.druid.timeline.partition.ShardSpec; - -import java.io.File; - -public class DefaultRealtimeAppenderatorFactory implements AppenderatorFactory -{ - private final ServiceEmitter emitter; - private final QueryRunnerFactoryConglomerate conglomerate; - private final DataSegmentAnnouncer segmentAnnouncer; - private final QueryProcessingPool queryProcessingPool; - private final JoinableFactory joinableFactory; - private final DataSegmentPusher dataSegmentPusher; - private final ObjectMapper jsonMapper; - private final IndexIO indexIO; - private final IndexMerger indexMerger; - private final Cache cache; - private final CacheConfig cacheConfig; - private final CachePopulatorStats cachePopulatorStats; - - public DefaultRealtimeAppenderatorFactory( - @JacksonInject ServiceEmitter emitter, - @JacksonInject QueryRunnerFactoryConglomerate conglomerate, - @JacksonInject DataSegmentAnnouncer segmentAnnouncer, - @JacksonInject QueryProcessingPool queryProcessingPool, - @JacksonInject JoinableFactory joinableFactory, - @JacksonInject DataSegmentPusher dataSegmentPusher, - @JacksonInject @Json ObjectMapper jsonMapper, - @JacksonInject IndexIO indexIO, - @JacksonInject IndexMerger indexMerger, - @JacksonInject Cache cache, - @JacksonInject CacheConfig cacheConfig, - @JacksonInject CachePopulatorStats cachePopulatorStats - ) - { - this.emitter = emitter; - this.conglomerate = conglomerate; - this.segmentAnnouncer = segmentAnnouncer; - this.queryProcessingPool = queryProcessingPool; - this.joinableFactory = joinableFactory; - this.dataSegmentPusher = dataSegmentPusher; - this.jsonMapper = jsonMapper; - this.indexIO = indexIO; - this.indexMerger = indexMerger; - this.cache = cache; - this.cacheConfig = cacheConfig; - this.cachePopulatorStats = cachePopulatorStats; - } - - @Override - public Appenderator build( - final DataSchema schema, - final RealtimeTuningConfig config, - final FireDepartmentMetrics metrics - ) - { - final RowIngestionMeters rowIngestionMeters = new NoopRowIngestionMeters(); - return Appenderators.createRealtime( - null, - schema.getDataSource(), - schema, - config.withBasePersistDirectory( - makeBasePersistSubdirectory( - config.getBasePersistDirectory(), - schema.getDataSource(), - config.getShardSpec() - ) - ), - metrics, - dataSegmentPusher, - jsonMapper, - indexIO, - indexMerger, - conglomerate, - segmentAnnouncer, - emitter, - queryProcessingPool, - cache, - cacheConfig, - cachePopulatorStats, - rowIngestionMeters, - new ParseExceptionHandler( - rowIngestionMeters, - false, - config.isReportParseExceptions() ? 0 : Integer.MAX_VALUE, - 0 - ), - true, - null - ); - } - - private static File makeBasePersistSubdirectory( - final File basePersistDirectory, - final String dataSource, - final ShardSpec shardSpec - ) - { - final File dataSourceDirectory = new File(basePersistDirectory, dataSource); - return new File(dataSourceDirectory, String.valueOf(shardSpec.getPartitionNum())); - } -} diff --git a/server/src/main/java/org/apache/druid/segment/realtime/appenderator/DummyForInjectionAppenderatorsManager.java b/server/src/main/java/org/apache/druid/segment/realtime/appenderator/DummyForInjectionAppenderatorsManager.java index 988d77b6f70..f44fffe20e1 100644 --- a/server/src/main/java/org/apache/druid/segment/realtime/appenderator/DummyForInjectionAppenderatorsManager.java +++ b/server/src/main/java/org/apache/druid/segment/realtime/appenderator/DummyForInjectionAppenderatorsManager.java @@ -39,7 +39,7 @@ import org.apache.druid.segment.join.JoinableFactory; import org.apache.druid.segment.loading.DataSegmentPusher; import org.apache.druid.segment.loading.SegmentLoaderConfig; import org.apache.druid.segment.metadata.CentralizedDatasourceSchemaConfig; -import org.apache.druid.segment.realtime.FireDepartmentMetrics; +import org.apache.druid.segment.realtime.SegmentGenerationMetrics; import org.apache.druid.server.coordination.DataSegmentAnnouncer; import org.joda.time.Interval; @@ -61,7 +61,7 @@ public class DummyForInjectionAppenderatorsManager implements AppenderatorsManag String taskId, DataSchema schema, AppenderatorConfig config, - FireDepartmentMetrics metrics, + SegmentGenerationMetrics metrics, DataSegmentPusher dataSegmentPusher, ObjectMapper objectMapper, IndexIO indexIO, @@ -88,7 +88,7 @@ public class DummyForInjectionAppenderatorsManager implements AppenderatorsManag String taskId, DataSchema schema, AppenderatorConfig config, - FireDepartmentMetrics metrics, + SegmentGenerationMetrics metrics, DataSegmentPusher dataSegmentPusher, ObjectMapper objectMapper, IndexIO indexIO, @@ -107,7 +107,7 @@ public class DummyForInjectionAppenderatorsManager implements AppenderatorsManag String taskId, DataSchema schema, AppenderatorConfig config, - FireDepartmentMetrics metrics, + SegmentGenerationMetrics metrics, DataSegmentPusher dataSegmentPusher, ObjectMapper objectMapper, IndexIO indexIO, @@ -126,7 +126,7 @@ public class DummyForInjectionAppenderatorsManager implements AppenderatorsManag String taskId, DataSchema schema, AppenderatorConfig config, - FireDepartmentMetrics metrics, + SegmentGenerationMetrics metrics, DataSegmentPusher dataSegmentPusher, ObjectMapper objectMapper, IndexIO indexIO, diff --git a/server/src/main/java/org/apache/druid/segment/realtime/appenderator/PeonAppenderatorsManager.java b/server/src/main/java/org/apache/druid/segment/realtime/appenderator/PeonAppenderatorsManager.java index c0f833ac594..52f75f72e47 100644 --- a/server/src/main/java/org/apache/druid/segment/realtime/appenderator/PeonAppenderatorsManager.java +++ b/server/src/main/java/org/apache/druid/segment/realtime/appenderator/PeonAppenderatorsManager.java @@ -39,7 +39,7 @@ import org.apache.druid.segment.join.JoinableFactory; import org.apache.druid.segment.loading.DataSegmentPusher; import org.apache.druid.segment.loading.SegmentLoaderConfig; import org.apache.druid.segment.metadata.CentralizedDatasourceSchemaConfig; -import org.apache.druid.segment.realtime.FireDepartmentMetrics; +import org.apache.druid.segment.realtime.SegmentGenerationMetrics; import org.apache.druid.server.coordination.DataSegmentAnnouncer; import org.joda.time.Interval; @@ -67,7 +67,7 @@ public class PeonAppenderatorsManager implements AppenderatorsManager String taskId, DataSchema schema, AppenderatorConfig config, - FireDepartmentMetrics metrics, + SegmentGenerationMetrics metrics, DataSegmentPusher dataSegmentPusher, ObjectMapper jsonMapper, IndexIO indexIO, @@ -122,7 +122,7 @@ public class PeonAppenderatorsManager implements AppenderatorsManager String taskId, DataSchema schema, AppenderatorConfig config, - FireDepartmentMetrics metrics, + SegmentGenerationMetrics metrics, DataSegmentPusher dataSegmentPusher, ObjectMapper objectMapper, IndexIO indexIO, @@ -160,7 +160,7 @@ public class PeonAppenderatorsManager implements AppenderatorsManager String taskId, DataSchema schema, AppenderatorConfig config, - FireDepartmentMetrics metrics, + SegmentGenerationMetrics metrics, DataSegmentPusher dataSegmentPusher, ObjectMapper objectMapper, IndexIO indexIO, @@ -198,7 +198,7 @@ public class PeonAppenderatorsManager implements AppenderatorsManager String taskId, DataSchema schema, AppenderatorConfig config, - FireDepartmentMetrics metrics, + SegmentGenerationMetrics metrics, DataSegmentPusher dataSegmentPusher, ObjectMapper objectMapper, IndexIO indexIO, diff --git a/server/src/main/java/org/apache/druid/segment/realtime/appenderator/UsedSegmentChecker.java b/server/src/main/java/org/apache/druid/segment/realtime/appenderator/PublishedSegmentRetriever.java similarity index 82% rename from server/src/main/java/org/apache/druid/segment/realtime/appenderator/UsedSegmentChecker.java rename to server/src/main/java/org/apache/druid/segment/realtime/appenderator/PublishedSegmentRetriever.java index 29fd6028f82..3f2d239f3be 100644 --- a/server/src/main/java/org/apache/druid/segment/realtime/appenderator/UsedSegmentChecker.java +++ b/server/src/main/java/org/apache/druid/segment/realtime/appenderator/PublishedSegmentRetriever.java @@ -20,18 +20,15 @@ package org.apache.druid.segment.realtime.appenderator; import org.apache.druid.timeline.DataSegment; +import org.apache.druid.timeline.SegmentId; import java.io.IOException; import java.util.Set; -public interface UsedSegmentChecker +public interface PublishedSegmentRetriever { /** * For any identifiers that exist and are actually used, returns the corresponding DataSegment objects. - * - * @param identifiers identifiers to search for - * - * @return used DataSegments */ - Set findUsedSegments(Set identifiers) throws IOException; + Set findPublishedSegments(Set identifiers) throws IOException; } diff --git a/server/src/main/java/org/apache/druid/segment/realtime/appenderator/SegmentSchemas.java b/server/src/main/java/org/apache/druid/segment/realtime/appenderator/SegmentSchemas.java index fbe2b6315ef..359da9bed76 100644 --- a/server/src/main/java/org/apache/druid/segment/realtime/appenderator/SegmentSchemas.java +++ b/server/src/main/java/org/apache/druid/segment/realtime/appenderator/SegmentSchemas.java @@ -30,7 +30,7 @@ import java.util.Objects; /** * Encapsulates schema information for multiple segments. *

    - * Primarily used to announce schema changes for all {@link org.apache.druid.segment.realtime.plumber.Sink} + * Primarily used to announce schema changes for all {@link org.apache.druid.segment.realtime.sink.Sink} * created by a task in {@link StreamAppenderator}. */ public class SegmentSchemas diff --git a/server/src/main/java/org/apache/druid/segment/realtime/appenderator/SegmentsAndCommitMetadata.java b/server/src/main/java/org/apache/druid/segment/realtime/appenderator/SegmentsAndCommitMetadata.java index 72187688057..dfbfa621a23 100644 --- a/server/src/main/java/org/apache/druid/segment/realtime/appenderator/SegmentsAndCommitMetadata.java +++ b/server/src/main/java/org/apache/druid/segment/realtime/appenderator/SegmentsAndCommitMetadata.java @@ -26,16 +26,12 @@ import org.apache.druid.segment.SegmentUtils; import org.apache.druid.timeline.DataSegment; import javax.annotation.Nullable; -import java.util.Collections; import java.util.List; import java.util.Objects; import java.util.Set; public class SegmentsAndCommitMetadata { - private static final SegmentsAndCommitMetadata NIL - = new SegmentsAndCommitMetadata(Collections.emptyList(), null, null, null); - private final Object commitMetadata; private final ImmutableList segments; private final SegmentSchemaMapping segmentSchemaMapping; @@ -139,9 +135,4 @@ public class SegmentsAndCommitMetadata ", segmentSchemaMapping=" + segmentSchemaMapping + '}'; } - - public static SegmentsAndCommitMetadata nil() - { - return NIL; - } } diff --git a/server/src/main/java/org/apache/druid/segment/realtime/appenderator/SinkQuerySegmentWalker.java b/server/src/main/java/org/apache/druid/segment/realtime/appenderator/SinkQuerySegmentWalker.java index aba071de1df..72cfe4de261 100644 --- a/server/src/main/java/org/apache/druid/segment/realtime/appenderator/SinkQuerySegmentWalker.java +++ b/server/src/main/java/org/apache/druid/segment/realtime/appenderator/SinkQuerySegmentWalker.java @@ -59,8 +59,8 @@ import org.apache.druid.query.spec.SpecificSegmentSpec; import org.apache.druid.segment.SegmentReference; import org.apache.druid.segment.StorageAdapter; import org.apache.druid.segment.realtime.FireHydrant; -import org.apache.druid.segment.realtime.plumber.Sink; -import org.apache.druid.segment.realtime.plumber.SinkSegmentReference; +import org.apache.druid.segment.realtime.sink.Sink; +import org.apache.druid.segment.realtime.sink.SinkSegmentReference; import org.apache.druid.server.ResourceIdPopulatingQueryRunner; import org.apache.druid.timeline.SegmentId; import org.apache.druid.timeline.VersionedIntervalTimeline; diff --git a/server/src/main/java/org/apache/druid/segment/realtime/appenderator/StreamAppenderator.java b/server/src/main/java/org/apache/druid/segment/realtime/appenderator/StreamAppenderator.java index 7a41ae3fb26..4d1253591e0 100644 --- a/server/src/main/java/org/apache/druid/segment/realtime/appenderator/StreamAppenderator.java +++ b/server/src/main/java/org/apache/druid/segment/realtime/appenderator/StreamAppenderator.java @@ -76,9 +76,9 @@ import org.apache.druid.segment.loading.DataSegmentPusher; import org.apache.druid.segment.loading.SegmentLoaderConfig; import org.apache.druid.segment.metadata.CentralizedDatasourceSchemaConfig; import org.apache.druid.segment.metadata.FingerprintGenerator; -import org.apache.druid.segment.realtime.FireDepartmentMetrics; import org.apache.druid.segment.realtime.FireHydrant; -import org.apache.druid.segment.realtime.plumber.Sink; +import org.apache.druid.segment.realtime.SegmentGenerationMetrics; +import org.apache.druid.segment.realtime.sink.Sink; import org.apache.druid.server.coordination.DataSegmentAnnouncer; import org.apache.druid.timeline.DataSegment; import org.apache.druid.timeline.SegmentId; @@ -134,7 +134,7 @@ public class StreamAppenderator implements Appenderator private final String myId; private final DataSchema schema; private final AppenderatorConfig tuningConfig; - private final FireDepartmentMetrics metrics; + private final SegmentGenerationMetrics metrics; private final DataSegmentPusher dataSegmentPusher; private final ObjectMapper objectMapper; private final DataSegmentAnnouncer segmentAnnouncer; @@ -221,7 +221,7 @@ public class StreamAppenderator implements Appenderator String id, DataSchema schema, AppenderatorConfig tuningConfig, - FireDepartmentMetrics metrics, + SegmentGenerationMetrics metrics, DataSegmentPusher dataSegmentPusher, ObjectMapper objectMapper, DataSegmentAnnouncer segmentAnnouncer, @@ -533,8 +533,7 @@ public class StreamAppenderator implements Appenderator tuningConfig.getAppendableIndexSpec(), tuningConfig.getMaxRowsInMemory(), maxBytesTuningConfig, - useMaxMemoryEstimates, - null + useMaxMemoryEstimates ); bytesCurrentlyInMemory.addAndGet(calculateSinkMemoryInUsed(retVal)); @@ -1391,7 +1390,6 @@ public class StreamAppenderator implements Appenderator tuningConfig.getMaxRowsInMemory(), maxBytesTuningConfig, useMaxMemoryEstimates, - null, hydrants ); rowsSoFar += currSink.getNumRows(); diff --git a/server/src/main/java/org/apache/druid/segment/realtime/appenderator/StreamAppenderatorDriver.java b/server/src/main/java/org/apache/druid/segment/realtime/appenderator/StreamAppenderatorDriver.java index 2b5c153d602..68599dbea11 100644 --- a/server/src/main/java/org/apache/druid/segment/realtime/appenderator/StreamAppenderatorDriver.java +++ b/server/src/main/java/org/apache/druid/segment/realtime/appenderator/StreamAppenderatorDriver.java @@ -40,7 +40,7 @@ import org.apache.druid.query.SegmentDescriptor; import org.apache.druid.segment.handoff.SegmentHandoffNotifier; import org.apache.druid.segment.handoff.SegmentHandoffNotifierFactory; import org.apache.druid.segment.loading.DataSegmentKiller; -import org.apache.druid.segment.realtime.FireDepartmentMetrics; +import org.apache.druid.segment.realtime.SegmentGenerationMetrics; import org.apache.druid.segment.realtime.appenderator.SegmentWithState.SegmentState; import org.apache.druid.timeline.DataSegment; import org.joda.time.Interval; @@ -82,7 +82,7 @@ public class StreamAppenderatorDriver extends BaseAppenderatorDriver private static final long HANDOFF_TIME_THRESHOLD = 600_000; private final SegmentHandoffNotifier handoffNotifier; - private final FireDepartmentMetrics metrics; + private final SegmentGenerationMetrics metrics; private final ObjectMapper objectMapper; /** @@ -91,7 +91,7 @@ public class StreamAppenderatorDriver extends BaseAppenderatorDriver * @param appenderator appenderator * @param segmentAllocator segment allocator * @param handoffNotifierFactory handoff notifier factory - * @param usedSegmentChecker used segment checker + * @param segmentRetriever used segment checker * @param objectMapper object mapper, used for serde of commit metadata * @param metrics Firedepartment metrics */ @@ -99,13 +99,13 @@ public class StreamAppenderatorDriver extends BaseAppenderatorDriver Appenderator appenderator, SegmentAllocator segmentAllocator, SegmentHandoffNotifierFactory handoffNotifierFactory, - UsedSegmentChecker usedSegmentChecker, + PublishedSegmentRetriever segmentRetriever, DataSegmentKiller dataSegmentKiller, ObjectMapper objectMapper, - FireDepartmentMetrics metrics + SegmentGenerationMetrics metrics ) { - super(appenderator, segmentAllocator, usedSegmentChecker, dataSegmentKiller); + super(appenderator, segmentAllocator, segmentRetriever, dataSegmentKiller); this.handoffNotifier = Preconditions.checkNotNull(handoffNotifierFactory, "handoffNotifierFactory") .createSegmentHandoffNotifier(appenderator.getDataSource()); @@ -416,7 +416,7 @@ public class StreamAppenderatorDriver extends BaseAppenderatorDriver { return Futures.transformAsync( publish(publisher, committer, sequenceNames), - (AsyncFunction) this::registerHandoff, + this::registerHandoff, MoreExecutors.directExecutor() ); } diff --git a/server/src/main/java/org/apache/druid/segment/realtime/appenderator/UnifiedIndexerAppenderatorsManager.java b/server/src/main/java/org/apache/druid/segment/realtime/appenderator/UnifiedIndexerAppenderatorsManager.java index 68ed2ae0d8f..ffdfb8d1eb0 100644 --- a/server/src/main/java/org/apache/druid/segment/realtime/appenderator/UnifiedIndexerAppenderatorsManager.java +++ b/server/src/main/java/org/apache/druid/segment/realtime/appenderator/UnifiedIndexerAppenderatorsManager.java @@ -64,8 +64,8 @@ import org.apache.druid.segment.join.JoinableFactoryWrapper; import org.apache.druid.segment.loading.DataSegmentPusher; import org.apache.druid.segment.loading.SegmentLoaderConfig; import org.apache.druid.segment.metadata.CentralizedDatasourceSchemaConfig; -import org.apache.druid.segment.realtime.FireDepartmentMetrics; -import org.apache.druid.segment.realtime.plumber.Sink; +import org.apache.druid.segment.realtime.SegmentGenerationMetrics; +import org.apache.druid.segment.realtime.sink.Sink; import org.apache.druid.segment.writeout.SegmentWriteOutMediumFactory; import org.apache.druid.server.coordination.DataSegmentAnnouncer; import org.apache.druid.timeline.VersionedIntervalTimeline; @@ -155,7 +155,7 @@ public class UnifiedIndexerAppenderatorsManager implements AppenderatorsManager String taskId, DataSchema schema, AppenderatorConfig config, - FireDepartmentMetrics metrics, + SegmentGenerationMetrics metrics, DataSegmentPusher dataSegmentPusher, ObjectMapper objectMapper, IndexIO indexIO, @@ -209,7 +209,7 @@ public class UnifiedIndexerAppenderatorsManager implements AppenderatorsManager String taskId, DataSchema schema, AppenderatorConfig config, - FireDepartmentMetrics metrics, + SegmentGenerationMetrics metrics, DataSegmentPusher dataSegmentPusher, ObjectMapper objectMapper, IndexIO indexIO, @@ -250,7 +250,7 @@ public class UnifiedIndexerAppenderatorsManager implements AppenderatorsManager String taskId, DataSchema schema, AppenderatorConfig config, - FireDepartmentMetrics metrics, + SegmentGenerationMetrics metrics, DataSegmentPusher dataSegmentPusher, ObjectMapper objectMapper, IndexIO indexIO, @@ -291,7 +291,7 @@ public class UnifiedIndexerAppenderatorsManager implements AppenderatorsManager String taskId, DataSchema schema, AppenderatorConfig config, - FireDepartmentMetrics metrics, + SegmentGenerationMetrics metrics, DataSegmentPusher dataSegmentPusher, ObjectMapper objectMapper, IndexIO indexIO, diff --git a/server/src/main/java/org/apache/druid/segment/realtime/plumber/FlushingPlumber.java b/server/src/main/java/org/apache/druid/segment/realtime/plumber/FlushingPlumber.java deleted file mode 100644 index a271c4540c5..00000000000 --- a/server/src/main/java/org/apache/druid/segment/realtime/plumber/FlushingPlumber.java +++ /dev/null @@ -1,238 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package org.apache.druid.segment.realtime.plumber; - -import com.fasterxml.jackson.databind.ObjectMapper; -import org.apache.druid.client.cache.Cache; -import org.apache.druid.client.cache.CacheConfig; -import org.apache.druid.client.cache.CachePopulatorStats; -import org.apache.druid.common.guava.ThreadRenamingCallable; -import org.apache.druid.java.util.common.DateTimes; -import org.apache.druid.java.util.common.FileUtils; -import org.apache.druid.java.util.common.StringUtils; -import org.apache.druid.java.util.common.concurrent.Execs; -import org.apache.druid.java.util.common.concurrent.ScheduledExecutors; -import org.apache.druid.java.util.common.granularity.Granularity; -import org.apache.druid.java.util.emitter.EmittingLogger; -import org.apache.druid.java.util.emitter.service.ServiceEmitter; -import org.apache.druid.query.QueryProcessingPool; -import org.apache.druid.query.QueryRunnerFactoryConglomerate; -import org.apache.druid.segment.IndexIO; -import org.apache.druid.segment.IndexMerger; -import org.apache.druid.segment.indexing.DataSchema; -import org.apache.druid.segment.indexing.RealtimeTuningConfig; -import org.apache.druid.segment.join.JoinableFactory; -import org.apache.druid.segment.realtime.FireDepartmentMetrics; -import org.apache.druid.server.coordination.DataSegmentAnnouncer; -import org.joda.time.DateTime; -import org.joda.time.Duration; - -import java.io.IOException; -import java.util.ArrayList; -import java.util.List; -import java.util.Map; -import java.util.concurrent.Callable; -import java.util.concurrent.ScheduledExecutorService; - -/** - */ -public class FlushingPlumber extends RealtimePlumber -{ - private static final EmittingLogger log = new EmittingLogger(FlushingPlumber.class); - - private final DataSchema schema; - private final RealtimeTuningConfig config; - private final Duration flushDuration; - - private volatile ScheduledExecutorService flushScheduledExec = null; - private volatile boolean stopped = false; - - public FlushingPlumber( - Duration flushDuration, - DataSchema schema, - RealtimeTuningConfig config, - FireDepartmentMetrics metrics, - ServiceEmitter emitter, - QueryRunnerFactoryConglomerate conglomerate, - DataSegmentAnnouncer segmentAnnouncer, - QueryProcessingPool queryProcessingPool, - JoinableFactory joinableFactory, - IndexMerger indexMerger, - IndexIO indexIO, - Cache cache, - CacheConfig cacheConfig, - CachePopulatorStats cachePopulatorStats, - ObjectMapper objectMapper - - ) - { - super( - schema, - config, - metrics, - emitter, - conglomerate, - segmentAnnouncer, - queryProcessingPool, - null, - null, - null, - indexMerger, - indexIO, - cache, - cacheConfig, - cachePopulatorStats, - objectMapper - ); - - this.flushDuration = flushDuration; - this.schema = schema; - this.config = config; - } - - @Override - public Object startJob() - { - log.info("Starting job for %s", getSchema().getDataSource()); - - try { - FileUtils.mkdirp(computeBaseDir(getSchema())); - } - catch (IOException e) { - throw new RuntimeException(e); - } - - initializeExecutors(); - - if (flushScheduledExec == null) { - flushScheduledExec = Execs.scheduledSingleThreaded("flushing_scheduled_%d"); - } - - Object retVal = bootstrapSinksFromDisk(); - startFlushThread(); - return retVal; - } - - protected void flushAfterDuration(final long truncatedTime, final Sink sink) - { - log.info( - "Abandoning segment %s at %s", - sink.getSegment().getId(), - DateTimes.nowUtc().plusMillis((int) flushDuration.getMillis()) - ); - - ScheduledExecutors.scheduleWithFixedDelay( - flushScheduledExec, - flushDuration, - new Callable() - { - @Override - public ScheduledExecutors.Signal call() - { - log.info("Abandoning segment %s", sink.getSegment().getId()); - abandonSegment(truncatedTime, sink); - return ScheduledExecutors.Signal.STOP; - } - } - ); - } - - private void startFlushThread() - { - final Granularity segmentGranularity = schema.getGranularitySpec().getSegmentGranularity(); - final DateTime truncatedNow = segmentGranularity.bucketStart(DateTimes.nowUtc()); - final long windowMillis = config.getWindowPeriod().toStandardDuration().getMillis(); - - log.info( - "Expect to run at [%s]", - DateTimes.nowUtc().plus( - new Duration( - System.currentTimeMillis(), - schema.getGranularitySpec().getSegmentGranularity().increment(truncatedNow).getMillis() + windowMillis - ) - ) - ); - - String threadName = StringUtils.format( - "%s-flusher-%d", - getSchema().getDataSource(), - getConfig().getShardSpec().getPartitionNum() - ); - ThreadRenamingCallable threadRenamingCallable = - new ThreadRenamingCallable(threadName) - { - @Override - public ScheduledExecutors.Signal doCall() - { - if (stopped) { - log.info("Stopping flusher thread"); - return ScheduledExecutors.Signal.STOP; - } - - long minTimestamp = segmentGranularity.bucketStart( - getRejectionPolicy().getCurrMaxTime().minus(windowMillis) - ).getMillis(); - - List> sinksToPush = new ArrayList<>(); - for (Map.Entry entry : getSinks().entrySet()) { - final Long intervalStart = entry.getKey(); - if (intervalStart < minTimestamp) { - log.info("Adding entry[%s] to flush.", entry); - sinksToPush.add(entry); - } - } - - for (final Map.Entry entry : sinksToPush) { - flushAfterDuration(entry.getKey(), entry.getValue()); - } - - if (stopped) { - log.info("Stopping flusher thread"); - return ScheduledExecutors.Signal.STOP; - } else { - return ScheduledExecutors.Signal.REPEAT; - } - } - }; - Duration initialDelay = new Duration( - System.currentTimeMillis(), - schema.getGranularitySpec().getSegmentGranularity().increment(truncatedNow).getMillis() + windowMillis - ); - Duration rate = new Duration(truncatedNow, segmentGranularity.increment(truncatedNow)); - ScheduledExecutors.scheduleAtFixedRate(flushScheduledExec, initialDelay, rate, threadRenamingCallable); - } - - @Override - public void finishJob() - { - log.info("Stopping job"); - - for (final Map.Entry entry : getSinks().entrySet()) { - abandonSegment(entry.getKey(), entry.getValue()); - } - shutdownExecutors(); - - if (flushScheduledExec != null) { - flushScheduledExec.shutdown(); - } - - stopped = true; - } -} diff --git a/server/src/main/java/org/apache/druid/segment/realtime/plumber/FlushingPlumberSchool.java b/server/src/main/java/org/apache/druid/segment/realtime/plumber/FlushingPlumberSchool.java deleted file mode 100644 index 787ea26cbc6..00000000000 --- a/server/src/main/java/org/apache/druid/segment/realtime/plumber/FlushingPlumberSchool.java +++ /dev/null @@ -1,145 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package org.apache.druid.segment.realtime.plumber; - -import com.fasterxml.jackson.annotation.JacksonInject; -import com.fasterxml.jackson.annotation.JsonCreator; -import com.fasterxml.jackson.annotation.JsonProperty; -import com.fasterxml.jackson.databind.ObjectMapper; -import com.google.common.base.Preconditions; -import org.apache.druid.client.cache.Cache; -import org.apache.druid.client.cache.CacheConfig; -import org.apache.druid.client.cache.CachePopulatorStats; -import org.apache.druid.java.util.emitter.service.ServiceEmitter; -import org.apache.druid.query.QueryProcessingPool; -import org.apache.druid.query.QueryRunnerFactoryConglomerate; -import org.apache.druid.segment.IndexIO; -import org.apache.druid.segment.IndexMergerV9; -import org.apache.druid.segment.indexing.DataSchema; -import org.apache.druid.segment.indexing.RealtimeTuningConfig; -import org.apache.druid.segment.join.JoinableFactory; -import org.apache.druid.segment.realtime.FireDepartmentMetrics; -import org.apache.druid.server.coordination.DataSegmentAnnouncer; -import org.joda.time.Duration; - -/** - * This plumber just drops segments at the end of a flush duration instead of handing them off. It is only useful if you want to run - * a real time node without the rest of the Druid cluster. - */ -public class FlushingPlumberSchool extends RealtimePlumberSchool -{ - private static final Duration DEFAULT_FLUSH_DURATION = new Duration("PT1H"); - - private final Duration flushDuration; - - private final ServiceEmitter emitter; - private final QueryRunnerFactoryConglomerate conglomerate; - private final DataSegmentAnnouncer segmentAnnouncer; - private final QueryProcessingPool queryProcessingPool; - private final JoinableFactory joinableFactory; - private final IndexMergerV9 indexMergerV9; - private final IndexIO indexIO; - private final Cache cache; - private final CacheConfig cacheConfig; - private final CachePopulatorStats cachePopulatorStats; - private final ObjectMapper objectMapper; - - @JsonCreator - public FlushingPlumberSchool( - @JsonProperty("flushDuration") Duration flushDuration, - @JacksonInject ServiceEmitter emitter, - @JacksonInject QueryRunnerFactoryConglomerate conglomerate, - @JacksonInject DataSegmentAnnouncer segmentAnnouncer, - @JacksonInject QueryProcessingPool queryProcessingPool, - @JacksonInject JoinableFactory joinableFactory, - @JacksonInject IndexMergerV9 indexMergerV9, - @JacksonInject IndexIO indexIO, - @JacksonInject Cache cache, - @JacksonInject CacheConfig cacheConfig, - @JacksonInject CachePopulatorStats cachePopulatorStats, - @JacksonInject ObjectMapper objectMapper - ) - { - super( - emitter, - conglomerate, - null, - segmentAnnouncer, - null, - null, - queryProcessingPool, - joinableFactory, - indexMergerV9, - indexIO, - cache, - cacheConfig, - cachePopulatorStats, - objectMapper - ); - - this.flushDuration = flushDuration == null ? DEFAULT_FLUSH_DURATION : flushDuration; - this.emitter = emitter; - this.conglomerate = conglomerate; - this.segmentAnnouncer = segmentAnnouncer; - this.queryProcessingPool = queryProcessingPool; - this.joinableFactory = joinableFactory; - this.indexMergerV9 = Preconditions.checkNotNull(indexMergerV9, "Null IndexMergerV9"); - this.indexIO = Preconditions.checkNotNull(indexIO, "Null IndexIO"); - this.cache = cache; - this.cacheConfig = cacheConfig; - this.cachePopulatorStats = cachePopulatorStats; - this.objectMapper = objectMapper; - } - - @Override - public Plumber findPlumber( - final DataSchema schema, - final RealtimeTuningConfig config, - final FireDepartmentMetrics metrics - ) - { - verifyState(); - - return new FlushingPlumber( - flushDuration, - schema, - config, - metrics, - emitter, - conglomerate, - segmentAnnouncer, - queryProcessingPool, - joinableFactory, - indexMergerV9, - indexIO, - cache, - cacheConfig, - cachePopulatorStats, - objectMapper - ); - } - - private void verifyState() - { - Preconditions.checkNotNull(conglomerate, "must specify a queryRunnerFactoryConglomerate to do this action."); - Preconditions.checkNotNull(segmentAnnouncer, "must specify a segmentAnnouncer to do this action."); - Preconditions.checkNotNull(emitter, "must specify a serviceEmitter to do this action."); - } -} diff --git a/server/src/main/java/org/apache/druid/segment/realtime/plumber/IntervalStartVersioningPolicy.java b/server/src/main/java/org/apache/druid/segment/realtime/plumber/IntervalStartVersioningPolicy.java deleted file mode 100644 index e343d4fe0df..00000000000 --- a/server/src/main/java/org/apache/druid/segment/realtime/plumber/IntervalStartVersioningPolicy.java +++ /dev/null @@ -1,31 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package org.apache.druid.segment.realtime.plumber; - -import org.joda.time.Interval; - -public class IntervalStartVersioningPolicy implements VersioningPolicy -{ - @Override - public String getVersion(Interval interval) - { - return interval.getStart().toString(); - } -} diff --git a/server/src/main/java/org/apache/druid/segment/realtime/plumber/MessageTimeRejectionPolicyFactory.java b/server/src/main/java/org/apache/druid/segment/realtime/plumber/MessageTimeRejectionPolicyFactory.java deleted file mode 100644 index 19fb37e3441..00000000000 --- a/server/src/main/java/org/apache/druid/segment/realtime/plumber/MessageTimeRejectionPolicyFactory.java +++ /dev/null @@ -1,90 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package org.apache.druid.segment.realtime.plumber; - -import org.apache.druid.java.util.common.DateTimes; -import org.apache.druid.java.util.common.JodaUtils; -import org.apache.druid.java.util.common.StringUtils; -import org.joda.time.DateTime; -import org.joda.time.Period; - -import java.util.concurrent.atomic.AtomicLongFieldUpdater; - -public class MessageTimeRejectionPolicyFactory implements RejectionPolicyFactory -{ - @Override - public RejectionPolicy create(final Period windowPeriod) - { - final long windowMillis = windowPeriod.toStandardDuration().getMillis(); - return new MessageTimeRejectionPolicy(windowMillis, windowPeriod); - } - - private static class MessageTimeRejectionPolicy implements RejectionPolicy - { - private static final AtomicLongFieldUpdater MAX_TIMESTAMP_UPDATER = - AtomicLongFieldUpdater.newUpdater(MessageTimeRejectionPolicy.class, "maxTimestamp"); - private final long windowMillis; - private final Period windowPeriod; - private volatile long maxTimestamp; - - public MessageTimeRejectionPolicy(long windowMillis, Period windowPeriod) - { - this.windowMillis = windowMillis; - this.windowPeriod = windowPeriod; - this.maxTimestamp = JodaUtils.MIN_INSTANT; - } - - @Override - public DateTime getCurrMaxTime() - { - return DateTimes.utc(maxTimestamp); - } - - @Override - public boolean accept(long timestamp) - { - long maxTimestamp = this.maxTimestamp; - if (timestamp > maxTimestamp) { - maxTimestamp = tryUpdateMaxTimestamp(timestamp); - } - - return timestamp >= (maxTimestamp - windowMillis); - } - - private long tryUpdateMaxTimestamp(long timestamp) - { - long currentMaxTimestamp; - do { - currentMaxTimestamp = maxTimestamp; - if (timestamp <= currentMaxTimestamp) { - return currentMaxTimestamp; - } - } while (!MAX_TIMESTAMP_UPDATER.compareAndSet(this, currentMaxTimestamp, timestamp)); - return timestamp; - } - - @Override - public String toString() - { - return StringUtils.format("messageTime-%s", windowPeriod); - } - } -} - diff --git a/server/src/main/java/org/apache/druid/segment/realtime/plumber/NoopRejectionPolicyFactory.java b/server/src/main/java/org/apache/druid/segment/realtime/plumber/NoopRejectionPolicyFactory.java deleted file mode 100644 index 6acce9abaa3..00000000000 --- a/server/src/main/java/org/apache/druid/segment/realtime/plumber/NoopRejectionPolicyFactory.java +++ /dev/null @@ -1,46 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package org.apache.druid.segment.realtime.plumber; - -import org.apache.druid.java.util.common.DateTimes; -import org.joda.time.DateTime; -import org.joda.time.Period; - -public class NoopRejectionPolicyFactory implements RejectionPolicyFactory -{ - @Override - public RejectionPolicy create(Period windowPeriod) - { - return new RejectionPolicy() - { - @Override - public DateTime getCurrMaxTime() - { - return DateTimes.EPOCH; - } - - @Override - public boolean accept(long timestamp) - { - return true; - } - }; - } -} diff --git a/server/src/main/java/org/apache/druid/segment/realtime/plumber/Plumber.java b/server/src/main/java/org/apache/druid/segment/realtime/plumber/Plumber.java deleted file mode 100644 index e7fab4ae8a0..00000000000 --- a/server/src/main/java/org/apache/druid/segment/realtime/plumber/Plumber.java +++ /dev/null @@ -1,70 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package org.apache.druid.segment.realtime.plumber; - -import com.google.common.base.Supplier; -import org.apache.druid.data.input.Committer; -import org.apache.druid.data.input.InputRow; -import org.apache.druid.query.Query; -import org.apache.druid.query.QueryRunner; -import org.apache.druid.segment.incremental.IncrementalIndexAddResult; -import org.apache.druid.segment.incremental.IndexSizeExceededException; - -public interface Plumber -{ - IncrementalIndexAddResult THROWAWAY = new IncrementalIndexAddResult(-1, -1, "row too late"); - IncrementalIndexAddResult NOT_WRITABLE = new IncrementalIndexAddResult(-1, -1, "not writable"); - IncrementalIndexAddResult DUPLICATE = new IncrementalIndexAddResult(-2, -1, "duplicate row"); - - /** - * Perform any initial setup. Should be called before using any other methods, and should be paired - * with a corresponding call to {@link #finishJob}. - * - * @return the metadata of the "newest" segment that might have previously been persisted - */ - Object startJob(); - - /** - * @param row the row to insert - * @param committerSupplier supplier of a committer associated with all data that has been added, including this row - * - * @return IncrementalIndexAddResult whose rowCount - * - positive numbers indicate how many summarized rows exist in the index for that timestamp, - * -1 means a row was thrown away because it was too late - * -2 means a row was thrown away because it is duplicate - */ - IncrementalIndexAddResult add(InputRow row, Supplier committerSupplier) throws IndexSizeExceededException; - - QueryRunner getQueryRunner(Query query); - - /** - * Persist any in-memory indexed data to durable storage. This may be only somewhat durable, e.g. the - * machine's local disk. - * - * @param committer committer to use after persisting data - */ - void persist(Committer committer); - - /** - * Perform any final processing and clean up after ourselves. Should be called after all data has been - * fed into sinks and persisted. - */ - void finishJob(); -} diff --git a/server/src/main/java/org/apache/druid/segment/realtime/plumber/PlumberSchool.java b/server/src/main/java/org/apache/druid/segment/realtime/plumber/PlumberSchool.java deleted file mode 100644 index 849d5dbb184..00000000000 --- a/server/src/main/java/org/apache/druid/segment/realtime/plumber/PlumberSchool.java +++ /dev/null @@ -1,44 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package org.apache.druid.segment.realtime.plumber; - -import com.fasterxml.jackson.annotation.JsonSubTypes; -import com.fasterxml.jackson.annotation.JsonTypeInfo; -import org.apache.druid.segment.indexing.DataSchema; -import org.apache.druid.segment.indexing.RealtimeTuningConfig; -import org.apache.druid.segment.realtime.FireDepartmentMetrics; - -/** - */ -@JsonTypeInfo(use = JsonTypeInfo.Id.NAME, property = "type", defaultImpl = RealtimePlumberSchool.class) -@JsonSubTypes(value = { - @JsonSubTypes.Type(name = "realtime", value = RealtimePlumberSchool.class), - @JsonSubTypes.Type(name = "flushing", value = FlushingPlumberSchool.class) -}) -public interface PlumberSchool -{ - /** - * Creates a Plumber - * - * @return returns a plumber - */ - Plumber findPlumber(DataSchema schema, RealtimeTuningConfig config, FireDepartmentMetrics metrics); - -} diff --git a/server/src/main/java/org/apache/druid/segment/realtime/plumber/Plumbers.java b/server/src/main/java/org/apache/druid/segment/realtime/plumber/Plumbers.java deleted file mode 100644 index 44d611dfd0e..00000000000 --- a/server/src/main/java/org/apache/druid/segment/realtime/plumber/Plumbers.java +++ /dev/null @@ -1,96 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package org.apache.druid.segment.realtime.plumber; - -import com.google.common.base.Supplier; -import org.apache.druid.data.input.Committer; -import org.apache.druid.data.input.Firehose; -import org.apache.druid.data.input.InputRow; -import org.apache.druid.java.util.common.ISE; -import org.apache.druid.java.util.common.logger.Logger; -import org.apache.druid.java.util.common.parsers.ParseException; -import org.apache.druid.segment.incremental.IncrementalIndexAddResult; -import org.apache.druid.segment.incremental.IndexSizeExceededException; -import org.apache.druid.segment.realtime.FireDepartmentMetrics; - -import java.io.IOException; - -public class Plumbers -{ - private static final Logger log = new Logger(Plumbers.class); - - private Plumbers() - { - // No instantiation - } - - public static void addNextRow( - final Supplier committerSupplier, - final Firehose firehose, - final Plumber plumber, - final boolean reportParseExceptions, - final FireDepartmentMetrics metrics - ) throws IOException - { - final InputRow inputRow; - try { - inputRow = firehose.nextRow(); - } - catch (ParseException e) { - if (reportParseExceptions) { - throw e; - } else { - log.debug(e, "Discarded row due to exception, considering unparseable."); - metrics.incrementUnparseable(); - return; - } - } - - if (inputRow == null) { - log.debug("Discarded null row, considering thrownAway."); - metrics.incrementThrownAway(); - return; - } - - final IncrementalIndexAddResult addResult; - try { - addResult = plumber.add(inputRow, committerSupplier); - } - catch (IndexSizeExceededException e) { - // Shouldn't happen if this is only being called by a single thread. - // plumber.add should be swapping out indexes before they fill up. - throw new ISE(e, "Index size exceeded"); - } - - if (addResult.getRowCount() == -1) { - metrics.incrementThrownAway(); - log.debug("Discarded row[%s], considering thrownAway due to %s.", inputRow, addResult.getReasonOfNotAdded()); - return; - } - - if (addResult.getRowCount() == -2) { - metrics.incrementDedup(); - log.debug("Discarded row[%s], considering duplication.", inputRow); - return; - } - - metrics.incrementProcessed(); - } -} diff --git a/server/src/main/java/org/apache/druid/segment/realtime/plumber/RealtimePlumber.java b/server/src/main/java/org/apache/druid/segment/realtime/plumber/RealtimePlumber.java deleted file mode 100644 index 0380abf9f22..00000000000 --- a/server/src/main/java/org/apache/druid/segment/realtime/plumber/RealtimePlumber.java +++ /dev/null @@ -1,1014 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package org.apache.druid.segment.realtime.plumber; - -import com.fasterxml.jackson.databind.ObjectMapper; -import com.google.common.base.Preconditions; -import com.google.common.base.Stopwatch; -import com.google.common.base.Supplier; -import com.google.common.collect.Collections2; -import com.google.common.collect.ImmutableMap; -import com.google.common.primitives.Ints; -import org.apache.druid.client.cache.Cache; -import org.apache.druid.client.cache.CacheConfig; -import org.apache.druid.client.cache.CachePopulatorStats; -import org.apache.druid.common.guava.ThreadRenamingCallable; -import org.apache.druid.common.guava.ThreadRenamingRunnable; -import org.apache.druid.concurrent.TaskThreadPriority; -import org.apache.druid.data.input.Committer; -import org.apache.druid.data.input.InputRow; -import org.apache.druid.java.util.common.DateTimes; -import org.apache.druid.java.util.common.FileUtils; -import org.apache.druid.java.util.common.ISE; -import org.apache.druid.java.util.common.Intervals; -import org.apache.druid.java.util.common.Pair; -import org.apache.druid.java.util.common.StringUtils; -import org.apache.druid.java.util.common.concurrent.Execs; -import org.apache.druid.java.util.common.concurrent.ScheduledExecutors; -import org.apache.druid.java.util.common.granularity.Granularity; -import org.apache.druid.java.util.common.io.Closer; -import org.apache.druid.java.util.emitter.EmittingLogger; -import org.apache.druid.java.util.emitter.service.ServiceEmitter; -import org.apache.druid.query.Query; -import org.apache.druid.query.QueryProcessingPool; -import org.apache.druid.query.QueryRunner; -import org.apache.druid.query.QueryRunnerFactoryConglomerate; -import org.apache.druid.query.QuerySegmentWalker; -import org.apache.druid.query.SegmentDescriptor; -import org.apache.druid.segment.BaseProgressIndicator; -import org.apache.druid.segment.IndexIO; -import org.apache.druid.segment.IndexMerger; -import org.apache.druid.segment.Metadata; -import org.apache.druid.segment.QueryableIndex; -import org.apache.druid.segment.QueryableIndexSegment; -import org.apache.druid.segment.ReferenceCountingSegment; -import org.apache.druid.segment.handoff.SegmentHandoffNotifier; -import org.apache.druid.segment.incremental.IncrementalIndexAddResult; -import org.apache.druid.segment.incremental.IndexSizeExceededException; -import org.apache.druid.segment.indexing.DataSchema; -import org.apache.druid.segment.indexing.RealtimeTuningConfig; -import org.apache.druid.segment.loading.DataSegmentPusher; -import org.apache.druid.segment.realtime.FireDepartmentMetrics; -import org.apache.druid.segment.realtime.FireHydrant; -import org.apache.druid.segment.realtime.SegmentPublisher; -import org.apache.druid.segment.realtime.appenderator.SinkQuerySegmentWalker; -import org.apache.druid.server.coordination.DataSegmentAnnouncer; -import org.apache.druid.timeline.DataSegment; -import org.apache.druid.timeline.SegmentId; -import org.apache.druid.timeline.VersionedIntervalTimeline; -import org.apache.druid.timeline.partition.SingleElementPartitionChunk; -import org.apache.druid.utils.JvmUtils; -import org.joda.time.DateTime; -import org.joda.time.Duration; -import org.joda.time.Interval; -import org.joda.time.Period; - -import java.io.Closeable; -import java.io.File; -import java.io.FilenameFilter; -import java.io.IOException; -import java.util.ArrayList; -import java.util.Arrays; -import java.util.Comparator; -import java.util.List; -import java.util.Map; -import java.util.concurrent.ConcurrentHashMap; -import java.util.concurrent.ConcurrentMap; -import java.util.concurrent.ExecutorService; -import java.util.concurrent.ScheduledExecutorService; -import java.util.concurrent.TimeUnit; - -/** - * - */ -public class RealtimePlumber implements Plumber -{ - private static final EmittingLogger log = new EmittingLogger(RealtimePlumber.class); - private static final int WARN_DELAY = 1000; - - private final DataSchema schema; - private final RealtimeTuningConfig config; - private final RejectionPolicy rejectionPolicy; - private final FireDepartmentMetrics metrics; - private final DataSegmentAnnouncer segmentAnnouncer; - private final DataSegmentPusher dataSegmentPusher; - private final SegmentPublisher segmentPublisher; - private final SegmentHandoffNotifier handoffNotifier; - private final Object handoffCondition = new Object(); - private final ConcurrentMap sinks = new ConcurrentHashMap<>(); - private final VersionedIntervalTimeline sinkTimeline = new VersionedIntervalTimeline( - String.CASE_INSENSITIVE_ORDER - ); - private final QuerySegmentWalker texasRanger; - private final Cache cache; - - private volatile long nextFlush = 0; - private volatile boolean shuttingDown = false; - private volatile boolean stopped = false; - private volatile boolean cleanShutdown = true; - private volatile ExecutorService persistExecutor = null; - private volatile ExecutorService mergeExecutor = null; - private volatile ScheduledExecutorService scheduledExecutor = null; - private volatile IndexMerger indexMerger; - private volatile IndexIO indexIO; - - private static final String COMMIT_METADATA_KEY = "%commitMetadata%"; - private static final String COMMIT_METADATA_TIMESTAMP_KEY = "%commitMetadataTimestamp%"; - - public RealtimePlumber( - DataSchema schema, - RealtimeTuningConfig config, - FireDepartmentMetrics metrics, - ServiceEmitter emitter, - QueryRunnerFactoryConglomerate conglomerate, - DataSegmentAnnouncer segmentAnnouncer, - QueryProcessingPool queryProcessingPool, - DataSegmentPusher dataSegmentPusher, - SegmentPublisher segmentPublisher, - SegmentHandoffNotifier handoffNotifier, - IndexMerger indexMerger, - IndexIO indexIO, - Cache cache, - CacheConfig cacheConfig, - CachePopulatorStats cachePopulatorStats, - ObjectMapper objectMapper - ) - { - this.schema = schema; - this.config = config; - this.rejectionPolicy = config.getRejectionPolicyFactory().create(config.getWindowPeriod()); - this.metrics = metrics; - this.segmentAnnouncer = segmentAnnouncer; - this.dataSegmentPusher = dataSegmentPusher; - this.segmentPublisher = segmentPublisher; - this.handoffNotifier = handoffNotifier; - this.indexMerger = Preconditions.checkNotNull(indexMerger, "Null IndexMerger"); - this.indexIO = Preconditions.checkNotNull(indexIO, "Null IndexIO"); - this.cache = cache; - this.texasRanger = new SinkQuerySegmentWalker( - schema.getDataSource(), - sinkTimeline, - objectMapper, - emitter, - conglomerate, - queryProcessingPool, - cache, - cacheConfig, - cachePopulatorStats - ); - - log.info("Creating plumber using rejectionPolicy[%s]", getRejectionPolicy()); - } - - public DataSchema getSchema() - { - return schema; - } - - public RealtimeTuningConfig getConfig() - { - return config; - } - - public RejectionPolicy getRejectionPolicy() - { - return rejectionPolicy; - } - - public Map getSinks() - { - return sinks; - } - - @Override - public Object startJob() - { - try { - FileUtils.mkdirp(computeBaseDir(schema)); - } - catch (IOException e) { - throw new RuntimeException(e); - } - - initializeExecutors(); - handoffNotifier.start(); - Object retVal = bootstrapSinksFromDisk(); - startPersistThread(); - // Push pending sinks bootstrapped from previous run - mergeAndPush(); - resetNextFlush(); - return retVal; - } - - @Override - public IncrementalIndexAddResult add(InputRow row, Supplier committerSupplier) - throws IndexSizeExceededException - { - long messageTimestamp = row.getTimestampFromEpoch(); - final Sink sink = getSink(messageTimestamp); - metrics.reportMessageMaxTimestamp(messageTimestamp); - if (sink == null) { - return Plumber.THROWAWAY; - } - - final IncrementalIndexAddResult addResult = sink.add(row, false); - if (config.isReportParseExceptions() && addResult.getParseException() != null) { - throw addResult.getParseException(); - } - - if (!sink.canAppendRow() || System.currentTimeMillis() > nextFlush) { - persist(committerSupplier.get()); - } - - return addResult; - } - - private Sink getSink(long timestamp) - { - if (!rejectionPolicy.accept(timestamp)) { - return null; - } - - final Granularity segmentGranularity = schema.getGranularitySpec().getSegmentGranularity(); - final VersioningPolicy versioningPolicy = config.getVersioningPolicy(); - - DateTime truncatedDateTime = segmentGranularity.bucketStart(DateTimes.utc(timestamp)); - final long truncatedTime = truncatedDateTime.getMillis(); - - Sink retVal = sinks.get(truncatedTime); - - if (retVal == null) { - final Interval sinkInterval = new Interval( - truncatedDateTime, - segmentGranularity.increment(truncatedDateTime) - ); - - retVal = new Sink( - sinkInterval, - schema, - config.getShardSpec(), - versioningPolicy.getVersion(sinkInterval), - config.getAppendableIndexSpec(), - config.getMaxRowsInMemory(), - config.getMaxBytesInMemoryOrDefault(), - true, - config.getDedupColumn() - ); - addSink(retVal); - - } - - return retVal; - } - - @Override - public QueryRunner getQueryRunner(final Query query) - { - // Calling getQueryRunnerForIntervals here works because there's only one segment per interval for RealtimePlumber. - return texasRanger.getQueryRunnerForIntervals(query, query.getIntervals()); - } - - @Override - public void persist(final Committer committer) - { - final List> indexesToPersist = new ArrayList<>(); - for (Sink sink : sinks.values()) { - if (sink.swappable()) { - indexesToPersist.add(Pair.of(sink.swap(), sink.getInterval())); - } - } - - log.info("Submitting persist runnable for dataSource[%s]", schema.getDataSource()); - - final Stopwatch runExecStopwatch = Stopwatch.createStarted(); - final Stopwatch persistStopwatch = Stopwatch.createStarted(); - - final Map metadataElems = committer.getMetadata() == null ? null : - ImmutableMap.of( - COMMIT_METADATA_KEY, - committer.getMetadata(), - COMMIT_METADATA_TIMESTAMP_KEY, - System.currentTimeMillis() - ); - - persistExecutor.execute( - new ThreadRenamingRunnable(StringUtils.format("%s-incremental-persist", schema.getDataSource())) - { - @Override - public void doRun() - { - /* Note: - If plumber crashes after storing a subset of all the hydrants then we will lose data and next - time we will start with the commitMetadata stored in those hydrants. - option#1: - maybe it makes sense to store the metadata outside the segments in a separate file. This is because the - commit metadata isn't really associated with an individual segment-- it's associated with a set of segments - that are persisted at the same time or maybe whole datasource. So storing it in the segments is asking for problems. - Sort of like this: - - { - "metadata" : {"foo": "bar"}, - "segments": [ - {"id": "datasource_2000_2001_2000_1", "hydrant": 10}, - {"id": "datasource_2001_2002_2001_1", "hydrant": 12}, - ] - } - When a realtime node crashes and starts back up, it would delete any hydrants numbered higher than the - ones in the commit file. - - option#2 - We could also just include the set of segments for the same chunk of metadata in more metadata on each - of the segments. we might also have to think about the hand-off in terms of the full set of segments being - handed off instead of individual segments being handed off (that is, if one of the set succeeds in handing - off and the others fail, the real-time would believe that it needs to re-ingest the data). - */ - long persistThreadCpuTime = JvmUtils.safeGetThreadCpuTime(); - try { - for (Pair pair : indexesToPersist) { - metrics.incrementRowOutputCount( - persistHydrant(pair.lhs, schema, pair.rhs, metadataElems) - ); - } - committer.run(); - } - catch (Exception e) { - metrics.incrementFailedPersists(); - throw e; - } - finally { - metrics.incrementPersistCpuTime(JvmUtils.safeGetThreadCpuTime() - persistThreadCpuTime); - metrics.incrementNumPersists(); - metrics.incrementPersistTimeMillis(persistStopwatch.elapsed(TimeUnit.MILLISECONDS)); - persistStopwatch.stop(); - } - } - } - ); - - final long startDelay = runExecStopwatch.elapsed(TimeUnit.MILLISECONDS); - metrics.incrementPersistBackPressureMillis(startDelay); - if (startDelay > WARN_DELAY) { - log.warn("Ingestion was throttled for [%,d] millis because persists were pending.", startDelay); - } - runExecStopwatch.stop(); - resetNextFlush(); - } - - // Submits persist-n-merge task for a Sink to the mergeExecutor - private void persistAndMerge(final long truncatedTime, final Sink sink) - { - final String threadName = StringUtils.format( - "%s-%s-persist-n-merge", schema.getDataSource(), DateTimes.utc(truncatedTime) - ); - mergeExecutor.execute( - new ThreadRenamingRunnable(threadName) - { - final Interval interval = sink.getInterval(); - Stopwatch mergeStopwatch = null; - - @Override - public void doRun() - { - try { - // Bail out if this sink has been abandoned by a previously-executed task. - if (sinks.get(truncatedTime) != sink) { - log.info("Sink[%s] was abandoned, bailing out of persist-n-merge.", sink); - return; - } - - // Use a file to indicate that pushing has completed. - final File persistDir = computePersistDir(schema, interval); - final File mergedTarget = new File(persistDir, "merged"); - final File isPushedMarker = new File(persistDir, "isPushedMarker"); - - if (!isPushedMarker.exists()) { - removeSegment(sink, mergedTarget); - if (mergedTarget.exists()) { - log.warn("Merged target[%s] still exists after attempt to delete it; skipping push.", mergedTarget); - return; - } - } else { - log.info("Already pushed sink[%s]", sink); - return; - } - - /* - Note: it the plumber crashes after persisting a subset of hydrants then might duplicate data as these - hydrants will be read but older commitMetadata will be used. fixing this possibly needs structural - changes to plumber. - */ - for (FireHydrant hydrant : sink) { - synchronized (hydrant) { - if (!hydrant.hasSwapped()) { - log.info("Hydrant[%s] hasn't swapped yet, swapping. Sink[%s]", hydrant, sink); - final int rowCount = persistHydrant(hydrant, schema, interval, null); - metrics.incrementRowOutputCount(rowCount); - } - } - } - final long mergeThreadCpuTime = JvmUtils.safeGetThreadCpuTime(); - mergeStopwatch = Stopwatch.createStarted(); - - final File mergedFile; - List indexes = new ArrayList<>(); - Closer closer = Closer.create(); - try { - for (FireHydrant fireHydrant : sink) { - Pair segmentAndCloseable = fireHydrant.getAndIncrementSegment(); - final QueryableIndex queryableIndex = segmentAndCloseable.lhs.asQueryableIndex(); - log.info("Adding hydrant[%s]", fireHydrant); - indexes.add(queryableIndex); - closer.register(segmentAndCloseable.rhs); - } - - mergedFile = indexMerger.mergeQueryableIndex( - indexes, - schema.getGranularitySpec().isRollup(), - schema.getAggregators(), - null, - mergedTarget, - config.getIndexSpec(), - config.getIndexSpecForIntermediatePersists(), - new BaseProgressIndicator(), - config.getSegmentWriteOutMediumFactory(), - -1 - ); - } - catch (Throwable t) { - throw closer.rethrow(t); - } - finally { - closer.close(); - } - - // emit merge metrics before publishing segment - metrics.incrementMergeCpuTime(JvmUtils.safeGetThreadCpuTime() - mergeThreadCpuTime); - metrics.incrementMergeTimeMillis(mergeStopwatch.elapsed(TimeUnit.MILLISECONDS)); - - log.info("Pushing [%s] to deep storage", sink.getSegment().getId()); - - DataSegment segment = dataSegmentPusher.push( - mergedFile, - sink.getSegment().withDimensions(IndexMerger.getMergedDimensionsFromQueryableIndexes(indexes, schema.getDimensionsSpec())), - false - ); - log.info("Inserting [%s] to the metadata store", sink.getSegment().getId()); - segmentPublisher.publishSegment(segment); - - if (!isPushedMarker.createNewFile()) { - log.makeAlert("Failed to create marker file for [%s]", schema.getDataSource()) - .addData("interval", sink.getInterval()) - .addData("partitionNum", segment.getShardSpec().getPartitionNum()) - .addData("marker", isPushedMarker) - .emit(); - } - } - catch (Exception e) { - metrics.incrementFailedHandoffs(); - log.makeAlert(e, "Failed to persist merged index[%s]", schema.getDataSource()) - .addData("interval", interval) - .emit(); - if (shuttingDown) { - // We're trying to shut down, and this segment failed to push. Let's just get rid of it. - // This call will also delete possibly-partially-written files, so we don't need to do it explicitly. - cleanShutdown = false; - abandonSegment(truncatedTime, sink); - } - } - finally { - if (mergeStopwatch != null) { - mergeStopwatch.stop(); - } - } - } - } - ); - handoffNotifier.registerSegmentHandoffCallback( - new SegmentDescriptor(sink.getInterval(), sink.getVersion(), config.getShardSpec().getPartitionNum()), - mergeExecutor, new Runnable() - { - @Override - public void run() - { - abandonSegment(sink.getInterval().getStartMillis(), sink); - metrics.incrementHandOffCount(); - } - } - ); - } - - @Override - public void finishJob() - { - log.info("Shutting down..."); - - shuttingDown = true; - - for (final Map.Entry entry : sinks.entrySet()) { - entry.getValue().clearDedupCache(); - persistAndMerge(entry.getKey(), entry.getValue()); - } - - final long forceEndWaitTime = System.currentTimeMillis() + config.getHandoffConditionTimeout(); - while (!sinks.isEmpty()) { - try { - log.info( - "Cannot shut down yet! Sinks remaining: %s", - Collections2.transform(sinks.values(), sink -> sink.getSegment().getId()) - ); - - synchronized (handoffCondition) { - while (!sinks.isEmpty()) { - if (config.getHandoffConditionTimeout() == 0) { - handoffCondition.wait(); - } else { - long curr = System.currentTimeMillis(); - if (forceEndWaitTime - curr > 0) { - handoffCondition.wait(forceEndWaitTime - curr); - } else { - throw new ISE( - "Segment handoff wait timeout. [%s] segments might not have completed handoff.", - sinks.size() - ); - } - } - } - } - } - catch (InterruptedException e) { - throw new RuntimeException(e); - } - } - - handoffNotifier.close(); - shutdownExecutors(); - - stopped = true; - - if (!cleanShutdown) { - throw new ISE("Exception occurred during persist and merge."); - } - } - - private void resetNextFlush() - { - nextFlush = DateTimes.nowUtc().plus(config.getIntermediatePersistPeriod()).getMillis(); - } - - protected void initializeExecutors() - { - final int maxPendingPersists = config.getMaxPendingPersists(); - - if (persistExecutor == null) { - // use a blocking single threaded executor to throttle the firehose when write to disk is slow - persistExecutor = Execs.newBlockingSingleThreaded( - "plumber_persist_%d", - maxPendingPersists, - TaskThreadPriority.getThreadPriorityFromTaskPriority(config.getPersistThreadPriority()) - ); - } - if (mergeExecutor == null) { - // use a blocking single threaded executor to throttle the firehose when write to disk is slow - mergeExecutor = Execs.newBlockingSingleThreaded( - "plumber_merge_%d", - 1, - TaskThreadPriority.getThreadPriorityFromTaskPriority(config.getMergeThreadPriority()) - ); - } - - if (scheduledExecutor == null) { - scheduledExecutor = Execs.scheduledSingleThreaded("plumber_scheduled_%d"); - } - } - - protected void shutdownExecutors() - { - // scheduledExecutor is shutdown here - if (scheduledExecutor != null) { - scheduledExecutor.shutdown(); - persistExecutor.shutdown(); - mergeExecutor.shutdown(); - } - } - - protected Object bootstrapSinksFromDisk() - { - final VersioningPolicy versioningPolicy = config.getVersioningPolicy(); - - File baseDir = computeBaseDir(schema); - if (baseDir == null || !baseDir.exists()) { - return null; - } - - File[] files = baseDir.listFiles(); - if (files == null) { - return null; - } - - Object metadata = null; - long latestCommitTime = 0; - for (File sinkDir : files) { - final Interval sinkInterval = Intervals.of(sinkDir.getName().replace('_', '/')); - - //final File[] sinkFiles = sinkDir.listFiles(); - // To avoid reading and listing of "merged" dir - final File[] sinkFiles = sinkDir.listFiles( - new FilenameFilter() - { - @Override - public boolean accept(File dir, String fileName) - { - return !(Ints.tryParse(fileName) == null); - } - } - ); - Arrays.sort( - sinkFiles, - new Comparator() - { - @Override - public int compare(File o1, File o2) - { - try { - return Ints.compare(Integer.parseInt(o1.getName()), Integer.parseInt(o2.getName())); - } - catch (NumberFormatException e) { - log.error(e, "Couldn't compare as numbers? [%s][%s]", o1, o2); - return o1.compareTo(o2); - } - } - } - ); - boolean isCorrupted = false; - List hydrants = new ArrayList<>(); - for (File segmentDir : sinkFiles) { - log.info("Loading previously persisted segment at [%s]", segmentDir); - - // Although this has been tackled at start of this method. - // Just a doubly-check added to skip "merged" dir. from being added to hydrants - // If 100% sure that this is not needed, this check can be removed. - if (Ints.tryParse(segmentDir.getName()) == null) { - continue; - } - QueryableIndex queryableIndex = null; - try { - queryableIndex = indexIO.loadIndex(segmentDir); - } - catch (IOException e) { - log.error(e, "Problem loading segmentDir from disk."); - isCorrupted = true; - } - if (isCorrupted) { - try { - File corruptSegmentDir = computeCorruptedFileDumpDir(segmentDir, schema); - log.info("Renaming %s to %s", segmentDir.getAbsolutePath(), corruptSegmentDir.getAbsolutePath()); - org.apache.commons.io.FileUtils.copyDirectory(segmentDir, corruptSegmentDir); - FileUtils.deleteDirectory(segmentDir); - } - catch (Exception e1) { - log.error(e1, "Failed to rename %s", segmentDir.getAbsolutePath()); - } - //Note: skipping corrupted segment might lead to dropping some data. This strategy should be changed - //at some point. - continue; - } - Metadata segmentMetadata = queryableIndex.getMetadata(); - if (segmentMetadata != null) { - Object timestampObj = segmentMetadata.get(COMMIT_METADATA_TIMESTAMP_KEY); - if (timestampObj != null) { - long timestamp = ((Long) timestampObj).longValue(); - if (timestamp > latestCommitTime) { - log.info( - "Found metaData [%s] with latestCommitTime [%s] greater than previous recorded [%s]", - queryableIndex.getMetadata(), - timestamp, - latestCommitTime - ); - latestCommitTime = timestamp; - metadata = queryableIndex.getMetadata().get(COMMIT_METADATA_KEY); - } - } - } - hydrants.add( - new FireHydrant( - new QueryableIndexSegment( - queryableIndex, - SegmentId.of( - schema.getDataSource(), - sinkInterval, - versioningPolicy.getVersion(sinkInterval), - config.getShardSpec() - ) - ), - Integer.parseInt(segmentDir.getName()) - ) - ); - } - if (hydrants.isEmpty()) { - // Probably encountered a corrupt sink directory - log.warn( - "Found persisted segment directory with no intermediate segments present at %s, skipping sink creation.", - sinkDir.getAbsolutePath() - ); - continue; - } - final Sink currSink = new Sink( - sinkInterval, - schema, - config.getShardSpec(), - versioningPolicy.getVersion(sinkInterval), - config.getAppendableIndexSpec(), - config.getMaxRowsInMemory(), - config.getMaxBytesInMemoryOrDefault(), - true, - config.getDedupColumn(), - hydrants - ); - addSink(currSink); - } - return metadata; - } - - private void addSink(final Sink sink) - { - sinks.put(sink.getInterval().getStartMillis(), sink); - metrics.setSinkCount(sinks.size()); - sinkTimeline.add( - sink.getInterval(), - sink.getVersion(), - new SingleElementPartitionChunk<>(sink) - ); - try { - segmentAnnouncer.announceSegment(sink.getSegment()); - } - catch (IOException e) { - log.makeAlert(e, "Failed to announce new segment[%s]", schema.getDataSource()) - .addData("interval", sink.getInterval()) - .emit(); - } - clearDedupCache(); - } - - protected void startPersistThread() - { - final Granularity segmentGranularity = schema.getGranularitySpec().getSegmentGranularity(); - final Period windowPeriod = config.getWindowPeriod(); - - final DateTime truncatedNow = segmentGranularity.bucketStart(DateTimes.nowUtc()); - final long windowMillis = windowPeriod.toStandardDuration().getMillis(); - - log.info( - "Expect to run at [%s]", - DateTimes.nowUtc().plus( - new Duration( - System.currentTimeMillis(), - segmentGranularity.increment(truncatedNow).getMillis() + windowMillis - ) - ) - ); - - String threadName = StringUtils.format( - "%s-overseer-%d", - schema.getDataSource(), - config.getShardSpec().getPartitionNum() - ); - ThreadRenamingCallable threadRenamingCallable = - new ThreadRenamingCallable(threadName) - { - @Override - public ScheduledExecutors.Signal doCall() - { - if (stopped) { - log.info("Stopping merge-n-push overseer thread"); - return ScheduledExecutors.Signal.STOP; - } - - mergeAndPush(); - - if (stopped) { - log.info("Stopping merge-n-push overseer thread"); - return ScheduledExecutors.Signal.STOP; - } else { - return ScheduledExecutors.Signal.REPEAT; - } - } - }; - Duration initialDelay = new Duration( - System.currentTimeMillis(), - segmentGranularity.increment(truncatedNow).getMillis() + windowMillis - ); - Duration rate = new Duration(truncatedNow, segmentGranularity.increment(truncatedNow)); - ScheduledExecutors.scheduleAtFixedRate(scheduledExecutor, initialDelay, rate, threadRenamingCallable); - } - - private void clearDedupCache() - { - long minTimestamp = getAllowedMinTime().getMillis(); - - for (Map.Entry entry : sinks.entrySet()) { - final Long intervalStart = entry.getKey(); - if (intervalStart < minTimestamp) { - entry.getValue().clearDedupCache(); - } - } - } - - private DateTime getAllowedMinTime() - { - final Granularity segmentGranularity = schema.getGranularitySpec().getSegmentGranularity(); - final Period windowPeriod = config.getWindowPeriod(); - - final long windowMillis = windowPeriod.toStandardDuration().getMillis(); - return segmentGranularity.bucketStart( - DateTimes.utc(Math.max(windowMillis, rejectionPolicy.getCurrMaxTime().getMillis()) - windowMillis) - ); - } - - private void mergeAndPush() - { - log.info("Starting merge and push."); - DateTime minTimestampAsDate = getAllowedMinTime(); - long minTimestamp = minTimestampAsDate.getMillis(); - - log.info( - "Found [%,d] segments. Attempting to hand off segments that start before [%s].", - sinks.size(), - minTimestampAsDate - ); - - List> sinksToPush = new ArrayList<>(); - for (Map.Entry entry : sinks.entrySet()) { - final Long intervalStart = entry.getKey(); - if (intervalStart < minTimestamp) { - log.info("Adding entry [%s] for merge and push.", entry); - sinksToPush.add(entry); - entry.getValue().clearDedupCache(); - } else { - log.info( - "Skipping persist and merge for entry [%s] : Start time [%s] >= [%s] min timestamp required in this run. Segment will be picked up in a future run.", - entry, - DateTimes.utc(intervalStart), - minTimestampAsDate - ); - } - } - - log.info("Found [%,d] sinks to persist and merge", sinksToPush.size()); - - for (final Map.Entry entry : sinksToPush) { - persistAndMerge(entry.getKey(), entry.getValue()); - } - } - - /** - * Unannounces a given sink and removes all local references to it. It is important that this is only called - * from the single-threaded mergeExecutor, since otherwise chaos may ensue if merged segments are deleted while - * being created. - * - * @param truncatedTime sink key - * @param sink sink to unannounce - */ - protected void abandonSegment(final long truncatedTime, final Sink sink) - { - if (sinks.containsKey(truncatedTime)) { - try { - segmentAnnouncer.unannounceSegment(sink.getSegment()); - removeSegment(sink, computePersistDir(schema, sink.getInterval())); - log.info("Removing sinkKey %d for segment %s", truncatedTime, sink.getSegment().getId()); - sinks.remove(truncatedTime); - metrics.setSinkCount(sinks.size()); - sinkTimeline.remove( - sink.getInterval(), - sink.getVersion(), - new SingleElementPartitionChunk<>(sink) - ); - for (FireHydrant hydrant : sink) { - cache.close(SinkQuerySegmentWalker.makeHydrantCacheIdentifier(hydrant)); - hydrant.swapSegment(null); - } - synchronized (handoffCondition) { - handoffCondition.notifyAll(); - } - } - catch (Exception e) { - log.makeAlert(e, "Unable to abandon old segment for dataSource[%s]", schema.getDataSource()) - .addData("interval", sink.getInterval()) - .emit(); - } - } - } - - protected File computeBaseDir(DataSchema schema) - { - return new File(config.getBasePersistDirectory(), schema.getDataSource()); - } - - protected File computeCorruptedFileDumpDir(File persistDir, DataSchema schema) - { - return new File( - StringUtils.replace( - persistDir.getAbsolutePath(), - schema.getDataSource(), - "corrupted" + File.pathSeparator + schema.getDataSource() - ) - ); - } - - protected File computePersistDir(DataSchema schema, Interval interval) - { - return new File(computeBaseDir(schema), interval.toString().replace('/', '_')); - } - - /** - * Persists the given hydrant and returns the number of rows persisted - * - * @param indexToPersist hydrant to persist - * @param schema datasource schema - * @param interval interval to persist - * - * @return the number of rows persisted - */ - protected int persistHydrant( - FireHydrant indexToPersist, - DataSchema schema, - Interval interval, - Map metadataElems - ) - { - synchronized (indexToPersist) { - if (indexToPersist.hasSwapped()) { - log.info( - "DataSource[%s], Interval[%s], Hydrant[%s] already swapped. Ignoring request to persist.", - schema.getDataSource(), interval, indexToPersist - ); - return 0; - } - - log.info( - "DataSource[%s], Interval[%s], Metadata [%s] persisting Hydrant[%s]", - schema.getDataSource(), - interval, - metadataElems, - indexToPersist - ); - try { - int numRows = indexToPersist.getIndex().size(); - - indexToPersist.getIndex().getMetadata().putAll(metadataElems); - final File persistedFile = indexMerger.persist( - indexToPersist.getIndex(), - interval, - new File(computePersistDir(schema, interval), String.valueOf(indexToPersist.getCount())), - config.getIndexSpecForIntermediatePersists(), - config.getSegmentWriteOutMediumFactory() - ); - - indexToPersist.swapSegment( - new QueryableIndexSegment(indexIO.loadIndex(persistedFile), indexToPersist.getSegmentId()) - ); - return numRows; - } - catch (IOException e) { - log.makeAlert("dataSource[%s] -- incremental persist failed", schema.getDataSource()) - .addData("interval", interval) - .addData("count", indexToPersist.getCount()) - .emit(); - - throw new RuntimeException(e); - } - } - } - - private void removeSegment(final Sink sink, final File target) - { - if (target.exists()) { - try { - log.info("Deleting Index File[%s]", target); - FileUtils.deleteDirectory(target); - } - catch (Exception e) { - log.makeAlert(e, "Unable to remove file for dataSource[%s]", schema.getDataSource()) - .addData("file", target) - .addData("interval", sink.getInterval()) - .emit(); - } - } - } -} diff --git a/server/src/main/java/org/apache/druid/segment/realtime/plumber/RealtimePlumberSchool.java b/server/src/main/java/org/apache/druid/segment/realtime/plumber/RealtimePlumberSchool.java deleted file mode 100644 index 8b19153a9de..00000000000 --- a/server/src/main/java/org/apache/druid/segment/realtime/plumber/RealtimePlumberSchool.java +++ /dev/null @@ -1,136 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package org.apache.druid.segment.realtime.plumber; - -import com.fasterxml.jackson.annotation.JacksonInject; -import com.fasterxml.jackson.annotation.JsonCreator; -import com.fasterxml.jackson.databind.ObjectMapper; -import com.google.common.base.Preconditions; -import org.apache.druid.client.cache.Cache; -import org.apache.druid.client.cache.CacheConfig; -import org.apache.druid.client.cache.CachePopulatorStats; -import org.apache.druid.java.util.emitter.service.ServiceEmitter; -import org.apache.druid.query.QueryProcessingPool; -import org.apache.druid.query.QueryRunnerFactoryConglomerate; -import org.apache.druid.segment.IndexIO; -import org.apache.druid.segment.IndexMergerV9; -import org.apache.druid.segment.handoff.SegmentHandoffNotifierFactory; -import org.apache.druid.segment.indexing.DataSchema; -import org.apache.druid.segment.indexing.RealtimeTuningConfig; -import org.apache.druid.segment.join.JoinableFactory; -import org.apache.druid.segment.loading.DataSegmentPusher; -import org.apache.druid.segment.realtime.FireDepartmentMetrics; -import org.apache.druid.segment.realtime.SegmentPublisher; -import org.apache.druid.server.coordination.DataSegmentAnnouncer; - -/** - * - */ -public class RealtimePlumberSchool implements PlumberSchool -{ - private final ServiceEmitter emitter; - private final QueryRunnerFactoryConglomerate conglomerate; - private final DataSegmentPusher dataSegmentPusher; - private final DataSegmentAnnouncer segmentAnnouncer; - private final SegmentPublisher segmentPublisher; - private final SegmentHandoffNotifierFactory handoffNotifierFactory; - private final QueryProcessingPool queryProcessingPool; - private final JoinableFactory joinableFactory; - private final IndexMergerV9 indexMergerV9; - private final IndexIO indexIO; - private final Cache cache; - private final CacheConfig cacheConfig; - private final CachePopulatorStats cachePopulatorStats; - private final ObjectMapper objectMapper; - - @JsonCreator - public RealtimePlumberSchool( - @JacksonInject ServiceEmitter emitter, - @JacksonInject QueryRunnerFactoryConglomerate conglomerate, - @JacksonInject DataSegmentPusher dataSegmentPusher, - @JacksonInject DataSegmentAnnouncer segmentAnnouncer, - @JacksonInject SegmentPublisher segmentPublisher, - @JacksonInject SegmentHandoffNotifierFactory handoffNotifierFactory, - @JacksonInject QueryProcessingPool queryProcessingPool, - @JacksonInject JoinableFactory joinableFactory, - @JacksonInject IndexMergerV9 indexMergerV9, - @JacksonInject IndexIO indexIO, - @JacksonInject Cache cache, - @JacksonInject CacheConfig cacheConfig, - @JacksonInject CachePopulatorStats cachePopulatorStats, - @JacksonInject ObjectMapper objectMapper - ) - { - this.emitter = emitter; - this.conglomerate = conglomerate; - this.dataSegmentPusher = dataSegmentPusher; - this.segmentAnnouncer = segmentAnnouncer; - this.segmentPublisher = segmentPublisher; - this.handoffNotifierFactory = handoffNotifierFactory; - this.queryProcessingPool = queryProcessingPool; - this.joinableFactory = joinableFactory; - this.indexMergerV9 = Preconditions.checkNotNull(indexMergerV9, "Null IndexMergerV9"); - this.indexIO = Preconditions.checkNotNull(indexIO, "Null IndexIO"); - - this.cache = cache; - this.cacheConfig = cacheConfig; - this.cachePopulatorStats = cachePopulatorStats; - this.objectMapper = objectMapper; - } - - @Override - public Plumber findPlumber( - final DataSchema schema, - final RealtimeTuningConfig config, - final FireDepartmentMetrics metrics - ) - { - verifyState(); - - return new RealtimePlumber( - schema, - config, - metrics, - emitter, - conglomerate, - segmentAnnouncer, - queryProcessingPool, - dataSegmentPusher, - segmentPublisher, - handoffNotifierFactory.createSegmentHandoffNotifier(schema.getDataSource()), - indexMergerV9, - indexIO, - cache, - cacheConfig, - cachePopulatorStats, - objectMapper - ); - } - - private void verifyState() - { - Preconditions.checkNotNull(conglomerate, "must specify a queryRunnerFactoryConglomerate to do this action."); - Preconditions.checkNotNull(dataSegmentPusher, "must specify a segmentPusher to do this action."); - Preconditions.checkNotNull(segmentAnnouncer, "must specify a segmentAnnouncer to do this action."); - Preconditions.checkNotNull(segmentPublisher, "must specify a segmentPublisher to do this action."); - Preconditions.checkNotNull(handoffNotifierFactory, "must specify a handoffNotifierFactory to do this action."); - Preconditions.checkNotNull(emitter, "must specify a serviceEmitter to do this action."); - } -} diff --git a/server/src/main/java/org/apache/druid/segment/realtime/plumber/RejectionPolicy.java b/server/src/main/java/org/apache/druid/segment/realtime/plumber/RejectionPolicy.java deleted file mode 100644 index e1dd6185bef..00000000000 --- a/server/src/main/java/org/apache/druid/segment/realtime/plumber/RejectionPolicy.java +++ /dev/null @@ -1,28 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package org.apache.druid.segment.realtime.plumber; - -import org.joda.time.DateTime; - -public interface RejectionPolicy -{ - DateTime getCurrMaxTime(); - boolean accept(long timestamp); -} diff --git a/server/src/main/java/org/apache/druid/segment/realtime/plumber/RejectionPolicyFactory.java b/server/src/main/java/org/apache/druid/segment/realtime/plumber/RejectionPolicyFactory.java deleted file mode 100644 index f9b559f179d..00000000000 --- a/server/src/main/java/org/apache/druid/segment/realtime/plumber/RejectionPolicyFactory.java +++ /dev/null @@ -1,35 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package org.apache.druid.segment.realtime.plumber; - -import com.fasterxml.jackson.annotation.JsonSubTypes; -import com.fasterxml.jackson.annotation.JsonTypeInfo; -import org.joda.time.Period; - -@JsonTypeInfo(use = JsonTypeInfo.Id.NAME, property = "type") -@JsonSubTypes(value = { - @JsonSubTypes.Type(name = "serverTime", value = ServerTimeRejectionPolicyFactory.class), - @JsonSubTypes.Type(name = "messageTime", value = MessageTimeRejectionPolicyFactory.class), - @JsonSubTypes.Type(name = "none", value = NoopRejectionPolicyFactory.class) -}) -public interface RejectionPolicyFactory -{ - RejectionPolicy create(Period windowPeriod); -} diff --git a/server/src/main/java/org/apache/druid/segment/realtime/plumber/ServerTimeRejectionPolicyFactory.java b/server/src/main/java/org/apache/druid/segment/realtime/plumber/ServerTimeRejectionPolicyFactory.java deleted file mode 100644 index 9cbd2cca23f..00000000000 --- a/server/src/main/java/org/apache/druid/segment/realtime/plumber/ServerTimeRejectionPolicyFactory.java +++ /dev/null @@ -1,60 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package org.apache.druid.segment.realtime.plumber; - -import org.apache.druid.java.util.common.DateTimes; -import org.apache.druid.java.util.common.StringUtils; -import org.joda.time.DateTime; -import org.joda.time.Period; - -public class ServerTimeRejectionPolicyFactory implements RejectionPolicyFactory -{ - @Override - public RejectionPolicy create(final Period windowPeriod) - { - final long windowMillis = windowPeriod.toStandardDuration().getMillis(); - - return new RejectionPolicy() - { - @Override - public DateTime getCurrMaxTime() - { - return DateTimes.nowUtc(); - } - - @Override - public boolean accept(long timestamp) - { - long now = System.currentTimeMillis(); - - boolean notTooOld = timestamp >= (now - windowMillis); - boolean notTooYoung = timestamp <= (now + windowMillis); - - return notTooOld && notTooYoung; - } - - @Override - public String toString() - { - return StringUtils.format("serverTime-%s", windowPeriod); - } - }; - } -} diff --git a/server/src/main/java/org/apache/druid/segment/realtime/plumber/VersioningPolicy.java b/server/src/main/java/org/apache/druid/segment/realtime/plumber/VersioningPolicy.java deleted file mode 100644 index 7d6e539f33a..00000000000 --- a/server/src/main/java/org/apache/druid/segment/realtime/plumber/VersioningPolicy.java +++ /dev/null @@ -1,35 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package org.apache.druid.segment.realtime.plumber; - -import com.fasterxml.jackson.annotation.JsonSubTypes; -import com.fasterxml.jackson.annotation.JsonTypeInfo; -import org.joda.time.Interval; - -@JsonTypeInfo(use = JsonTypeInfo.Id.NAME, property = "type") -@JsonSubTypes(value = { - @JsonSubTypes.Type(name = "intervalStart", value = IntervalStartVersioningPolicy.class), - @JsonSubTypes.Type(name = "custom", value = CustomVersioningPolicy.class) - -}) -public interface VersioningPolicy -{ - String getVersion(Interval interval); -} diff --git a/server/src/main/java/org/apache/druid/segment/realtime/plumber/Committers.java b/server/src/main/java/org/apache/druid/segment/realtime/sink/Committers.java similarity index 96% rename from server/src/main/java/org/apache/druid/segment/realtime/plumber/Committers.java rename to server/src/main/java/org/apache/druid/segment/realtime/sink/Committers.java index aa0bcbe50fb..8efd117b06c 100644 --- a/server/src/main/java/org/apache/druid/segment/realtime/plumber/Committers.java +++ b/server/src/main/java/org/apache/druid/segment/realtime/sink/Committers.java @@ -17,7 +17,7 @@ * under the License. */ -package org.apache.druid.segment.realtime.plumber; +package org.apache.druid.segment.realtime.sink; import com.google.common.base.Supplier; import com.google.common.base.Suppliers; diff --git a/server/src/main/java/org/apache/druid/segment/realtime/plumber/Sink.java b/server/src/main/java/org/apache/druid/segment/realtime/sink/Sink.java similarity index 92% rename from server/src/main/java/org/apache/druid/segment/realtime/plumber/Sink.java rename to server/src/main/java/org/apache/druid/segment/realtime/sink/Sink.java index 4035f606434..44d0db3cb7f 100644 --- a/server/src/main/java/org/apache/druid/segment/realtime/plumber/Sink.java +++ b/server/src/main/java/org/apache/druid/segment/realtime/sink/Sink.java @@ -17,7 +17,7 @@ * under the License. */ -package org.apache.druid.segment.realtime.plumber; +package org.apache.druid.segment.realtime.sink; import com.google.common.annotations.VisibleForTesting; import com.google.common.base.Predicate; @@ -60,13 +60,11 @@ import java.util.ArrayList; import java.util.Arrays; import java.util.Collections; import java.util.HashMap; -import java.util.HashSet; import java.util.Iterator; import java.util.LinkedHashSet; import java.util.List; import java.util.Map; import java.util.Optional; -import java.util.Set; import java.util.concurrent.CopyOnWriteArrayList; import java.util.concurrent.atomic.AtomicInteger; import java.util.concurrent.atomic.AtomicLong; @@ -74,6 +72,8 @@ import java.util.function.Function; public class Sink implements Iterable, Overshadowable { + private static final IncrementalIndexAddResult NOT_WRITABLE = new IncrementalIndexAddResult(-1, -1, "not writable"); + private static final IncrementalIndexAddResult ALREADY_SWAPPED = new IncrementalIndexAddResult(-1, -1, "write after index swapped"); private static final Logger log = new Logger(Sink.class); @@ -98,8 +98,6 @@ public class Sink implements Iterable, Overshadowable private final Map columnTypeExcludingCurrIndex = new HashMap<>(); private final AtomicInteger numRowsExcludingCurrIndex = new AtomicInteger(); - private final String dedupColumn; - private final Set dedupSet = new HashSet<>(); private volatile FireHydrant currHydrant; private volatile boolean writable = true; @@ -112,8 +110,7 @@ public class Sink implements Iterable, Overshadowable AppendableIndexSpec appendableIndexSpec, int maxRowsInMemory, long maxBytesInMemory, - boolean useMaxMemoryEstimates, - String dedupColumn + boolean useMaxMemoryEstimates ) { this( @@ -125,7 +122,6 @@ public class Sink implements Iterable, Overshadowable maxRowsInMemory, maxBytesInMemory, useMaxMemoryEstimates, - dedupColumn, Collections.emptyList() ); } @@ -139,7 +135,6 @@ public class Sink implements Iterable, Overshadowable int maxRowsInMemory, long maxBytesInMemory, boolean useMaxMemoryEstimates, - String dedupColumn, List hydrants ) { @@ -151,7 +146,6 @@ public class Sink implements Iterable, Overshadowable this.maxRowsInMemory = maxRowsInMemory; this.maxBytesInMemory = maxBytesInMemory; this.useMaxMemoryEstimates = useMaxMemoryEstimates; - this.dedupColumn = dedupColumn; int maxCount = -1; for (int i = 0; i < hydrants.size(); ++i) { @@ -175,11 +169,6 @@ public class Sink implements Iterable, Overshadowable makeNewCurrIndex(interval.getStartMillis(), schema); } - public void clearDedupCache() - { - dedupSet.clear(); - } - public Interval getInterval() { return interval; @@ -198,7 +187,7 @@ public class Sink implements Iterable, Overshadowable synchronized (hydrantLock) { if (!writable) { - return Plumber.NOT_WRITABLE; + return NOT_WRITABLE; } IncrementalIndex index = currHydrant.getIndex(); @@ -206,10 +195,6 @@ public class Sink implements Iterable, Overshadowable return ALREADY_SWAPPED; // the hydrant was swapped without being replaced } - if (checkInDedupSet(row)) { - return Plumber.DUPLICATE; - } - return index.add(row, skipMaxRowsInMemoryCheck); } } @@ -267,7 +252,6 @@ public class Sink implements Iterable, Overshadowable return false; } writable = false; - clearDedupCache(); } return true; } @@ -334,41 +318,6 @@ public class Sink implements Iterable, Overshadowable return acquireSegmentReferences(hydrants, segmentMapFn, skipIncrementalSegment); } - private boolean checkInDedupSet(InputRow row) - { - if (dedupColumn != null) { - Object value = row.getRaw(dedupColumn); - if (value != null) { - if (value instanceof List) { - throw new IAE("Dedup on multi-value field not support"); - } - Long pk; - if (value instanceof Long || value instanceof Integer) { - pk = ((Number) value).longValue(); - } else { - // use long type hashcode to reduce heap cost. - // maybe hash collision, but it's more important to avoid OOM - pk = pkHash(String.valueOf(value)); - } - if (dedupSet.contains(pk)) { - return true; - } - dedupSet.add(pk); - } - } - return false; - } - - private long pkHash(String s) - { - long seed = 131; // 31 131 1313 13131 131313 etc.. BKDRHash - long hash = 0; - for (int i = 0; i < s.length(); i++) { - hash = (hash * seed) + s.charAt(i); - } - return hash; - } - private FireHydrant makeNewCurrIndex(long minTimestamp, DataSchema schema) { final IncrementalIndexSchema indexSchema = new IncrementalIndexSchema.Builder() diff --git a/server/src/main/java/org/apache/druid/segment/realtime/plumber/SinkSegmentReference.java b/server/src/main/java/org/apache/druid/segment/realtime/sink/SinkSegmentReference.java similarity index 97% rename from server/src/main/java/org/apache/druid/segment/realtime/plumber/SinkSegmentReference.java rename to server/src/main/java/org/apache/druid/segment/realtime/sink/SinkSegmentReference.java index 10dfc2b275e..c24cf118661 100644 --- a/server/src/main/java/org/apache/druid/segment/realtime/plumber/SinkSegmentReference.java +++ b/server/src/main/java/org/apache/druid/segment/realtime/sink/SinkSegmentReference.java @@ -17,7 +17,7 @@ * under the License. */ -package org.apache.druid.segment.realtime.plumber; +package org.apache.druid.segment.realtime.sink; import org.apache.druid.segment.SegmentReference; diff --git a/server/src/main/java/org/apache/druid/server/ClientQuerySegmentWalker.java b/server/src/main/java/org/apache/druid/server/ClientQuerySegmentWalker.java index 1d3b38b2fdb..d49ce3909f7 100644 --- a/server/src/main/java/org/apache/druid/server/ClientQuerySegmentWalker.java +++ b/server/src/main/java/org/apache/druid/server/ClientQuerySegmentWalker.java @@ -746,6 +746,7 @@ public class ClientQuerySegmentWalker implements QuerySegmentWalker { Optional> framesOptional; + boolean startedAccumulating = false; try { framesOptional = toolChest.resultsAsFrames( query, @@ -760,6 +761,9 @@ public class ClientQuerySegmentWalker implements QuerySegmentWalker Sequence frames = framesOptional.get(); List frameSignaturePairs = new ArrayList<>(); + + startedAccumulating = true; + frames.forEach( frame -> { limitAccumulator.addAndGet(frame.getFrame().numRows()); @@ -772,21 +776,29 @@ public class ClientQuerySegmentWalker implements QuerySegmentWalker } ); return Optional.of(new FrameBasedInlineDataSource(frameSignaturePairs, toolChest.resultArraySignature(query))); - - } - catch (ResourceLimitExceededException e) { - throw e; } catch (UnsupportedColumnTypeException e) { subqueryStatsProvider.incrementSubqueriesFallingBackDueToUnsufficientTypeInfo(); log.debug(e, "Type info in signature insufficient to materialize rows as frames."); return Optional.empty(); } + catch (ResourceLimitExceededException e) { + throw e; + } catch (Exception e) { - subqueryStatsProvider.incrementSubqueriesFallingBackDueToUnknownReason(); - log.debug(e, "Unable to materialize the results as frames due to an unhandleable exception " - + "while conversion. Defaulting to materializing the results as rows"); - return Optional.empty(); + if (startedAccumulating) { + // If we have opened the resultSequence, we can't fall back safely as the resultSequence might hold some resources + // that we release on exception, and we need to throw the exception to disable the 'maxSubqueryBytes' configuration + throw DruidException.defensive() + .build( + e, + "Unable to materialize the results as frames for estimating the byte footprint. " + + "Please disable the 'maxSubqueryBytes' by setting it to 'disabled' in the query context or removing it altogether " + + "from the query context and/or the server config." + ); + } else { + return Optional.empty(); + } } } diff --git a/server/src/main/java/org/apache/druid/server/coordination/ChangeRequestsSnapshot.java b/server/src/main/java/org/apache/druid/server/coordination/ChangeRequestsSnapshot.java index 14113bed6b2..a86453df66d 100644 --- a/server/src/main/java/org/apache/druid/server/coordination/ChangeRequestsSnapshot.java +++ b/server/src/main/java/org/apache/druid/server/coordination/ChangeRequestsSnapshot.java @@ -24,7 +24,6 @@ import com.fasterxml.jackson.annotation.JsonProperty; import com.google.common.base.Preconditions; import javax.annotation.Nullable; - import java.util.List; /** diff --git a/server/src/main/java/org/apache/druid/server/coordination/DataSegmentAnnouncer.java b/server/src/main/java/org/apache/druid/server/coordination/DataSegmentAnnouncer.java index bf708206651..08368cbcabe 100644 --- a/server/src/main/java/org/apache/druid/server/coordination/DataSegmentAnnouncer.java +++ b/server/src/main/java/org/apache/druid/server/coordination/DataSegmentAnnouncer.java @@ -29,11 +29,11 @@ public interface DataSegmentAnnouncer { void announceSegment(DataSegment segment) throws IOException; - void unannounceSegment(DataSegment segment) throws IOException; + void unannounceSegment(DataSegment segment); void announceSegments(Iterable segments) throws IOException; - void unannounceSegments(Iterable segments) throws IOException; + void unannounceSegments(Iterable segments); /** * Announces schema associated with all segments for the specified realtime task. diff --git a/server/src/main/java/org/apache/druid/server/coordination/LoadableDataSegment.java b/server/src/main/java/org/apache/druid/server/coordination/LoadableDataSegment.java index 4f4f7a5b1d1..2a633b6ad27 100644 --- a/server/src/main/java/org/apache/druid/server/coordination/LoadableDataSegment.java +++ b/server/src/main/java/org/apache/druid/server/coordination/LoadableDataSegment.java @@ -19,7 +19,6 @@ package org.apache.druid.server.coordination; -import com.fasterxml.jackson.annotation.JacksonInject; import com.fasterxml.jackson.annotation.JsonCreator; import com.fasterxml.jackson.annotation.JsonProperty; import com.fasterxml.jackson.databind.annotation.JsonDeserialize; @@ -59,8 +58,7 @@ public class LoadableDataSegment extends DataSegment @JsonProperty("shardSpec") @Nullable ShardSpec shardSpec, @JsonProperty("lastCompactionState") @Nullable CompactionState lastCompactionState, @JsonProperty("binaryVersion") Integer binaryVersion, - @JsonProperty("size") long size, - @JacksonInject PruneSpecsHolder pruneSpecsHolder + @JsonProperty("size") long size ) { super( diff --git a/server/src/main/java/org/apache/druid/server/coordination/SegmentBootstrapper.java b/server/src/main/java/org/apache/druid/server/coordination/SegmentBootstrapper.java new file mode 100644 index 00000000000..c5b71fbcddc --- /dev/null +++ b/server/src/main/java/org/apache/druid/server/coordination/SegmentBootstrapper.java @@ -0,0 +1,439 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.druid.server.coordination; + +import com.google.common.base.Throwables; +import com.google.common.collect.ImmutableList; +import com.google.common.util.concurrent.SettableFuture; +import com.google.inject.Inject; +import org.apache.druid.client.BootstrapSegmentsResponse; +import org.apache.druid.client.coordinator.CoordinatorClient; +import org.apache.druid.common.guava.FutureUtils; +import org.apache.druid.guice.ManageLifecycle; +import org.apache.druid.guice.ServerTypeConfig; +import org.apache.druid.java.util.common.ISE; +import org.apache.druid.java.util.common.Stopwatch; +import org.apache.druid.java.util.common.concurrent.Execs; +import org.apache.druid.java.util.common.lifecycle.LifecycleStart; +import org.apache.druid.java.util.common.lifecycle.LifecycleStop; +import org.apache.druid.java.util.emitter.EmittingLogger; +import org.apache.druid.java.util.emitter.service.ServiceEmitter; +import org.apache.druid.java.util.emitter.service.ServiceMetricEvent; +import org.apache.druid.segment.loading.SegmentLoaderConfig; +import org.apache.druid.segment.loading.SegmentLoadingException; +import org.apache.druid.server.SegmentManager; +import org.apache.druid.timeline.DataSegment; + +import javax.annotation.Nullable; +import java.io.IOException; +import java.util.ArrayList; +import java.util.List; +import java.util.concurrent.CopyOnWriteArrayList; +import java.util.concurrent.CountDownLatch; +import java.util.concurrent.ExecutionException; +import java.util.concurrent.ExecutorService; +import java.util.concurrent.Executors; +import java.util.concurrent.LinkedBlockingQueue; +import java.util.concurrent.ScheduledExecutorService; +import java.util.concurrent.ScheduledFuture; +import java.util.concurrent.TimeUnit; +import java.util.concurrent.atomic.AtomicInteger; + +/** + * Responsible for bootstrapping segments already cached on disk and bootstrap segments fetched from the coordinator. + * Also responsible for announcing the node as a data server if applicable, once the bootstrapping operations + * are complete. + */ +@ManageLifecycle +public class SegmentBootstrapper +{ + private final SegmentLoadDropHandler loadDropHandler; + private final SegmentLoaderConfig config; + private final DataSegmentAnnouncer segmentAnnouncer; + private final DataSegmentServerAnnouncer serverAnnouncer; + private final SegmentManager segmentManager; + private final ServerTypeConfig serverTypeConfig; + private final CoordinatorClient coordinatorClient; + private final ServiceEmitter emitter; + + private volatile boolean isComplete = false; + + // Synchronizes start/stop of this object. + private final Object startStopLock = new Object(); + + private static final EmittingLogger log = new EmittingLogger(SegmentBootstrapper.class); + + @Inject + public SegmentBootstrapper( + SegmentLoadDropHandler loadDropHandler, + SegmentLoaderConfig config, + DataSegmentAnnouncer segmentAnnouncer, + DataSegmentServerAnnouncer serverAnnouncer, + SegmentManager segmentManager, + ServerTypeConfig serverTypeConfig, + CoordinatorClient coordinatorClient, + ServiceEmitter emitter + ) + { + this.loadDropHandler = loadDropHandler; + this.config = config; + this.segmentAnnouncer = segmentAnnouncer; + this.serverAnnouncer = serverAnnouncer; + this.segmentManager = segmentManager; + this.serverTypeConfig = serverTypeConfig; + this.coordinatorClient = coordinatorClient; + this.emitter = emitter; + } + + @LifecycleStart + public void start() throws IOException + { + synchronized (startStopLock) { + if (isComplete) { + return; + } + + log.info("Starting..."); + try { + if (segmentManager.canHandleSegments()) { + loadSegmentsOnStartup(); + } + + if (shouldAnnounce()) { + serverAnnouncer.announce(); + } + } + catch (Exception e) { + Throwables.propagateIfPossible(e, IOException.class); + throw new RuntimeException(e); + } + isComplete = true; + log.info("Started."); + } + } + + @LifecycleStop + public void stop() + { + synchronized (startStopLock) { + if (!isComplete) { + return; + } + + log.info("Stopping..."); + try { + if (shouldAnnounce()) { + serverAnnouncer.unannounce(); + } + } + catch (Exception e) { + throw new RuntimeException(e); + } + finally { + isComplete = false; + } + log.info("Stopped."); + } + } + + public boolean isBootstrappingComplete() + { + return isComplete; + } + + /** + * Bulk loading of the following segments into the page cache at startup: + *

  • Previously cached segments
  • + *
  • Bootstrap segments from the coordinator
  • + */ + private void loadSegmentsOnStartup() throws IOException + { + final List segmentsOnStartup = new ArrayList<>(); + segmentsOnStartup.addAll(segmentManager.getCachedSegments()); + segmentsOnStartup.addAll(getBootstrapSegments()); + + final Stopwatch stopwatch = Stopwatch.createStarted(); + + // Start a temporary thread pool to load segments into page cache during bootstrap + final ExecutorService bootstrapExecutor = Execs.multiThreaded( + config.getNumBootstrapThreads(), "Segment-Bootstrap-%s" + ); + + // Start a temporary scheduled executor for background segment announcing + final ScheduledExecutorService backgroundAnnouncerExecutor = Executors.newScheduledThreadPool( + config.getNumLoadingThreads(), Execs.makeThreadFactory("Background-Segment-Announcer-%s") + ); + + try (final BackgroundSegmentAnnouncer backgroundSegmentAnnouncer = + new BackgroundSegmentAnnouncer(segmentAnnouncer, backgroundAnnouncerExecutor, config.getAnnounceIntervalMillis())) { + + backgroundSegmentAnnouncer.startAnnouncing(); + + final int numSegments = segmentsOnStartup.size(); + final CountDownLatch latch = new CountDownLatch(numSegments); + final AtomicInteger counter = new AtomicInteger(0); + final CopyOnWriteArrayList failedSegments = new CopyOnWriteArrayList<>(); + for (final DataSegment segment : segmentsOnStartup) { + bootstrapExecutor.submit( + () -> { + try { + log.info( + "Loading segment[%d/%d][%s]", + counter.incrementAndGet(), numSegments, segment.getId() + ); + try { + segmentManager.loadSegmentOnBootstrap( + segment, + () -> loadDropHandler.removeSegment(segment, DataSegmentChangeCallback.NOOP, false) + ); + } + catch (Exception e) { + loadDropHandler.removeSegment(segment, DataSegmentChangeCallback.NOOP, false); + throw new SegmentLoadingException(e, "Exception loading segment[%s]", segment.getId()); + } + try { + backgroundSegmentAnnouncer.announceSegment(segment); + } + catch (InterruptedException e) { + Thread.currentThread().interrupt(); + throw new SegmentLoadingException(e, "Loading Interrupted"); + } + } + catch (SegmentLoadingException e) { + log.error(e, "[%s] failed to load", segment.getId()); + failedSegments.add(segment); + } + finally { + latch.countDown(); + } + } + ); + } + + try { + latch.await(); + + if (failedSegments.size() > 0) { + log.makeAlert("[%,d] errors seen while loading segments on startup", failedSegments.size()) + .addData("failedSegments", failedSegments) + .emit(); + } + } + catch (InterruptedException e) { + Thread.currentThread().interrupt(); + log.makeAlert(e, "LoadingInterrupted").emit(); + } + + backgroundSegmentAnnouncer.finishAnnouncing(); + } + catch (SegmentLoadingException e) { + log.makeAlert(e, "Failed to load segments on startup -- likely problem with announcing.") + .addData("numSegments", segmentsOnStartup.size()) + .emit(); + } + finally { + bootstrapExecutor.shutdownNow(); + backgroundAnnouncerExecutor.shutdownNow(); + stopwatch.stop(); + // At this stage, all tasks have been submitted, send a shutdown command to cleanup any resources alloted + // for the bootstrapping function. + segmentManager.shutdownBootstrap(); + log.info("Loaded [%d] segments on startup in [%,d]ms.", segmentsOnStartup.size(), stopwatch.millisElapsed()); + } + } + + /** + * @return a list of bootstrap segments. When bootstrap segments cannot be found, an empty list is returned. + */ + private List getBootstrapSegments() + { + log.info("Fetching bootstrap segments from the coordinator."); + final Stopwatch stopwatch = Stopwatch.createStarted(); + + List bootstrapSegments = new ArrayList<>(); + + try { + final BootstrapSegmentsResponse response = + FutureUtils.getUnchecked(coordinatorClient.fetchBootstrapSegments(), true); + bootstrapSegments = ImmutableList.copyOf(response.getIterator()); + } + catch (Exception e) { + log.warn("Error fetching bootstrap segments from the coordinator: [%s]. ", e.getMessage()); + } + finally { + stopwatch.stop(); + final long fetchRunMillis = stopwatch.millisElapsed(); + emitter.emit(new ServiceMetricEvent.Builder().setMetric("segment/bootstrap/time", fetchRunMillis)); + emitter.emit(new ServiceMetricEvent.Builder().setMetric("segment/bootstrap/count", bootstrapSegments.size())); + log.info("Fetched [%d] bootstrap segments in [%d]ms.", bootstrapSegments.size(), fetchRunMillis); + } + + return bootstrapSegments; + } + + /** + * Returns whether or not we should announce ourselves as a data server using {@link DataSegmentServerAnnouncer}. + * + * Returns true if _either_: + * + *
  • Our {@link #serverTypeConfig} indicates we are a segment server. This is necessary for Brokers to be able + * to detect that we exist.
  • + *
  • The segment manager is able to handle segments. This is necessary for Coordinators to be able to + * assign segments to us.
  • + */ + private boolean shouldAnnounce() + { + return serverTypeConfig.getServerType().isSegmentServer() || segmentManager.canHandleSegments(); + } + + private static class BackgroundSegmentAnnouncer implements AutoCloseable + { + private static final EmittingLogger log = new EmittingLogger(BackgroundSegmentAnnouncer.class); + + private final int announceIntervalMillis; + private final DataSegmentAnnouncer segmentAnnouncer; + private final ScheduledExecutorService exec; + private final LinkedBlockingQueue queue; + private final SettableFuture doneAnnouncing; + + private final Object lock = new Object(); + + private volatile boolean finished = false; + @Nullable + private volatile ScheduledFuture startedAnnouncing = null; + @Nullable + private volatile ScheduledFuture nextAnnoucement = null; + + BackgroundSegmentAnnouncer( + DataSegmentAnnouncer segmentAnnouncer, + ScheduledExecutorService exec, + int announceIntervalMillis + ) + { + this.segmentAnnouncer = segmentAnnouncer; + this.exec = exec; + this.announceIntervalMillis = announceIntervalMillis; + this.queue = new LinkedBlockingQueue<>(); + this.doneAnnouncing = SettableFuture.create(); + } + + public void announceSegment(final DataSegment segment) throws InterruptedException + { + if (finished) { + throw new ISE("Announce segment called after finishAnnouncing"); + } + queue.put(segment); + } + + public void startAnnouncing() + { + if (announceIntervalMillis <= 0) { + log.info("Skipping background segment announcing as announceIntervalMillis is [%d].", announceIntervalMillis); + return; + } + + log.info("Starting background segment announcing task"); + + // schedule background announcing task + nextAnnoucement = startedAnnouncing = exec.schedule( + new Runnable() + { + @Override + public void run() + { + synchronized (lock) { + try { + if (!(finished && queue.isEmpty())) { + final List segments = new ArrayList<>(); + queue.drainTo(segments); + try { + segmentAnnouncer.announceSegments(segments); + nextAnnoucement = exec.schedule(this, announceIntervalMillis, TimeUnit.MILLISECONDS); + } + catch (IOException e) { + doneAnnouncing.setException( + new SegmentLoadingException(e, "Failed to announce segments[%s]", segments) + ); + } + } else { + doneAnnouncing.set(true); + } + } + catch (Exception e) { + doneAnnouncing.setException(e); + } + } + } + }, + announceIntervalMillis, + TimeUnit.MILLISECONDS + ); + } + + public void finishAnnouncing() throws SegmentLoadingException + { + synchronized (lock) { + finished = true; + // announce any remaining segments + try { + final List segments = new ArrayList<>(); + queue.drainTo(segments); + segmentAnnouncer.announceSegments(segments); + } + catch (Exception e) { + throw new SegmentLoadingException(e, "Failed to announce segments[%s]", queue); + } + + // get any exception that may have been thrown in background announcing + try { + // check in case intervalMillis is <= 0 + if (startedAnnouncing != null) { + startedAnnouncing.cancel(false); + } + // - if the task is waiting on the lock, then the queue will be empty by the time it runs + // - if the task just released it, then the lock ensures any exception is set in doneAnnouncing + if (doneAnnouncing.isDone()) { + doneAnnouncing.get(); + } + } + catch (InterruptedException e) { + Thread.currentThread().interrupt(); + throw new SegmentLoadingException(e, "Loading Interrupted"); + } + catch (ExecutionException e) { + throw new SegmentLoadingException(e.getCause(), "Background Announcing Task Failed"); + } + } + log.info("Completed background segment announcing"); + } + + @Override + public void close() + { + // stop background scheduling + synchronized (lock) { + finished = true; + if (nextAnnoucement != null) { + nextAnnoucement.cancel(false); + } + } + } + } +} diff --git a/server/src/main/java/org/apache/druid/server/coordination/SegmentChangeRequestDrop.java b/server/src/main/java/org/apache/druid/server/coordination/SegmentChangeRequestDrop.java index c4229a02880..ddee89b9763 100644 --- a/server/src/main/java/org/apache/druid/server/coordination/SegmentChangeRequestDrop.java +++ b/server/src/main/java/org/apache/druid/server/coordination/SegmentChangeRequestDrop.java @@ -26,7 +26,6 @@ import org.apache.druid.java.util.common.StringUtils; import org.apache.druid.timeline.DataSegment; import javax.annotation.Nullable; - import java.util.Objects; /** diff --git a/server/src/main/java/org/apache/druid/server/coordination/SegmentChangeRequestLoad.java b/server/src/main/java/org/apache/druid/server/coordination/SegmentChangeRequestLoad.java index 130c7b50d80..1bb9997980c 100644 --- a/server/src/main/java/org/apache/druid/server/coordination/SegmentChangeRequestLoad.java +++ b/server/src/main/java/org/apache/druid/server/coordination/SegmentChangeRequestLoad.java @@ -26,7 +26,6 @@ import org.apache.druid.java.util.common.StringUtils; import org.apache.druid.timeline.DataSegment; import javax.annotation.Nullable; - import java.util.Objects; /** diff --git a/server/src/main/java/org/apache/druid/server/coordination/SegmentLoadDropHandler.java b/server/src/main/java/org/apache/druid/server/coordination/SegmentLoadDropHandler.java index bcd88ee7ee9..12462adab2f 100644 --- a/server/src/main/java/org/apache/druid/server/coordination/SegmentLoadDropHandler.java +++ b/server/src/main/java/org/apache/druid/server/coordination/SegmentLoadDropHandler.java @@ -20,22 +20,15 @@ package org.apache.druid.server.coordination; import com.google.common.annotations.VisibleForTesting; -import com.google.common.base.Throwables; import com.google.common.cache.Cache; import com.google.common.cache.CacheBuilder; import com.google.common.collect.ImmutableList; import com.google.common.collect.Maps; import com.google.common.util.concurrent.AbstractFuture; import com.google.common.util.concurrent.ListenableFuture; -import com.google.common.util.concurrent.SettableFuture; import com.google.inject.Inject; import org.apache.druid.guice.ManageLifecycle; -import org.apache.druid.guice.ServerTypeConfig; -import org.apache.druid.java.util.common.ISE; -import org.apache.druid.java.util.common.Stopwatch; import org.apache.druid.java.util.common.concurrent.Execs; -import org.apache.druid.java.util.common.lifecycle.LifecycleStart; -import org.apache.druid.java.util.common.lifecycle.LifecycleStop; import org.apache.druid.java.util.emitter.EmittingLogger; import org.apache.druid.segment.loading.SegmentLoaderConfig; import org.apache.druid.segment.loading.SegmentLoadingException; @@ -51,20 +44,13 @@ import java.util.LinkedHashSet; import java.util.List; import java.util.Map; import java.util.concurrent.ConcurrentSkipListSet; -import java.util.concurrent.CopyOnWriteArrayList; -import java.util.concurrent.CountDownLatch; -import java.util.concurrent.ExecutionException; -import java.util.concurrent.ExecutorService; import java.util.concurrent.Executors; -import java.util.concurrent.LinkedBlockingQueue; import java.util.concurrent.ScheduledExecutorService; -import java.util.concurrent.ScheduledFuture; import java.util.concurrent.TimeUnit; -import java.util.concurrent.atomic.AtomicInteger; import java.util.concurrent.atomic.AtomicReference; /** - * + * Responsible for loading and dropping of segments by a process that can serve segments. */ @ManageLifecycle public class SegmentLoadDropHandler implements DataSegmentChangeHandler @@ -74,18 +60,12 @@ public class SegmentLoadDropHandler implements DataSegmentChangeHandler // Synchronizes removals from segmentsToDelete private final Object segmentDeleteLock = new Object(); - // Synchronizes start/stop of this object. - private final Object startStopLock = new Object(); - private final SegmentLoaderConfig config; private final DataSegmentAnnouncer announcer; - private final DataSegmentServerAnnouncer serverAnnouncer; private final SegmentManager segmentManager; private final ScheduledExecutorService exec; - private final ServerTypeConfig serverTypeConfig; - private final ConcurrentSkipListSet segmentsToDelete; - private volatile boolean started = false; + private final ConcurrentSkipListSet segmentsToDelete; // Keep history of load/drop request status in a LRU cache to maintain idempotency if same request shows up // again and to return status of a completed request. Maximum size of this cache must be significantly greater @@ -101,21 +81,17 @@ public class SegmentLoadDropHandler implements DataSegmentChangeHandler public SegmentLoadDropHandler( SegmentLoaderConfig config, DataSegmentAnnouncer announcer, - DataSegmentServerAnnouncer serverAnnouncer, - SegmentManager segmentManager, - ServerTypeConfig serverTypeConfig + SegmentManager segmentManager ) { this( config, announcer, - serverAnnouncer, segmentManager, Executors.newScheduledThreadPool( config.getNumLoadingThreads(), Execs.makeThreadFactory("SimpleDataSegmentChangeHandler-%s") - ), - serverTypeConfig + ) ); } @@ -123,79 +99,19 @@ public class SegmentLoadDropHandler implements DataSegmentChangeHandler SegmentLoadDropHandler( SegmentLoaderConfig config, DataSegmentAnnouncer announcer, - DataSegmentServerAnnouncer serverAnnouncer, SegmentManager segmentManager, - ScheduledExecutorService exec, - ServerTypeConfig serverTypeConfig + ScheduledExecutorService exec ) { this.config = config; this.announcer = announcer; - this.serverAnnouncer = serverAnnouncer; this.segmentManager = segmentManager; this.exec = exec; - this.serverTypeConfig = serverTypeConfig; this.segmentsToDelete = new ConcurrentSkipListSet<>(); requestStatuses = CacheBuilder.newBuilder().maximumSize(config.getStatusQueueMaxSize()).initialCapacity(8).build(); } - @LifecycleStart - public void start() throws IOException - { - synchronized (startStopLock) { - if (started) { - return; - } - - log.info("Starting..."); - try { - if (segmentManager.canHandleSegments()) { - bootstrapCachedSegments(); - } - - if (shouldAnnounce()) { - serverAnnouncer.announce(); - } - } - catch (Exception e) { - Throwables.propagateIfPossible(e, IOException.class); - throw new RuntimeException(e); - } - started = true; - log.info("Started."); - } - } - - @LifecycleStop - public void stop() - { - synchronized (startStopLock) { - if (!started) { - return; - } - - log.info("Stopping..."); - try { - if (shouldAnnounce()) { - serverAnnouncer.unannounce(); - } - } - catch (Exception e) { - throw new RuntimeException(e); - } - finally { - started = false; - } - log.info("Stopped."); - } - } - - public boolean isStarted() - { - return started; - } - public Map getAverageNumOfRowsPerSegmentForDatasource() { return segmentManager.getAverageRowCountForDatasource(); @@ -206,96 +122,6 @@ public class SegmentLoadDropHandler implements DataSegmentChangeHandler return segmentManager.getRowCountDistribution(); } - /** - * Bulk loading of cached segments into page cache during bootstrap. - */ - private void bootstrapCachedSegments() throws IOException - { - final Stopwatch stopwatch = Stopwatch.createStarted(); - final List segments = segmentManager.getCachedSegments(); - - // Start a temporary thread pool to load segments into page cache during bootstrap - final ExecutorService loadingExecutor = Execs.multiThreaded( - config.getNumBootstrapThreads(), "Segment-Load-Startup-%s" - ); - - try (final BackgroundSegmentAnnouncer backgroundSegmentAnnouncer = - new BackgroundSegmentAnnouncer(announcer, exec, config.getAnnounceIntervalMillis())) { - - backgroundSegmentAnnouncer.startAnnouncing(); - - final int numSegments = segments.size(); - final CountDownLatch latch = new CountDownLatch(numSegments); - final AtomicInteger counter = new AtomicInteger(0); - final CopyOnWriteArrayList failedSegments = new CopyOnWriteArrayList<>(); - for (final DataSegment segment : segments) { - loadingExecutor.submit( - () -> { - try { - log.info( - "Loading segment[%d/%d][%s]", - counter.incrementAndGet(), numSegments, segment.getId() - ); - try { - segmentManager.loadSegmentOnBootstrap( - segment, - () -> this.removeSegment(segment, DataSegmentChangeCallback.NOOP, false) - ); - } - catch (Exception e) { - removeSegment(segment, DataSegmentChangeCallback.NOOP, false); - throw new SegmentLoadingException(e, "Exception loading segment[%s]", segment.getId()); - } - try { - backgroundSegmentAnnouncer.announceSegment(segment); - } - catch (InterruptedException e) { - Thread.currentThread().interrupt(); - throw new SegmentLoadingException(e, "Loading Interrupted"); - } - } - catch (SegmentLoadingException e) { - log.error(e, "[%s] failed to load", segment.getId()); - failedSegments.add(segment); - } - finally { - latch.countDown(); - } - } - ); - } - - try { - latch.await(); - - if (failedSegments.size() > 0) { - log.makeAlert("%,d errors seen while loading segments", failedSegments.size()) - .addData("failedSegments", failedSegments) - .emit(); - } - } - catch (InterruptedException e) { - Thread.currentThread().interrupt(); - log.makeAlert(e, "LoadingInterrupted").emit(); - } - - backgroundSegmentAnnouncer.finishAnnouncing(); - } - catch (SegmentLoadingException e) { - log.makeAlert(e, "Failed to load segments -- likely problem with announcing.") - .addData("numSegments", segments.size()) - .emit(); - } - finally { - loadingExecutor.shutdownNow(); - stopwatch.stop(); - // At this stage, all tasks have been submitted, send a shutdown command to cleanup any resources alloted - // for the bootstrapping function. - segmentManager.shutdownBootstrap(); - log.info("Cache load of [%d] bootstrap segments took [%,d]ms.", segments.size(), stopwatch.millisElapsed()); - } - } - @Override public void addSegment(DataSegment segment, @Nullable DataSegmentChangeCallback callback) { @@ -515,154 +341,6 @@ public class SegmentLoadDropHandler implements DataSegmentChangeHandler } } - /** - * Returns whether or not we should announce ourselves as a data server using {@link DataSegmentServerAnnouncer}. - * - * Returns true if _either_: - * - *
  • Our {@link #serverTypeConfig} indicates we are a segment server. This is necessary for Brokers to be able - * to detect that we exist.
  • - *
  • The segment manager is able to handle segments. This is necessary for Coordinators to be able to - * assign segments to us.
  • - */ - private boolean shouldAnnounce() - { - return serverTypeConfig.getServerType().isSegmentServer() || segmentManager.canHandleSegments(); - } - - private static class BackgroundSegmentAnnouncer implements AutoCloseable - { - private static final EmittingLogger log = new EmittingLogger(BackgroundSegmentAnnouncer.class); - - private final int intervalMillis; - private final DataSegmentAnnouncer announcer; - private final ScheduledExecutorService exec; - private final LinkedBlockingQueue queue; - private final SettableFuture doneAnnouncing; - - private final Object lock = new Object(); - - private volatile boolean finished = false; - @Nullable - private volatile ScheduledFuture startedAnnouncing = null; - @Nullable - private volatile ScheduledFuture nextAnnoucement = null; - - public BackgroundSegmentAnnouncer( - DataSegmentAnnouncer announcer, - ScheduledExecutorService exec, - int intervalMillis - ) - { - this.announcer = announcer; - this.exec = exec; - this.intervalMillis = intervalMillis; - this.queue = new LinkedBlockingQueue<>(); - this.doneAnnouncing = SettableFuture.create(); - } - - public void announceSegment(final DataSegment segment) throws InterruptedException - { - if (finished) { - throw new ISE("Announce segment called after finishAnnouncing"); - } - queue.put(segment); - } - - public void startAnnouncing() - { - if (intervalMillis <= 0) { - return; - } - - log.info("Starting background segment announcing task"); - - // schedule background announcing task - nextAnnoucement = startedAnnouncing = exec.schedule( - new Runnable() - { - @Override - public void run() - { - synchronized (lock) { - try { - if (!(finished && queue.isEmpty())) { - final List segments = new ArrayList<>(); - queue.drainTo(segments); - try { - announcer.announceSegments(segments); - nextAnnoucement = exec.schedule(this, intervalMillis, TimeUnit.MILLISECONDS); - } - catch (IOException e) { - doneAnnouncing.setException( - new SegmentLoadingException(e, "Failed to announce segments[%s]", segments) - ); - } - } else { - doneAnnouncing.set(true); - } - } - catch (Exception e) { - doneAnnouncing.setException(e); - } - } - } - }, - intervalMillis, - TimeUnit.MILLISECONDS - ); - } - - public void finishAnnouncing() throws SegmentLoadingException - { - synchronized (lock) { - finished = true; - // announce any remaining segments - try { - final List segments = new ArrayList<>(); - queue.drainTo(segments); - announcer.announceSegments(segments); - } - catch (Exception e) { - throw new SegmentLoadingException(e, "Failed to announce segments[%s]", queue); - } - - // get any exception that may have been thrown in background announcing - try { - // check in case intervalMillis is <= 0 - if (startedAnnouncing != null) { - startedAnnouncing.cancel(false); - } - // - if the task is waiting on the lock, then the queue will be empty by the time it runs - // - if the task just released it, then the lock ensures any exception is set in doneAnnouncing - if (doneAnnouncing.isDone()) { - doneAnnouncing.get(); - } - } - catch (InterruptedException e) { - Thread.currentThread().interrupt(); - throw new SegmentLoadingException(e, "Loading Interrupted"); - } - catch (ExecutionException e) { - throw new SegmentLoadingException(e.getCause(), "Background Announcing Task Failed"); - } - } - log.info("Completed background segment announcing"); - } - - @Override - public void close() - { - // stop background scheduling - synchronized (lock) { - finished = true; - if (nextAnnoucement != null) { - nextAnnoucement.cancel(false); - } - } - } - } - // Future with cancel() implementation to remove it from "waitingFutures" list private class CustomSettableFuture extends AbstractFuture> { @@ -708,6 +386,5 @@ public class SegmentLoadDropHandler implements DataSegmentChangeHandler return true; } } - } diff --git a/server/src/main/java/org/apache/druid/server/coordinator/AutoCompactionSnapshot.java b/server/src/main/java/org/apache/druid/server/coordinator/AutoCompactionSnapshot.java index fe46eabb426..d52d4e9eba0 100644 --- a/server/src/main/java/org/apache/druid/server/coordinator/AutoCompactionSnapshot.java +++ b/server/src/main/java/org/apache/druid/server/coordinator/AutoCompactionSnapshot.java @@ -22,6 +22,7 @@ package org.apache.druid.server.coordinator; import com.fasterxml.jackson.annotation.JsonCreator; import com.fasterxml.jackson.annotation.JsonProperty; import org.apache.druid.java.util.common.ISE; +import org.apache.druid.server.coordinator.compact.CompactionStatistics; import javax.validation.constraints.NotNull; import java.util.Objects; @@ -193,15 +194,9 @@ public class AutoCompactionSnapshot private final String dataSource; private final AutoCompactionScheduleStatus scheduleStatus; - private long bytesAwaitingCompaction; - private long bytesCompacted; - private long bytesSkipped; - private long segmentCountAwaitingCompaction; - private long segmentCountCompacted; - private long segmentCountSkipped; - private long intervalCountAwaitingCompaction; - private long intervalCountCompacted; - private long intervalCountSkipped; + private final CompactionStatistics compactedStats = new CompactionStatistics(); + private final CompactionStatistics skippedStats = new CompactionStatistics(); + private final CompactionStatistics waitingStats = new CompactionStatistics(); private Builder( @NotNull String dataSource, @@ -217,69 +212,21 @@ public class AutoCompactionSnapshot this.dataSource = dataSource; this.scheduleStatus = scheduleStatus; - this.bytesAwaitingCompaction = 0; - this.bytesCompacted = 0; - this.bytesSkipped = 0; - this.segmentCountAwaitingCompaction = 0; - this.segmentCountCompacted = 0; - this.segmentCountSkipped = 0; - this.intervalCountAwaitingCompaction = 0; - this.intervalCountCompacted = 0; - this.intervalCountSkipped = 0; } - public Builder incrementBytesAwaitingCompaction(long incrementValue) + public void incrementWaitingStats(CompactionStatistics entry) { - this.bytesAwaitingCompaction = this.bytesAwaitingCompaction + incrementValue; - return this; + waitingStats.increment(entry); } - public Builder incrementBytesCompacted(long incrementValue) + public void incrementCompactedStats(CompactionStatistics entry) { - this.bytesCompacted = this.bytesCompacted + incrementValue; - return this; + compactedStats.increment(entry); } - public Builder incrementSegmentCountAwaitingCompaction(long incrementValue) + public void incrementSkippedStats(CompactionStatistics entry) { - this.segmentCountAwaitingCompaction = this.segmentCountAwaitingCompaction + incrementValue; - return this; - } - - public Builder incrementSegmentCountCompacted(long incrementValue) - { - this.segmentCountCompacted = this.segmentCountCompacted + incrementValue; - return this; - } - - public Builder incrementIntervalCountAwaitingCompaction(long incrementValue) - { - this.intervalCountAwaitingCompaction = this.intervalCountAwaitingCompaction + incrementValue; - return this; - } - - public Builder incrementIntervalCountCompacted(long incrementValue) - { - this.intervalCountCompacted = this.intervalCountCompacted + incrementValue; - return this; - } - - public Builder incrementBytesSkipped(long incrementValue) - { - this.bytesSkipped = this.bytesSkipped + incrementValue; - return this; - } - - public Builder incrementSegmentCountSkipped(long incrementValue) - { - this.segmentCountSkipped = this.segmentCountSkipped + incrementValue; - return this; - } - - public Builder incrementIntervalCountSkipped(long incrementValue) - { - this.intervalCountSkipped = this.intervalCountSkipped + incrementValue; - return this; + skippedStats.increment(entry); } public AutoCompactionSnapshot build() @@ -287,15 +234,15 @@ public class AutoCompactionSnapshot return new AutoCompactionSnapshot( dataSource, scheduleStatus, - bytesAwaitingCompaction, - bytesCompacted, - bytesSkipped, - segmentCountAwaitingCompaction, - segmentCountCompacted, - segmentCountSkipped, - intervalCountAwaitingCompaction, - intervalCountCompacted, - intervalCountSkipped + waitingStats.getTotalBytes(), + compactedStats.getTotalBytes(), + skippedStats.getTotalBytes(), + waitingStats.getNumSegments(), + compactedStats.getNumSegments(), + skippedStats.getNumSegments(), + waitingStats.getNumIntervals(), + compactedStats.getNumIntervals(), + skippedStats.getNumIntervals() ); } } diff --git a/server/src/main/java/org/apache/druid/guice/NoopSegmentPublisherProvider.java b/server/src/main/java/org/apache/druid/server/coordinator/CompactionConfigValidationResult.java similarity index 61% rename from server/src/main/java/org/apache/druid/guice/NoopSegmentPublisherProvider.java rename to server/src/main/java/org/apache/druid/server/coordinator/CompactionConfigValidationResult.java index 254f2ca2823..88eaa3e923a 100644 --- a/server/src/main/java/org/apache/druid/guice/NoopSegmentPublisherProvider.java +++ b/server/src/main/java/org/apache/druid/server/coordinator/CompactionConfigValidationResult.java @@ -17,19 +17,28 @@ * under the License. */ -package org.apache.druid.guice; +package org.apache.druid.server.coordinator; -import org.apache.druid.metadata.SegmentPublisherProvider; -import org.apache.druid.segment.realtime.NoopSegmentPublisher; -import org.apache.druid.segment.realtime.SegmentPublisher; +import org.apache.druid.java.util.common.StringUtils; -/** - */ -public class NoopSegmentPublisherProvider implements SegmentPublisherProvider +public class CompactionConfigValidationResult { - @Override - public SegmentPublisher get() + private final boolean valid; + private final String reason; + + public CompactionConfigValidationResult(boolean valid, String format, Object... args) { - return new NoopSegmentPublisher(); + this.valid = valid; + this.reason = format == null ? null : StringUtils.format(format, args); + } + + public boolean isValid() + { + return valid; + } + + public String getReason() + { + return reason; } } diff --git a/server/src/main/java/org/apache/druid/server/coordinator/CoordinatorCompactionConfig.java b/server/src/main/java/org/apache/druid/server/coordinator/CoordinatorCompactionConfig.java index 2d7b6c0100a..036c53121e9 100644 --- a/server/src/main/java/org/apache/druid/server/coordinator/CoordinatorCompactionConfig.java +++ b/server/src/main/java/org/apache/druid/server/coordinator/CoordinatorCompactionConfig.java @@ -22,6 +22,8 @@ package org.apache.druid.server.coordinator; import com.fasterxml.jackson.annotation.JsonCreator; import com.fasterxml.jackson.annotation.JsonProperty; import com.google.common.collect.ImmutableList; +import org.apache.druid.common.config.Configs; +import org.apache.druid.indexer.CompactionEngine; import javax.annotation.Nullable; import java.util.List; @@ -34,11 +36,13 @@ public class CoordinatorCompactionConfig private static final double DEFAULT_COMPACTION_TASK_RATIO = 0.1; private static final int DEFAULT_MAX_COMPACTION_TASK_SLOTS = Integer.MAX_VALUE; private static final boolean DEFAULT_USE_AUTO_SCALE_SLOTS = false; + private static final CompactionEngine DEFAULT_COMPACTION_ENGINE = CompactionEngine.NATIVE; private final List compactionConfigs; private final double compactionTaskSlotRatio; private final int maxCompactionTaskSlots; private final boolean useAutoScaleSlots; + private final CompactionEngine compactionEngine; public static CoordinatorCompactionConfig from( CoordinatorCompactionConfig baseConfig, @@ -49,7 +53,8 @@ public class CoordinatorCompactionConfig compactionConfigs, baseConfig.compactionTaskSlotRatio, baseConfig.maxCompactionTaskSlots, - baseConfig.useAutoScaleSlots + baseConfig.useAutoScaleSlots, + null ); } @@ -64,18 +69,19 @@ public class CoordinatorCompactionConfig baseConfig.compactionConfigs, compactionTaskSlotRatio == null ? baseConfig.compactionTaskSlotRatio : compactionTaskSlotRatio, maxCompactionTaskSlots == null ? baseConfig.maxCompactionTaskSlots : maxCompactionTaskSlots, - useAutoScaleSlots == null ? baseConfig.useAutoScaleSlots : useAutoScaleSlots + useAutoScaleSlots == null ? baseConfig.useAutoScaleSlots : useAutoScaleSlots, + null ); } public static CoordinatorCompactionConfig from(List compactionConfigs) { - return new CoordinatorCompactionConfig(compactionConfigs, null, null, null); + return new CoordinatorCompactionConfig(compactionConfigs, null, null, null, null); } public static CoordinatorCompactionConfig empty() { - return new CoordinatorCompactionConfig(ImmutableList.of(), null, null, null); + return new CoordinatorCompactionConfig(ImmutableList.of(), null, null, null, null); } @JsonCreator @@ -83,19 +89,15 @@ public class CoordinatorCompactionConfig @JsonProperty("compactionConfigs") List compactionConfigs, @JsonProperty("compactionTaskSlotRatio") @Nullable Double compactionTaskSlotRatio, @JsonProperty("maxCompactionTaskSlots") @Nullable Integer maxCompactionTaskSlots, - @JsonProperty("useAutoScaleSlots") @Nullable Boolean useAutoScaleSlots + @JsonProperty("useAutoScaleSlots") @Nullable Boolean useAutoScaleSlots, + @JsonProperty("compactionEngine") @Nullable CompactionEngine compactionEngine ) { this.compactionConfigs = compactionConfigs; - this.compactionTaskSlotRatio = compactionTaskSlotRatio == null ? - DEFAULT_COMPACTION_TASK_RATIO : - compactionTaskSlotRatio; - this.maxCompactionTaskSlots = maxCompactionTaskSlots == null ? - DEFAULT_MAX_COMPACTION_TASK_SLOTS : - maxCompactionTaskSlots; - this.useAutoScaleSlots = useAutoScaleSlots == null ? - DEFAULT_USE_AUTO_SCALE_SLOTS : - useAutoScaleSlots; + this.compactionTaskSlotRatio = Configs.valueOrDefault(compactionTaskSlotRatio, DEFAULT_COMPACTION_TASK_RATIO); + this.maxCompactionTaskSlots = Configs.valueOrDefault(maxCompactionTaskSlots, DEFAULT_MAX_COMPACTION_TASK_SLOTS); + this.useAutoScaleSlots = Configs.valueOrDefault(useAutoScaleSlots, DEFAULT_USE_AUTO_SCALE_SLOTS); + this.compactionEngine = Configs.valueOrDefault(compactionEngine, DEFAULT_COMPACTION_ENGINE); } @JsonProperty @@ -122,6 +124,12 @@ public class CoordinatorCompactionConfig return useAutoScaleSlots; } + @JsonProperty + public CompactionEngine getEngine() + { + return compactionEngine; + } + @Override public boolean equals(Object o) { @@ -135,13 +143,20 @@ public class CoordinatorCompactionConfig return Double.compare(that.compactionTaskSlotRatio, compactionTaskSlotRatio) == 0 && maxCompactionTaskSlots == that.maxCompactionTaskSlots && useAutoScaleSlots == that.useAutoScaleSlots && + compactionEngine == that.compactionEngine && Objects.equals(compactionConfigs, that.compactionConfigs); } @Override public int hashCode() { - return Objects.hash(compactionConfigs, compactionTaskSlotRatio, maxCompactionTaskSlots, useAutoScaleSlots); + return Objects.hash( + compactionConfigs, + compactionTaskSlotRatio, + maxCompactionTaskSlots, + useAutoScaleSlots, + compactionEngine + ); } @Override @@ -152,6 +167,7 @@ public class CoordinatorCompactionConfig ", compactionTaskSlotRatio=" + compactionTaskSlotRatio + ", maxCompactionTaskSlots=" + maxCompactionTaskSlots + ", useAutoScaleSlots=" + useAutoScaleSlots + + ", compactionEngine=" + compactionEngine + '}'; } } diff --git a/server/src/main/java/org/apache/druid/server/coordinator/DataSourceCompactionConfig.java b/server/src/main/java/org/apache/druid/server/coordinator/DataSourceCompactionConfig.java index da89040f50a..767e8218f31 100644 --- a/server/src/main/java/org/apache/druid/server/coordinator/DataSourceCompactionConfig.java +++ b/server/src/main/java/org/apache/druid/server/coordinator/DataSourceCompactionConfig.java @@ -22,6 +22,7 @@ package org.apache.druid.server.coordinator; import com.fasterxml.jackson.annotation.JsonCreator; import com.fasterxml.jackson.annotation.JsonProperty; import com.google.common.base.Preconditions; +import org.apache.druid.indexer.CompactionEngine; import org.apache.druid.query.aggregation.AggregatorFactory; import org.joda.time.Period; @@ -55,6 +56,7 @@ public class DataSourceCompactionConfig private final UserCompactionTaskTransformConfig transformSpec; private final UserCompactionTaskIOConfig ioConfig; private final Map taskContext; + private final CompactionEngine engine; @JsonCreator public DataSourceCompactionConfig( @@ -69,6 +71,7 @@ public class DataSourceCompactionConfig @JsonProperty("metricsSpec") @Nullable AggregatorFactory[] metricsSpec, @JsonProperty("transformSpec") @Nullable UserCompactionTaskTransformConfig transformSpec, @JsonProperty("ioConfig") @Nullable UserCompactionTaskIOConfig ioConfig, + @JsonProperty("engine") @Nullable CompactionEngine engine, @JsonProperty("taskContext") @Nullable Map taskContext ) { @@ -88,6 +91,7 @@ public class DataSourceCompactionConfig this.dimensionsSpec = dimensionsSpec; this.transformSpec = transformSpec; this.taskContext = taskContext; + this.engine = engine; } @JsonProperty @@ -171,6 +175,13 @@ public class DataSourceCompactionConfig return taskContext; } + @JsonProperty + @Nullable + public CompactionEngine getEngine() + { + return engine; + } + @Override public boolean equals(Object o) { @@ -192,6 +203,7 @@ public class DataSourceCompactionConfig Arrays.equals(metricsSpec, that.metricsSpec) && Objects.equals(transformSpec, that.transformSpec) && Objects.equals(ioConfig, that.ioConfig) && + this.engine == that.engine && Objects.equals(taskContext, that.taskContext); } @@ -209,7 +221,8 @@ public class DataSourceCompactionConfig dimensionsSpec, transformSpec, ioConfig, - taskContext + taskContext, + engine ); result = 31 * result + Arrays.hashCode(metricsSpec); return result; diff --git a/server/src/main/java/org/apache/druid/server/coordinator/DruidCoordinator.java b/server/src/main/java/org/apache/druid/server/coordinator/DruidCoordinator.java index 0787bc8f7d4..9710bda79b4 100644 --- a/server/src/main/java/org/apache/druid/server/coordinator/DruidCoordinator.java +++ b/server/src/main/java/org/apache/druid/server/coordinator/DruidCoordinator.java @@ -174,6 +174,12 @@ public class DruidCoordinator */ private volatile SegmentReplicationStatus segmentReplicationStatus = null; + /** + * Set of broadcast segments determined in the latest coordinator run of the {@link RunRules} duty. + * This might contain stale information if the Coordinator duties haven't run or are delayed. + */ + private volatile Set broadcastSegments = null; + public static final String HISTORICAL_MANAGEMENT_DUTIES_DUTY_GROUP = "HistoricalManagementDuties"; private static final String METADATA_STORE_MANAGEMENT_DUTIES_DUTY_GROUP = "MetadataStoreManagementDuties"; private static final String INDEXING_SERVICE_DUTIES_DUTY_GROUP = "IndexingServiceDuties"; @@ -315,6 +321,16 @@ public class DruidCoordinator return loadStatus; } + /** + * @return Set of broadcast segments determined by the latest run of the {@link RunRules} duty. + * If the coordinator runs haven't triggered or are delayed, this information may be stale. + */ + @Nullable + public Set getBroadcastSegments() + { + return broadcastSegments; + } + @Nullable public Integer getReplicationFactor(SegmentId segmentId) { @@ -798,7 +814,11 @@ public class DruidCoordinator @Override public DruidCoordinatorRuntimeParams run(DruidCoordinatorRuntimeParams params) { + broadcastSegments = params.getBroadcastSegments(); segmentReplicationStatus = params.getSegmentReplicationStatus(); + if (coordinatorSegmentMetadataCache != null) { + coordinatorSegmentMetadataCache.updateSegmentReplicationStatus(segmentReplicationStatus); + } // Collect stats for unavailable and under-replicated segments final CoordinatorRunStats stats = params.getCoordinatorStats(); diff --git a/server/src/main/java/org/apache/druid/server/coordinator/DruidCoordinatorRuntimeParams.java b/server/src/main/java/org/apache/druid/server/coordinator/DruidCoordinatorRuntimeParams.java index 5548636b000..ebdbd4f500e 100644 --- a/server/src/main/java/org/apache/druid/server/coordinator/DruidCoordinatorRuntimeParams.java +++ b/server/src/main/java/org/apache/druid/server/coordinator/DruidCoordinatorRuntimeParams.java @@ -123,6 +123,12 @@ public class DruidCoordinatorRuntimeParams return segmentAssigner == null ? null : segmentAssigner.getReplicationStatus(); } + @Nullable + public Set getBroadcastSegments() + { + return segmentAssigner == null ? null : segmentAssigner.getBroadcastSegments(); + } + public StrategicSegmentAssigner getSegmentAssigner() { return segmentAssigner; diff --git a/server/src/main/java/org/apache/druid/server/coordinator/compact/CompactionSegmentSearchPolicy.java b/server/src/main/java/org/apache/druid/server/coordinator/compact/CompactionSegmentSearchPolicy.java index 5a006908c31..cc5f4f59d85 100644 --- a/server/src/main/java/org/apache/druid/server/coordinator/compact/CompactionSegmentSearchPolicy.java +++ b/server/src/main/java/org/apache/druid/server/coordinator/compact/CompactionSegmentSearchPolicy.java @@ -33,9 +33,9 @@ import java.util.Map; public interface CompactionSegmentSearchPolicy { /** - * Reset the current states of this policy. This method should be called whenever iterating starts. + * Creates an iterator that returns compactible segments. */ - CompactionSegmentIterator reset( + CompactionSegmentIterator createIterator( Map compactionConfigs, Map dataSources, Map> skipIntervals diff --git a/server/src/main/java/org/apache/druid/server/coordinator/compact/CompactionStatistics.java b/server/src/main/java/org/apache/druid/server/coordinator/compact/CompactionStatistics.java index dd672ce4480..6997dec47c0 100644 --- a/server/src/main/java/org/apache/druid/server/coordinator/compact/CompactionStatistics.java +++ b/server/src/main/java/org/apache/druid/server/coordinator/compact/CompactionStatistics.java @@ -28,9 +28,13 @@ public class CompactionStatistics private long numSegments; private long numIntervals; - public static CompactionStatistics create() + public static CompactionStatistics create(long bytes, long numSegments, long numIntervals) { - return new CompactionStatistics(); + final CompactionStatistics stats = new CompactionStatistics(); + stats.totalBytes = bytes; + stats.numIntervals = numIntervals; + stats.numSegments = numSegments; + return stats; } public long getTotalBytes() @@ -48,10 +52,10 @@ public class CompactionStatistics return numIntervals; } - public void addFrom(SegmentsToCompact segments) + public void increment(CompactionStatistics other) { - totalBytes += segments.getTotalBytes(); - numIntervals += segments.getNumIntervals(); - numSegments += segments.size(); + totalBytes += other.getTotalBytes(); + numIntervals += other.getNumIntervals(); + numSegments += other.getNumSegments(); } } diff --git a/server/src/main/java/org/apache/druid/server/coordinator/compact/CompactionStatus.java b/server/src/main/java/org/apache/druid/server/coordinator/compact/CompactionStatus.java index 862f2e7c5b4..fa053fb8d6a 100644 --- a/server/src/main/java/org/apache/druid/server/coordinator/compact/CompactionStatus.java +++ b/server/src/main/java/org/apache/druid/server/coordinator/compact/CompactionStatus.java @@ -167,12 +167,7 @@ public class CompactionStatus this.objectMapper = objectMapper; this.lastCompactionState = candidateSegments.getFirst().getLastCompactionState(); this.compactionConfig = compactionConfig; - this.tuningConfig = ClientCompactionTaskQueryTuningConfig.from( - compactionConfig.getTuningConfig(), - compactionConfig.getMaxRowsPerSegment(), - null - ); - + this.tuningConfig = ClientCompactionTaskQueryTuningConfig.from(compactionConfig); this.configuredGranularitySpec = compactionConfig.getGranularitySpec(); if (lastCompactionState == null) { this.existingGranularitySpec = null; diff --git a/server/src/main/java/org/apache/druid/server/coordinator/compact/NewestSegmentFirstIterator.java b/server/src/main/java/org/apache/druid/server/coordinator/compact/DataSourceCompactibleSegmentIterator.java similarity index 82% rename from server/src/main/java/org/apache/druid/server/coordinator/compact/NewestSegmentFirstIterator.java rename to server/src/main/java/org/apache/druid/server/coordinator/compact/DataSourceCompactibleSegmentIterator.java index c4ae771f808..c086be3112b 100644 --- a/server/src/main/java/org/apache/druid/server/coordinator/compact/NewestSegmentFirstIterator.java +++ b/server/src/main/java/org/apache/druid/server/coordinator/compact/DataSourceCompactibleSegmentIterator.java @@ -23,9 +23,7 @@ import com.fasterxml.jackson.databind.ObjectMapper; import com.google.common.annotations.VisibleForTesting; import com.google.common.base.Preconditions; import com.google.common.collect.Lists; -import com.google.common.collect.Maps; import org.apache.druid.java.util.common.DateTimes; -import org.apache.druid.java.util.common.ISE; import org.apache.druid.java.util.common.Intervals; import org.apache.druid.java.util.common.JodaUtils; import org.apache.druid.java.util.common.granularity.Granularity; @@ -59,44 +57,46 @@ import java.util.Set; import java.util.stream.Collectors; /** - * This class iterates all segments of the dataSources configured for compaction from the newest to the oldest. + * Iterator over compactible segments of a datasource in order of specified priority. */ -public class NewestSegmentFirstIterator implements CompactionSegmentIterator +public class DataSourceCompactibleSegmentIterator implements Iterator { - private static final Logger log = new Logger(NewestSegmentFirstIterator.class); + private static final Logger log = new Logger(DataSourceCompactibleSegmentIterator.class); + private final String dataSource; private final ObjectMapper objectMapper; - private final Map compactionConfigs; - private final Map compactedSegmentStats = new HashMap<>(); - private final Map skippedSegmentStats = new HashMap<>(); - - private final Map timelineIterators; + private final DataSourceCompactionConfig config; + private final CompactionStatistics compactedSegmentStats = new CompactionStatistics(); + private final CompactionStatistics skippedSegmentStats = new CompactionStatistics(); // This is needed for datasource that has segmentGranularity configured // If configured segmentGranularity in config is finer than current segmentGranularity, the same set of segments // can belong to multiple intervals in the timeline. We keep track of the compacted intervals between each // run of the compaction job and skip any interval that was already previously compacted. - private final Map> intervalCompactedForDatasource = new HashMap<>(); + private final Set compactedIntervals = new HashSet<>(); - private final PriorityQueue queue = new PriorityQueue<>( - (o1, o2) -> Comparators.intervalsByStartThenEnd().compare(o2.getUmbrellaInterval(), o1.getUmbrellaInterval()) - ); + private final PriorityQueue queue; - NewestSegmentFirstIterator( - ObjectMapper objectMapper, - Map compactionConfigs, - Map dataSources, - Map> skipIntervals + public DataSourceCompactibleSegmentIterator( + DataSourceCompactionConfig config, + SegmentTimeline timeline, + List skipIntervals, + Comparator segmentPriority, + ObjectMapper objectMapper ) { this.objectMapper = objectMapper; - this.compactionConfigs = compactionConfigs; - this.timelineIterators = Maps.newHashMapWithExpectedSize(dataSources.size()); + this.config = config; + this.dataSource = config.getDataSource(); + this.queue = new PriorityQueue<>(segmentPriority); + populateQueue(timeline, skipIntervals); + } - dataSources.forEach((dataSource, timeline) -> { - final DataSourceCompactionConfig config = compactionConfigs.get(dataSource); + private void populateQueue(SegmentTimeline timeline, List skipIntervals) + { + if (timeline != null) { Granularity configuredSegmentGranularity = null; - if (config != null && !timeline.isEmpty()) { + if (!timeline.isEmpty()) { SegmentTimeline originalTimeline = null; if (config.getGranularitySpec() != null && config.getGranularitySpec().getSegmentGranularity() != null) { String temporaryVersion = DateTimes.nowUtc().toString(); @@ -154,33 +154,25 @@ public class NewestSegmentFirstIterator implements CompactionSegmentIterator timeline, config.getSkipOffsetFromLatest(), configuredSegmentGranularity, - skipIntervals.get(dataSource) + skipIntervals ); if (!searchIntervals.isEmpty()) { - timelineIterators.put( - dataSource, + findAndEnqueueSegmentsToCompact( new CompactibleSegmentIterator(timeline, searchIntervals, originalTimeline) ); + } else { + log.warn("Skipping compaction for datasource[%s] as it has no compactible segments.", dataSource); } } - }); - - compactionConfigs.forEach((dataSourceName, config) -> { - if (config == null) { - throw new ISE("Unknown dataSource[%s]", dataSourceName); - } - updateQueue(dataSourceName, config); - }); + } } - @Override - public Map totalCompactedStatistics() + public CompactionStatistics totalCompactedStatistics() { return compactedSegmentStats; } - @Override - public Map totalSkippedStatistics() + public CompactionStatistics totalSkippedStatistics() { return skippedSegmentStats; } @@ -206,25 +198,9 @@ public class NewestSegmentFirstIterator implements CompactionSegmentIterator final List resultSegments = entry.getSegments(); Preconditions.checkState(!resultSegments.isEmpty(), "Queue entry must not be empty"); - final String dataSource = resultSegments.get(0).getDataSource(); - updateQueue(dataSource, compactionConfigs.get(dataSource)); - return entry; } - /** - * Find the next segments to compact for the given dataSource and add them to the queue. - * {@link #timelineIterators} is updated according to the found segments. That is, the found segments are removed from - * the timeline of the given dataSource. - */ - private void updateQueue(String dataSourceName, DataSourceCompactionConfig config) - { - final SegmentsToCompact segmentsToCompact = findSegmentsToCompact(dataSourceName, config); - if (!segmentsToCompact.isEmpty()) { - queue.add(segmentsToCompact); - } - } - /** * Iterates compactible segments in a {@link SegmentTimeline}. */ @@ -315,27 +291,12 @@ public class NewestSegmentFirstIterator implements CompactionSegmentIterator } /** - * Finds segments to compact together for the given datasource. - * - * @return An empty {@link SegmentsToCompact} if there are no eligible candidates. + * Finds segments to compact together for the given datasource and adds them to + * the priority queue. */ - private SegmentsToCompact findSegmentsToCompact( - final String dataSourceName, - final DataSourceCompactionConfig config - ) + private void findAndEnqueueSegmentsToCompact(CompactibleSegmentIterator compactibleSegmentIterator) { - final CompactibleSegmentIterator compactibleSegmentIterator - = timelineIterators.get(dataSourceName); - if (compactibleSegmentIterator == null) { - log.warn( - "Skipping compaction for datasource[%s] as there is no compactible segment in its timeline.", - dataSourceName - ); - return SegmentsToCompact.empty(); - } - final long inputSegmentSize = config.getInputSegmentSizeBytes(); - while (compactibleSegmentIterator.hasNext()) { List segments = compactibleSegmentIterator.next(); @@ -352,47 +313,33 @@ public class NewestSegmentFirstIterator implements CompactionSegmentIterator if (!compactionStatus.isComplete()) { log.debug( "Datasource[%s], interval[%s] has [%d] segments that need to be compacted because [%s].", - dataSourceName, interval, candidates.size(), compactionStatus.getReasonToCompact() + dataSource, interval, candidates.size(), compactionStatus.getReasonToCompact() ); } if (compactionStatus.isComplete()) { - addSegmentStatsTo(compactedSegmentStats, dataSourceName, candidates); + compactedSegmentStats.increment(candidates.getStats()); } else if (candidates.getTotalBytes() > inputSegmentSize) { - addSegmentStatsTo(skippedSegmentStats, dataSourceName, candidates); + skippedSegmentStats.increment(candidates.getStats()); log.warn( "Skipping compaction for datasource[%s], interval[%s] as total segment size[%d]" + " is larger than allowed inputSegmentSize[%d].", - dataSourceName, interval, candidates.getTotalBytes(), inputSegmentSize + dataSource, interval, candidates.getTotalBytes(), inputSegmentSize ); } else if (config.getGranularitySpec() != null && config.getGranularitySpec().getSegmentGranularity() != null) { - Set compactedIntervals = intervalCompactedForDatasource - .computeIfAbsent(dataSourceName, k -> new HashSet<>()); - if (compactedIntervals.contains(interval)) { // Skip these candidate segments as we have already compacted this interval } else { compactedIntervals.add(interval); - return candidates; + queue.add(candidates); } } else { - return candidates; + queue.add(candidates); } } - log.debug("No more segments to compact for datasource[%s].", dataSourceName); - return SegmentsToCompact.empty(); - } - - private void addSegmentStatsTo( - Map statisticsMap, - String dataSourceName, - SegmentsToCompact segments - ) - { - statisticsMap.computeIfAbsent(dataSourceName, v -> CompactionStatistics.create()) - .addFrom(segments); + log.debug("No more segments to compact for datasource[%s].", dataSource); } /** @@ -428,7 +375,7 @@ public class NewestSegmentFirstIterator implements CompactionSegmentIterator final List segments = new ArrayList<>( timeline.findNonOvershadowedObjectsInInterval(skipInterval, Partitions.ONLY_COMPLETE) ); - addSegmentStatsTo(skippedSegmentStats, dataSourceName, SegmentsToCompact.from(segments)); + skippedSegmentStats.increment(SegmentsToCompact.from(segments).getStats()); } final Interval totalInterval = new Interval(first.getInterval().getStart(), last.getInterval().getEnd()); diff --git a/server/src/main/java/org/apache/druid/server/coordinator/compact/NewestSegmentFirstPolicy.java b/server/src/main/java/org/apache/druid/server/coordinator/compact/NewestSegmentFirstPolicy.java index 20f6d920441..bc923da4f80 100644 --- a/server/src/main/java/org/apache/druid/server/coordinator/compact/NewestSegmentFirstPolicy.java +++ b/server/src/main/java/org/apache/druid/server/coordinator/compact/NewestSegmentFirstPolicy.java @@ -21,6 +21,7 @@ package org.apache.druid.server.coordinator.compact; import com.fasterxml.jackson.databind.ObjectMapper; import com.google.inject.Inject; +import org.apache.druid.java.util.common.guava.Comparators; import org.apache.druid.server.coordinator.DataSourceCompactionConfig; import org.apache.druid.timeline.SegmentTimeline; import org.joda.time.Interval; @@ -29,7 +30,7 @@ import java.util.List; import java.util.Map; /** - * This policy searches segments for compaction from the newest one to oldest one. + * This policy searches segments for compaction from newest to oldest. */ public class NewestSegmentFirstPolicy implements CompactionSegmentSearchPolicy { @@ -42,12 +43,20 @@ public class NewestSegmentFirstPolicy implements CompactionSegmentSearchPolicy } @Override - public CompactionSegmentIterator reset( + public CompactionSegmentIterator createIterator( Map compactionConfigs, Map dataSources, Map> skipIntervals ) { - return new NewestSegmentFirstIterator(objectMapper, compactionConfigs, dataSources, skipIntervals); + return new PriorityBasedCompactionSegmentIterator( + compactionConfigs, + dataSources, + skipIntervals, + (o1, o2) -> Comparators.intervalsByStartThenEnd() + .compare(o2.getUmbrellaInterval(), o1.getUmbrellaInterval()), + objectMapper + ); } + } diff --git a/server/src/main/java/org/apache/druid/server/coordinator/compact/PriorityBasedCompactionSegmentIterator.java b/server/src/main/java/org/apache/druid/server/coordinator/compact/PriorityBasedCompactionSegmentIterator.java new file mode 100644 index 00000000000..33aea2a0451 --- /dev/null +++ b/server/src/main/java/org/apache/druid/server/coordinator/compact/PriorityBasedCompactionSegmentIterator.java @@ -0,0 +1,135 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.druid.server.coordinator.compact; + +import com.fasterxml.jackson.databind.ObjectMapper; +import com.google.common.base.Preconditions; +import com.google.common.collect.Maps; +import org.apache.druid.error.DruidException; +import org.apache.druid.java.util.common.logger.Logger; +import org.apache.druid.server.coordinator.DataSourceCompactionConfig; +import org.apache.druid.timeline.SegmentTimeline; +import org.apache.druid.utils.CollectionUtils; +import org.joda.time.Interval; + +import java.util.Collections; +import java.util.Comparator; +import java.util.List; +import java.util.Map; +import java.util.NoSuchElementException; +import java.util.PriorityQueue; + +/** + * Implementation of {@link CompactionSegmentIterator} that returns segments in + * order of their priority. + */ +public class PriorityBasedCompactionSegmentIterator implements CompactionSegmentIterator +{ + private static final Logger log = new Logger(PriorityBasedCompactionSegmentIterator.class); + + private final PriorityQueue queue; + private final Map datasourceIterators; + + public PriorityBasedCompactionSegmentIterator( + Map compactionConfigs, + Map datasourceToTimeline, + Map> skipIntervals, + Comparator segmentPriority, + ObjectMapper objectMapper + ) + { + this.queue = new PriorityQueue<>(segmentPriority); + this.datasourceIterators = Maps.newHashMapWithExpectedSize(datasourceToTimeline.size()); + compactionConfigs.forEach((datasource, config) -> { + if (config == null) { + throw DruidException.defensive("Invalid null compaction config for dataSource[%s].", datasource); + } + final SegmentTimeline timeline = datasourceToTimeline.get(datasource); + if (timeline == null) { + log.warn("Skipping compaction for datasource[%s] as it has no timeline.", datasource); + return; + } + + datasourceIterators.put( + datasource, + new DataSourceCompactibleSegmentIterator( + compactionConfigs.get(datasource), + timeline, + skipIntervals.getOrDefault(datasource, Collections.emptyList()), + segmentPriority, + objectMapper + ) + ); + addNextItemForDatasourceToQueue(datasource); + }); + } + + @Override + public Map totalCompactedStatistics() + { + return CollectionUtils.mapValues( + datasourceIterators, + DataSourceCompactibleSegmentIterator::totalCompactedStatistics + ); + } + + @Override + public Map totalSkippedStatistics() + { + return CollectionUtils.mapValues( + datasourceIterators, + DataSourceCompactibleSegmentIterator::totalSkippedStatistics + ); + } + + @Override + public boolean hasNext() + { + return !queue.isEmpty(); + } + + @Override + public SegmentsToCompact next() + { + if (!hasNext()) { + throw new NoSuchElementException(); + } + + final SegmentsToCompact entry = queue.poll(); + if (entry == null) { + throw new NoSuchElementException(); + } + Preconditions.checkState(!entry.isEmpty(), "Queue entry must not be empty"); + + addNextItemForDatasourceToQueue(entry.getFirst().getDataSource()); + return entry; + } + + private void addNextItemForDatasourceToQueue(String dataSourceName) + { + final DataSourceCompactibleSegmentIterator iterator = datasourceIterators.get(dataSourceName); + if (iterator.hasNext()) { + final SegmentsToCompact segmentsToCompact = iterator.next(); + if (!segmentsToCompact.isEmpty()) { + queue.add(segmentsToCompact); + } + } + } +} diff --git a/server/src/main/java/org/apache/druid/server/coordinator/compact/SegmentsToCompact.java b/server/src/main/java/org/apache/druid/server/coordinator/compact/SegmentsToCompact.java index 1bc53b7dbe7..27ce9beab81 100644 --- a/server/src/main/java/org/apache/druid/server/coordinator/compact/SegmentsToCompact.java +++ b/server/src/main/java/org/apache/druid/server/coordinator/compact/SegmentsToCompact.java @@ -107,9 +107,9 @@ public class SegmentsToCompact return umbrellaInterval; } - public long getNumIntervals() + public CompactionStatistics getStats() { - return numIntervals; + return CompactionStatistics.create(totalBytes, size(), numIntervals); } @Override diff --git a/server/src/main/java/org/apache/druid/server/coordinator/duty/CompactSegments.java b/server/src/main/java/org/apache/druid/server/coordinator/duty/CompactSegments.java index 27f6d17638d..01f3bc77e9e 100644 --- a/server/src/main/java/org/apache/druid/server/coordinator/duty/CompactSegments.java +++ b/server/src/main/java/org/apache/druid/server/coordinator/duty/CompactSegments.java @@ -27,15 +27,18 @@ import com.google.common.base.Predicate; import com.google.inject.Inject; import org.apache.druid.client.indexing.ClientCompactionIOConfig; import org.apache.druid.client.indexing.ClientCompactionIntervalSpec; +import org.apache.druid.client.indexing.ClientCompactionRunnerInfo; import org.apache.druid.client.indexing.ClientCompactionTaskDimensionsSpec; import org.apache.druid.client.indexing.ClientCompactionTaskGranularitySpec; import org.apache.druid.client.indexing.ClientCompactionTaskQuery; import org.apache.druid.client.indexing.ClientCompactionTaskQueryTuningConfig; import org.apache.druid.client.indexing.ClientCompactionTaskTransformSpec; +import org.apache.druid.client.indexing.ClientMSQContext; import org.apache.druid.client.indexing.ClientTaskQuery; import org.apache.druid.client.indexing.TaskPayloadResponse; import org.apache.druid.common.guava.FutureUtils; import org.apache.druid.common.utils.IdUtils; +import org.apache.druid.indexer.CompactionEngine; import org.apache.druid.indexer.TaskStatusPlus; import org.apache.druid.indexer.partitions.DimensionRangePartitionsSpec; import org.apache.druid.java.util.common.ISE; @@ -51,7 +54,6 @@ import org.apache.druid.server.coordinator.DataSourceCompactionConfig; import org.apache.druid.server.coordinator.DruidCoordinatorRuntimeParams; import org.apache.druid.server.coordinator.compact.CompactionSegmentIterator; import org.apache.druid.server.coordinator.compact.CompactionSegmentSearchPolicy; -import org.apache.druid.server.coordinator.compact.CompactionStatistics; import org.apache.druid.server.coordinator.compact.SegmentsToCompact; import org.apache.druid.server.coordinator.stats.CoordinatorRunStats; import org.apache.druid.server.coordinator.stats.Dimension; @@ -84,6 +86,7 @@ public class CompactSegments implements CoordinatorCustomDuty private static final Logger LOG = new Logger(CompactSegments.class); + private static final String TASK_ID_PREFIX = "coordinator-issued"; private static final Predicate IS_COMPACTION_TASK = status -> null != status && COMPACTION_TASK_TYPE.equals(status.getType()); @@ -167,10 +170,17 @@ public class CompactSegments implements CoordinatorCustomDuty final Interval interval = compactionTaskQuery.getIoConfig().getInputSpec().getInterval(); intervalsToSkipCompaction.computeIfAbsent(status.getDataSource(), k -> new ArrayList<>()) .add(interval); - - busyCompactionTaskSlots += findMaxNumTaskSlotsUsedByOneCompactionTask( - compactionTaskQuery.getTuningConfig() - ); + // Note: The default compactionRunnerType used here should match the default runner used in CompactionTask when + // no runner is provided there. + CompactionEngine compactionRunnerType = compactionTaskQuery.getCompactionRunner() == null + ? CompactionEngine.NATIVE + : compactionTaskQuery.getCompactionRunner().getType(); + if (compactionRunnerType == CompactionEngine.NATIVE) { + busyCompactionTaskSlots += + findMaxNumTaskSlotsUsedByOneNativeCompactionTask(compactionTaskQuery.getTuningConfig()); + } else { + busyCompactionTaskSlots += findMaxNumTaskSlotsUsedByOneMsqCompactionTask(compactionTaskQuery.getContext()); + } } // Skip all the intervals locked by higher priority tasks for each datasource @@ -186,7 +196,7 @@ public class CompactSegments implements CoordinatorCustomDuty // Get iterator over segments to compact and submit compaction tasks Map dataSources = params.getUsedSegmentsTimelinesPerDataSource(); final CompactionSegmentIterator iterator = - policy.reset(compactionConfigs, dataSources, intervalsToSkipCompaction); + policy.createIterator(compactionConfigs, dataSources, intervalsToSkipCompaction); final int compactionTaskCapacity = getCompactionTaskCapacity(dynamicConfig); final int availableCompactionTaskSlots @@ -197,14 +207,15 @@ public class CompactSegments implements CoordinatorCustomDuty compactionConfigs, currentRunAutoCompactionSnapshotBuilders, availableCompactionTaskSlots, - iterator + iterator, + dynamicConfig.getEngine() ); final CoordinatorRunStats stats = params.getCoordinatorStats(); stats.add(Stats.Compaction.MAX_SLOTS, compactionTaskCapacity); stats.add(Stats.Compaction.AVAILABLE_SLOTS, availableCompactionTaskSlots); stats.add(Stats.Compaction.SUBMITTED_TASKS, numSubmittedCompactionTasks); - addCompactionSnapshotStats(currentRunAutoCompactionSnapshotBuilders, iterator, stats); + updateCompactionSnapshotStats(currentRunAutoCompactionSnapshotBuilders, iterator, stats); return params; } @@ -234,12 +245,12 @@ public class CompactSegments implements CoordinatorCustomDuty Granularity configuredSegmentGranularity = dataSourceCompactionConfig.getGranularitySpec() .getSegmentGranularity(); Granularity taskSegmentGranularity = compactionTaskQuery.getGranularitySpec().getSegmentGranularity(); - if (configuredSegmentGranularity.equals(taskSegmentGranularity)) { + if (configuredSegmentGranularity == null || configuredSegmentGranularity.equals(taskSegmentGranularity)) { return false; } LOG.info( - "Cancelling task [%s] as task segmentGranularity is [%s] but compaction config segmentGranularity is [%s]", + "Cancelling task[%s] as task segmentGranularity[%s] differs from compaction config segmentGranularity[%s].", compactionTaskQuery.getId(), taskSegmentGranularity, configuredSegmentGranularity ); overlordClient.cancelTask(compactionTaskQuery.getId()); @@ -280,11 +291,13 @@ public class CompactSegments implements CoordinatorCustomDuty } /** - * Returns the maximum number of task slots used by one compaction task at any time when the task is issued with - * the given tuningConfig. + * Returns the maximum number of task slots used by one native compaction task at any time when the task is + * issued with the given tuningConfig. */ @VisibleForTesting - static int findMaxNumTaskSlotsUsedByOneCompactionTask(@Nullable ClientCompactionTaskQueryTuningConfig tuningConfig) + static int findMaxNumTaskSlotsUsedByOneNativeCompactionTask( + @Nullable ClientCompactionTaskQueryTuningConfig tuningConfig + ) { if (isParallelMode(tuningConfig)) { @Nullable @@ -296,6 +309,18 @@ public class CompactSegments implements CoordinatorCustomDuty } } + /** + * Returns the maximum number of task slots used by one MSQ compaction task at any time when the task is + * issued with the given context. + */ + static int findMaxNumTaskSlotsUsedByOneMsqCompactionTask(@Nullable Map context) + { + return context == null + ? ClientMSQContext.DEFAULT_MAX_NUM_TASKS + : (int) context.getOrDefault(ClientMSQContext.CTX_MAX_NUM_TASKS, ClientMSQContext.DEFAULT_MAX_NUM_TASKS); + } + + /** * Returns true if the compaction task can run in the parallel mode with the given tuningConfig. * This method should be synchronized with ParallelIndexSupervisorTask.isParallelMode(InputSource, ParallelIndexTuningConfig). @@ -354,7 +379,8 @@ public class CompactSegments implements CoordinatorCustomDuty Map compactionConfigs, Map currentRunAutoCompactionSnapshotBuilders, int numAvailableCompactionTaskSlots, - CompactionSegmentIterator iterator + CompactionSegmentIterator iterator, + CompactionEngine defaultEngine ) { if (numAvailableCompactionTaskSlots <= 0) { @@ -362,32 +388,23 @@ public class CompactSegments implements CoordinatorCustomDuty } int numSubmittedTasks = 0; - int numCompactionTasksAndSubtasks = 0; + int totalTaskSlotsAssigned = 0; - while (iterator.hasNext() && numCompactionTasksAndSubtasks < numAvailableCompactionTaskSlots) { + while (iterator.hasNext() && totalTaskSlotsAssigned < numAvailableCompactionTaskSlots) { final SegmentsToCompact entry = iterator.next(); - final List segmentsToCompact = entry.getSegments(); - if (segmentsToCompact.isEmpty()) { + if (entry.isEmpty()) { throw new ISE("segmentsToCompact is empty?"); } - final String dataSourceName = segmentsToCompact.get(0).getDataSource(); + final String dataSourceName = entry.getFirst().getDataSource(); // As these segments will be compacted, we will aggregate the statistic to the Compacted statistics - AutoCompactionSnapshot.Builder snapshotBuilder = currentRunAutoCompactionSnapshotBuilders.computeIfAbsent( - dataSourceName, - AutoCompactionSnapshot::builder - ); - snapshotBuilder - .incrementBytesCompacted( - segmentsToCompact.stream().mapToLong(DataSegment::getSize).sum() - ) - .incrementIntervalCountCompacted( - segmentsToCompact.stream().map(DataSegment::getInterval).distinct().count() - ) - .incrementSegmentCountCompacted(segmentsToCompact.size()); + currentRunAutoCompactionSnapshotBuilders + .computeIfAbsent(dataSourceName, AutoCompactionSnapshot::builder) + .incrementCompactedStats(entry.getStats()); final DataSourceCompactionConfig config = compactionConfigs.get(dataSourceName); + final List segmentsToCompact = entry.getSegments(); // Create granularitySpec to send to compaction task ClientCompactionTaskGranularitySpec granularitySpec; @@ -464,8 +481,30 @@ public class CompactSegments implements CoordinatorCustomDuty } } + final CompactionEngine compactionEngine = config.getEngine() == null ? defaultEngine : config.getEngine(); + final Map autoCompactionContext = newAutoCompactionContext(config.getTaskContext()); + int slotsRequiredForCurrentTask; + + if (compactionEngine == CompactionEngine.MSQ) { + if (autoCompactionContext.containsKey(ClientMSQContext.CTX_MAX_NUM_TASKS)) { + slotsRequiredForCurrentTask = (int) autoCompactionContext.get(ClientMSQContext.CTX_MAX_NUM_TASKS); + } else { + // Since MSQ needs all task slots for the calculated #tasks to be available upfront, allot all available + // compaction slots (upto a max of MAX_TASK_SLOTS_FOR_MSQ_COMPACTION) to current compaction task to avoid + // stalling. Setting "taskAssignment" to "auto" has the problem of not being able to determine the actual + // count, which is required for subsequent tasks. + slotsRequiredForCurrentTask = Math.min( + // Update the slots to 2 (min required for MSQ) if only 1 slot is available. + numAvailableCompactionTaskSlots == 1 ? 2 : numAvailableCompactionTaskSlots, + ClientMSQContext.MAX_TASK_SLOTS_FOR_MSQ_COMPACTION_TASK + ); + autoCompactionContext.put(ClientMSQContext.CTX_MAX_NUM_TASKS, slotsRequiredForCurrentTask); + } + } else { + slotsRequiredForCurrentTask = findMaxNumTaskSlotsUsedByOneNativeCompactionTask(config.getTuningConfig()); + } + final String taskId = compactSegments( - "coordinator-issued", segmentsToCompact, config.getTaskPriority(), ClientCompactionTaskQueryTuningConfig.from( @@ -478,7 +517,8 @@ public class CompactSegments implements CoordinatorCustomDuty config.getMetricsSpec(), transformSpec, dropExisting, - newAutoCompactionContext(config.getTaskContext()) + autoCompactionContext, + new ClientCompactionRunnerInfo(compactionEngine) ); LOG.info( @@ -486,9 +526,8 @@ public class CompactSegments implements CoordinatorCustomDuty taskId, segmentsToCompact.size(), dataSourceName, entry.getUmbrellaInterval() ); LOG.debugSegments(segmentsToCompact, "Compacting segments"); - // Count the compaction task itself + its sub tasks numSubmittedTasks++; - numCompactionTasksAndSubtasks += findMaxNumTaskSlotsUsedByOneCompactionTask(config.getTuningConfig()); + totalTaskSlotsAssigned += slotsRequiredForCurrentTask; } LOG.info("Submitted a total of [%d] compaction tasks.", numSubmittedTasks); @@ -504,7 +543,7 @@ public class CompactSegments implements CoordinatorCustomDuty return newContext; } - private void addCompactionSnapshotStats( + private void updateCompactionSnapshotStats( Map currentRunAutoCompactionSnapshotBuilders, CompactionSegmentIterator iterator, CoordinatorRunStats stats @@ -513,77 +552,45 @@ public class CompactSegments implements CoordinatorCustomDuty // Mark all the segments remaining in the iterator as "awaiting compaction" while (iterator.hasNext()) { final SegmentsToCompact entry = iterator.next(); - final List segmentsToCompact = entry.getSegments(); - if (!segmentsToCompact.isEmpty()) { - final String dataSourceName = segmentsToCompact.get(0).getDataSource(); - AutoCompactionSnapshot.Builder snapshotBuilder = currentRunAutoCompactionSnapshotBuilders.computeIfAbsent( - dataSourceName, - AutoCompactionSnapshot::builder - ); - snapshotBuilder - .incrementBytesAwaitingCompaction( - segmentsToCompact.stream().mapToLong(DataSegment::getSize).sum() - ) - .incrementIntervalCountAwaitingCompaction( - segmentsToCompact.stream().map(DataSegment::getInterval).distinct().count() - ) - .incrementSegmentCountAwaitingCompaction(segmentsToCompact.size()); + if (!entry.isEmpty()) { + final String dataSourceName = entry.getFirst().getDataSource(); + currentRunAutoCompactionSnapshotBuilders + .computeIfAbsent(dataSourceName, AutoCompactionSnapshot::builder) + .incrementWaitingStats(entry.getStats()); } } // Statistics of all segments considered compacted after this run - Map allCompactedStatistics = iterator.totalCompactedStatistics(); - for (Map.Entry compactionStatisticsEntry : allCompactedStatistics.entrySet()) { - final String dataSource = compactionStatisticsEntry.getKey(); - final CompactionStatistics dataSourceCompactedStatistics = compactionStatisticsEntry.getValue(); - AutoCompactionSnapshot.Builder builder = currentRunAutoCompactionSnapshotBuilders.computeIfAbsent( - dataSource, - AutoCompactionSnapshot::builder - ); - builder.incrementBytesCompacted(dataSourceCompactedStatistics.getTotalBytes()); - builder.incrementSegmentCountCompacted(dataSourceCompactedStatistics.getNumSegments()); - builder.incrementIntervalCountCompacted(dataSourceCompactedStatistics.getNumIntervals()); - } + iterator.totalCompactedStatistics().forEach((dataSource, compactedStats) -> { + currentRunAutoCompactionSnapshotBuilders + .computeIfAbsent(dataSource, AutoCompactionSnapshot::builder) + .incrementCompactedStats(compactedStats); + }); // Statistics of all segments considered skipped after this run - Map allSkippedStatistics = iterator.totalSkippedStatistics(); - for (Map.Entry compactionStatisticsEntry : allSkippedStatistics.entrySet()) { - final String dataSource = compactionStatisticsEntry.getKey(); - final CompactionStatistics dataSourceSkippedStatistics = compactionStatisticsEntry.getValue(); - AutoCompactionSnapshot.Builder builder = currentRunAutoCompactionSnapshotBuilders.computeIfAbsent( - dataSource, - AutoCompactionSnapshot::builder - ); - builder.incrementBytesSkipped(dataSourceSkippedStatistics.getTotalBytes()) - .incrementSegmentCountSkipped(dataSourceSkippedStatistics.getNumSegments()) - .incrementIntervalCountSkipped(dataSourceSkippedStatistics.getNumIntervals()); - } + iterator.totalSkippedStatistics().forEach((dataSource, dataSourceSkippedStatistics) -> { + currentRunAutoCompactionSnapshotBuilders + .computeIfAbsent(dataSource, AutoCompactionSnapshot::builder) + .incrementSkippedStats(dataSourceSkippedStatistics); + }); final Map currentAutoCompactionSnapshotPerDataSource = new HashMap<>(); - for (Map.Entry autoCompactionSnapshotBuilderEntry - : currentRunAutoCompactionSnapshotBuilders.entrySet()) { - final String dataSource = autoCompactionSnapshotBuilderEntry.getKey(); - final AutoCompactionSnapshot.Builder builder = autoCompactionSnapshotBuilderEntry.getValue(); - - // Build the complete snapshot for the datasource - AutoCompactionSnapshot autoCompactionSnapshot = builder.build(); + currentRunAutoCompactionSnapshotBuilders.forEach((dataSource, builder) -> { + final AutoCompactionSnapshot autoCompactionSnapshot = builder.build(); currentAutoCompactionSnapshotPerDataSource.put(dataSource, autoCompactionSnapshot); - - // Use the complete snapshot to emit metrics - addStatsForDatasource(dataSource, autoCompactionSnapshot, stats); - } + collectSnapshotStats(autoCompactionSnapshot, stats); + }); // Atomic update of autoCompactionSnapshotPerDataSource with the latest from this coordinator run autoCompactionSnapshotPerDataSource.set(currentAutoCompactionSnapshotPerDataSource); } - private void addStatsForDatasource( - String dataSource, + private void collectSnapshotStats( AutoCompactionSnapshot autoCompactionSnapshot, CoordinatorRunStats stats ) { - final RowKey rowKey = RowKey.of(Dimension.DATASOURCE, dataSource); + final RowKey rowKey = RowKey.of(Dimension.DATASOURCE, autoCompactionSnapshot.getDataSource()); stats.add(Stats.Compaction.PENDING_BYTES, rowKey, autoCompactionSnapshot.getBytesAwaitingCompaction()); stats.add(Stats.Compaction.PENDING_SEGMENTS, rowKey, autoCompactionSnapshot.getSegmentCountAwaitingCompaction()); @@ -618,7 +625,6 @@ public class CompactSegments implements CoordinatorCustomDuty } private String compactSegments( - String idPrefix, List segments, int compactionTaskPriority, @Nullable ClientCompactionTaskQueryTuningConfig tuningConfig, @@ -627,7 +633,8 @@ public class CompactSegments implements CoordinatorCustomDuty @Nullable AggregatorFactory[] metricsSpec, @Nullable ClientCompactionTaskTransformSpec transformSpec, @Nullable Boolean dropExisting, - @Nullable Map context + @Nullable Map context, + ClientCompactionRunnerInfo compactionRunner ) { Preconditions.checkArgument(!segments.isEmpty(), "Expect non-empty segments to compact"); @@ -641,7 +648,7 @@ public class CompactSegments implements CoordinatorCustomDuty context = context == null ? new HashMap<>() : context; context.put("priority", compactionTaskPriority); - final String taskId = IdUtils.newTaskId(idPrefix, ClientCompactionTaskQuery.TYPE, dataSource, null); + final String taskId = IdUtils.newTaskId(TASK_ID_PREFIX, ClientCompactionTaskQuery.TYPE, dataSource, null); final Granularity segmentGranularity = granularitySpec == null ? null : granularitySpec.getSegmentGranularity(); final ClientTaskQuery taskPayload = new ClientCompactionTaskQuery( taskId, @@ -655,7 +662,8 @@ public class CompactSegments implements CoordinatorCustomDuty dimensionsSpec, metricsSpec, transformSpec, - context + context, + compactionRunner ); FutureUtils.getUnchecked(overlordClient.runTask(taskId, taskPayload), true); return taskId; diff --git a/server/src/main/java/org/apache/druid/server/coordinator/loading/StrategicSegmentAssigner.java b/server/src/main/java/org/apache/druid/server/coordinator/loading/StrategicSegmentAssigner.java index 1c2a867c4fa..9b5d38f198e 100644 --- a/server/src/main/java/org/apache/druid/server/coordinator/loading/StrategicSegmentAssigner.java +++ b/server/src/main/java/org/apache/druid/server/coordinator/loading/StrategicSegmentAssigner.java @@ -69,6 +69,7 @@ public class StrategicSegmentAssigner implements SegmentActionHandler private final Map tierToHistoricalCount = new HashMap<>(); private final Map> segmentsToDelete = new HashMap<>(); private final Map> segmentsWithZeroRequiredReplicas = new HashMap<>(); + private final Set broadcastSegments = new HashSet<>(); public StrategicSegmentAssigner( SegmentLoadQueueManager loadQueueManager, @@ -361,6 +362,8 @@ public class StrategicSegmentAssigner implements SegmentActionHandler entry -> replicaCountMap.computeIfAbsent(segment.getId(), entry.getKey()) .setRequired(entry.getIntValue(), entry.getIntValue()) ); + + broadcastSegments.add(segment); } @Override @@ -398,6 +401,11 @@ public class StrategicSegmentAssigner implements SegmentActionHandler return false; } + public Set getBroadcastSegments() + { + return broadcastSegments; + } + /** * Drops the broadcast segment if it is loaded on the given server. * Returns true only if the segment was successfully queued for drop on the server. diff --git a/server/src/main/java/org/apache/druid/server/http/CoordinatorCompactionConfigsResource.java b/server/src/main/java/org/apache/druid/server/http/CoordinatorCompactionConfigsResource.java index 57833506f09..0bba5cf63fa 100644 --- a/server/src/main/java/org/apache/druid/server/http/CoordinatorCompactionConfigsResource.java +++ b/server/src/main/java/org/apache/druid/server/http/CoordinatorCompactionConfigsResource.java @@ -26,9 +26,14 @@ import com.sun.jersey.spi.container.ResourceFilters; import org.apache.druid.audit.AuditEntry; import org.apache.druid.audit.AuditInfo; import org.apache.druid.audit.AuditManager; +import org.apache.druid.client.indexing.ClientCompactionRunnerInfo; import org.apache.druid.common.config.ConfigManager.SetResult; +import org.apache.druid.error.DruidException; +import org.apache.druid.error.InvalidInput; +import org.apache.druid.error.NotFound; import org.apache.druid.java.util.common.Intervals; import org.apache.druid.java.util.common.logger.Logger; +import org.apache.druid.server.coordinator.CompactionConfigValidationResult; import org.apache.druid.server.coordinator.CoordinatorCompactionConfig; import org.apache.druid.server.coordinator.CoordinatorConfigManager; import org.apache.druid.server.coordinator.DataSourceCompactionConfig; @@ -119,6 +124,12 @@ public class CoordinatorCompactionConfigsResource .getCompactionConfigs() .stream() .collect(Collectors.toMap(DataSourceCompactionConfig::getDataSource, Function.identity())); + CompactionConfigValidationResult validationResult = + ClientCompactionRunnerInfo.validateCompactionConfig(newConfig, current.getEngine()); + if (!validationResult.isValid()) { + throw InvalidInput.exception("Compaction config not supported. Reason[%s].", validationResult.getReason()); + } + // Don't persist config with the default engine if engine not specified, to enable update of the default. newConfigs.put(newConfig.getDataSource(), newConfig); newCompactionConfig = CoordinatorCompactionConfig.from(current, ImmutableList.copyOf(newConfigs.values())); @@ -206,7 +217,7 @@ public class CoordinatorCompactionConfigsResource final DataSourceCompactionConfig config = configs.remove(dataSource); if (config == null) { - throw new NoSuchElementException("datasource not found"); + throw NotFound.exception("datasource not found"); } return CoordinatorCompactionConfig.from(current, ImmutableList.copyOf(configs.values())); @@ -231,9 +242,8 @@ public class CoordinatorCompactionConfigsResource updateRetryDelay(); } } - catch (NoSuchElementException e) { - LOG.warn(e, "Update compaction config failed"); - return Response.status(Response.Status.NOT_FOUND).build(); + catch (DruidException e) { + return ServletResourceUtils.buildErrorResponseFrom(e); } catch (Exception e) { LOG.warn(e, "Update compaction config failed"); diff --git a/server/src/main/java/org/apache/druid/server/http/DataSegmentPlus.java b/server/src/main/java/org/apache/druid/server/http/DataSegmentPlus.java index 9841e09a1a7..bfda5cbf3ad 100644 --- a/server/src/main/java/org/apache/druid/server/http/DataSegmentPlus.java +++ b/server/src/main/java/org/apache/druid/server/http/DataSegmentPlus.java @@ -36,6 +36,8 @@ import java.util.Objects; *
  • {@link DataSegmentPlus#createdDate} - The time when the segment was created.
  • *
  • {@link DataSegmentPlus#usedStatusLastUpdatedDate} - The time when the segments * used status was last updated.
  • + *
  • {@link DataSegmentPlus#upgradedFromSegmentId} - The segment id to which the same load spec originally belonged. + * Load specs can be shared as a result of segment version upgrades.
  • * *

    * This class closely resembles the row structure of the {@link MetadataStorageTablesConfig#getSegmentsTable()}. @@ -53,6 +55,9 @@ public class DataSegmentPlus private final String schemaFingerprint; private final Long numRows; + @Nullable + private final String upgradedFromSegmentId; + @JsonCreator public DataSegmentPlus( @JsonProperty("dataSegment") final DataSegment dataSegment, @@ -60,7 +65,8 @@ public class DataSegmentPlus @JsonProperty("usedStatusLastUpdatedDate") @Nullable final DateTime usedStatusLastUpdatedDate, @JsonProperty("used") @Nullable final Boolean used, @JsonProperty("schemaFingerprint") @Nullable final String schemaFingerprint, - @JsonProperty("numRows") @Nullable final Long numRows + @JsonProperty("numRows") @Nullable final Long numRows, + @JsonProperty("upgradedFromSegmentId") @Nullable final String upgradedFromSegmentId ) { this.dataSegment = dataSegment; @@ -69,6 +75,7 @@ public class DataSegmentPlus this.used = used; this.schemaFingerprint = schemaFingerprint; this.numRows = numRows; + this.upgradedFromSegmentId = upgradedFromSegmentId; } @Nullable @@ -112,6 +119,13 @@ public class DataSegmentPlus return numRows; } + @Nullable + @JsonProperty + public String getUpgradedFromSegmentId() + { + return upgradedFromSegmentId; + } + @Override public boolean equals(Object o) { @@ -127,7 +141,8 @@ public class DataSegmentPlus && Objects.equals(usedStatusLastUpdatedDate, that.getUsedStatusLastUpdatedDate()) && Objects.equals(used, that.getUsed()) && Objects.equals(schemaFingerprint, that.getSchemaFingerprint()) - && Objects.equals(numRows, that.getNumRows()); + && Objects.equals(numRows, that.getNumRows()) + && Objects.equals(upgradedFromSegmentId, that.getUpgradedFromSegmentId()); } @Override @@ -139,7 +154,8 @@ public class DataSegmentPlus usedStatusLastUpdatedDate, used, schemaFingerprint, - numRows + numRows, + upgradedFromSegmentId ); } @@ -153,6 +169,7 @@ public class DataSegmentPlus ", used=" + getUsed() + ", schemaFingerprint=" + getSchemaFingerprint() + ", numRows=" + getNumRows() + + ", upgradedFromSegmentId=" + getUpgradedFromSegmentId() + '}'; } } diff --git a/server/src/main/java/org/apache/druid/server/http/HistoricalResource.java b/server/src/main/java/org/apache/druid/server/http/HistoricalResource.java index 4bc48f444df..223e1a2dea2 100644 --- a/server/src/main/java/org/apache/druid/server/http/HistoricalResource.java +++ b/server/src/main/java/org/apache/druid/server/http/HistoricalResource.java @@ -21,7 +21,7 @@ package org.apache.druid.server.http; import com.google.common.collect.ImmutableMap; import com.sun.jersey.spi.container.ResourceFilters; -import org.apache.druid.server.coordination.SegmentLoadDropHandler; +import org.apache.druid.server.coordination.SegmentBootstrapper; import org.apache.druid.server.http.security.StateResourceFilter; import javax.inject.Inject; @@ -34,14 +34,14 @@ import javax.ws.rs.core.Response; @Path("/druid/historical/v1") public class HistoricalResource { - private final SegmentLoadDropHandler segmentLoadDropHandler; + private final SegmentBootstrapper segmentBootstrapper; @Inject public HistoricalResource( - SegmentLoadDropHandler segmentLoadDropHandler + SegmentBootstrapper segmentBootstrapper ) { - this.segmentLoadDropHandler = segmentLoadDropHandler; + this.segmentBootstrapper = segmentBootstrapper; } @GET @@ -50,14 +50,14 @@ public class HistoricalResource @Produces(MediaType.APPLICATION_JSON) public Response getLoadStatus() { - return Response.ok(ImmutableMap.of("cacheInitialized", segmentLoadDropHandler.isStarted())).build(); + return Response.ok(ImmutableMap.of("cacheInitialized", segmentBootstrapper.isBootstrappingComplete())).build(); } @GET @Path("/readiness") public Response getReadiness() { - if (segmentLoadDropHandler.isStarted()) { + if (segmentBootstrapper.isBootstrappingComplete()) { return Response.ok().build(); } else { return Response.status(Response.Status.SERVICE_UNAVAILABLE).build(); diff --git a/server/src/main/java/org/apache/druid/server/http/MetadataResource.java b/server/src/main/java/org/apache/druid/server/http/MetadataResource.java index d8b00c318db..a1cccd2b784 100644 --- a/server/src/main/java/org/apache/druid/server/http/MetadataResource.java +++ b/server/src/main/java/org/apache/druid/server/http/MetadataResource.java @@ -471,4 +471,23 @@ public class MetadataResource ); return Response.status(Response.Status.OK).entity(authorizedDataSourceInformation).build(); } + + /** + * @return all bootstrap segments determined by the coordinator. + */ + @POST + @Path("/bootstrapSegments") + @Produces(MediaType.APPLICATION_JSON) + @ResourceFilters(DatasourceResourceFilter.class) + public Response getBootstrapSegments() + { + final Set broadcastSegments = coordinator.getBroadcastSegments(); + if (broadcastSegments == null) { + return Response.status(Response.Status.SERVICE_UNAVAILABLE) + .entity("Bootstrap segments are not initialized yet." + + " Please ensure that the Coordinator duties are running and try again.") + .build(); + } + return Response.status(Response.Status.OK).entity(broadcastSegments).build(); + } } diff --git a/server/src/test/java/org/apache/druid/client/CachingQueryRunnerTest.java b/server/src/test/java/org/apache/druid/client/CachingQueryRunnerTest.java index a4375a61900..7208ab2fc4b 100644 --- a/server/src/test/java/org/apache/druid/client/CachingQueryRunnerTest.java +++ b/server/src/test/java/org/apache/druid/client/CachingQueryRunnerTest.java @@ -68,6 +68,7 @@ import org.apache.druid.query.topn.TopNQueryBuilder; import org.apache.druid.query.topn.TopNQueryConfig; import org.apache.druid.query.topn.TopNQueryQueryToolChest; import org.apache.druid.query.topn.TopNResultValue; +import org.apache.druid.testing.InitializedNullHandlingTest; import org.easymock.EasyMock; import org.joda.time.DateTime; import org.junit.Assert; @@ -90,7 +91,7 @@ import java.util.concurrent.TimeUnit; import java.util.concurrent.atomic.AtomicBoolean; @RunWith(Parameterized.class) -public class CachingQueryRunnerTest +public class CachingQueryRunnerTest extends InitializedNullHandlingTest { @Parameterized.Parameters(name = "numBackgroundThreads={0}") public static Iterable constructorFeeder() @@ -222,8 +223,8 @@ public class CachingQueryRunnerTest Cache cache = EasyMock.mock(Cache.class); EasyMock.replay(cache); CachingQueryRunner queryRunner = makeCachingQueryRunner(null, cache, toolchest, Sequences.empty()); - Assert.assertFalse(queryRunner.canPopulateCache(query, toolchest.getCacheStrategy(query))); - Assert.assertFalse(queryRunner.canUseCache(query, toolchest.getCacheStrategy(query))); + Assert.assertFalse(queryRunner.canPopulateCache(query, toolchest.getCacheStrategy(query, null))); + Assert.assertFalse(queryRunner.canUseCache(query, toolchest.getCacheStrategy(query, null))); queryRunner.run(QueryPlus.wrap(query)); EasyMock.verifyUnexpectedCalls(cache); } @@ -243,7 +244,7 @@ public class CachingQueryRunnerTest QueryToolChest toolchest = EasyMock.mock(QueryToolChest.class); Cache cache = EasyMock.mock(Cache.class); - EasyMock.expect(toolchest.getCacheStrategy(query)).andReturn(null); + EasyMock.expect(toolchest.getCacheStrategy(EasyMock.eq(query), EasyMock.anyObject())).andReturn(null); EasyMock.replay(cache, toolchest); CachingQueryRunner queryRunner = makeCachingQueryRunner(new byte[0], cache, toolchest, Sequences.empty()); Assert.assertFalse(queryRunner.canPopulateCache(query, null)); @@ -339,7 +340,7 @@ public class CachingQueryRunnerTest resultSeq ); - CacheStrategy cacheStrategy = toolchest.getCacheStrategy(query); + CacheStrategy cacheStrategy = toolchest.getCacheStrategy(query, null); Cache.NamedKey cacheKey = CacheUtil.computeSegmentCacheKey( CACHE_ID, SEGMENT_DESCRIPTOR, @@ -383,7 +384,7 @@ public class CachingQueryRunnerTest byte[] cacheKeyPrefix = RandomUtils.nextBytes(10); - CacheStrategy cacheStrategy = toolchest.getCacheStrategy(query); + CacheStrategy cacheStrategy = toolchest.getCacheStrategy(query, null); Cache.NamedKey cacheKey = CacheUtil.computeSegmentCacheKey( CACHE_ID, SEGMENT_DESCRIPTOR, @@ -399,7 +400,7 @@ public class CachingQueryRunnerTest toolchest, Sequences.empty() ); - Assert.assertTrue(runner.canUseCache(query, toolchest.getCacheStrategy(query))); + Assert.assertTrue(runner.canUseCache(query, toolchest.getCacheStrategy(query, null))); List results = runner.run(QueryPlus.wrap(query)).toList(); Assert.assertEquals(expectedResults.toString(), results.toString()); } diff --git a/server/src/test/java/org/apache/druid/client/coordinator/CoordinatorClientImplTest.java b/server/src/test/java/org/apache/druid/client/coordinator/CoordinatorClientImplTest.java index 62af96d4e0e..3dde6dda149 100644 --- a/server/src/test/java/org/apache/druid/client/coordinator/CoordinatorClientImplTest.java +++ b/server/src/test/java/org/apache/druid/client/coordinator/CoordinatorClientImplTest.java @@ -24,7 +24,12 @@ import com.fasterxml.jackson.databind.ObjectMapper; import com.google.common.collect.ImmutableList; import com.google.common.collect.ImmutableMap; import com.google.common.collect.ImmutableSet; +import com.google.common.util.concurrent.ListenableFuture; +import com.google.inject.Injector; +import org.apache.druid.client.BootstrapSegmentsResponse; import org.apache.druid.client.ImmutableSegmentLoadInfo; +import org.apache.druid.guice.StartupInjectorBuilder; +import org.apache.druid.initialization.CoreInjectorBuilder; import org.apache.druid.jackson.DefaultObjectMapper; import org.apache.druid.java.util.common.Intervals; import org.apache.druid.java.util.common.StringUtils; @@ -37,6 +42,7 @@ import org.apache.druid.segment.metadata.DataSourceInformation; import org.apache.druid.server.coordination.DruidServerMetadata; import org.apache.druid.server.coordination.ServerType; import org.apache.druid.timeline.DataSegment; +import org.apache.druid.timeline.PruneLoadSpec; import org.apache.druid.timeline.partition.NumberedShardSpec; import org.jboss.netty.handler.codec.http.HttpMethod; import org.jboss.netty.handler.codec.http.HttpResponseStatus; @@ -58,6 +64,24 @@ public class CoordinatorClientImplTest private MockServiceClient serviceClient; private CoordinatorClient coordinatorClient; + private static final DataSegment SEGMENT1 = DataSegment.builder() + .dataSource("xyz") + .interval(Intervals.of("1000/2000")) + .version("1") + .loadSpec(ImmutableMap.of("type", "local", "loc", "foo")) + .shardSpec(new NumberedShardSpec(0, 1)) + .size(1) + .build(); + + private static final DataSegment SEGMENT2 = DataSegment.builder() + .dataSource("xyz") + .interval(Intervals.of("2000/3000")) + .version("1") + .loadSpec(ImmutableMap.of("type", "local", "loc", "bar")) + .shardSpec(new NumberedShardSpec(0, 1)) + .size(1) + .build(); + @Before public void setup() { @@ -181,6 +205,82 @@ public class CoordinatorClientImplTest ); } + @Test + public void test_fetchBootstrapSegments() throws Exception + { + final List expectedSegments = ImmutableList.of(SEGMENT1, SEGMENT2); + + serviceClient.expectAndRespond( + new RequestBuilder(HttpMethod.POST, "/druid/coordinator/v1/metadata/bootstrapSegments"), + HttpResponseStatus.OK, + ImmutableMap.of(HttpHeaders.CONTENT_TYPE, MediaType.APPLICATION_JSON), + jsonMapper.writeValueAsBytes(expectedSegments) + ); + + final ListenableFuture response = coordinatorClient.fetchBootstrapSegments(); + Assert.assertNotNull(response); + + final ImmutableList observedDataSegments = ImmutableList.copyOf(response.get().getIterator()); + for (int idx = 0; idx < expectedSegments.size(); idx++) { + Assert.assertEquals(expectedSegments.get(idx).getLoadSpec(), observedDataSegments.get(idx).getLoadSpec()); + } + } + + /** + * Set up a Guice injector with PruneLoadSpec set to true. This test verifies that the bootstrap segments API + * always return segments with load specs present, ensuring they can be loaded anywhere. + */ + @Test + public void test_fetchBootstrapSegmentsAreLoadableWhenPruneLoadSpecIsEnabled() throws Exception + { + final List expectedSegments = ImmutableList.of(SEGMENT1, SEGMENT2); + + // Set up a coordinator client with PruneLoadSpec set to true in the injector + final Injector injector = new CoreInjectorBuilder(new StartupInjectorBuilder().build()) + .addModule(binder -> binder.bindConstant().annotatedWith(PruneLoadSpec.class).to(true)) + .build(); + + final ObjectMapper objectMapper = injector.getInstance(ObjectMapper.class); + final CoordinatorClient coordinatorClient = new CoordinatorClientImpl(serviceClient, objectMapper); + + serviceClient.expectAndRespond( + new RequestBuilder(HttpMethod.POST, "/druid/coordinator/v1/metadata/bootstrapSegments"), + HttpResponseStatus.OK, + ImmutableMap.of(HttpHeaders.CONTENT_TYPE, MediaType.APPLICATION_JSON), + objectMapper.writeValueAsBytes(expectedSegments) + ); + + final ListenableFuture response = coordinatorClient.fetchBootstrapSegments(); + Assert.assertNotNull(response); + + final ImmutableList observedDataSegments = ImmutableList.copyOf(response.get().getIterator()); + Assert.assertEquals(expectedSegments, observedDataSegments); + for (int idx = 0; idx < expectedSegments.size(); idx++) { + Assert.assertEquals(expectedSegments.get(idx).getLoadSpec(), observedDataSegments.get(idx).getLoadSpec()); + } + } + + @Test + public void test_fetchEmptyBootstrapSegments() throws Exception + { + final List segments = ImmutableList.of(); + + serviceClient.expectAndRespond( + new RequestBuilder(HttpMethod.POST, "/druid/coordinator/v1/metadata/bootstrapSegments"), + HttpResponseStatus.OK, + ImmutableMap.of(HttpHeaders.CONTENT_TYPE, MediaType.APPLICATION_JSON), + jsonMapper.writeValueAsBytes(segments) + ); + + final ListenableFuture response = coordinatorClient.fetchBootstrapSegments(); + Assert.assertNotNull(response); + + Assert.assertEquals( + segments, + ImmutableList.copyOf(response.get().getIterator()) + ); + } + @Test public void test_fetchDataSourceInformation() throws Exception { diff --git a/server/src/test/java/org/apache/druid/client/coordinator/NoopCoordinatorClient.java b/server/src/test/java/org/apache/druid/client/coordinator/NoopCoordinatorClient.java index 7b7d253ef6d..58f5af58a3e 100644 --- a/server/src/test/java/org/apache/druid/client/coordinator/NoopCoordinatorClient.java +++ b/server/src/test/java/org/apache/druid/client/coordinator/NoopCoordinatorClient.java @@ -20,6 +20,7 @@ package org.apache.druid.client.coordinator; import com.google.common.util.concurrent.ListenableFuture; +import org.apache.druid.client.BootstrapSegmentsResponse; import org.apache.druid.client.ImmutableSegmentLoadInfo; import org.apache.druid.query.SegmentDescriptor; import org.apache.druid.rpc.ServiceRetryPolicy; @@ -62,10 +63,22 @@ public class NoopCoordinatorClient implements CoordinatorClient throw new UnsupportedOperationException(); } + @Override + public ListenableFuture fetchBootstrapSegments() + { + throw new UnsupportedOperationException(); + } + @Override public CoordinatorClient withRetryPolicy(ServiceRetryPolicy retryPolicy) { // Ignore retryPolicy for the test client. return this; } + + @Override + public ListenableFuture> fetchDataSourcesWithUsedSegments() + { + throw new UnsupportedOperationException(); + } } diff --git a/server/src/test/java/org/apache/druid/client/indexing/ClientCompactionRunnerInfoTest.java b/server/src/test/java/org/apache/druid/client/indexing/ClientCompactionRunnerInfoTest.java new file mode 100644 index 00000000000..f6d4a2b6e58 --- /dev/null +++ b/server/src/test/java/org/apache/druid/client/indexing/ClientCompactionRunnerInfoTest.java @@ -0,0 +1,246 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.druid.client.indexing; + +import com.google.common.collect.ImmutableList; +import org.apache.druid.data.input.SegmentsSplitHintSpec; +import org.apache.druid.indexer.CompactionEngine; +import org.apache.druid.indexer.partitions.DimensionRangePartitionsSpec; +import org.apache.druid.indexer.partitions.DynamicPartitionsSpec; +import org.apache.druid.indexer.partitions.HashedPartitionsSpec; +import org.apache.druid.indexer.partitions.PartitionsSpec; +import org.apache.druid.java.util.common.HumanReadableBytes; +import org.apache.druid.java.util.common.granularity.Granularities; +import org.apache.druid.query.aggregation.AggregatorFactory; +import org.apache.druid.query.aggregation.LongSumAggregatorFactory; +import org.apache.druid.segment.IndexSpec; +import org.apache.druid.segment.data.CompressionFactory; +import org.apache.druid.segment.data.CompressionStrategy; +import org.apache.druid.segment.writeout.TmpFileSegmentWriteOutMediumFactory; +import org.apache.druid.server.coordinator.CompactionConfigValidationResult; +import org.apache.druid.server.coordinator.DataSourceCompactionConfig; +import org.apache.druid.server.coordinator.UserCompactionTaskGranularityConfig; +import org.apache.druid.server.coordinator.UserCompactionTaskQueryTuningConfig; +import org.joda.time.Duration; +import org.joda.time.Period; +import org.junit.Assert; +import org.junit.Test; + +import javax.annotation.Nullable; +import java.util.Collections; +import java.util.Map; + +public class ClientCompactionRunnerInfoTest +{ + @Test + public void testMSQEngineWithHashedPartitionsSpecIsInvalid() + { + DataSourceCompactionConfig compactionConfig = createCompactionConfig( + new HashedPartitionsSpec(100, null, null), + Collections.emptyMap(), + null, + null + ); + CompactionConfigValidationResult validationResult = ClientCompactionRunnerInfo.validateCompactionConfig( + compactionConfig, + CompactionEngine.NATIVE + ); + Assert.assertFalse(validationResult.isValid()); + Assert.assertEquals( + "Invalid partitionsSpec type[HashedPartitionsSpec] for MSQ engine." + + " Type must be either 'dynamic' or 'range'.", + validationResult.getReason() + ); + } + + @Test + public void testMSQEngineWithMaxTotalRowsIsInvalid() + { + DataSourceCompactionConfig compactionConfig = createCompactionConfig( + new DynamicPartitionsSpec(100, 100L), + Collections.emptyMap(), + null, + null + ); + CompactionConfigValidationResult validationResult = ClientCompactionRunnerInfo.validateCompactionConfig( + compactionConfig, + CompactionEngine.NATIVE + ); + Assert.assertFalse(validationResult.isValid()); + Assert.assertEquals( + "maxTotalRows[100] in DynamicPartitionsSpec not supported for MSQ engine.", + validationResult.getReason() + ); + } + + @Test + public void testMSQEngineWithDynamicPartitionsSpecIsValid() + { + DataSourceCompactionConfig compactionConfig = createCompactionConfig( + new DynamicPartitionsSpec(100, null), + Collections.emptyMap(), + null, + null + ); + Assert.assertTrue(ClientCompactionRunnerInfo.validateCompactionConfig(compactionConfig, CompactionEngine.NATIVE) + .isValid()); + } + + @Test + public void testMSQEngineWithDimensionRangePartitionsSpecIsValid() + { + DataSourceCompactionConfig compactionConfig = createCompactionConfig( + new DimensionRangePartitionsSpec(100, null, ImmutableList.of("partitionDim"), false), + Collections.emptyMap(), + null, + null + ); + Assert.assertTrue(ClientCompactionRunnerInfo.validateCompactionConfig(compactionConfig, CompactionEngine.NATIVE) + .isValid()); + } + + @Test + public void testMSQEngineWithQueryGranularityAllIsValid() + { + DataSourceCompactionConfig compactionConfig = createCompactionConfig( + new DynamicPartitionsSpec(3, null), + Collections.emptyMap(), + new UserCompactionTaskGranularityConfig(Granularities.ALL, Granularities.ALL, false), + null + ); + Assert.assertTrue(ClientCompactionRunnerInfo.validateCompactionConfig(compactionConfig, CompactionEngine.NATIVE) + .isValid()); + } + + @Test + public void testMSQEngineWithRollupFalseWithMetricsSpecIsInValid() + { + DataSourceCompactionConfig compactionConfig = createCompactionConfig( + new DynamicPartitionsSpec(3, null), + Collections.emptyMap(), + new UserCompactionTaskGranularityConfig(null, null, false), + new AggregatorFactory[]{new LongSumAggregatorFactory("sum", "sum")} + ); + CompactionConfigValidationResult validationResult = ClientCompactionRunnerInfo.validateCompactionConfig( + compactionConfig, + CompactionEngine.NATIVE + ); + Assert.assertFalse(validationResult.isValid()); + Assert.assertEquals( + "rollup in granularitySpec must be set to True if metricsSpec is specifed for MSQ engine.", + validationResult.getReason() + ); + } + + @Test + public void testMSQEngineWithUnsupportedMetricsSpecIsInValid() + { + // Aggregators having different input and ouput column names are unsupported. + final String inputColName = "added"; + final String outputColName = "sum_added"; + DataSourceCompactionConfig compactionConfig = createCompactionConfig( + new DynamicPartitionsSpec(3, null), + Collections.emptyMap(), + new UserCompactionTaskGranularityConfig(null, null, null), + new AggregatorFactory[]{new LongSumAggregatorFactory(outputColName, inputColName)} + ); + CompactionConfigValidationResult validationResult = ClientCompactionRunnerInfo.validateCompactionConfig( + compactionConfig, + CompactionEngine.NATIVE + ); + Assert.assertFalse(validationResult.isValid()); + Assert.assertEquals( + "Different name[sum_added] and fieldName(s)[[added]] for aggregator unsupported for MSQ engine.", + validationResult.getReason() + ); + } + + @Test + public void testMSQEngineWithRollupNullWithMetricsSpecIsValid() + { + DataSourceCompactionConfig compactionConfig = createCompactionConfig( + new DynamicPartitionsSpec(3, null), + Collections.emptyMap(), + new UserCompactionTaskGranularityConfig(null, null, null), + new AggregatorFactory[]{new LongSumAggregatorFactory("sum", "sum")} + ); + Assert.assertTrue(ClientCompactionRunnerInfo.validateCompactionConfig(compactionConfig, CompactionEngine.NATIVE) + .isValid()); + } + + private static DataSourceCompactionConfig createCompactionConfig( + PartitionsSpec partitionsSpec, + Map context, + @Nullable UserCompactionTaskGranularityConfig granularitySpec, + @Nullable AggregatorFactory[] metricsSpec + ) + { + final DataSourceCompactionConfig config = new DataSourceCompactionConfig( + "dataSource", + null, + 500L, + 10000, + new Period(3600), + createTuningConfig(partitionsSpec), + granularitySpec, + null, + metricsSpec, + null, + null, + CompactionEngine.MSQ, + context + ); + return config; + } + + private static UserCompactionTaskQueryTuningConfig createTuningConfig(PartitionsSpec partitionsSpec) + { + final UserCompactionTaskQueryTuningConfig tuningConfig = new UserCompactionTaskQueryTuningConfig( + 40000, + null, + 2000L, + null, + new SegmentsSplitHintSpec(new HumanReadableBytes(100000L), null), + partitionsSpec, + IndexSpec.builder() + .withDimensionCompression(CompressionStrategy.LZ4) + .withMetricCompression(CompressionStrategy.LZF) + .withLongEncoding(CompressionFactory.LongEncodingStrategy.LONGS) + .build(), + IndexSpec.builder() + .withDimensionCompression(CompressionStrategy.LZ4) + .withMetricCompression(CompressionStrategy.UNCOMPRESSED) + .withLongEncoding(CompressionFactory.LongEncodingStrategy.AUTO) + .build(), + 2, + 1000L, + TmpFileSegmentWriteOutMediumFactory.instance(), + 100, + 5, + 1000L, + new Duration(3000L), + 7, + 1000, + 100, + 2 + ); + return tuningConfig; + } +} diff --git a/server/src/test/java/org/apache/druid/metadata/IndexerSQLMetadataStorageCoordinatorTest.java b/server/src/test/java/org/apache/druid/metadata/IndexerSQLMetadataStorageCoordinatorTest.java index 20a74e6c026..6eccbccaa84 100644 --- a/server/src/test/java/org/apache/druid/metadata/IndexerSQLMetadataStorageCoordinatorTest.java +++ b/server/src/test/java/org/apache/druid/metadata/IndexerSQLMetadataStorageCoordinatorTest.java @@ -138,8 +138,10 @@ public class IndexerSQLMetadataStorageCoordinatorTest extends IndexerSqlMetadata final String v1 = "2023-01-01"; final String v2 = "2023-01-02"; final String v3 = "2023-01-03"; + final String alreadyUpgradedVersion = "2023-02-01"; final String lockVersion = "2024-01-01"; + final String taskAllocatorId = "appendTask"; final String replaceTaskId = "replaceTask1"; final ReplaceTaskLock replaceLock = new ReplaceTaskLock( replaceTaskId, @@ -148,6 +150,7 @@ public class IndexerSQLMetadataStorageCoordinatorTest extends IndexerSqlMetadata ); final Set appendSegments = new HashSet<>(); + final List pendingSegmentsForTask = new ArrayList<>(); final Set expectedSegmentsToUpgrade = new HashSet<>(); for (int i = 0; i < 10; i++) { final DataSegment segment = createSegment( @@ -157,6 +160,31 @@ public class IndexerSQLMetadataStorageCoordinatorTest extends IndexerSqlMetadata ); appendSegments.add(segment); expectedSegmentsToUpgrade.add(segment); + // Add the same segment + pendingSegmentsForTask.add( + new PendingSegmentRecord( + SegmentIdWithShardSpec.fromDataSegment(segment), + v1, + segment.getId().toString(), + null, + taskAllocatorId + ) + ); + // Add upgraded pending segment + pendingSegmentsForTask.add( + new PendingSegmentRecord( + new SegmentIdWithShardSpec( + DS.WIKI, + Intervals.of("2023-01-01/2023-02-01"), + alreadyUpgradedVersion, + new NumberedShardSpec(i, 0) + ), + alreadyUpgradedVersion, + segment.getId().toString(), + segment.getId().toString(), + taskAllocatorId + ) + ); } for (int i = 0; i < 10; i++) { @@ -167,6 +195,31 @@ public class IndexerSQLMetadataStorageCoordinatorTest extends IndexerSqlMetadata ); appendSegments.add(segment); expectedSegmentsToUpgrade.add(segment); + // Add the same segment + pendingSegmentsForTask.add( + new PendingSegmentRecord( + SegmentIdWithShardSpec.fromDataSegment(segment), + v2, + segment.getId().toString(), + null, + taskAllocatorId + ) + ); + // Add upgraded pending segment + pendingSegmentsForTask.add( + new PendingSegmentRecord( + new SegmentIdWithShardSpec( + DS.WIKI, + Intervals.of("2023-01-01/2023-02-01"), + alreadyUpgradedVersion, + new NumberedShardSpec(10 + i, 0) + ), + alreadyUpgradedVersion, + segment.getId().toString(), + segment.getId().toString(), + taskAllocatorId + ) + ); } for (int i = 0; i < 10; i++) { @@ -176,23 +229,78 @@ public class IndexerSQLMetadataStorageCoordinatorTest extends IndexerSqlMetadata new LinearShardSpec(i) ); appendSegments.add(segment); + // Add the same segment + pendingSegmentsForTask.add( + new PendingSegmentRecord( + SegmentIdWithShardSpec.fromDataSegment(segment), + v3, + segment.getId().toString(), + null, + taskAllocatorId + ) + ); + // Add upgraded pending segment + pendingSegmentsForTask.add( + new PendingSegmentRecord( + new SegmentIdWithShardSpec( + DS.WIKI, + Intervals.of("2023-01-01/2023-02-01"), + alreadyUpgradedVersion, + new NumberedShardSpec(20 + i, 0) + ), + alreadyUpgradedVersion, + segment.getId().toString(), + segment.getId().toString(), + taskAllocatorId + ) + ); } + derbyConnector.retryWithHandle( + handle -> coordinator.insertPendingSegmentsIntoMetastore(handle, pendingSegmentsForTask, DS.WIKI, false) + ); + final Map segmentToReplaceLock = expectedSegmentsToUpgrade.stream() .collect(Collectors.toMap(s -> s, s -> replaceLock)); // Commit the segment and verify the results SegmentPublishResult commitResult - = coordinator.commitAppendSegments(appendSegments, segmentToReplaceLock, "append", null); + = coordinator.commitAppendSegments(appendSegments, segmentToReplaceLock, taskAllocatorId, null); Assert.assertTrue(commitResult.isSuccess()); - Assert.assertEquals(appendSegments, commitResult.getSegments()); - // Verify the segments present in the metadata store - Assert.assertEquals( - appendSegments, - ImmutableSet.copyOf(retrieveUsedSegments(derbyConnectorRule.metadataTablesConfigSupplier().get())) + Set allCommittedSegments + = new HashSet<>(retrieveUsedSegments(derbyConnectorRule.metadataTablesConfigSupplier().get())); + Map upgradedFromSegmentIdMap = coordinator.retrieveUpgradedFromSegmentIds( + DS.WIKI, + allCommittedSegments.stream().map(DataSegment::getId).map(SegmentId::toString).collect(Collectors.toSet()) ); + // Verify the segments present in the metadata store + Assert.assertTrue(allCommittedSegments.containsAll(appendSegments)); + for (DataSegment segment : appendSegments) { + Assert.assertNull(upgradedFromSegmentIdMap.get(segment.getId().toString())); + } + allCommittedSegments.removeAll(appendSegments); + + // Verify the commit of upgraded pending segments + Assert.assertEquals(appendSegments.size(), allCommittedSegments.size()); + Map segmentMap = new HashMap<>(); + for (DataSegment segment : appendSegments) { + segmentMap.put(segment.getId().toString(), segment); + } + for (DataSegment segment : allCommittedSegments) { + for (PendingSegmentRecord pendingSegmentRecord : pendingSegmentsForTask) { + if (pendingSegmentRecord.getId().asSegmentId().toString().equals(segment.getId().toString())) { + DataSegment upgradedFromSegment = segmentMap.get(pendingSegmentRecord.getUpgradedFromSegmentId()); + Assert.assertNotNull(upgradedFromSegment); + Assert.assertEquals(segment.getLoadSpec(), upgradedFromSegment.getLoadSpec()); + Assert.assertEquals( + pendingSegmentRecord.getUpgradedFromSegmentId(), + upgradedFromSegmentIdMap.get(segment.getId().toString()) + ); + } + } + } // Verify entries in the segment task lock table final Set expectedUpgradeSegmentIds @@ -290,12 +398,24 @@ public class IndexerSQLMetadataStorageCoordinatorTest extends IndexerSqlMetadata retrieveUsedSegmentIds(derbyConnectorRule.metadataTablesConfigSupplier().get()).size() ); - final Set usedSegments = new HashSet<>(retrieveUsedSegments(derbyConnectorRule.metadataTablesConfigSupplier().get())); + final Set usedSegments + = new HashSet<>(retrieveUsedSegments(derbyConnectorRule.metadataTablesConfigSupplier().get())); + + final Map upgradedFromSegmentIdMap = coordinator.retrieveUpgradedFromSegmentIds( + "foo", + usedSegments.stream().map(DataSegment::getId).map(SegmentId::toString).collect(Collectors.toSet()) + ); Assert.assertTrue(usedSegments.containsAll(segmentsAppendedWithReplaceLock)); + for (DataSegment appendSegment : segmentsAppendedWithReplaceLock) { + Assert.assertNull(upgradedFromSegmentIdMap.get(appendSegment.getId().toString())); + } usedSegments.removeAll(segmentsAppendedWithReplaceLock); Assert.assertTrue(usedSegments.containsAll(replacingSegments)); + for (DataSegment replaceSegment : replacingSegments) { + Assert.assertNull(upgradedFromSegmentIdMap.get(replaceSegment.getId().toString())); + } usedSegments.removeAll(replacingSegments); Assert.assertEquals(segmentsAppendedWithReplaceLock.size(), usedSegments.size()); @@ -303,6 +423,10 @@ public class IndexerSQLMetadataStorageCoordinatorTest extends IndexerSqlMetadata boolean hasBeenCarriedForward = false; for (DataSegment appendedSegment : segmentsAppendedWithReplaceLock) { if (appendedSegment.getLoadSpec().equals(segmentReplicaWithNewVersion.getLoadSpec())) { + Assert.assertEquals( + appendedSegment.getId().toString(), + upgradedFromSegmentIdMap.get(segmentReplicaWithNewVersion.getId().toString()) + ); hasBeenCarriedForward = true; break; } @@ -523,7 +647,7 @@ public class IndexerSQLMetadataStorageCoordinatorTest extends IndexerSqlMetadata { metadataUpdateCounter.getAndIncrement(); if (attemptCounter.getAndIncrement() == 0) { - return new DataStoreMetadataUpdateResult(true, true, null); + return DataStoreMetadataUpdateResult.retryableFailure(null); } else { return super.updateDataSourceMetadataWithHandle(handle, dataSource, startMetadata, endMetadata); } @@ -582,7 +706,7 @@ public class IndexerSQLMetadataStorageCoordinatorTest extends IndexerSqlMetadata } @Test - public void testTransactionalAnnounceFailDbNullWantNotNull() throws IOException + public void testTransactionalAnnounceFailDbNullWantNotNull() { final SegmentPublishResult result1 = coordinator.commitSegmentsAndMetadata( ImmutableSet.of(defaultSegment), @@ -593,7 +717,7 @@ public class IndexerSQLMetadataStorageCoordinatorTest extends IndexerSqlMetadata Assert.assertEquals( SegmentPublishResult.fail( InvalidInput.exception( - "The new start metadata state[ObjectMetadata{theObject={foo=bar}}] is ahead of the last commited" + "The new start metadata state[ObjectMetadata{theObject={foo=bar}}] is ahead of the last committed" + " end state[null]. Try resetting the supervisor." ).toString()), result1 @@ -604,7 +728,7 @@ public class IndexerSQLMetadataStorageCoordinatorTest extends IndexerSqlMetadata } @Test - public void testTransactionalAnnounceFailDbNotNullWantNull() throws IOException + public void testTransactionalAnnounceFailDbNotNullWantNull() { final SegmentPublishResult result1 = coordinator.commitSegmentsAndMetadata( ImmutableSet.of(defaultSegment), @@ -682,7 +806,7 @@ public class IndexerSQLMetadataStorageCoordinatorTest extends IndexerSqlMetadata } @Test - public void testTransactionalAnnounceFailDbNotNullWantDifferent() throws IOException + public void testTransactionalAnnounceFailDbNotNullWantDifferent() { final SegmentPublishResult result1 = coordinator.commitSegmentsAndMetadata( ImmutableSet.of(defaultSegment), @@ -712,7 +836,7 @@ public class IndexerSQLMetadataStorageCoordinatorTest extends IndexerSqlMetadata } @Test - public void testSimpleUsedList() throws IOException + public void testSimpleUsedList() { coordinator.commitSegments(SEGMENTS, new SegmentSchemaMapping(CentralizedDatasourceSchemaConfig.SCHEMA_VERSION)); Assert.assertEquals( @@ -728,7 +852,7 @@ public class IndexerSQLMetadataStorageCoordinatorTest extends IndexerSqlMetadata } @Test - public void testMultiIntervalUsedList() throws IOException + public void testMultiIntervalUsedList() { coordinator.commitSegments(SEGMENTS, new SegmentSchemaMapping(CentralizedDatasourceSchemaConfig.SCHEMA_VERSION)); coordinator.commitSegments(ImmutableSet.of(defaultSegment3), new SegmentSchemaMapping(CentralizedDatasourceSchemaConfig.SCHEMA_VERSION)); @@ -771,7 +895,7 @@ public class IndexerSQLMetadataStorageCoordinatorTest extends IndexerSqlMetadata } @Test - public void testRetrieveUsedSegmentsUsingMultipleIntervals() throws IOException + public void testRetrieveUsedSegmentsUsingMultipleIntervals() { final List segments = createAndGetUsedYearSegments(1900, 2133); final List intervals = segments.stream().map(DataSegment::getInterval).collect(Collectors.toList()); @@ -787,7 +911,7 @@ public class IndexerSQLMetadataStorageCoordinatorTest extends IndexerSqlMetadata } @Test - public void testRetrieveAllUsedSegmentsUsingIntervalsOutOfRange() throws IOException + public void testRetrieveAllUsedSegmentsUsingIntervalsOutOfRange() { final List segments = createAndGetUsedYearSegments(1905, 1910); @@ -805,7 +929,7 @@ public class IndexerSQLMetadataStorageCoordinatorTest extends IndexerSqlMetadata } @Test - public void testRetrieveAllUsedSegmentsUsingNoIntervals() throws IOException + public void testRetrieveAllUsedSegmentsUsingNoIntervals() { final List segments = createAndGetUsedYearSegments(1900, 2133); @@ -819,7 +943,7 @@ public class IndexerSQLMetadataStorageCoordinatorTest extends IndexerSqlMetadata } @Test - public void testRetrieveUnusedSegmentsUsingSingleIntervalAndNoLimit() throws IOException + public void testRetrieveUnusedSegmentsUsingSingleIntervalAndNoLimit() { final List segments = createAndGetUsedYearSegments(1900, 2133); markAllSegmentsUnused(new HashSet<>(segments), DateTimes.nowUtc()); @@ -836,7 +960,7 @@ public class IndexerSQLMetadataStorageCoordinatorTest extends IndexerSqlMetadata } @Test - public void testRetrieveUnusedSegmentsUsingSingleIntervalAndLimitAtRange() throws IOException + public void testRetrieveUnusedSegmentsUsingSingleIntervalAndLimitAtRange() { final List segments = createAndGetUsedYearSegments(1900, 2133); markAllSegmentsUnused(new HashSet<>(segments), DateTimes.nowUtc()); @@ -854,7 +978,7 @@ public class IndexerSQLMetadataStorageCoordinatorTest extends IndexerSqlMetadata } @Test - public void testRetrieveUnusedSegmentsUsingSingleIntervalAndLimitInRange() throws IOException + public void testRetrieveUnusedSegmentsUsingSingleIntervalAndLimitInRange() { final List segments = createAndGetUsedYearSegments(1900, 2133); markAllSegmentsUnused(new HashSet<>(segments), DateTimes.nowUtc()); @@ -872,26 +996,7 @@ public class IndexerSQLMetadataStorageCoordinatorTest extends IndexerSqlMetadata } @Test - public void testRetrieveUnusedSegmentsUsingSingleIntervalVersionAndLimitInRange() throws IOException - { - final List segments = createAndGetUsedYearSegments(1900, 2133); - markAllSegmentsUnused(new HashSet<>(segments), DateTimes.nowUtc()); - - final int requestedLimit = 10; - final List actualUnusedSegments = coordinator.retrieveUnusedSegmentsForInterval( - DS.WIKI, - Intervals.of("1900/3000"), - ImmutableList.of("version"), - requestedLimit, - null - ); - - Assert.assertEquals(requestedLimit, actualUnusedSegments.size()); - Assert.assertTrue(actualUnusedSegments.containsAll(segments.stream().limit(requestedLimit).collect(Collectors.toList()))); - } - - @Test - public void testRetrieveUnusedSegmentsUsingSingleIntervalAndLimitOutOfRange() throws IOException + public void testRetrieveUnusedSegmentsUsingSingleIntervalAndLimitOutOfRange() { final List segments = createAndGetUsedYearSegments(1900, 2133); markAllSegmentsUnused(new HashSet<>(segments), DateTimes.nowUtc()); @@ -908,7 +1013,7 @@ public class IndexerSQLMetadataStorageCoordinatorTest extends IndexerSqlMetadata } @Test - public void testRetrieveUnusedSegmentsUsingSingleIntervalOutOfRange() throws IOException + public void testRetrieveUnusedSegmentsUsingSingleIntervalOutOfRange() { final List segments = createAndGetUsedYearSegments(1905, 1910); markAllSegmentsUnused(new HashSet<>(segments), DateTimes.nowUtc()); @@ -928,7 +1033,7 @@ public class IndexerSQLMetadataStorageCoordinatorTest extends IndexerSqlMetadata } @Test - public void testRetrieveUnusedSegmentsUsingMultipleIntervalsAndNoLimit() throws IOException + public void testRetrieveUnusedSegmentsUsingMultipleIntervalsAndNoLimit() { final List segments = createAndGetUsedYearSegments(1900, 2133); DateTime usedStatusLastUpdatedTime = DateTimes.nowUtc(); @@ -958,7 +1063,7 @@ public class IndexerSQLMetadataStorageCoordinatorTest extends IndexerSqlMetadata } @Test - public void testRetrieveUnusedSegmentsUsingNoIntervalsNoLimitAndNoLastSegmentId() throws IOException + public void testRetrieveUnusedSegmentsUsingNoIntervalsNoLimitAndNoLastSegmentId() { final List segments = createAndGetUsedYearSegments(1900, 2133); DateTime usedStatusLastUpdatedTime = DateTimes.nowUtc(); @@ -988,7 +1093,7 @@ public class IndexerSQLMetadataStorageCoordinatorTest extends IndexerSqlMetadata } @Test - public void testRetrieveUnusedSegmentsUsingNoIntervalsAndNoLimitAndNoLastSegmentId() throws IOException + public void testRetrieveUnusedSegmentsUsingNoIntervalsAndNoLimitAndNoLastSegmentId() { final List segments = createAndGetUsedYearSegments(2033, 2133); DateTime usedStatusLastUpdatedTime = DateTimes.nowUtc(); @@ -1071,7 +1176,7 @@ public class IndexerSQLMetadataStorageCoordinatorTest extends IndexerSqlMetadata } @Test - public void testRetrieveUnusedSegmentsUsingMultipleIntervalsAndLimitAtRange() throws IOException + public void testRetrieveUnusedSegmentsUsingMultipleIntervalsAndLimitAtRange() { final List segments = createAndGetUsedYearSegments(1900, 2133); DateTime usedStatusLastUpdatedTime = DateTimes.nowUtc(); @@ -1101,7 +1206,7 @@ public class IndexerSQLMetadataStorageCoordinatorTest extends IndexerSqlMetadata } @Test - public void testRetrieveUnusedSegmentsUsingMultipleIntervalsAndLimitInRange() throws IOException + public void testRetrieveUnusedSegmentsUsingMultipleIntervalsAndLimitInRange() { final List segments = createAndGetUsedYearSegments(1900, 2133); DateTime usedStatusLastUpdatedTime = DateTimes.nowUtc(); @@ -1133,7 +1238,7 @@ public class IndexerSQLMetadataStorageCoordinatorTest extends IndexerSqlMetadata } @Test - public void testRetrieveUnusedSegmentsUsingMultipleIntervalsInSingleBatchLimitAndLastSegmentId() throws IOException + public void testRetrieveUnusedSegmentsUsingMultipleIntervalsInSingleBatchLimitAndLastSegmentId() { final List segments = createAndGetUsedYearSegments(2034, 2133); DateTime usedStatusLastUpdatedTime = DateTimes.nowUtc(); @@ -1169,7 +1274,7 @@ public class IndexerSQLMetadataStorageCoordinatorTest extends IndexerSqlMetadata } @Test - public void testRetrieveUnusedSegmentsUsingMultipleIntervalsLimitAndLastSegmentId() throws IOException + public void testRetrieveUnusedSegmentsUsingMultipleIntervalsLimitAndLastSegmentId() { final List segments = createAndGetUsedYearSegments(1900, 2133); DateTime usedStatusLastUpdatedTime = DateTimes.nowUtc(); @@ -1205,7 +1310,7 @@ public class IndexerSQLMetadataStorageCoordinatorTest extends IndexerSqlMetadata } @Test - public void testRetrieveUnusedSegmentsUsingMultipleIntervals() throws IOException + public void testRetrieveUnusedSegmentsUsingMultipleIntervals() { final List segments = createAndGetUsedYearSegments(1900, 2133); DateTime usedStatusLastUpdatedTime = DateTimes.nowUtc(); @@ -1235,7 +1340,7 @@ public class IndexerSQLMetadataStorageCoordinatorTest extends IndexerSqlMetadata } @Test - public void testRetrieveUnusedSegmentsUsingIntervalOutOfRange() throws IOException + public void testRetrieveUnusedSegmentsUsingIntervalOutOfRange() { final List segments = createAndGetUsedYearSegments(1905, 1910); markAllSegmentsUnused(new HashSet<>(segments), DateTimes.nowUtc()); @@ -1266,7 +1371,7 @@ public class IndexerSQLMetadataStorageCoordinatorTest extends IndexerSqlMetadata } @Test - public void testRetrieveUnusedSegmentsWithMaxUsedStatusLastUpdatedTime() throws IOException + public void testRetrieveUnusedSegmentsWithMaxUsedStatusLastUpdatedTime() { final List segments = createAndGetUsedYearSegments(1905, 1910); DateTime usedStatusLastUpdatedTime = DateTimes.nowUtc(); @@ -1316,7 +1421,7 @@ public class IndexerSQLMetadataStorageCoordinatorTest extends IndexerSqlMetadata } @Test - public void testRetrieveUnusedSegmentsWithMaxUsedStatusLastUpdatedTime2() throws IOException + public void testRetrieveUnusedSegmentsWithMaxUsedStatusLastUpdatedTime2() { final List segments = createAndGetUsedYearSegments(1900, 1950); final List evenYearSegments = new ArrayList<>(); @@ -1381,7 +1486,7 @@ public class IndexerSQLMetadataStorageCoordinatorTest extends IndexerSqlMetadata } @Test - public void testSimpleUnusedList() throws IOException + public void testSimpleUnusedList() { coordinator.commitSegments(SEGMENTS, new SegmentSchemaMapping(CentralizedDatasourceSchemaConfig.SCHEMA_VERSION)); markAllSegmentsUnused(); @@ -1399,7 +1504,7 @@ public class IndexerSQLMetadataStorageCoordinatorTest extends IndexerSqlMetadata } @Test - public void testRetrieveUnusedSegmentsWithVersions() throws IOException + public void testRetrieveUnusedSegmentsWithVersions() { final DateTime now = DateTimes.nowUtc(); final String v1 = now.toString(); @@ -1476,7 +1581,7 @@ public class IndexerSQLMetadataStorageCoordinatorTest extends IndexerSqlMetadata } @Test - public void testSimpleUnusedListWithLimit() throws IOException + public void testSimpleUnusedListWithLimit() { coordinator.commitSegments(SEGMENTS, new SegmentSchemaMapping(CentralizedDatasourceSchemaConfig.SCHEMA_VERSION)); markAllSegmentsUnused(); @@ -1495,7 +1600,7 @@ public class IndexerSQLMetadataStorageCoordinatorTest extends IndexerSqlMetadata } @Test - public void testUsedOverlapLow() throws IOException + public void testUsedOverlapLow() { coordinator.commitSegments(SEGMENTS, new SegmentSchemaMapping(CentralizedDatasourceSchemaConfig.SCHEMA_VERSION)); Set actualSegments = ImmutableSet.copyOf( @@ -1513,7 +1618,7 @@ public class IndexerSQLMetadataStorageCoordinatorTest extends IndexerSqlMetadata @Test - public void testUsedOverlapHigh() throws IOException + public void testUsedOverlapHigh() { coordinator.commitSegments(SEGMENTS, new SegmentSchemaMapping(CentralizedDatasourceSchemaConfig.SCHEMA_VERSION)); Assert.assertEquals( @@ -1529,7 +1634,7 @@ public class IndexerSQLMetadataStorageCoordinatorTest extends IndexerSqlMetadata } @Test - public void testUsedOutOfBoundsLow() throws IOException + public void testUsedOutOfBoundsLow() { coordinator.commitSegments(SEGMENTS, new SegmentSchemaMapping(CentralizedDatasourceSchemaConfig.SCHEMA_VERSION)); Assert.assertTrue( @@ -1543,7 +1648,7 @@ public class IndexerSQLMetadataStorageCoordinatorTest extends IndexerSqlMetadata @Test - public void testUsedOutOfBoundsHigh() throws IOException + public void testUsedOutOfBoundsHigh() { coordinator.commitSegments(SEGMENTS, new SegmentSchemaMapping(CentralizedDatasourceSchemaConfig.SCHEMA_VERSION)); Assert.assertTrue( @@ -1556,7 +1661,7 @@ public class IndexerSQLMetadataStorageCoordinatorTest extends IndexerSqlMetadata } @Test - public void testUsedWithinBoundsEnd() throws IOException + public void testUsedWithinBoundsEnd() { coordinator.commitSegments(SEGMENTS, new SegmentSchemaMapping(CentralizedDatasourceSchemaConfig.SCHEMA_VERSION)); Assert.assertEquals( @@ -1572,7 +1677,7 @@ public class IndexerSQLMetadataStorageCoordinatorTest extends IndexerSqlMetadata } @Test - public void testUsedOverlapEnd() throws IOException + public void testUsedOverlapEnd() { coordinator.commitSegments(SEGMENTS, new SegmentSchemaMapping(CentralizedDatasourceSchemaConfig.SCHEMA_VERSION)); Assert.assertEquals( @@ -1588,7 +1693,7 @@ public class IndexerSQLMetadataStorageCoordinatorTest extends IndexerSqlMetadata } @Test - public void testUnusedOverlapLow() throws IOException + public void testUnusedOverlapLow() { coordinator.commitSegments(SEGMENTS, new SegmentSchemaMapping(CentralizedDatasourceSchemaConfig.SCHEMA_VERSION)); markAllSegmentsUnused(); @@ -1606,7 +1711,7 @@ public class IndexerSQLMetadataStorageCoordinatorTest extends IndexerSqlMetadata } @Test - public void testUnusedUnderlapLow() throws IOException + public void testUnusedUnderlapLow() { coordinator.commitSegments(SEGMENTS, new SegmentSchemaMapping(CentralizedDatasourceSchemaConfig.SCHEMA_VERSION)); markAllSegmentsUnused(); @@ -1622,7 +1727,7 @@ public class IndexerSQLMetadataStorageCoordinatorTest extends IndexerSqlMetadata @Test - public void testUnusedUnderlapHigh() throws IOException + public void testUnusedUnderlapHigh() { coordinator.commitSegments(SEGMENTS, new SegmentSchemaMapping(CentralizedDatasourceSchemaConfig.SCHEMA_VERSION)); markAllSegmentsUnused(); @@ -1637,7 +1742,7 @@ public class IndexerSQLMetadataStorageCoordinatorTest extends IndexerSqlMetadata } @Test - public void testUnusedOverlapHigh() throws IOException + public void testUnusedOverlapHigh() { coordinator.commitSegments(SEGMENTS, new SegmentSchemaMapping(CentralizedDatasourceSchemaConfig.SCHEMA_VERSION)); markAllSegmentsUnused(); @@ -1652,7 +1757,7 @@ public class IndexerSQLMetadataStorageCoordinatorTest extends IndexerSqlMetadata } @Test - public void testUnusedBigOverlap() throws IOException + public void testUnusedBigOverlap() { coordinator.commitSegments(SEGMENTS, new SegmentSchemaMapping(CentralizedDatasourceSchemaConfig.SCHEMA_VERSION)); markAllSegmentsUnused(); @@ -1670,7 +1775,7 @@ public class IndexerSQLMetadataStorageCoordinatorTest extends IndexerSqlMetadata } @Test - public void testUnusedLowRange() throws IOException + public void testUnusedLowRange() { coordinator.commitSegments(SEGMENTS, new SegmentSchemaMapping(CentralizedDatasourceSchemaConfig.SCHEMA_VERSION)); markAllSegmentsUnused(); @@ -1699,7 +1804,7 @@ public class IndexerSQLMetadataStorageCoordinatorTest extends IndexerSqlMetadata } @Test - public void testUnusedHighRange() throws IOException + public void testUnusedHighRange() { coordinator.commitSegments(SEGMENTS, new SegmentSchemaMapping(CentralizedDatasourceSchemaConfig.SCHEMA_VERSION)); markAllSegmentsUnused(); @@ -1728,7 +1833,7 @@ public class IndexerSQLMetadataStorageCoordinatorTest extends IndexerSqlMetadata } @Test - public void testUsedHugeTimeRangeEternityFilter() throws IOException + public void testUsedHugeTimeRangeEternityFilter() { coordinator.commitSegments( ImmutableSet.of( @@ -1752,7 +1857,7 @@ public class IndexerSQLMetadataStorageCoordinatorTest extends IndexerSqlMetadata } @Test - public void testUsedHugeTimeRangeTrickyFilter1() throws IOException + public void testUsedHugeTimeRangeTrickyFilter1() { coordinator.commitSegments( ImmutableSet.of( @@ -1776,7 +1881,7 @@ public class IndexerSQLMetadataStorageCoordinatorTest extends IndexerSqlMetadata } @Test - public void testUsedHugeTimeRangeTrickyFilter2() throws IOException + public void testUsedHugeTimeRangeTrickyFilter2() { coordinator.commitSegments( ImmutableSet.of( @@ -1800,7 +1905,7 @@ public class IndexerSQLMetadataStorageCoordinatorTest extends IndexerSqlMetadata } @Test - public void testEternitySegmentWithStringComparison() throws IOException + public void testEternitySegmentWithStringComparison() { coordinator.commitSegments( ImmutableSet.of( @@ -1822,7 +1927,7 @@ public class IndexerSQLMetadataStorageCoordinatorTest extends IndexerSqlMetadata } @Test - public void testEternityMultipleSegmentWithStringComparison() throws IOException + public void testEternityMultipleSegmentWithStringComparison() { coordinator.commitSegments( ImmutableSet.of( @@ -1845,7 +1950,7 @@ public class IndexerSQLMetadataStorageCoordinatorTest extends IndexerSqlMetadata } @Test - public void testFirstHalfEternitySegmentWithStringComparison() throws IOException + public void testFirstHalfEternitySegmentWithStringComparison() { coordinator.commitSegments( ImmutableSet.of( @@ -1867,7 +1972,7 @@ public class IndexerSQLMetadataStorageCoordinatorTest extends IndexerSqlMetadata } @Test - public void testFirstHalfEternityMultipleSegmentWithStringComparison() throws IOException + public void testFirstHalfEternityMultipleSegmentWithStringComparison() { coordinator.commitSegments( ImmutableSet.of( @@ -1890,7 +1995,7 @@ public class IndexerSQLMetadataStorageCoordinatorTest extends IndexerSqlMetadata } @Test - public void testSecondHalfEternitySegmentWithStringComparison() throws IOException + public void testSecondHalfEternitySegmentWithStringComparison() { coordinator.commitSegments( ImmutableSet.of( @@ -1914,7 +2019,7 @@ public class IndexerSQLMetadataStorageCoordinatorTest extends IndexerSqlMetadata // Known Issue: https://github.com/apache/druid/issues/12860 @Ignore @Test - public void testLargeIntervalWithStringComparison() throws IOException + public void testLargeIntervalWithStringComparison() { coordinator.commitSegments( ImmutableSet.of( @@ -1936,7 +2041,7 @@ public class IndexerSQLMetadataStorageCoordinatorTest extends IndexerSqlMetadata } @Test - public void testSecondHalfEternityMultipleSegmentWithStringComparison() throws IOException + public void testSecondHalfEternityMultipleSegmentWithStringComparison() { coordinator.commitSegments( ImmutableSet.of( @@ -1959,7 +2064,7 @@ public class IndexerSQLMetadataStorageCoordinatorTest extends IndexerSqlMetadata } @Test - public void testDeleteDataSourceMetadata() throws IOException + public void testDeleteDataSourceMetadata() { coordinator.commitSegmentsAndMetadata( ImmutableSet.of(defaultSegment), @@ -1980,7 +2085,7 @@ public class IndexerSQLMetadataStorageCoordinatorTest extends IndexerSqlMetadata } @Test - public void testDeleteSegmentsInMetaDataStorage() throws IOException + public void testDeleteSegmentsInMetaDataStorage() { // Published segments to MetaDataStorage coordinator.commitSegments(SEGMENTS, new SegmentSchemaMapping(CentralizedDatasourceSchemaConfig.SCHEMA_VERSION)); @@ -2013,7 +2118,7 @@ public class IndexerSQLMetadataStorageCoordinatorTest extends IndexerSqlMetadata } @Test - public void testUpdateSegmentsInMetaDataStorage() throws IOException + public void testUpdateSegmentsInMetaDataStorage() { // Published segments to MetaDataStorage coordinator.commitSegments(SEGMENTS, new SegmentSchemaMapping(CentralizedDatasourceSchemaConfig.SCHEMA_VERSION)); @@ -2646,7 +2751,7 @@ public class IndexerSQLMetadataStorageCoordinatorTest extends IndexerSqlMetadata } @Test - public void testAllocatePendingSegmentsWithOvershadowingSegments() throws IOException + public void testAllocatePendingSegmentsWithOvershadowingSegments() { final String dataSource = "ds"; final Interval interval = Intervals.of("2017-01-01/2017-02-01"); @@ -2716,7 +2821,7 @@ public class IndexerSQLMetadataStorageCoordinatorTest extends IndexerSqlMetadata } @Test - public void testAllocatePendingSegmentsForHashBasedNumberedShardSpec() throws IOException + public void testAllocatePendingSegmentsForHashBasedNumberedShardSpec() { final PartialShardSpec partialShardSpec = new HashBasedNumberedPartialShardSpec(null, 2, 5, null); final String dataSource = "ds"; @@ -2806,7 +2911,7 @@ public class IndexerSQLMetadataStorageCoordinatorTest extends IndexerSqlMetadata } @Test - public void testAddNumberedShardSpecAfterMultiDimensionsShardSpecWithUnknownCorePartitionSize() throws IOException + public void testAddNumberedShardSpecAfterMultiDimensionsShardSpecWithUnknownCorePartitionSize() { final String datasource = "datasource"; final Interval interval = Intervals.of("2020-01-01/P1D"); @@ -2850,7 +2955,7 @@ public class IndexerSQLMetadataStorageCoordinatorTest extends IndexerSqlMetadata } @Test - public void testAddNumberedShardSpecAfterSingleDimensionsShardSpecWithUnknownCorePartitionSize() throws IOException + public void testAddNumberedShardSpecAfterSingleDimensionsShardSpecWithUnknownCorePartitionSize() { final String datasource = "datasource"; final Interval interval = Intervals.of("2020-01-01/P1D"); @@ -2896,7 +3001,7 @@ public class IndexerSQLMetadataStorageCoordinatorTest extends IndexerSqlMetadata } @Test - public void testRemoveDataSourceMetadataOlderThanDatasourceActiveShouldNotBeDeleted() throws Exception + public void testRemoveDataSourceMetadataOlderThanDatasourceActiveShouldNotBeDeleted() { coordinator.commitSegmentsAndMetadata( ImmutableSet.of(defaultSegment), @@ -2925,7 +3030,7 @@ public class IndexerSQLMetadataStorageCoordinatorTest extends IndexerSqlMetadata } @Test - public void testRemoveDataSourceMetadataOlderThanDatasourceNotActiveAndOlderThanTimeShouldBeDeleted() throws Exception + public void testRemoveDataSourceMetadataOlderThanDatasourceNotActiveAndOlderThanTimeShouldBeDeleted() { coordinator.commitSegmentsAndMetadata( ImmutableSet.of(defaultSegment), @@ -2951,7 +3056,6 @@ public class IndexerSQLMetadataStorageCoordinatorTest extends IndexerSqlMetadata @Test public void testRemoveDataSourceMetadataOlderThanDatasourceNotActiveButNotOlderThanTimeShouldNotBeDeleted() - throws Exception { coordinator.commitSegmentsAndMetadata( ImmutableSet.of(defaultSegment), @@ -2981,7 +3085,7 @@ public class IndexerSQLMetadataStorageCoordinatorTest extends IndexerSqlMetadata } @Test - public void testMarkSegmentsAsUnusedWithinIntervalOneYear() throws IOException + public void testMarkSegmentsAsUnusedWithinIntervalOneYear() { coordinator.commitSegments(ImmutableSet.of(existingSegment1, existingSegment2), new SegmentSchemaMapping(CentralizedDatasourceSchemaConfig.SCHEMA_VERSION)); @@ -3018,7 +3122,7 @@ public class IndexerSQLMetadataStorageCoordinatorTest extends IndexerSqlMetadata } @Test - public void testMarkSegmentsAsUnusedWithinIntervalTwoYears() throws IOException + public void testMarkSegmentsAsUnusedWithinIntervalTwoYears() { coordinator.commitSegments(ImmutableSet.of(existingSegment1, existingSegment2), new SegmentSchemaMapping(CentralizedDatasourceSchemaConfig.SCHEMA_VERSION)); @@ -3116,7 +3220,7 @@ public class IndexerSQLMetadataStorageCoordinatorTest extends IndexerSqlMetadata } @Test - public void testTimelineVisibilityWith0CorePartitionTombstone() throws IOException + public void testTimelineVisibilityWith0CorePartitionTombstone() { final Interval interval = Intervals.of("2020/2021"); // Create and commit a tombstone segment @@ -3168,7 +3272,7 @@ public class IndexerSQLMetadataStorageCoordinatorTest extends IndexerSqlMetadata } @Test - public void testTimelineWith1CorePartitionTombstone() throws IOException + public void testTimelineWith1CorePartitionTombstone() { // Register the old generation tombstone spec for this test. mapper.registerSubtypes(TombstoneShardSpecWith1CorePartition.class); @@ -3222,7 +3326,7 @@ public class IndexerSQLMetadataStorageCoordinatorTest extends IndexerSqlMetadata } @Test - public void testSegmentIdShouldNotBeReallocated() throws IOException + public void testSegmentIdShouldNotBeReallocated() { final SegmentIdWithShardSpec idWithNullTaskAllocator = coordinator.allocatePendingSegment( DS.WIKI, @@ -3277,7 +3381,7 @@ public class IndexerSQLMetadataStorageCoordinatorTest extends IndexerSqlMetadata } @Test - public void testRetrieveUnusedSegmentsForExactIntervalAndVersion() throws Exception + public void testRetrieveUnusedSegmentsForExactIntervalAndVersion() { DataSegment unusedForDifferentVersion = createSegment( Intervals.of("2024/2025"), @@ -3320,4 +3424,156 @@ public class IndexerSQLMetadataStorageCoordinatorTest extends IndexerSqlMetadata unusedSegmentIdsForIntervalAndVersion.get(0) ); } + + @Test + public void testRetrieveUpgradedFromSegmentIds() + { + final String datasource = defaultSegment.getDataSource(); + final Map upgradedFromSegmentIdMap = new HashMap<>(); + upgradedFromSegmentIdMap.put(defaultSegment2.getId().toString(), defaultSegment.getId().toString()); + insertUsedSegments(ImmutableSet.of(defaultSegment, defaultSegment2), upgradedFromSegmentIdMap); + coordinator.markSegmentsAsUnusedWithinInterval(datasource, Intervals.ETERNITY); + upgradedFromSegmentIdMap.clear(); + upgradedFromSegmentIdMap.put(defaultSegment3.getId().toString(), defaultSegment.getId().toString()); + insertUsedSegments(ImmutableSet.of(defaultSegment3, defaultSegment4), upgradedFromSegmentIdMap); + + Map expected = new HashMap<>(); + expected.put(defaultSegment2.getId().toString(), defaultSegment.getId().toString()); + expected.put(defaultSegment3.getId().toString(), defaultSegment.getId().toString()); + + Set segmentIds = new HashSet<>(); + segmentIds.add(defaultSegment.getId().toString()); + segmentIds.add(defaultSegment2.getId().toString()); + segmentIds.add(defaultSegment3.getId().toString()); + segmentIds.add(defaultSegment4.getId().toString()); + Assert.assertEquals( + expected, + coordinator.retrieveUpgradedFromSegmentIds(datasource, segmentIds) + ); + } + + @Test + public void testRetrieveUpgradedFromSegmentIdsInBatches() + { + final int size = 500; + final int batchSize = 100; + + List segments = new ArrayList<>(); + for (int i = 0; i < size; i++) { + segments.add( + new DataSegment( + "DS", + Intervals.ETERNITY, + "v " + (i % 5), + ImmutableMap.of("num", i / 5), + ImmutableList.of("dim"), + ImmutableList.of("agg"), + new NumberedShardSpec(i / 5, 0), + 0, + 100L + ) + ); + } + Map expected = new HashMap<>(); + for (int i = 0; i < batchSize; i++) { + for (int j = 1; j < 5; j++) { + expected.put( + segments.get(5 * i + j).getId().toString(), + segments.get(5 * i).getId().toString() + ); + } + } + insertUsedSegments(ImmutableSet.copyOf(segments), expected); + + Map actual = coordinator.retrieveUpgradedFromSegmentIds( + "DS", + segments.stream().map(DataSegment::getId).map(SegmentId::toString).collect(Collectors.toSet()) + ); + + Assert.assertEquals(400, actual.size()); + Assert.assertEquals(expected, actual); + } + + @Test + public void testRetrieveUpgradedToSegmentIds() + { + final String datasource = defaultSegment.getDataSource(); + final Map upgradedFromSegmentIdMap = new HashMap<>(); + upgradedFromSegmentIdMap.put(defaultSegment2.getId().toString(), defaultSegment.getId().toString()); + insertUsedSegments(ImmutableSet.of(defaultSegment, defaultSegment2), upgradedFromSegmentIdMap); + coordinator.markSegmentsAsUnusedWithinInterval(datasource, Intervals.ETERNITY); + upgradedFromSegmentIdMap.clear(); + upgradedFromSegmentIdMap.put(defaultSegment3.getId().toString(), defaultSegment.getId().toString()); + insertUsedSegments(ImmutableSet.of(defaultSegment3, defaultSegment4), upgradedFromSegmentIdMap); + + Map> expected = new HashMap<>(); + expected.put(defaultSegment.getId().toString(), new HashSet<>()); + expected.get(defaultSegment.getId().toString()).add(defaultSegment.getId().toString()); + expected.get(defaultSegment.getId().toString()).add(defaultSegment2.getId().toString()); + expected.get(defaultSegment.getId().toString()).add(defaultSegment3.getId().toString()); + + Set upgradedIds = new HashSet<>(); + upgradedIds.add(defaultSegment.getId().toString()); + Assert.assertEquals( + expected, + coordinator.retrieveUpgradedToSegmentIds(datasource, upgradedIds) + ); + } + + @Test + public void testRetrieveUpgradedToSegmentIdsInBatches() + { + final int size = 500; + final int batchSize = 100; + + List segments = new ArrayList<>(); + for (int i = 0; i < size; i++) { + segments.add( + new DataSegment( + "DS", + Intervals.ETERNITY, + "v " + (i % 5), + ImmutableMap.of("num", i / 5), + ImmutableList.of("dim"), + ImmutableList.of("agg"), + new NumberedShardSpec(i / 5, 0), + 0, + 100L + ) + ); + } + + Map> expected = new HashMap<>(); + for (DataSegment segment : segments) { + final String id = segment.getId().toString(); + expected.put(id, new HashSet<>()); + expected.get(id).add(id); + } + Map upgradeMap = new HashMap<>(); + for (int i = 0; i < batchSize; i++) { + for (int j = 1; j < 5; j++) { + upgradeMap.put( + segments.get(5 * i + j).getId().toString(), + segments.get(5 * i).getId().toString() + ); + expected.get(segments.get(5 * i).getId().toString()) + .add(segments.get(5 * i + j).getId().toString()); + } + } + insertUsedSegments(ImmutableSet.copyOf(segments), upgradeMap); + + Map> actual = coordinator.retrieveUpgradedToSegmentIds( + "DS", + segments.stream().map(DataSegment::getId).map(SegmentId::toString).collect(Collectors.toSet()) + ); + + Assert.assertEquals(500, actual.size()); + Assert.assertEquals(expected, actual); + } + + private void insertUsedSegments(Set segments, Map upgradedFromSegmentIdMap) + { + final String table = derbyConnectorRule.metadataTablesConfigSupplier().get().getSegmentsTable(); + insertUsedSegments(segments, upgradedFromSegmentIdMap, derbyConnector, table, mapper); + } } diff --git a/server/src/test/java/org/apache/druid/metadata/IndexerSqlMetadataStorageCoordinatorTestBase.java b/server/src/test/java/org/apache/druid/metadata/IndexerSqlMetadataStorageCoordinatorTestBase.java index a2f91378bb7..2076e5ffa46 100644 --- a/server/src/test/java/org/apache/druid/metadata/IndexerSqlMetadataStorageCoordinatorTestBase.java +++ b/server/src/test/java/org/apache/druid/metadata/IndexerSqlMetadataStorageCoordinatorTestBase.java @@ -52,13 +52,13 @@ import org.skife.jdbi.v2.PreparedBatch; import org.skife.jdbi.v2.ResultIterator; import org.skife.jdbi.v2.util.StringMapper; -import java.io.IOException; import java.util.ArrayList; import java.util.Arrays; import java.util.HashMap; import java.util.HashSet; import java.util.List; import java.util.Map; +import java.util.Objects; import java.util.Set; import java.util.concurrent.atomic.AtomicLong; import java.util.function.Function; @@ -323,10 +323,12 @@ public class IndexerSqlMetadataStorageCoordinatorTestBase .version(version) .shardSpec(shardSpec) .size(100) + // hash to get a unique load spec as segmentId has not yet been generated + .loadSpec(ImmutableMap.of("hash", Objects.hash(interval, version, shardSpec))) .build(); } - protected List createAndGetUsedYearSegments(final int startYear, final int endYear) throws IOException + protected List createAndGetUsedYearSegments(final int startYear, final int endYear) { final List segments = new ArrayList<>(); @@ -560,4 +562,50 @@ public class IndexerSqlMetadataStorageCoordinatorTestBase } ); } + + public static void insertUsedSegments( + Set dataSegments, + Map upgradedFromSegmentIdMap, + SQLMetadataConnector connector, + String table, + ObjectMapper jsonMapper + ) + { + connector.retryWithHandle( + handle -> { + PreparedBatch preparedBatch = handle.prepareBatch( + StringUtils.format( + "INSERT INTO %1$s (id, dataSource, created_date, start, %2$send%2$s, partitioned, version," + + " used, payload, used_status_last_updated, upgraded_from_segment_id) " + + "VALUES (:id, :dataSource, :created_date, :start, :end, :partitioned, :version," + + " :used, :payload, :used_status_last_updated, :upgraded_from_segment_id)", + table, + connector.getQuoteString() + ) + ); + for (DataSegment segment : dataSegments) { + String id = segment.getId().toString(); + preparedBatch.add() + .bind("id", id) + .bind("dataSource", segment.getDataSource()) + .bind("created_date", DateTimes.nowUtc().toString()) + .bind("start", segment.getInterval().getStart().toString()) + .bind("end", segment.getInterval().getEnd().toString()) + .bind("partitioned", !(segment.getShardSpec() instanceof NoneShardSpec)) + .bind("version", segment.getVersion()) + .bind("used", true) + .bind("payload", jsonMapper.writeValueAsBytes(segment)) + .bind("used_status_last_updated", DateTimes.nowUtc().toString()) + .bind("upgraded_from_segment_id", upgradedFromSegmentIdMap.get(segment.getId().toString())); + } + + final int[] affectedRows = preparedBatch.execute(); + final boolean succeeded = Arrays.stream(affectedRows).allMatch(eachAffectedRows -> eachAffectedRows == 1); + if (!succeeded) { + throw new ISE("Failed to publish segments to DB"); + } + return true; + } + ); + } } diff --git a/server/src/test/java/org/apache/druid/metadata/SQLMetadataRuleManagerTest.java b/server/src/test/java/org/apache/druid/metadata/SQLMetadataRuleManagerTest.java index 8cdf32bddf9..efeadbdc04e 100644 --- a/server/src/test/java/org/apache/druid/metadata/SQLMetadataRuleManagerTest.java +++ b/server/src/test/java/org/apache/druid/metadata/SQLMetadataRuleManagerTest.java @@ -62,7 +62,6 @@ public class SQLMetadataRuleManagerTest private MetadataRuleManagerConfig managerConfig; private SQLMetadataRuleManager ruleManager; private AuditManager auditManager; - private SQLMetadataSegmentPublisher publisher; private final ObjectMapper mapper = new DefaultObjectMapper(); @Before @@ -87,11 +86,6 @@ public class SQLMetadataRuleManagerTest ruleManager = new SQLMetadataRuleManager(mapper, managerConfig, tablesConfig, connector, auditManager); connector.createSegmentSchemasTable(); connector.createSegmentTable(); - publisher = new SQLMetadataSegmentPublisher( - mapper, - derbyConnectorRule.metadataTablesConfigSupplier().get(), - connector - ); } @Test @@ -321,7 +315,7 @@ public class SQLMetadataRuleManagerTest 1, 1234L ); - publisher.publishSegment(dataSegment); + SqlSegmentsMetadataManagerTestBase.publishSegment(connector, tablesConfig, mapper, dataSegment); // This will not delete the rule as the datasource has segment in the segment metadata table ruleManager.removeRulesForEmptyDatasourcesOlderThan(System.currentTimeMillis()); diff --git a/server/src/test/java/org/apache/druid/metadata/SqlSegmentsMetadataManagerSchemaPollTest.java b/server/src/test/java/org/apache/druid/metadata/SqlSegmentsMetadataManagerSchemaPollTest.java index 8595f3e7194..18095305ad9 100644 --- a/server/src/test/java/org/apache/druid/metadata/SqlSegmentsMetadataManagerSchemaPollTest.java +++ b/server/src/test/java/org/apache/druid/metadata/SqlSegmentsMetadataManagerSchemaPollTest.java @@ -73,18 +73,13 @@ public class SqlSegmentsMetadataManagerSchemaPollTest extends SqlSegmentsMetadat CentralizedDatasourceSchemaConfig.create() ); sqlSegmentsMetadataManager.start(); - - publisher = new SQLMetadataSegmentPublisher( - jsonMapper, - derbyConnectorRule.metadataTablesConfigSupplier().get(), - connector - ); + storageConfig = derbyConnectorRule.metadataTablesConfigSupplier().get(); connector.createSegmentSchemasTable(); connector.createSegmentTable(); - publisher.publishSegment(segment1); - publisher.publishSegment(segment2); + publishSegment(segment1); + publishSegment(segment2); } @After diff --git a/server/src/test/java/org/apache/druid/metadata/SqlSegmentsMetadataManagerTest.java b/server/src/test/java/org/apache/druid/metadata/SqlSegmentsMetadataManagerTest.java index 486e58e2a2a..d101a7a74b4 100644 --- a/server/src/test/java/org/apache/druid/metadata/SqlSegmentsMetadataManagerTest.java +++ b/server/src/test/java/org/apache/druid/metadata/SqlSegmentsMetadataManagerTest.java @@ -19,7 +19,6 @@ package org.apache.druid.metadata; -import com.fasterxml.jackson.databind.ObjectMapper; import com.google.common.base.Optional; import com.google.common.base.Suppliers; import com.google.common.collect.ImmutableList; @@ -34,7 +33,6 @@ import org.apache.druid.java.util.common.DateTimes; import org.apache.druid.java.util.common.Intervals; import org.apache.druid.java.util.common.StringUtils; import org.apache.druid.java.util.emitter.EmittingLogger; -import org.apache.druid.segment.TestHelper; import org.apache.druid.segment.metadata.CentralizedDatasourceSchemaConfig; import org.apache.druid.segment.metadata.SegmentSchemaCache; import org.apache.druid.segment.metadata.SegmentSchemaManager; @@ -89,10 +87,6 @@ public class SqlSegmentsMetadataManagerTest extends SqlSegmentsMetadataManagerTe public final TestDerbyConnector.DerbyConnectorRule derbyConnectorRule = new TestDerbyConnector.DerbyConnectorRule(); - private SqlSegmentsMetadataManager sqlSegmentsMetadataManager; - private SQLMetadataSegmentPublisher publisher; - private static final ObjectMapper JSON_MAPPER = TestHelper.makeJsonMapper(); - private final DataSegment wikiSegment1 = CreateDataSegments.ofDatasource(DS.WIKI).startingAt("2012-03-15").eachOfSizeInMb(500).get(0); private final DataSegment wikiSegment2 = @@ -101,7 +95,7 @@ public class SqlSegmentsMetadataManagerTest extends SqlSegmentsMetadataManagerTe private void publishUnusedSegments(DataSegment... segments) throws IOException { for (DataSegment segment : segments) { - publisher.publishSegment(segment); + publishSegment(segment); sqlSegmentsMetadataManager.markSegmentAsUnused(segment.getId()); } } @@ -109,8 +103,8 @@ public class SqlSegmentsMetadataManagerTest extends SqlSegmentsMetadataManagerTe private void publishWikiSegments() { try { - publisher.publishSegment(wikiSegment1); - publisher.publishSegment(wikiSegment2); + publishSegment(wikiSegment1); + publishSegment(wikiSegment2); } catch (Exception e) { throw new RuntimeException(e); @@ -123,6 +117,7 @@ public class SqlSegmentsMetadataManagerTest extends SqlSegmentsMetadataManagerTe connector = derbyConnectorRule.getConnector(); SegmentsMetadataManagerConfig config = new SegmentsMetadataManagerConfig(); config.setPollDuration(Period.seconds(3)); + storageConfig = derbyConnectorRule.metadataTablesConfigSupplier().get(); segmentSchemaCache = new SegmentSchemaCache(new NoopServiceEmitter()); segmentSchemaManager = new SegmentSchemaManager( @@ -131,10 +126,8 @@ public class SqlSegmentsMetadataManagerTest extends SqlSegmentsMetadataManagerTe connector ); - final TestDerbyConnector connector = derbyConnectorRule.getConnector(); - sqlSegmentsMetadataManager = new SqlSegmentsMetadataManager( - JSON_MAPPER, + jsonMapper, Suppliers.ofInstance(config), derbyConnectorRule.metadataTablesConfigSupplier(), connector, @@ -143,12 +136,6 @@ public class SqlSegmentsMetadataManagerTest extends SqlSegmentsMetadataManagerTe ); sqlSegmentsMetadataManager.start(); - publisher = new SQLMetadataSegmentPublisher( - JSON_MAPPER, - derbyConnectorRule.metadataTablesConfigSupplier().get(), - connector - ); - connector.createSegmentSchemasTable(); connector.createSegmentTable(); } @@ -271,7 +258,7 @@ public class SqlSegmentsMetadataManagerTest extends SqlSegmentsMetadataManagerTe .map(ImmutableDruidDataSource::getName) .collect(Collectors.toList()) ); - publisher.publishSegment(createNewSegment1(DS.KOALA)); + publishSegment(createNewSegment1(DS.KOALA)); // This call will force on demand poll sqlSegmentsMetadataManager.forceOrWaitOngoingDatabasePoll(); @@ -288,7 +275,7 @@ public class SqlSegmentsMetadataManagerTest extends SqlSegmentsMetadataManagerTe ); final String newDataSource3 = "wikipedia3"; - publisher.publishSegment(createNewSegment1(newDataSource3)); + publishSegment(createNewSegment1(newDataSource3)); // This time wait for periodic poll (not doing on demand poll so we have to wait a bit...) while (sqlSegmentsMetadataManager.getDataSourcesSnapshot().getDataSource(newDataSource3) == null) { @@ -367,10 +354,11 @@ public class SqlSegmentsMetadataManagerTest extends SqlSegmentsMetadataManagerTe sqlSegmentsMetadataManager.retrieveAllDataSourceNames() ); final DataSegment koalaSegment = createNewSegment1(DS.KOALA); - publisher.publishSegment(koalaSegment); + publishSegment(koalaSegment); sqlSegmentsMetadataManager.startPollingDatabasePeriodically(); return koalaSegment; } + /** * Create a corrupted segment entry in the segments table to test * whether the overall loading of segments from the database continues to work @@ -382,7 +370,7 @@ public class SqlSegmentsMetadataManagerTest extends SqlSegmentsMetadataManagerTe publishWikiSegments(); final DataSegment corruptSegment = DataSegment.builder(wikiSegment1).dataSource("corrupt-datasource").build(); - publisher.publishSegment(corruptSegment); + publishSegment(corruptSegment); updateSegmentPayload(corruptSegment, StringUtils.toUtf8("corrupt-payload")); EmittingLogger.registerEmitter(new NoopServiceEmitter()); @@ -444,26 +432,56 @@ public class SqlSegmentsMetadataManagerTest extends SqlSegmentsMetadataManagerTe Assert.assertEquals( ImmutableList.of(wikiSegment2.getInterval()), - sqlSegmentsMetadataManager.getUnusedSegmentIntervals(DS.WIKI, null, DateTimes.of("3000"), 1, DateTimes.COMPARE_DATE_AS_STRING_MAX) + sqlSegmentsMetadataManager.getUnusedSegmentIntervals( + DS.WIKI, + null, + DateTimes.of("3000"), + 1, + DateTimes.COMPARE_DATE_AS_STRING_MAX + ) ); // Test the DateTime maxEndTime argument of getUnusedSegmentIntervals Assert.assertEquals( ImmutableList.of(wikiSegment2.getInterval()), - sqlSegmentsMetadataManager.getUnusedSegmentIntervals(DS.WIKI, null, DateTimes.of(2012, 1, 7, 0, 0), 1, DateTimes.COMPARE_DATE_AS_STRING_MAX) + sqlSegmentsMetadataManager.getUnusedSegmentIntervals( + DS.WIKI, + null, + DateTimes.of(2012, 1, 7, 0, 0), + 1, + DateTimes.COMPARE_DATE_AS_STRING_MAX + ) ); Assert.assertEquals( ImmutableList.of(wikiSegment1.getInterval()), - sqlSegmentsMetadataManager.getUnusedSegmentIntervals(DS.WIKI, DateTimes.of(2012, 1, 7, 0, 0), DateTimes.of(2012, 4, 7, 0, 0), 1, DateTimes.COMPARE_DATE_AS_STRING_MAX) + sqlSegmentsMetadataManager.getUnusedSegmentIntervals( + DS.WIKI, + DateTimes.of(2012, 1, 7, 0, 0), + DateTimes.of(2012, 4, 7, 0, 0), + 1, + DateTimes.COMPARE_DATE_AS_STRING_MAX + ) ); Assert.assertEquals( ImmutableList.of(), - sqlSegmentsMetadataManager.getUnusedSegmentIntervals(DS.WIKI, DateTimes.of(2012, 1, 7, 0, 0), DateTimes.of(2012, 1, 7, 0, 0), 1, DateTimes.COMPARE_DATE_AS_STRING_MAX) + sqlSegmentsMetadataManager.getUnusedSegmentIntervals( + DS.WIKI, + DateTimes.of(2012, 1, 7, 0, 0), + DateTimes.of(2012, 1, 7, 0, 0), + 1, + DateTimes.COMPARE_DATE_AS_STRING_MAX + ) ); Assert.assertEquals( ImmutableList.of(wikiSegment2.getInterval(), wikiSegment1.getInterval()), - sqlSegmentsMetadataManager.getUnusedSegmentIntervals(DS.WIKI, null, DateTimes.of("3000"), 5, DateTimes.COMPARE_DATE_AS_STRING_MAX) + sqlSegmentsMetadataManager.getUnusedSegmentIntervals( + DS.WIKI, + null, + DateTimes.of("3000"), + 5, + DateTimes.COMPARE_DATE_AS_STRING_MAX + ) ); // Test a buffer period that should exclude some segments @@ -471,7 +489,13 @@ public class SqlSegmentsMetadataManagerTest extends SqlSegmentsMetadataManagerTe // The wikipedia datasource has segments generated with last used time equal to roughly the time of test run. None of these segments should be selected with a bufer period of 1 day Assert.assertEquals( ImmutableList.of(), - sqlSegmentsMetadataManager.getUnusedSegmentIntervals(DS.WIKI, DateTimes.COMPARE_DATE_AS_STRING_MIN, DateTimes.of("3000"), 5, DateTimes.nowUtc().minus(Duration.parse("PT86400S"))) + sqlSegmentsMetadataManager.getUnusedSegmentIntervals( + DS.WIKI, + DateTimes.COMPARE_DATE_AS_STRING_MIN, + DateTimes.of("3000"), + 5, + DateTimes.nowUtc().minus(Duration.parse("PT86400S")) + ) ); // koalaSegment3 has a null used_status_last_updated which should mean getUnusedSegmentIntervals never returns it @@ -496,7 +520,7 @@ public class SqlSegmentsMetadataManagerTest extends SqlSegmentsMetadataManagerTe sqlSegmentsMetadataManager.poll(); Assert.assertTrue(sqlSegmentsMetadataManager.isPollingDatabasePeriodically()); - publisher.publishSegment(createNewSegment1(DS.KOALA)); + publishSegment(createNewSegment1(DS.KOALA)); awaitDataSourceAppeared(DS.KOALA); int numChangedSegments = sqlSegmentsMetadataManager.markAsUnusedAllSegmentsInDataSource(DS.KOALA); @@ -536,7 +560,7 @@ public class SqlSegmentsMetadataManagerTest extends SqlSegmentsMetadataManagerTe "2017-10-15T20:19:12.565Z" ); - publisher.publishSegment(koalaSegment); + publishSegment(koalaSegment); awaitDataSourceAppeared(DS.KOALA); Assert.assertNotNull(sqlSegmentsMetadataManager.getImmutableDataSourceWithUsedSegments(DS.KOALA)); @@ -1018,8 +1042,8 @@ public class SqlSegmentsMetadataManagerTest extends SqlSegmentsMetadataManagerTe final DataSegment koalaSegment1 = createNewSegment1(DS.KOALA); final DataSegment koalaSegment2 = createNewSegment1(DS.KOALA); - publisher.publishSegment(koalaSegment1); - publisher.publishSegment(koalaSegment2); + publishSegment(koalaSegment1); + publishSegment(koalaSegment2); final ImmutableSet segmentIds = ImmutableSet.of(koalaSegment1.getId(), koalaSegment1.getId()); @@ -1048,9 +1072,9 @@ public class SqlSegmentsMetadataManagerTest extends SqlSegmentsMetadataManagerTe "2017-10-15T20:19:12.565Z" ); - publisher.publishSegment(koalaSegment1); - publisher.publishSegment(koalaSegment2); - publisher.publishSegment(koalaSegment3); + publishSegment(koalaSegment1); + publishSegment(koalaSegment2); + publishSegment(koalaSegment3); final Interval theInterval = Intervals.of("2017-10-15T00:00:00.000/2017-10-18T00:00:00.000"); // 2 out of 3 segments match the interval @@ -1091,9 +1115,9 @@ public class SqlSegmentsMetadataManagerTest extends SqlSegmentsMetadataManagerTe v2 ); - publisher.publishSegment(koalaSegment1); - publisher.publishSegment(koalaSegment2); - publisher.publishSegment(koalaSegment3); + publishSegment(koalaSegment1); + publishSegment(koalaSegment2); + publishSegment(koalaSegment3); final Interval theInterval = Intervals.of("2017-10-15/2017-10-18"); Assert.assertEquals( @@ -1140,9 +1164,9 @@ public class SqlSegmentsMetadataManagerTest extends SqlSegmentsMetadataManagerTe v2 ); - publisher.publishSegment(koalaSegment1); - publisher.publishSegment(koalaSegment2); - publisher.publishSegment(koalaSegment3); + publishSegment(koalaSegment1); + publishSegment(koalaSegment2); + publishSegment(koalaSegment3); final Interval theInterval = Intervals.of("2017-10-15/2017-10-18"); Assert.assertEquals( @@ -1189,9 +1213,9 @@ public class SqlSegmentsMetadataManagerTest extends SqlSegmentsMetadataManagerTe v2 ); - publisher.publishSegment(koalaSegment1); - publisher.publishSegment(koalaSegment2); - publisher.publishSegment(koalaSegment3); + publishSegment(koalaSegment1); + publishSegment(koalaSegment2); + publishSegment(koalaSegment3); final Interval theInterval = Intervals.of("2017-10-15/2017-10-18"); Assert.assertEquals( @@ -1238,9 +1262,9 @@ public class SqlSegmentsMetadataManagerTest extends SqlSegmentsMetadataManagerTe v2 ); - publisher.publishSegment(koalaSegment1); - publisher.publishSegment(koalaSegment2); - publisher.publishSegment(koalaSegment3); + publishSegment(koalaSegment1); + publishSegment(koalaSegment2); + publishSegment(koalaSegment3); final Interval theInterval = Intervals.of("2017-10-15/2017-10-18"); Assert.assertEquals( @@ -1279,9 +1303,9 @@ public class SqlSegmentsMetadataManagerTest extends SqlSegmentsMetadataManagerTe "2017-10-15T20:19:12.565Z" ); - publisher.publishSegment(koalaSegment1); - publisher.publishSegment(koalaSegment2); - publisher.publishSegment(koalaSegment3); + publishSegment(koalaSegment1); + publishSegment(koalaSegment2); + publishSegment(koalaSegment3); final Interval theInterval = Intervals.of("2017-10-16T00:00:00.000/2017-10-20T00:00:00.000"); // 1 out of 3 segments match the interval, other 2 overlap, only the segment fully contained will be marked unused @@ -1314,7 +1338,7 @@ public class SqlSegmentsMetadataManagerTest extends SqlSegmentsMetadataManagerTe final SegmentsMetadataManagerConfig config = new SegmentsMetadataManagerConfig(); config.setPollDuration(Period.seconds(1)); sqlSegmentsMetadataManager = new SqlSegmentsMetadataManager( - JSON_MAPPER, + jsonMapper, Suppliers.ofInstance(config), derbyConnectorRule.metadataTablesConfigSupplier(), derbyConnectorRule.getConnector(), @@ -1335,7 +1359,7 @@ public class SqlSegmentsMetadataManagerTest extends SqlSegmentsMetadataManagerTe "2012-03-16T00:00:00.000/2012-03-17T00:00:00.000", "2017-10-15T20:19:12.565Z" ); - publisher.publishSegment(wikiSegment3); + publishSegment(wikiSegment3); // New segment is not returned since we call without force poll segments = sqlSegmentsMetadataManager diff --git a/server/src/test/java/org/apache/druid/metadata/SqlSegmentsMetadataManagerTestBase.java b/server/src/test/java/org/apache/druid/metadata/SqlSegmentsMetadataManagerTestBase.java index f166befde73..18112c29b4c 100644 --- a/server/src/test/java/org/apache/druid/metadata/SqlSegmentsMetadataManagerTestBase.java +++ b/server/src/test/java/org/apache/druid/metadata/SqlSegmentsMetadataManagerTestBase.java @@ -22,23 +22,53 @@ package org.apache.druid.metadata; import com.fasterxml.jackson.databind.ObjectMapper; import com.google.common.collect.ImmutableList; import com.google.common.collect.ImmutableMap; +import org.apache.druid.java.util.common.DateTimes; import org.apache.druid.java.util.common.Intervals; +import org.apache.druid.java.util.common.StringUtils; import org.apache.druid.segment.TestHelper; import org.apache.druid.segment.metadata.SegmentSchemaCache; import org.apache.druid.segment.metadata.SegmentSchemaManager; import org.apache.druid.timeline.DataSegment; import org.apache.druid.timeline.partition.NoneShardSpec; +import org.skife.jdbi.v2.DBI; +import org.skife.jdbi.v2.tweak.HandleCallback; + +import java.io.IOException; +import java.util.List; +import java.util.Map; public class SqlSegmentsMetadataManagerTestBase { protected SqlSegmentsMetadataManager sqlSegmentsMetadataManager; - protected SQLMetadataSegmentPublisher publisher; + protected SegmentSchemaCache segmentSchemaCache; protected SegmentSchemaManager segmentSchemaManager; protected TestDerbyConnector connector; protected SegmentsMetadataManagerConfig config; + protected MetadataStorageTablesConfig storageConfig; protected final ObjectMapper jsonMapper = TestHelper.makeJsonMapper(); + protected final DataSegment segment1 = createSegment( + "wikipedia", + "2012-03-15T00:00:00.000/2012-03-16T00:00:00.000", + "2012-03-16T00:36:30.848Z", + "index/y=2012/m=03/d=15/2012-03-16T00:36:30.848Z/0/index.zip", + 0 + ); + + protected final DataSegment segment2 = createSegment( + "wikipedia", + "2012-01-05T00:00:00.000/2012-01-06T00:00:00.000", + "2012-01-06T22:19:12.565Z", + "wikipedia/index/y=2012/m=01/d=05/2012-01-06T22:19:12.565Z/0/index.zip", + 0 + ); + + protected void publishSegment(final DataSegment segment) throws IOException + { + publishSegment(connector, storageConfig, jsonMapper, segment); + } + protected static DataSegment createSegment( String dataSource, String interval, @@ -64,19 +94,86 @@ public class SqlSegmentsMetadataManagerTestBase ); } - protected final DataSegment segment1 = createSegment( - "wikipedia", - "2012-03-15T00:00:00.000/2012-03-16T00:00:00.000", - "2012-03-16T00:36:30.848Z", - "index/y=2012/m=03/d=15/2012-03-16T00:36:30.848Z/0/index.zip", - 0 - ); + public static void publishSegment( + final SQLMetadataConnector connector, + final MetadataStorageTablesConfig config, + final ObjectMapper jsonMapper, + final DataSegment segment + ) throws IOException + { + String now = DateTimes.nowUtc().toString(); + publishSegment( + connector, + config, + segment.getId().toString(), + segment.getDataSource(), + now, + segment.getInterval().getStart().toString(), + segment.getInterval().getEnd().toString(), + (segment.getShardSpec() instanceof NoneShardSpec) ? false : true, + segment.getVersion(), + true, + jsonMapper.writeValueAsBytes(segment), + now + ); + } - protected final DataSegment segment2 = createSegment( - "wikipedia", - "2012-01-05T00:00:00.000/2012-01-06T00:00:00.000", - "2012-01-06T22:19:12.565Z", - "wikipedia/index/y=2012/m=01/d=05/2012-01-06T22:19:12.565Z/0/index.zip", - 0 - ); + private static void publishSegment( + final SQLMetadataConnector connector, + final MetadataStorageTablesConfig config, + final String segmentId, + final String dataSource, + final String createdDate, + final String start, + final String end, + final boolean partitioned, + final String version, + final boolean used, + final byte[] payload, + final String usedFlagLastUpdated + ) + { + try { + final DBI dbi = connector.getDBI(); + List> exists = dbi.withHandle( + handle -> + handle.createQuery(StringUtils.format("SELECT id FROM %s WHERE id=:id", config.getSegmentsTable())) + .bind("id", segmentId) + .list() + ); + + if (!exists.isEmpty()) { + return; + } + + final String publishStatement = StringUtils.format( + "INSERT INTO %1$s (id, dataSource, created_date, start, %2$send%2$s, partitioned, version, used, payload, used_status_last_updated) " + + "VALUES (:id, :dataSource, :created_date, :start, :end, :partitioned, :version, :used, :payload, :used_status_last_updated)", + config.getSegmentsTable(), + connector.getQuoteString() + ); + + dbi.withHandle( + (HandleCallback) handle -> { + handle.createStatement(publishStatement) + .bind("id", segmentId) + .bind("dataSource", dataSource) + .bind("created_date", createdDate) + .bind("start", start) + .bind("end", end) + .bind("partitioned", partitioned) + .bind("version", version) + .bind("used", used) + .bind("payload", payload) + .bind("used_status_last_updated", usedFlagLastUpdated) + .execute(); + + return null; + } + ); + } + catch (Exception e) { + throw new RuntimeException(e); + } + } } diff --git a/server/src/test/java/org/apache/druid/segment/indexing/RealtimeTuningConfigTest.java b/server/src/test/java/org/apache/druid/segment/indexing/RealtimeTuningConfigTest.java deleted file mode 100644 index abc95719707..00000000000 --- a/server/src/test/java/org/apache/druid/segment/indexing/RealtimeTuningConfigTest.java +++ /dev/null @@ -1,151 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package org.apache.druid.segment.indexing; - -import com.fasterxml.jackson.databind.ObjectMapper; -import org.apache.druid.segment.IndexSpec; -import org.apache.druid.segment.TestHelper; -import org.apache.druid.segment.data.CompressionStrategy; -import org.apache.druid.segment.incremental.OnheapIncrementalIndex; -import org.apache.druid.timeline.partition.NumberedShardSpec; -import org.hamcrest.CoreMatchers; -import org.joda.time.Duration; -import org.joda.time.Period; -import org.junit.Assert; -import org.junit.Test; - -import java.io.File; -import java.util.UUID; - -public class RealtimeTuningConfigTest -{ - @Test - public void testErrorMessageIsMeaningfulWhenUnableToCreateTemporaryDirectory() - { - String propertyName = "java.io.tmpdir"; - String originalValue = System.getProperty(propertyName); - String nonExistedDirectory = "/tmp/" + UUID.randomUUID(); - try { - System.setProperty(propertyName, nonExistedDirectory); - RealtimeTuningConfig.makeDefaultTuningConfig(null); - } - catch (IllegalStateException e) { - Assert.assertThat( - e.getMessage(), - CoreMatchers.startsWith("java.io.tmpdir (" + nonExistedDirectory + ") does not exist") - ); - } - finally { - System.setProperty(propertyName, originalValue); - } - } - - @Test - public void testSpecificBasePersistDirectory() - { - final RealtimeTuningConfig tuningConfig = RealtimeTuningConfig.makeDefaultTuningConfig( - new File("/tmp/nonexistent") - ); - Assert.assertEquals(new File("/tmp/nonexistent"), tuningConfig.getBasePersistDirectory()); - } - - @Test - public void testSerdeWithDefaults() throws Exception - { - String jsonStr = "{\"type\":\"realtime\"}"; - - ObjectMapper mapper = TestHelper.makeJsonMapper(); - RealtimeTuningConfig config = (RealtimeTuningConfig) mapper.readValue( - mapper.writeValueAsString( - mapper.readValue( - jsonStr, - TuningConfig.class - ) - ), - TuningConfig.class - ); - - Assert.assertEquals(new OnheapIncrementalIndex.Spec(), config.getAppendableIndexSpec()); - Assert.assertEquals(Duration.standardMinutes(15).getMillis(), config.getHandoffConditionTimeout()); - Assert.assertEquals(0, config.getAlertTimeout()); - Assert.assertEquals(IndexSpec.DEFAULT, config.getIndexSpec()); - Assert.assertEquals(IndexSpec.DEFAULT, config.getIndexSpecForIntermediatePersists()); - Assert.assertEquals(new Period("PT10M"), config.getIntermediatePersistPeriod()); - Assert.assertEquals(new NumberedShardSpec(0, 1), config.getShardSpec()); - Assert.assertEquals(0, config.getMaxPendingPersists()); - Assert.assertEquals(150000, config.getMaxRowsInMemory()); - Assert.assertEquals(0, config.getMergeThreadPriority()); - Assert.assertEquals(0, config.getPersistThreadPriority()); - Assert.assertEquals(new Period("PT10M"), config.getWindowPeriod()); - Assert.assertFalse(config.isReportParseExceptions()); - } - - @Test - public void testSerdeWithNonDefaults() throws Exception - { - String jsonStr = "{\n" - + " \"type\": \"realtime\",\n" - + " \"maxRowsInMemory\": 100,\n" - + " \"intermediatePersistPeriod\": \"PT1H\",\n" - + " \"windowPeriod\": \"PT1H\",\n" - + " \"maxPendingPersists\": 100,\n" - + " \"persistThreadPriority\": 100,\n" - + " \"mergeThreadPriority\": 100,\n" - + " \"reportParseExceptions\": true,\n" - + " \"handoffConditionTimeout\": 100,\n" - + " \"alertTimeout\": 70,\n" - + " \"indexSpec\": { \"metricCompression\" : \"NONE\" },\n" - + " \"indexSpecForIntermediatePersists\": { \"dimensionCompression\" : \"uncompressed\" },\n" - + " \"appendableIndexSpec\": { \"type\" : \"onheap\" }\n" - + "}"; - - ObjectMapper mapper = TestHelper.makeJsonMapper(); - RealtimeTuningConfig config = (RealtimeTuningConfig) mapper.readValue( - mapper.writeValueAsString( - mapper.readValue( - jsonStr, - TuningConfig.class - ) - ), - TuningConfig.class - ); - - Assert.assertEquals(new OnheapIncrementalIndex.Spec(), config.getAppendableIndexSpec()); - Assert.assertEquals(100, config.getHandoffConditionTimeout()); - Assert.assertEquals(70, config.getAlertTimeout()); - Assert.assertEquals(new Period("PT1H"), config.getIntermediatePersistPeriod()); - Assert.assertEquals(new NumberedShardSpec(0, 1), config.getShardSpec()); - Assert.assertEquals(100, config.getMaxPendingPersists()); - Assert.assertEquals(100, config.getMaxRowsInMemory()); - Assert.assertEquals(100, config.getMergeThreadPriority()); - Assert.assertEquals(100, config.getPersistThreadPriority()); - Assert.assertEquals(new Period("PT1H"), config.getWindowPeriod()); - Assert.assertEquals(true, config.isReportParseExceptions()); - Assert.assertEquals( - IndexSpec.builder().withMetricCompression(CompressionStrategy.NONE).build(), - config.getIndexSpec() - ); - Assert.assertEquals( - IndexSpec.builder().withDimensionCompression(CompressionStrategy.UNCOMPRESSED).build(), - config.getIndexSpecForIntermediatePersists() - ); - - } -} diff --git a/server/src/test/java/org/apache/druid/segment/metadata/CoordinatorSegmentDataCacheConcurrencyTest.java b/server/src/test/java/org/apache/druid/segment/metadata/CoordinatorSegmentDataCacheConcurrencyTest.java index 81f65acf84a..4cc4ac38184 100644 --- a/server/src/test/java/org/apache/druid/segment/metadata/CoordinatorSegmentDataCacheConcurrencyTest.java +++ b/server/src/test/java/org/apache/druid/segment/metadata/CoordinatorSegmentDataCacheConcurrencyTest.java @@ -20,6 +20,8 @@ package org.apache.druid.segment.metadata; import com.fasterxml.jackson.databind.ObjectMapper; +import com.google.common.base.Supplier; +import com.google.common.base.Suppliers; import com.google.common.collect.ImmutableList; import com.google.common.collect.Sets; import org.apache.druid.client.BrokerServerView; @@ -39,6 +41,8 @@ import org.apache.druid.java.util.common.NonnullPair; import org.apache.druid.java.util.common.concurrent.Execs; import org.apache.druid.java.util.common.concurrent.ScheduledExecutors; import org.apache.druid.java.util.emitter.service.ServiceEmitter; +import org.apache.druid.metadata.SegmentsMetadataManagerConfig; +import org.apache.druid.metadata.SqlSegmentsMetadataManager; import org.apache.druid.metadata.TestDerbyConnector; import org.apache.druid.query.QueryRunner; import org.apache.druid.query.TableDataSource; @@ -61,16 +65,19 @@ import org.apache.druid.timeline.DataSegment.PruneSpecsHolder; import org.apache.druid.timeline.SegmentId; import org.apache.druid.timeline.partition.NumberedShardSpec; import org.easymock.EasyMock; +import org.joda.time.Period; import org.junit.After; import org.junit.Assert; import org.junit.Before; import org.junit.Rule; import org.junit.Test; +import org.mockito.Mockito; import javax.annotation.Nullable; import java.io.File; import java.util.ArrayList; import java.util.Collection; +import java.util.Collections; import java.util.HashMap; import java.util.HashSet; import java.util.List; @@ -101,6 +108,8 @@ public class CoordinatorSegmentDataCacheConcurrencyTest extends SegmentMetadataC private TestSegmentMetadataQueryWalker walker; private SegmentSchemaCache segmentSchemaCache; private SegmentSchemaBackFillQueue backFillQueue; + private SqlSegmentsMetadataManager sqlSegmentsMetadataManager; + private Supplier segmentsMetadataManagerConfigSupplier; private final ObjectMapper mapper = TestHelper.makeJsonMapper(); @Before @@ -190,6 +199,12 @@ public class CoordinatorSegmentDataCacheConcurrencyTest extends SegmentMetadataC } ); + sqlSegmentsMetadataManager = Mockito.mock(SqlSegmentsMetadataManager.class); + Mockito.when(sqlSegmentsMetadataManager.getImmutableDataSourcesWithAllUsedSegments()).thenReturn(Collections.emptyList()); + SegmentsMetadataManagerConfig metadataManagerConfig = Mockito.mock(SegmentsMetadataManagerConfig.class); + Mockito.when(metadataManagerConfig.getPollDuration()).thenReturn(Period.millis(1000)); + segmentsMetadataManagerConfigSupplier = Suppliers.ofInstance(metadataManagerConfig); + inventoryView.init(); initLatch.await(); exec = Execs.multiThreaded(4, "DruidSchemaConcurrencyTest-%d"); @@ -227,7 +242,9 @@ public class CoordinatorSegmentDataCacheConcurrencyTest extends SegmentMetadataC new InternalQueryConfig(), new NoopServiceEmitter(), segmentSchemaCache, - backFillQueue + backFillQueue, + sqlSegmentsMetadataManager, + segmentsMetadataManagerConfigSupplier ) { @Override @@ -341,7 +358,9 @@ public class CoordinatorSegmentDataCacheConcurrencyTest extends SegmentMetadataC new InternalQueryConfig(), new NoopServiceEmitter(), segmentSchemaCache, - backFillQueue + backFillQueue, + sqlSegmentsMetadataManager, + segmentsMetadataManagerConfigSupplier ) { @Override diff --git a/server/src/test/java/org/apache/druid/segment/metadata/CoordinatorSegmentMetadataCacheTest.java b/server/src/test/java/org/apache/druid/segment/metadata/CoordinatorSegmentMetadataCacheTest.java index e5b6db1d42d..ef1fb1e8edd 100644 --- a/server/src/test/java/org/apache/druid/segment/metadata/CoordinatorSegmentMetadataCacheTest.java +++ b/server/src/test/java/org/apache/druid/segment/metadata/CoordinatorSegmentMetadataCacheTest.java @@ -22,11 +22,14 @@ package org.apache.druid.segment.metadata; import com.fasterxml.jackson.databind.ObjectMapper; import com.google.common.annotations.VisibleForTesting; import com.google.common.base.Preconditions; +import com.google.common.base.Supplier; +import com.google.common.base.Suppliers; import com.google.common.collect.ImmutableList; import com.google.common.collect.ImmutableMap; import com.google.common.collect.ImmutableSet; import com.google.common.collect.Sets; import org.apache.druid.client.DruidServer; +import org.apache.druid.client.ImmutableDruidDataSource; import org.apache.druid.client.InternalQueryConfig; import org.apache.druid.data.input.InputRow; import org.apache.druid.java.util.common.Intervals; @@ -37,6 +40,8 @@ import org.apache.druid.java.util.common.guava.Sequences; import org.apache.druid.java.util.emitter.EmittingLogger; import org.apache.druid.java.util.metrics.StubServiceEmitter; import org.apache.druid.metadata.MetadataStorageTablesConfig; +import org.apache.druid.metadata.SegmentsMetadataManagerConfig; +import org.apache.druid.metadata.SqlSegmentsMetadataManager; import org.apache.druid.metadata.TestDerbyConnector; import org.apache.druid.query.DruidMetrics; import org.apache.druid.query.QueryContexts; @@ -66,6 +71,8 @@ import org.apache.druid.server.QueryLifecycleFactory; import org.apache.druid.server.QueryResponse; import org.apache.druid.server.coordination.DruidServerMetadata; import org.apache.druid.server.coordination.ServerType; +import org.apache.druid.server.coordinator.loading.SegmentReplicaCount; +import org.apache.druid.server.coordinator.loading.SegmentReplicationStatus; import org.apache.druid.server.metrics.NoopServiceEmitter; import org.apache.druid.server.security.Access; import org.apache.druid.server.security.AllowAllAuthenticator; @@ -74,18 +81,23 @@ import org.apache.druid.timeline.DataSegment; import org.apache.druid.timeline.SegmentId; import org.apache.druid.timeline.partition.LinearShardSpec; import org.easymock.EasyMock; +import org.joda.time.Period; import org.junit.After; import org.junit.Assert; import org.junit.Before; import org.junit.Test; +import org.mockito.ArgumentMatchers; +import org.mockito.Mockito; import org.skife.jdbi.v2.StatementContext; import java.io.File; import java.io.IOException; import java.sql.ResultSet; import java.util.ArrayList; +import java.util.Arrays; import java.util.Collections; import java.util.EnumSet; +import java.util.HashMap; import java.util.HashSet; import java.util.LinkedHashMap; import java.util.LinkedHashSet; @@ -106,12 +118,19 @@ public class CoordinatorSegmentMetadataCacheTest extends CoordinatorSegmentMetad private CoordinatorSegmentMetadataCache runningSchema; private CountDownLatch buildTableLatch = new CountDownLatch(1); private CountDownLatch markDataSourceLatch = new CountDownLatch(1); + private SqlSegmentsMetadataManager sqlSegmentsMetadataManager; + private Supplier segmentsMetadataManagerConfigSupplier; @Before @Override public void setUp() throws Exception { super.setUp(); + sqlSegmentsMetadataManager = Mockito.mock(SqlSegmentsMetadataManager.class); + Mockito.when(sqlSegmentsMetadataManager.getImmutableDataSourcesWithAllUsedSegments()).thenReturn(Collections.emptyList()); + SegmentsMetadataManagerConfig metadataManagerConfig = Mockito.mock(SegmentsMetadataManagerConfig.class); + Mockito.when(metadataManagerConfig.getPollDuration()).thenReturn(Period.millis(1000)); + segmentsMetadataManagerConfigSupplier = Suppliers.ofInstance(metadataManagerConfig); } @After @@ -132,6 +151,7 @@ public class CoordinatorSegmentMetadataCacheTest extends CoordinatorSegmentMetad public CoordinatorSegmentMetadataCache buildSchemaMarkAndTableLatch(SegmentMetadataCacheConfig config) throws InterruptedException { Preconditions.checkState(runningSchema == null); + runningSchema = new CoordinatorSegmentMetadataCache( getQueryLifecycleFactory(walker), serverView, @@ -140,7 +160,9 @@ public class CoordinatorSegmentMetadataCacheTest extends CoordinatorSegmentMetad new InternalQueryConfig(), new NoopServiceEmitter(), segmentSchemaCache, - backFillQueue + backFillQueue, + sqlSegmentsMetadataManager, + segmentsMetadataManagerConfigSupplier ) { @Override @@ -178,7 +200,7 @@ public class CoordinatorSegmentMetadataCacheTest extends CoordinatorSegmentMetad public void testGetTableMapFoo() throws InterruptedException { CoordinatorSegmentMetadataCache schema = buildSchemaMarkAndTableLatch(); - verifyFooDSSchema(schema); + verifyFooDSSchema(schema, 6); } @Test @@ -312,7 +334,9 @@ public class CoordinatorSegmentMetadataCacheTest extends CoordinatorSegmentMetad new InternalQueryConfig(), new NoopServiceEmitter(), segmentSchemaCache, - backFillQueue + backFillQueue, + sqlSegmentsMetadataManager, + segmentsMetadataManagerConfigSupplier ) { @Override @@ -523,7 +547,9 @@ public class CoordinatorSegmentMetadataCacheTest extends CoordinatorSegmentMetad new InternalQueryConfig(), new NoopServiceEmitter(), segmentSchemaCache, - backFillQueue + backFillQueue, + sqlSegmentsMetadataManager, + segmentsMetadataManagerConfigSupplier ) { @Override @@ -558,6 +584,11 @@ public class CoordinatorSegmentMetadataCacheTest extends CoordinatorSegmentMetad { String datasource = "newSegmentAddTest"; CountDownLatch addSegmentLatch = new CountDownLatch(2); + SqlSegmentsMetadataManager sqlSegmentsMetadataManager = Mockito.mock(SqlSegmentsMetadataManager.class); + Mockito.when(sqlSegmentsMetadataManager.getImmutableDataSourcesWithAllUsedSegments()).thenReturn(Collections.emptyList()); + SegmentsMetadataManagerConfig metadataManagerConfig = Mockito.mock(SegmentsMetadataManagerConfig.class); + Mockito.when(metadataManagerConfig.getPollDuration()).thenReturn(Period.millis(1000)); + Supplier segmentsMetadataManagerConfigSupplier = Suppliers.ofInstance(metadataManagerConfig); CoordinatorSegmentMetadataCache schema = new CoordinatorSegmentMetadataCache( getQueryLifecycleFactory(walker), serverView, @@ -566,7 +597,9 @@ public class CoordinatorSegmentMetadataCacheTest extends CoordinatorSegmentMetad new InternalQueryConfig(), new NoopServiceEmitter(), segmentSchemaCache, - backFillQueue + backFillQueue, + sqlSegmentsMetadataManager, + segmentsMetadataManagerConfigSupplier ) { @Override @@ -605,6 +638,11 @@ public class CoordinatorSegmentMetadataCacheTest extends CoordinatorSegmentMetad { String datasource = "newSegmentAddTest"; CountDownLatch addSegmentLatch = new CountDownLatch(1); + SqlSegmentsMetadataManager sqlSegmentsMetadataManager = Mockito.mock(SqlSegmentsMetadataManager.class); + Mockito.when(sqlSegmentsMetadataManager.getImmutableDataSourcesWithAllUsedSegments()).thenReturn(Collections.emptyList()); + SegmentsMetadataManagerConfig metadataManagerConfig = Mockito.mock(SegmentsMetadataManagerConfig.class); + Mockito.when(metadataManagerConfig.getPollDuration()).thenReturn(Period.millis(1000)); + Supplier segmentsMetadataManagerConfigSupplier = Suppliers.ofInstance(metadataManagerConfig); CoordinatorSegmentMetadataCache schema = new CoordinatorSegmentMetadataCache( getQueryLifecycleFactory(walker), serverView, @@ -613,7 +651,9 @@ public class CoordinatorSegmentMetadataCacheTest extends CoordinatorSegmentMetad new InternalQueryConfig(), new NoopServiceEmitter(), segmentSchemaCache, - backFillQueue + backFillQueue, + sqlSegmentsMetadataManager, + segmentsMetadataManagerConfigSupplier ) { @Override @@ -649,6 +689,11 @@ public class CoordinatorSegmentMetadataCacheTest extends CoordinatorSegmentMetad { String datasource = "newSegmentAddTest"; CountDownLatch addSegmentLatch = new CountDownLatch(1); + SqlSegmentsMetadataManager sqlSegmentsMetadataManager = Mockito.mock(SqlSegmentsMetadataManager.class); + Mockito.when(sqlSegmentsMetadataManager.getImmutableDataSourcesWithAllUsedSegments()).thenReturn(Collections.emptyList()); + SegmentsMetadataManagerConfig metadataManagerConfig = Mockito.mock(SegmentsMetadataManagerConfig.class); + Mockito.when(metadataManagerConfig.getPollDuration()).thenReturn(Period.millis(1000)); + Supplier segmentsMetadataManagerConfigSupplier = Suppliers.ofInstance(metadataManagerConfig); CoordinatorSegmentMetadataCache schema = new CoordinatorSegmentMetadataCache( getQueryLifecycleFactory(walker), serverView, @@ -657,7 +702,9 @@ public class CoordinatorSegmentMetadataCacheTest extends CoordinatorSegmentMetad new InternalQueryConfig(), new NoopServiceEmitter(), segmentSchemaCache, - backFillQueue + backFillQueue, + sqlSegmentsMetadataManager, + segmentsMetadataManagerConfigSupplier ) { @Override @@ -698,7 +745,9 @@ public class CoordinatorSegmentMetadataCacheTest extends CoordinatorSegmentMetad new InternalQueryConfig(), new NoopServiceEmitter(), segmentSchemaCache, - backFillQueue + backFillQueue, + sqlSegmentsMetadataManager, + segmentsMetadataManagerConfigSupplier ) { @Override @@ -756,7 +805,9 @@ public class CoordinatorSegmentMetadataCacheTest extends CoordinatorSegmentMetad new InternalQueryConfig(), new NoopServiceEmitter(), segmentSchemaCache, - backFillQueue + backFillQueue, + sqlSegmentsMetadataManager, + segmentsMetadataManagerConfigSupplier ) { @Override @@ -817,7 +868,9 @@ public class CoordinatorSegmentMetadataCacheTest extends CoordinatorSegmentMetad new InternalQueryConfig(), new NoopServiceEmitter(), segmentSchemaCache, - backFillQueue + backFillQueue, + sqlSegmentsMetadataManager, + segmentsMetadataManagerConfigSupplier ) { @Override @@ -852,7 +905,9 @@ public class CoordinatorSegmentMetadataCacheTest extends CoordinatorSegmentMetad new InternalQueryConfig(), new NoopServiceEmitter(), segmentSchemaCache, - backFillQueue + backFillQueue, + sqlSegmentsMetadataManager, + segmentsMetadataManagerConfigSupplier ) { @Override @@ -900,7 +955,9 @@ public class CoordinatorSegmentMetadataCacheTest extends CoordinatorSegmentMetad new InternalQueryConfig(), new NoopServiceEmitter(), segmentSchemaCache, - backFillQueue + backFillQueue, + sqlSegmentsMetadataManager, + segmentsMetadataManagerConfigSupplier ) { @Override @@ -972,7 +1029,9 @@ public class CoordinatorSegmentMetadataCacheTest extends CoordinatorSegmentMetad internalQueryConfig, new NoopServiceEmitter(), segmentSchemaCache, - backFillQueue + backFillQueue, + sqlSegmentsMetadataManager, + segmentsMetadataManagerConfigSupplier ); Map queryContext = ImmutableMap.of( @@ -1141,7 +1200,9 @@ public class CoordinatorSegmentMetadataCacheTest extends CoordinatorSegmentMetad new InternalQueryConfig(), emitter, segmentSchemaCache, - backFillQueue + backFillQueue, + sqlSegmentsMetadataManager, + segmentsMetadataManagerConfigSupplier ) { @Override @@ -1306,7 +1367,9 @@ public class CoordinatorSegmentMetadataCacheTest extends CoordinatorSegmentMetad new InternalQueryConfig(), new NoopServiceEmitter(), segmentSchemaCache, - backFillQueue + backFillQueue, + sqlSegmentsMetadataManager, + segmentsMetadataManagerConfigSupplier ) { @Override void updateSchemaForRealtimeSegments(SegmentSchemas segmentSchemas) @@ -1385,7 +1448,9 @@ public class CoordinatorSegmentMetadataCacheTest extends CoordinatorSegmentMetad new InternalQueryConfig(), new NoopServiceEmitter(), segmentSchemaCache, - backFillQueue + backFillQueue, + sqlSegmentsMetadataManager, + segmentsMetadataManagerConfigSupplier ) { @Override public void refresh(Set segmentsToRefresh, Set dataSourcesToRebuild) @@ -1565,7 +1630,9 @@ public class CoordinatorSegmentMetadataCacheTest extends CoordinatorSegmentMetad new InternalQueryConfig(), new NoopServiceEmitter(), segmentSchemaCache, - backFillQueue + backFillQueue, + sqlSegmentsMetadataManager, + segmentsMetadataManagerConfigSupplier ) { @Override public Set refreshSegmentsForDataSource(String dataSource, Set segments) @@ -1594,7 +1661,7 @@ public class CoordinatorSegmentMetadataCacheTest extends CoordinatorSegmentMetad Assert.assertEquals(0, refreshCount.get()); // verify that datasource schema is built - verifyFooDSSchema(schema); + verifyFooDSSchema(schema, 6); serverView.addSegment(segment3, ServerType.HISTORICAL); @@ -1721,12 +1788,384 @@ public class CoordinatorSegmentMetadataCacheTest extends CoordinatorSegmentMetad Assert.assertEquals(existingMetadata.getNumReplicas(), currentMetadata.getNumReplicas()); } - private void verifyFooDSSchema(CoordinatorSegmentMetadataCache schema) + private CoordinatorSegmentMetadataCache setupForColdDatasourceSchemaTest() + { + // foo has both hot and cold segments + DataSegment coldSegment = + DataSegment.builder() + .dataSource(DATASOURCE1) + .interval(Intervals.of("1998/P2Y")) + .version("1") + .shardSpec(new LinearShardSpec(0)) + .size(0) + .build(); + + // cold has only cold segments + DataSegment singleColdSegment = + DataSegment.builder() + .dataSource("cold") + .interval(Intervals.of("2000/P2Y")) + .version("1") + .shardSpec(new LinearShardSpec(0)) + .size(0) + .build(); + + ImmutableMap.Builder segmentStatsMap = new ImmutableMap.Builder<>(); + segmentStatsMap.put(coldSegment.getId(), new SegmentMetadata(20L, "foo-fingerprint")); + segmentStatsMap.put(singleColdSegment.getId(), new SegmentMetadata(20L, "cold-fingerprint")); + ImmutableMap.Builder schemaPayloadMap = new ImmutableMap.Builder<>(); + schemaPayloadMap.put( + "foo-fingerprint", + new SchemaPayload(RowSignature.builder() + .add("dim1", ColumnType.STRING) + .add("c1", ColumnType.STRING) + .add("c2", ColumnType.LONG) + .build()) + ); + schemaPayloadMap.put( + "cold-fingerprint", + new SchemaPayload( + RowSignature.builder() + .add("f1", ColumnType.STRING) + .add("f2", ColumnType.DOUBLE) + .build() + ) + ); + + segmentSchemaCache.updateFinalizedSegmentSchema( + new SegmentSchemaCache.FinalizedSegmentSchemaInfo(segmentStatsMap.build(), schemaPayloadMap.build()) + ); + + List druidDataSources = new ArrayList<>(); + Map segmentMap = new HashMap<>(); + segmentMap.put(coldSegment.getId(), coldSegment); + segmentMap.put(segment1.getId(), segment1); + segmentMap.put(segment2.getId(), segment2); + druidDataSources.add(new ImmutableDruidDataSource( + coldSegment.getDataSource(), + Collections.emptyMap(), + segmentMap + )); + druidDataSources.add(new ImmutableDruidDataSource( + singleColdSegment.getDataSource(), + Collections.emptyMap(), + Collections.singletonMap(singleColdSegment.getId(), singleColdSegment) + )); + + Mockito.when( + sqlSegmentsMetadataManager.getImmutableDataSourcesWithAllUsedSegments()) + .thenReturn(druidDataSources); + + CoordinatorSegmentMetadataCache schema = new CoordinatorSegmentMetadataCache( + getQueryLifecycleFactory(walker), + serverView, + SEGMENT_CACHE_CONFIG_DEFAULT, + new NoopEscalator(), + new InternalQueryConfig(), + new NoopServiceEmitter(), + segmentSchemaCache, + backFillQueue, + sqlSegmentsMetadataManager, + segmentsMetadataManagerConfigSupplier + ); + + SegmentReplicaCount zeroSegmentReplicaCount = Mockito.mock(SegmentReplicaCount.class); + SegmentReplicaCount nonZeroSegmentReplicaCount = Mockito.mock(SegmentReplicaCount.class); + Mockito.when(zeroSegmentReplicaCount.required()).thenReturn(0); + Mockito.when(nonZeroSegmentReplicaCount.required()).thenReturn(1); + SegmentReplicationStatus segmentReplicationStatus = Mockito.mock(SegmentReplicationStatus.class); + Mockito.when(segmentReplicationStatus.getReplicaCountsInCluster(ArgumentMatchers.eq(coldSegment.getId()))) + .thenReturn(zeroSegmentReplicaCount); + Mockito.when(segmentReplicationStatus.getReplicaCountsInCluster(ArgumentMatchers.eq(singleColdSegment.getId()))) + .thenReturn(zeroSegmentReplicaCount); + Mockito.when(segmentReplicationStatus.getReplicaCountsInCluster(ArgumentMatchers.eq(segment1.getId()))) + .thenReturn(nonZeroSegmentReplicaCount); + + Mockito.when(segmentReplicationStatus.getReplicaCountsInCluster(ArgumentMatchers.eq(segment2.getId()))) + .thenReturn(nonZeroSegmentReplicaCount); + + schema.updateSegmentReplicationStatus(segmentReplicationStatus); + schema.updateSegmentReplicationStatus(segmentReplicationStatus); + + return schema; + } + + @Test + public void testColdDatasourceSchema_refreshAfterColdSchemaExec() throws IOException + { + CoordinatorSegmentMetadataCache schema = setupForColdDatasourceSchemaTest(); + + schema.coldDatasourceSchemaExec(); + + Assert.assertEquals(new HashSet<>(Arrays.asList("foo", "cold")), schema.getDataSourceInformationMap().keySet()); + + // verify that cold schema for both foo and cold is present + RowSignature fooSignature = schema.getDatasource("foo").getRowSignature(); + List columnNames = fooSignature.getColumnNames(); + + // verify that foo schema doesn't contain columns from hot segments + Assert.assertEquals(3, columnNames.size()); + + Assert.assertEquals("dim1", columnNames.get(0)); + Assert.assertEquals(ColumnType.STRING, fooSignature.getColumnType(columnNames.get(0)).get()); + + Assert.assertEquals("c1", columnNames.get(1)); + Assert.assertEquals(ColumnType.STRING, fooSignature.getColumnType(columnNames.get(1)).get()); + + Assert.assertEquals("c2", columnNames.get(2)); + Assert.assertEquals(ColumnType.LONG, fooSignature.getColumnType(columnNames.get(2)).get()); + + RowSignature coldSignature = schema.getDatasource("cold").getRowSignature(); + columnNames = coldSignature.getColumnNames(); + Assert.assertEquals("f1", columnNames.get(0)); + Assert.assertEquals(ColumnType.STRING, coldSignature.getColumnType(columnNames.get(0)).get()); + + Assert.assertEquals("f2", columnNames.get(1)); + Assert.assertEquals(ColumnType.DOUBLE, coldSignature.getColumnType(columnNames.get(1)).get()); + + Set segmentIds = new HashSet<>(); + segmentIds.add(segment1.getId()); + segmentIds.add(segment2.getId()); + + schema.refresh(segmentIds, new HashSet<>()); + + Assert.assertEquals(new HashSet<>(Arrays.asList("foo", "cold")), schema.getDataSourceInformationMap().keySet()); + + coldSignature = schema.getDatasource("cold").getRowSignature(); + columnNames = coldSignature.getColumnNames(); + Assert.assertEquals("f1", columnNames.get(0)); + Assert.assertEquals(ColumnType.STRING, coldSignature.getColumnType(columnNames.get(0)).get()); + + Assert.assertEquals("f2", columnNames.get(1)); + Assert.assertEquals(ColumnType.DOUBLE, coldSignature.getColumnType(columnNames.get(1)).get()); + + // foo now contains schema from both hot and cold segments + verifyFooDSSchema(schema, 8); + RowSignature rowSignature = schema.getDatasource("foo").getRowSignature(); + + // cold columns should be present at the end + columnNames = rowSignature.getColumnNames(); + Assert.assertEquals("c1", columnNames.get(6)); + Assert.assertEquals(ColumnType.STRING, rowSignature.getColumnType(columnNames.get(6)).get()); + + Assert.assertEquals("c2", columnNames.get(7)); + Assert.assertEquals(ColumnType.LONG, rowSignature.getColumnType(columnNames.get(7)).get()); + } + + @Test + public void testColdDatasourceSchema_coldSchemaExecAfterRefresh() throws IOException + { + CoordinatorSegmentMetadataCache schema = setupForColdDatasourceSchemaTest(); + + Set segmentIds = new HashSet<>(); + segmentIds.add(segment1.getId()); + segmentIds.add(segment2.getId()); + + schema.refresh(segmentIds, new HashSet<>()); + // cold datasource shouldn't be present + Assert.assertEquals(Collections.singleton("foo"), schema.getDataSourceInformationMap().keySet()); + + // cold columns shouldn't be present + verifyFooDSSchema(schema, 6); + Assert.assertNull(schema.getDatasource("cold")); + + schema.coldDatasourceSchemaExec(); + + // could datasource should be present now + Assert.assertEquals(new HashSet<>(Arrays.asList("foo", "cold")), schema.getDataSourceInformationMap().keySet()); + + RowSignature coldSignature = schema.getDatasource("cold").getRowSignature(); + List columnNames = coldSignature.getColumnNames(); + Assert.assertEquals("f1", columnNames.get(0)); + Assert.assertEquals(ColumnType.STRING, coldSignature.getColumnType(columnNames.get(0)).get()); + + Assert.assertEquals("f2", columnNames.get(1)); + Assert.assertEquals(ColumnType.DOUBLE, coldSignature.getColumnType(columnNames.get(1)).get()); + + // columns from cold datasource should be present + verifyFooDSSchema(schema, 8); + RowSignature rowSignature = schema.getDatasource("foo").getRowSignature(); + + columnNames = rowSignature.getColumnNames(); + Assert.assertEquals("c1", columnNames.get(6)); + Assert.assertEquals(ColumnType.STRING, rowSignature.getColumnType(columnNames.get(6)).get()); + + Assert.assertEquals("c2", columnNames.get(7)); + Assert.assertEquals(ColumnType.LONG, rowSignature.getColumnType(columnNames.get(7)).get()); + } + + @Test + public void testColdDatasourceSchema_verifyStaleDatasourceRemoved() + { + DataSegment coldSegmentAlpha = + DataSegment.builder() + .dataSource("alpha") + .interval(Intervals.of("2000/P2Y")) + .version("1") + .shardSpec(new LinearShardSpec(0)) + .size(0) + .build(); + + DataSegment coldSegmentBeta = + DataSegment.builder() + .dataSource("beta") + .interval(Intervals.of("2000/P2Y")) + .version("1") + .shardSpec(new LinearShardSpec(0)) + .size(0) + .build(); + + DataSegment coldSegmentGamma = + DataSegment.builder() + .dataSource("gamma") + .interval(Intervals.of("2000/P2Y")) + .version("1") + .shardSpec(new LinearShardSpec(0)) + .size(0) + .build(); + + DataSegment hotSegmentGamma = + DataSegment.builder() + .dataSource("gamma") + .interval(Intervals.of("2001/P2Y")) + .version("1") + .shardSpec(new LinearShardSpec(0)) + .size(0) + .build(); + + ImmutableMap.Builder segmentStatsMap = new ImmutableMap.Builder<>(); + segmentStatsMap.put(coldSegmentAlpha.getId(), new SegmentMetadata(20L, "cold")); + segmentStatsMap.put(coldSegmentBeta.getId(), new SegmentMetadata(20L, "cold")); + segmentStatsMap.put(hotSegmentGamma.getId(), new SegmentMetadata(20L, "hot")); + segmentStatsMap.put(coldSegmentGamma.getId(), new SegmentMetadata(20L, "cold")); + + ImmutableMap.Builder schemaPayloadMap = new ImmutableMap.Builder<>(); + schemaPayloadMap.put( + "cold", + new SchemaPayload(RowSignature.builder() + .add("dim1", ColumnType.STRING) + .add("c1", ColumnType.STRING) + .add("c2", ColumnType.LONG) + .build()) + ); + schemaPayloadMap.put( + "hot", + new SchemaPayload(RowSignature.builder() + .add("c3", ColumnType.STRING) + .add("c4", ColumnType.STRING) + .build()) + ); + segmentSchemaCache.updateFinalizedSegmentSchema( + new SegmentSchemaCache.FinalizedSegmentSchemaInfo(segmentStatsMap.build(), schemaPayloadMap.build()) + ); + + List druidDataSources = new ArrayList<>(); + druidDataSources.add( + new ImmutableDruidDataSource( + "alpha", + Collections.emptyMap(), + Collections.singletonMap(coldSegmentAlpha.getId(), coldSegmentAlpha) + ) + ); + + Map gammaSegments = new HashMap<>(); + gammaSegments.put(hotSegmentGamma.getId(), hotSegmentGamma); + gammaSegments.put(coldSegmentGamma.getId(), coldSegmentGamma); + + druidDataSources.add( + new ImmutableDruidDataSource( + "gamma", + Collections.emptyMap(), + gammaSegments + ) + ); + + Mockito.when(sqlSegmentsMetadataManager.getImmutableDataSourcesWithAllUsedSegments()) + .thenReturn(druidDataSources); + + CoordinatorSegmentMetadataCache schema = new CoordinatorSegmentMetadataCache( + getQueryLifecycleFactory(walker), + serverView, + SEGMENT_CACHE_CONFIG_DEFAULT, + new NoopEscalator(), + new InternalQueryConfig(), + new NoopServiceEmitter(), + segmentSchemaCache, + backFillQueue, + sqlSegmentsMetadataManager, + segmentsMetadataManagerConfigSupplier + ); + + SegmentReplicaCount zeroSegmentReplicaCount = Mockito.mock(SegmentReplicaCount.class); + SegmentReplicaCount nonZeroSegmentReplicaCount = Mockito.mock(SegmentReplicaCount.class); + Mockito.when(zeroSegmentReplicaCount.required()).thenReturn(0); + Mockito.when(nonZeroSegmentReplicaCount.required()).thenReturn(1); + SegmentReplicationStatus segmentReplicationStatus = Mockito.mock(SegmentReplicationStatus.class); + Mockito.when(segmentReplicationStatus.getReplicaCountsInCluster(ArgumentMatchers.eq(coldSegmentAlpha.getId()))) + .thenReturn(zeroSegmentReplicaCount); + Mockito.when(segmentReplicationStatus.getReplicaCountsInCluster(ArgumentMatchers.eq(coldSegmentBeta.getId()))) + .thenReturn(zeroSegmentReplicaCount); + Mockito.when(segmentReplicationStatus.getReplicaCountsInCluster(ArgumentMatchers.eq(coldSegmentGamma.getId()))) + .thenReturn(zeroSegmentReplicaCount); + + Mockito.when(segmentReplicationStatus.getReplicaCountsInCluster(ArgumentMatchers.eq(hotSegmentGamma.getId()))) + .thenReturn(nonZeroSegmentReplicaCount); + + schema.updateSegmentReplicationStatus(segmentReplicationStatus); + + schema.coldDatasourceSchemaExec(); + // alpha has only 1 cold segment + Assert.assertNotNull(schema.getDatasource("alpha")); + // gamma has both hot and cold segment + Assert.assertNotNull(schema.getDatasource("gamma")); + // assert that cold schema for gamma doesn't contain any columns from hot segment + RowSignature rowSignature = schema.getDatasource("gamma").getRowSignature(); + Assert.assertTrue(rowSignature.contains("dim1")); + Assert.assertTrue(rowSignature.contains("c1")); + Assert.assertTrue(rowSignature.contains("c2")); + Assert.assertFalse(rowSignature.contains("c3")); + Assert.assertFalse(rowSignature.contains("c4")); + + Assert.assertEquals(new HashSet<>(Arrays.asList("alpha", "gamma")), schema.getDataSourceInformationMap().keySet()); + + druidDataSources.clear(); + druidDataSources.add( + new ImmutableDruidDataSource( + "beta", + Collections.emptyMap(), + Collections.singletonMap(coldSegmentBeta.getId(), coldSegmentBeta) + ) + ); + + druidDataSources.add( + new ImmutableDruidDataSource( + "gamma", + Collections.emptyMap(), + Collections.singletonMap(hotSegmentGamma.getId(), hotSegmentGamma) + ) + ); + + Mockito.when(sqlSegmentsMetadataManager.getImmutableDataSourcesWithAllUsedSegments()) + .thenReturn(druidDataSources); + + schema.coldDatasourceSchemaExec(); + Assert.assertNotNull(schema.getDatasource("beta")); + // alpha doesn't have any segments + Assert.assertNull(schema.getDatasource("alpha")); + // gamma just has 1 hot segment + Assert.assertNull(schema.getDatasource("gamma")); + + Assert.assertNull(schema.getDatasource("doesnotexist")); + + Assert.assertEquals(Collections.singleton("beta"), schema.getDataSourceInformationMap().keySet()); + } + + private void verifyFooDSSchema(CoordinatorSegmentMetadataCache schema, int columns) { final DataSourceInformation fooDs = schema.getDatasource("foo"); final RowSignature fooRowSignature = fooDs.getRowSignature(); List columnNames = fooRowSignature.getColumnNames(); - Assert.assertEquals(6, columnNames.size()); + Assert.assertEquals(columns, columnNames.size()); Assert.assertEquals("__time", columnNames.get(0)); Assert.assertEquals(ColumnType.LONG, fooRowSignature.getColumnType(columnNames.get(0)).get()); diff --git a/server/src/test/java/org/apache/druid/segment/metadata/SegmentSchemaCacheTest.java b/server/src/test/java/org/apache/druid/segment/metadata/SegmentSchemaCacheTest.java index f89c305b9db..c17fa59d6ba 100644 --- a/server/src/test/java/org/apache/druid/segment/metadata/SegmentSchemaCacheTest.java +++ b/server/src/test/java/org/apache/druid/segment/metadata/SegmentSchemaCacheTest.java @@ -63,28 +63,52 @@ public class SegmentSchemaCacheTest RowSignature rowSignature = RowSignature.builder().add("cx", ColumnType.FLOAT).build(); SchemaPayloadPlus expected = new SchemaPayloadPlus(new SchemaPayload(rowSignature, Collections.emptyMap()), 20L); SegmentId id = SegmentId.dummy("ds"); + SegmentId id2 = SegmentId.dummy("ds2"); // this call shouldn't result in any error cache.markMetadataQueryResultPublished(id); cache.addTemporaryMetadataQueryResult(id, rowSignature, Collections.emptyMap(), 20); + cache.addTemporaryMetadataQueryResult(id2, rowSignature, Collections.emptyMap(), 20); Assert.assertTrue(cache.isSchemaCached(id)); + Assert.assertTrue(cache.isSchemaCached(id2)); Optional schema = cache.getSchemaForSegment(id); Assert.assertTrue(schema.isPresent()); Assert.assertEquals(expected, schema.get()); + Optional schema2 = cache.getSchemaForSegment(id); + Assert.assertTrue(schema2.isPresent()); + Assert.assertEquals(expected, schema2.get()); cache.markMetadataQueryResultPublished(id); + cache.markMetadataQueryResultPublished(id2); schema = cache.getSchemaForSegment(id); Assert.assertTrue(schema.isPresent()); Assert.assertEquals(expected, schema.get()); - cache.resetTemporaryPublishedMetadataQueryResultOnDBPoll(); + // simulate call after segment polling - Assert.assertFalse(cache.isSchemaCached(id)); + ImmutableMap.Builder segmentMetadataBuilder = ImmutableMap.builder(); + segmentMetadataBuilder.put(id, new SegmentMetadata(5L, "fp")); + + ImmutableMap.Builder schemaPayloadBuilder = ImmutableMap.builder(); + schemaPayloadBuilder.put("fp", new SchemaPayload(rowSignature)); + + SegmentSchemaCache.FinalizedSegmentSchemaInfo finalizedSegmentSchemaInfo = + new SegmentSchemaCache.FinalizedSegmentSchemaInfo(segmentMetadataBuilder.build(), schemaPayloadBuilder.build()); + + cache.updateFinalizedSegmentSchema(finalizedSegmentSchemaInfo); + + Assert.assertNull(cache.getTemporaryPublishedMetadataQueryResults(id)); + Assert.assertNotNull(cache.getTemporaryPublishedMetadataQueryResults(id2)); + Assert.assertTrue(cache.isSchemaCached(id)); + Assert.assertTrue(cache.isSchemaCached(id2)); schema = cache.getSchemaForSegment(id); - Assert.assertFalse(schema.isPresent()); + Assert.assertTrue(schema.isPresent()); + + schema2 = cache.getSchemaForSegment(id2); + Assert.assertTrue(schema2.isPresent()); } @Test diff --git a/server/src/test/java/org/apache/druid/segment/realtime/FireDepartmentTest.java b/server/src/test/java/org/apache/druid/segment/realtime/FireDepartmentTest.java deleted file mode 100644 index 9c37fe7a37c..00000000000 --- a/server/src/test/java/org/apache/druid/segment/realtime/FireDepartmentTest.java +++ /dev/null @@ -1,135 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package org.apache.druid.segment.realtime; - -import com.fasterxml.jackson.databind.InjectableValues; -import com.fasterxml.jackson.databind.ObjectMapper; -import org.apache.druid.client.cache.CacheConfig; -import org.apache.druid.client.cache.CachePopulatorStats; -import org.apache.druid.client.cache.MapCache; -import org.apache.druid.data.input.impl.DimensionsSpec; -import org.apache.druid.data.input.impl.JSONParseSpec; -import org.apache.druid.data.input.impl.StringInputRowParser; -import org.apache.druid.data.input.impl.TimestampSpec; -import org.apache.druid.jackson.DefaultObjectMapper; -import org.apache.druid.java.util.common.granularity.Granularities; -import org.apache.druid.query.aggregation.AggregatorFactory; -import org.apache.druid.query.aggregation.CountAggregatorFactory; -import org.apache.druid.segment.TestHelper; -import org.apache.druid.segment.indexing.DataSchema; -import org.apache.druid.segment.indexing.RealtimeIOConfig; -import org.apache.druid.segment.indexing.RealtimeTuningConfig; -import org.apache.druid.segment.indexing.granularity.UniformGranularitySpec; -import org.apache.druid.segment.join.NoopJoinableFactory; -import org.apache.druid.segment.realtime.plumber.RealtimePlumberSchool; -import org.apache.druid.segment.writeout.OffHeapMemorySegmentWriteOutMediumFactory; -import org.junit.Assert; -import org.junit.Test; - -import java.io.File; -import java.util.Arrays; -import java.util.Map; - -/** - */ -public class FireDepartmentTest -{ - - public static final CacheConfig NO_CACHE_CONFIG = new CacheConfig() - { - @Override - public boolean isPopulateCache() - { - return false; - } - - @Override - public boolean isUseCache() - { - return false; - } - }; - - @Test - public void testSerde() throws Exception - { - ObjectMapper jsonMapper = new DefaultObjectMapper(); - jsonMapper.setInjectableValues(new InjectableValues.Std().addValue(ObjectMapper.class, jsonMapper)); - - FireDepartment schema = new FireDepartment( - new DataSchema( - "foo", - jsonMapper.convertValue( - new StringInputRowParser( - new JSONParseSpec( - new TimestampSpec( - "timestamp", - "auto", - null - ), - new DimensionsSpec( - DimensionsSpec.getDefaultSchemas(Arrays.asList("dim1", "dim2")) - ), - null, - null, - null - ), - null - ), - Map.class - ), - new AggregatorFactory[]{ - new CountAggregatorFactory("count") - }, - new UniformGranularitySpec(Granularities.HOUR, Granularities.MINUTE, null), - null, - jsonMapper - ), - new RealtimeIOConfig( - null, - new RealtimePlumberSchool( - null, - null, - null, - null, - null, - null, - null, - NoopJoinableFactory.INSTANCE, - TestHelper.getTestIndexMergerV9(OffHeapMemorySegmentWriteOutMediumFactory.instance()), - TestHelper.getTestIndexIO(), - MapCache.create(0), - NO_CACHE_CONFIG, - new CachePopulatorStats(), - TestHelper.makeJsonMapper() - - ) - ), - RealtimeTuningConfig.makeDefaultTuningConfig(new File("/tmp/nonexistent")) - ); - - String json = jsonMapper.writeValueAsString(schema); - - FireDepartment newSchema = jsonMapper.readValue(json, FireDepartment.class); - - Assert.assertEquals(schema.getDataSchema().getDataSource(), newSchema.getDataSchema().getDataSource()); - Assert.assertEquals("/tmp/nonexistent", schema.getTuningConfig().getBasePersistDirectory().toString()); - } -} diff --git a/server/src/test/java/org/apache/druid/segment/realtime/RealtimeMetricsMonitorTest.java b/server/src/test/java/org/apache/druid/segment/realtime/RealtimeMetricsMonitorTest.java deleted file mode 100644 index cca07bc9708..00000000000 --- a/server/src/test/java/org/apache/druid/segment/realtime/RealtimeMetricsMonitorTest.java +++ /dev/null @@ -1,107 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package org.apache.druid.segment.realtime; - -import org.apache.druid.jackson.DefaultObjectMapper; -import org.apache.druid.java.util.metrics.StubServiceEmitter; -import org.apache.druid.segment.indexing.DataSchema; -import org.apache.druid.segment.indexing.RealtimeIOConfig; -import org.junit.Before; -import org.junit.Test; - -import java.util.Collections; -import java.util.Random; - -public class RealtimeMetricsMonitorTest -{ - - private StubServiceEmitter emitter; - private Random random; - - @Before - public void setup() - { - random = new Random(100); - emitter = new StubServiceEmitter("test", "localhost"); - } - - @Test - public void testDoMonitor() - { - FireDepartment fireDepartment = new FireDepartment( - new DataSchema("wiki", null, null, null, null, null, null, new DefaultObjectMapper()), - new RealtimeIOConfig(null, null), - null - ); - - // Add some metrics and invoke monitoring - final FireDepartmentMetrics metrics = fireDepartment.getMetrics(); - invokeRandomTimes(metrics::incrementThrownAway); - invokeRandomTimes(metrics::incrementUnparseable); - invokeRandomTimes(metrics::incrementProcessed); - invokeRandomTimes(metrics::incrementDedup); - invokeRandomTimes(metrics::incrementFailedHandoffs); - invokeRandomTimes(metrics::incrementFailedPersists); - invokeRandomTimes(metrics::incrementHandOffCount); - invokeRandomTimes(metrics::incrementNumPersists); - - metrics.incrementPushedRows(random.nextInt()); - metrics.incrementRowOutputCount(random.nextInt()); - metrics.incrementMergedRows(random.nextInt()); - metrics.incrementMergeCpuTime(random.nextInt()); - metrics.setSinkCount(random.nextInt()); - - RealtimeMetricsMonitor monitor = new RealtimeMetricsMonitor(Collections.singletonList(fireDepartment)); - monitor.doMonitor(emitter); - - // Verify the metrics - emitter.verifyValue("ingest/events/thrownAway", metrics.thrownAway()); - emitter.verifyValue("ingest/events/unparseable", metrics.unparseable()); - - emitter.verifyValue("ingest/events/duplicate", metrics.dedup()); - emitter.verifyValue("ingest/events/processed", metrics.processed()); - emitter.verifyValue("ingest/rows/output", metrics.rowOutput()); - emitter.verifyValue("ingest/persists/count", metrics.numPersists()); - emitter.verifyValue("ingest/persists/time", metrics.persistTimeMillis()); - emitter.verifyValue("ingest/persists/cpu", metrics.persistCpuTime()); - emitter.verifyValue("ingest/persists/backPressure", metrics.persistBackPressureMillis()); - emitter.verifyValue("ingest/persists/failed", metrics.failedPersists()); - emitter.verifyValue("ingest/handoff/failed", metrics.failedHandoffs()); - emitter.verifyValue("ingest/merge/time", metrics.mergeTimeMillis()); - emitter.verifyValue("ingest/merge/cpu", metrics.mergeCpuTime()); - emitter.verifyValue("ingest/handoff/count", metrics.handOffCount()); - emitter.verifyValue("ingest/sink/count", metrics.sinkCount()); - } - - private void invokeRandomTimes(Action action) - { - int limit = random.nextInt(20); - for (int i = 0; i < limit; ++i) { - action.perform(); - } - } - - @FunctionalInterface - private interface Action - { - void perform(); - } - -} diff --git a/server/src/test/java/org/apache/druid/segment/realtime/FireDepartmentMetricsTest.java b/server/src/test/java/org/apache/druid/segment/realtime/SegmentGenerationMetricsTest.java similarity index 85% rename from server/src/test/java/org/apache/druid/segment/realtime/FireDepartmentMetricsTest.java rename to server/src/test/java/org/apache/druid/segment/realtime/SegmentGenerationMetricsTest.java index d9c1c461abe..1928c08b2ac 100644 --- a/server/src/test/java/org/apache/druid/segment/realtime/FireDepartmentMetricsTest.java +++ b/server/src/test/java/org/apache/druid/segment/realtime/SegmentGenerationMetricsTest.java @@ -23,20 +23,20 @@ import org.junit.Assert; import org.junit.Before; import org.junit.Test; -public class FireDepartmentMetricsTest +public class SegmentGenerationMetricsTest { - private FireDepartmentMetrics metrics; + private SegmentGenerationMetrics metrics; @Before public void setup() { - metrics = new FireDepartmentMetrics(); + metrics = new SegmentGenerationMetrics(); } @Test public void testSnapshotBeforeProcessing() { - FireDepartmentMetrics snapshot = metrics.snapshot(); + SegmentGenerationMetrics snapshot = metrics.snapshot(); Assert.assertEquals(0L, snapshot.messageGap()); // invalid value Assert.assertTrue(0 > snapshot.maxSegmentHandoffTime()); @@ -47,7 +47,7 @@ public class FireDepartmentMetricsTest { metrics.reportMessageMaxTimestamp(System.currentTimeMillis() - 20L); metrics.reportMaxSegmentHandoffTime(7L); - FireDepartmentMetrics snapshot = metrics.snapshot(); + SegmentGenerationMetrics snapshot = metrics.snapshot(); Assert.assertTrue(snapshot.messageGap() >= 20L); Assert.assertEquals(7, snapshot.maxSegmentHandoffTime()); } @@ -60,7 +60,7 @@ public class FireDepartmentMetricsTest // Should reset to invalid value metrics.snapshot(); metrics.markProcessingDone(); - FireDepartmentMetrics snapshot = metrics.snapshot(); + SegmentGenerationMetrics snapshot = metrics.snapshot(); // Message gap must be invalid after processing is done Assert.assertTrue(0 > snapshot.messageGap()); // value must be invalid diff --git a/server/src/test/java/org/apache/druid/segment/realtime/appenderator/AppenderatorPlumberTest.java b/server/src/test/java/org/apache/druid/segment/realtime/appenderator/AppenderatorPlumberTest.java deleted file mode 100644 index f795492819a..00000000000 --- a/server/src/test/java/org/apache/druid/segment/realtime/appenderator/AppenderatorPlumberTest.java +++ /dev/null @@ -1,139 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package org.apache.druid.segment.realtime.appenderator; - -import org.apache.druid.data.input.InputRow; -import org.apache.druid.segment.handoff.SegmentHandoffNotifier; -import org.apache.druid.segment.handoff.SegmentHandoffNotifierFactory; -import org.apache.druid.segment.indexing.RealtimeTuningConfig; -import org.apache.druid.segment.realtime.SegmentPublisher; -import org.apache.druid.segment.realtime.plumber.IntervalStartVersioningPolicy; -import org.apache.druid.segment.realtime.plumber.NoopRejectionPolicyFactory; -import org.apache.druid.server.coordination.DataSegmentAnnouncer; -import org.easymock.EasyMock; -import org.junit.Assert; -import org.junit.Before; -import org.junit.Rule; -import org.junit.Test; -import org.junit.rules.TemporaryFolder; - -public class AppenderatorPlumberTest -{ - private AppenderatorPlumber plumber; - private StreamAppenderatorTester streamAppenderatorTester; - - @Rule - public TemporaryFolder temporaryFolder = new TemporaryFolder(); - - @Before - public void setUp() throws Exception - { - this.streamAppenderatorTester = - new StreamAppenderatorTester.Builder() - .maxRowsInMemory(10) - .basePersistDirectory(temporaryFolder.newFolder()) - .build(); - DataSegmentAnnouncer segmentAnnouncer = EasyMock - .createMock(DataSegmentAnnouncer.class); - segmentAnnouncer.announceSegment(EasyMock.anyObject()); - EasyMock.expectLastCall().anyTimes(); - - SegmentPublisher segmentPublisher = EasyMock - .createNiceMock(SegmentPublisher.class); - SegmentHandoffNotifierFactory handoffNotifierFactory = EasyMock - .createNiceMock(SegmentHandoffNotifierFactory.class); - SegmentHandoffNotifier handoffNotifier = EasyMock - .createNiceMock(SegmentHandoffNotifier.class); - EasyMock - .expect( - handoffNotifierFactory.createSegmentHandoffNotifier(EasyMock - .anyString())).andReturn(handoffNotifier).anyTimes(); - EasyMock - .expect( - handoffNotifier.registerSegmentHandoffCallback( - EasyMock.anyObject(), - EasyMock.anyObject(), - EasyMock.anyObject())).andReturn(true).anyTimes(); - - RealtimeTuningConfig tuningConfig = new RealtimeTuningConfig( - null, - 1, - null, - null, - null, - null, - temporaryFolder.newFolder(), - new IntervalStartVersioningPolicy(), - new NoopRejectionPolicyFactory(), - null, - null, - null, - null, - 0, - 0, - false, - null, - null, - null, - null, - null - ); - - this.plumber = new AppenderatorPlumber(streamAppenderatorTester.getSchema(), - tuningConfig, streamAppenderatorTester.getMetrics(), - segmentAnnouncer, segmentPublisher, handoffNotifier, - streamAppenderatorTester.getAppenderator()); - } - - @Test - public void testSimpleIngestion() throws Exception - { - Appenderator appenderator = streamAppenderatorTester.getAppenderator(); - - // startJob - Assert.assertEquals(null, plumber.startJob()); - - // getDataSource - Assert.assertEquals(StreamAppenderatorTester.DATASOURCE, appenderator.getDataSource()); - - InputRow[] rows = new InputRow[] { - StreamAppenderatorTest.ir("2000", "foo", 1), - StreamAppenderatorTest.ir("2000", "bar", 2), StreamAppenderatorTest.ir("2000", "qux", 4)}; - // add - Assert.assertEquals(1, plumber.add(rows[0], null).getRowCount()); - - Assert.assertEquals(2, plumber.add(rows[1], null).getRowCount()); - - Assert.assertEquals(3, plumber.add(rows[2], null).getRowCount()); - - - Assert.assertEquals(1, plumber.getSegmentsView().size()); - - SegmentIdWithShardSpec si = plumber.getSegmentsView().values().toArray(new SegmentIdWithShardSpec[0])[0]; - - Assert.assertEquals(3, appenderator.getRowCount(si)); - - appenderator.clear(); - Assert.assertTrue(appenderator.getSegments().isEmpty()); - - plumber.dropSegment(si); - plumber.finishJob(); - } -} diff --git a/server/src/test/java/org/apache/druid/segment/realtime/appenderator/AppenderatorsTest.java b/server/src/test/java/org/apache/druid/segment/realtime/appenderator/AppenderatorsTest.java new file mode 100644 index 00000000000..66b2281d33c --- /dev/null +++ b/server/src/test/java/org/apache/druid/segment/realtime/appenderator/AppenderatorsTest.java @@ -0,0 +1,245 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.druid.segment.realtime.appenderator; + +import com.fasterxml.jackson.databind.ObjectMapper; +import org.apache.druid.data.input.impl.DimensionsSpec; +import org.apache.druid.data.input.impl.JSONParseSpec; +import org.apache.druid.data.input.impl.MapInputRowParser; +import org.apache.druid.data.input.impl.TimestampSpec; +import org.apache.druid.jackson.DefaultObjectMapper; +import org.apache.druid.java.util.common.FileUtils; +import org.apache.druid.java.util.common.granularity.Granularities; +import org.apache.druid.java.util.emitter.EmittingLogger; +import org.apache.druid.java.util.emitter.core.NoopEmitter; +import org.apache.druid.java.util.emitter.service.ServiceEmitter; +import org.apache.druid.query.aggregation.AggregatorFactory; +import org.apache.druid.query.aggregation.CountAggregatorFactory; +import org.apache.druid.query.aggregation.LongSumAggregatorFactory; +import org.apache.druid.segment.IndexIO; +import org.apache.druid.segment.IndexMerger; +import org.apache.druid.segment.IndexMergerV9; +import org.apache.druid.segment.IndexSpec; +import org.apache.druid.segment.column.ColumnConfig; +import org.apache.druid.segment.incremental.ParseExceptionHandler; +import org.apache.druid.segment.incremental.RowIngestionMeters; +import org.apache.druid.segment.incremental.SimpleRowIngestionMeters; +import org.apache.druid.segment.indexing.DataSchema; +import org.apache.druid.segment.indexing.TuningConfig; +import org.apache.druid.segment.indexing.granularity.UniformGranularitySpec; +import org.apache.druid.segment.loading.NoopDataSegmentPusher; +import org.apache.druid.segment.metadata.CentralizedDatasourceSchemaConfig; +import org.apache.druid.segment.realtime.SegmentGenerationMetrics; +import org.apache.druid.segment.writeout.OffHeapMemorySegmentWriteOutMediumFactory; +import org.apache.druid.timeline.partition.LinearShardSpec; +import org.junit.Assert; +import org.junit.Test; + +import javax.annotation.Nullable; +import java.io.File; +import java.util.Map; + + +public class AppenderatorsTest +{ + @Test + public void testOpenSegmentsOfflineAppenderator() throws Exception + { + try (final AppenderatorTester tester = new AppenderatorTester("OPEN_SEGMENTS")) { + Assert.assertTrue(tester.appenderator instanceof AppenderatorImpl); + AppenderatorImpl appenderator = (AppenderatorImpl) tester.appenderator; + Assert.assertTrue(appenderator.isOpenSegments()); + } + } + + @Test + public void testClosedSegmentsOfflineAppenderator() throws Exception + { + try (final AppenderatorTester tester = new AppenderatorTester("CLOSED_SEGMENTS")) { + Assert.assertTrue(tester.appenderator instanceof AppenderatorImpl); + AppenderatorImpl appenderator = (AppenderatorImpl) tester.appenderator; + Assert.assertFalse(appenderator.isOpenSegments()); + } + } + + @Test + public void testClosedSegmentsSinksOfflineAppenderator() throws Exception + { + try (final AppenderatorTester tester = new AppenderatorTester("CLOSED_SEGMENTS_SINKS")) { + Assert.assertTrue(tester.appenderator instanceof BatchAppenderator); + } + } + + private static class AppenderatorTester implements AutoCloseable + { + public static final String DATASOURCE = "foo"; + + private final AppenderatorConfig tuningConfig; + private final Appenderator appenderator; + private final ServiceEmitter emitter; + + public AppenderatorTester(final String batchMode) + { + this(100, 100, null, new SimpleRowIngestionMeters(), false, batchMode); + } + + public AppenderatorTester( + final int maxRowsInMemory, + final long maxSizeInBytes, + @Nullable final File basePersistDirectory, + final RowIngestionMeters rowIngestionMeters, + final boolean skipBytesInMemoryOverheadCheck, + String batchMode + ) + { + ObjectMapper objectMapper = new DefaultObjectMapper(); + objectMapper.registerSubtypes(LinearShardSpec.class); + + final Map parserMap = objectMapper.convertValue( + new MapInputRowParser( + new JSONParseSpec( + new TimestampSpec("ts", "auto", null), + DimensionsSpec.EMPTY, + null, + null, + null + ) + ), + Map.class + ); + + DataSchema schema = new DataSchema( + DATASOURCE, + null, + null, + new AggregatorFactory[]{ + new CountAggregatorFactory("count"), + new LongSumAggregatorFactory("met", "met") + }, + new UniformGranularitySpec(Granularities.MINUTE, Granularities.NONE, null), + null, + parserMap, + objectMapper + ); + + tuningConfig = new TestAppenderatorConfig( + TuningConfig.DEFAULT_APPENDABLE_INDEX, + maxRowsInMemory, + maxSizeInBytes == 0L ? getDefaultMaxBytesInMemory() : maxSizeInBytes, + skipBytesInMemoryOverheadCheck, + IndexSpec.DEFAULT, + 0, + false, + 0L, + OffHeapMemorySegmentWriteOutMediumFactory.instance(), + IndexMerger.UNLIMITED_MAX_COLUMNS_TO_MERGE, + basePersistDirectory == null ? createNewBasePersistDirectory() : basePersistDirectory + ); + SegmentGenerationMetrics metrics = new SegmentGenerationMetrics(); + + IndexIO indexIO = new IndexIO(objectMapper, ColumnConfig.DEFAULT); + IndexMergerV9 indexMerger = new IndexMergerV9( + objectMapper, + indexIO, + OffHeapMemorySegmentWriteOutMediumFactory.instance() + ); + + emitter = new ServiceEmitter( + "test", + "test", + new NoopEmitter() + ); + emitter.start(); + EmittingLogger.registerEmitter(emitter); + + switch (batchMode) { + case "OPEN_SEGMENTS": + appenderator = Appenderators.createOpenSegmentsOffline( + schema.getDataSource(), + schema, + tuningConfig, + metrics, + new NoopDataSegmentPusher(), + objectMapper, + indexIO, + indexMerger, + rowIngestionMeters, + new ParseExceptionHandler(rowIngestionMeters, false, Integer.MAX_VALUE, 0), + false, + CentralizedDatasourceSchemaConfig.create() + ); + break; + case "CLOSED_SEGMENTS": + appenderator = Appenderators.createClosedSegmentsOffline( + schema.getDataSource(), + schema, + tuningConfig, + metrics, + new NoopDataSegmentPusher(), + objectMapper, + indexIO, + indexMerger, + rowIngestionMeters, + new ParseExceptionHandler(rowIngestionMeters, false, Integer.MAX_VALUE, 0), + false, + CentralizedDatasourceSchemaConfig.create() + ); + + break; + case "CLOSED_SEGMENTS_SINKS": + appenderator = Appenderators.createOffline( + schema.getDataSource(), + schema, + tuningConfig, + metrics, + new NoopDataSegmentPusher(), + objectMapper, + indexIO, + indexMerger, + rowIngestionMeters, + new ParseExceptionHandler(rowIngestionMeters, false, Integer.MAX_VALUE, 0), + false, + CentralizedDatasourceSchemaConfig.create() + ); + break; + default: + throw new IllegalArgumentException("Unrecognized batchMode: " + batchMode); + } + } + + private long getDefaultMaxBytesInMemory() + { + return (Runtime.getRuntime().totalMemory()) / 3; + } + + @Override + public void close() throws Exception + { + appenderator.close(); + emitter.close(); + FileUtils.deleteDirectory(tuningConfig.getBasePersistDirectory()); + } + + private static File createNewBasePersistDirectory() + { + return FileUtils.createTempDir("druid-batch-persist"); + } + } +} diff --git a/server/src/test/java/org/apache/druid/segment/realtime/appenderator/ClosedSegmensSinksBatchAppenderatorTester.java b/server/src/test/java/org/apache/druid/segment/realtime/appenderator/ClosedSegmensSinksBatchAppenderatorTester.java index f5ae98dd6a0..cf2d7f79898 100644 --- a/server/src/test/java/org/apache/druid/segment/realtime/appenderator/ClosedSegmensSinksBatchAppenderatorTester.java +++ b/server/src/test/java/org/apache/druid/segment/realtime/appenderator/ClosedSegmensSinksBatchAppenderatorTester.java @@ -24,7 +24,6 @@ import org.apache.druid.data.input.impl.DimensionsSpec; import org.apache.druid.data.input.impl.JSONParseSpec; import org.apache.druid.data.input.impl.MapInputRowParser; import org.apache.druid.data.input.impl.TimestampSpec; -import org.apache.druid.indexer.partitions.PartitionsSpec; import org.apache.druid.jackson.DefaultObjectMapper; import org.apache.druid.java.util.common.FileUtils; import org.apache.druid.java.util.common.granularity.Granularities; @@ -39,7 +38,6 @@ import org.apache.druid.segment.IndexMerger; import org.apache.druid.segment.IndexMergerV9; import org.apache.druid.segment.IndexSpec; import org.apache.druid.segment.column.ColumnConfig; -import org.apache.druid.segment.incremental.AppendableIndexSpec; import org.apache.druid.segment.incremental.ParseExceptionHandler; import org.apache.druid.segment.incremental.RowIngestionMeters; import org.apache.druid.segment.incremental.SimpleRowIngestionMeters; @@ -48,12 +46,10 @@ import org.apache.druid.segment.indexing.TuningConfig; import org.apache.druid.segment.indexing.granularity.UniformGranularitySpec; import org.apache.druid.segment.loading.DataSegmentPusher; import org.apache.druid.segment.metadata.CentralizedDatasourceSchemaConfig; -import org.apache.druid.segment.realtime.FireDepartmentMetrics; +import org.apache.druid.segment.realtime.SegmentGenerationMetrics; import org.apache.druid.segment.writeout.OffHeapMemorySegmentWriteOutMediumFactory; -import org.apache.druid.segment.writeout.SegmentWriteOutMediumFactory; import org.apache.druid.timeline.DataSegment; import org.apache.druid.timeline.partition.LinearShardSpec; -import org.joda.time.Period; import javax.annotation.Nullable; import java.io.File; @@ -61,7 +57,6 @@ import java.io.IOException; import java.net.URI; import java.util.List; import java.util.Map; -import java.util.Objects; import java.util.concurrent.CopyOnWriteArrayList; public class ClosedSegmensSinksBatchAppenderatorTester implements AutoCloseable @@ -70,7 +65,7 @@ public class ClosedSegmensSinksBatchAppenderatorTester implements AutoCloseable private final DataSchema schema; private final AppenderatorConfig tuningConfig; - private final FireDepartmentMetrics metrics; + private final SegmentGenerationMetrics metrics; private final ObjectMapper objectMapper; private final Appenderator appenderator; private final ServiceEmitter emitter; @@ -170,7 +165,7 @@ public class ClosedSegmensSinksBatchAppenderatorTester implements AutoCloseable objectMapper ); - tuningConfig = new TestIndexTuningConfig( + tuningConfig = new TestAppenderatorConfig( TuningConfig.DEFAULT_APPENDABLE_INDEX, maxRowsInMemory, maxSizeInBytes == 0L ? getDefaultMaxBytesInMemory() : maxSizeInBytes, @@ -181,10 +176,9 @@ public class ClosedSegmensSinksBatchAppenderatorTester implements AutoCloseable 0L, OffHeapMemorySegmentWriteOutMediumFactory.instance(), IndexMerger.UNLIMITED_MAX_COLUMNS_TO_MERGE, - basePersistDirectory == null ? createNewBasePersistDirectory() : basePersistDirectory, - null + basePersistDirectory == null ? createNewBasePersistDirectory() : basePersistDirectory ); - metrics = new FireDepartmentMetrics(); + metrics = new SegmentGenerationMetrics(); IndexIO indexIO = new IndexIO(objectMapper, ColumnConfig.DEFAULT); IndexMergerV9 indexMerger = new IndexMergerV9( @@ -267,7 +261,7 @@ public class ClosedSegmensSinksBatchAppenderatorTester implements AutoCloseable return tuningConfig; } - public FireDepartmentMetrics getMetrics() + public SegmentGenerationMetrics getMetrics() { return metrics; } @@ -299,216 +293,4 @@ public class ClosedSegmensSinksBatchAppenderatorTester implements AutoCloseable { return FileUtils.createTempDir("druid-batch-persist"); } - - - static class TestIndexTuningConfig implements AppenderatorConfig - { - private final AppendableIndexSpec appendableIndexSpec; - private final int maxRowsInMemory; - private final long maxBytesInMemory; - private final boolean skipBytesInMemoryOverheadCheck; - private final int maxColumnsToMerge; - private final PartitionsSpec partitionsSpec; - private final IndexSpec indexSpec; - private final File basePersistDirectory; - private final int maxPendingPersists; - private final boolean reportParseExceptions; - private final long pushTimeout; - private final IndexSpec indexSpecForIntermediatePersists; - @Nullable - private final SegmentWriteOutMediumFactory segmentWriteOutMediumFactory; - private final int numPersistThreads; - - public TestIndexTuningConfig( - AppendableIndexSpec appendableIndexSpec, - Integer maxRowsInMemory, - Long maxBytesInMemory, - Boolean skipBytesInMemoryOverheadCheck, - IndexSpec indexSpec, - Integer maxPendingPersists, - Boolean reportParseExceptions, - Long pushTimeout, - @Nullable SegmentWriteOutMediumFactory segmentWriteOutMediumFactory, - Integer maxColumnsToMerge, - File basePersistDirectory, - @Nullable Integer numPersistThreads - ) - { - this.appendableIndexSpec = appendableIndexSpec; - this.maxRowsInMemory = maxRowsInMemory; - this.maxBytesInMemory = maxBytesInMemory; - this.skipBytesInMemoryOverheadCheck = skipBytesInMemoryOverheadCheck; - this.indexSpec = indexSpec; - this.maxPendingPersists = maxPendingPersists; - this.reportParseExceptions = reportParseExceptions; - this.pushTimeout = pushTimeout; - this.segmentWriteOutMediumFactory = segmentWriteOutMediumFactory; - this.maxColumnsToMerge = maxColumnsToMerge; - this.basePersistDirectory = basePersistDirectory; - - this.partitionsSpec = null; - this.indexSpecForIntermediatePersists = this.indexSpec; - this.numPersistThreads = numPersistThreads == null ? DEFAULT_NUM_PERSIST_THREADS : numPersistThreads; - } - - @Override - public TestIndexTuningConfig withBasePersistDirectory(File dir) - { - throw new UnsupportedOperationException(); - } - - @Override - public AppendableIndexSpec getAppendableIndexSpec() - { - return appendableIndexSpec; - } - - @Override - public int getMaxRowsInMemory() - { - return maxRowsInMemory; - } - - @Override - public long getMaxBytesInMemory() - { - return maxBytesInMemory; - } - - @Override - public boolean isSkipBytesInMemoryOverheadCheck() - { - return skipBytesInMemoryOverheadCheck; - } - - @Nullable - @Override - public PartitionsSpec getPartitionsSpec() - { - return partitionsSpec; - } - - @Override - public IndexSpec getIndexSpec() - { - return indexSpec; - } - - @Override - public IndexSpec getIndexSpecForIntermediatePersists() - { - return indexSpecForIntermediatePersists; - } - - @Override - public int getMaxPendingPersists() - { - return maxPendingPersists; - } - - @Override - public boolean isReportParseExceptions() - { - return reportParseExceptions; - } - - @Nullable - @Override - public SegmentWriteOutMediumFactory getSegmentWriteOutMediumFactory() - { - return segmentWriteOutMediumFactory; - } - - @Override - public int getMaxColumnsToMerge() - { - return maxColumnsToMerge; - } - - @Override - public File getBasePersistDirectory() - { - return basePersistDirectory; - } - - @Override - public Period getIntermediatePersistPeriod() - { - return new Period(Integer.MAX_VALUE); // intermediate persist doesn't make much sense for batch jobs - } - - @Override - public int getNumPersistThreads() - { - return numPersistThreads; - } - - @Override - public boolean equals(Object o) - { - if (this == o) { - return true; - } - if (o == null || getClass() != o.getClass()) { - return false; - } - TestIndexTuningConfig that = (TestIndexTuningConfig) o; - return Objects.equals(appendableIndexSpec, that.appendableIndexSpec) && - maxRowsInMemory == that.maxRowsInMemory && - maxBytesInMemory == that.maxBytesInMemory && - skipBytesInMemoryOverheadCheck == that.skipBytesInMemoryOverheadCheck && - maxColumnsToMerge == that.maxColumnsToMerge && - maxPendingPersists == that.maxPendingPersists && - reportParseExceptions == that.reportParseExceptions && - pushTimeout == that.pushTimeout && - numPersistThreads == that.numPersistThreads && - Objects.equals(partitionsSpec, that.partitionsSpec) && - Objects.equals(indexSpec, that.indexSpec) && - Objects.equals(indexSpecForIntermediatePersists, that.indexSpecForIntermediatePersists) && - Objects.equals(basePersistDirectory, that.basePersistDirectory) && - Objects.equals(segmentWriteOutMediumFactory, that.segmentWriteOutMediumFactory); - } - - @Override - public int hashCode() - { - return Objects.hash( - appendableIndexSpec, - maxRowsInMemory, - maxBytesInMemory, - skipBytesInMemoryOverheadCheck, - maxColumnsToMerge, - partitionsSpec, - indexSpec, - indexSpecForIntermediatePersists, - basePersistDirectory, - maxPendingPersists, - reportParseExceptions, - pushTimeout, - segmentWriteOutMediumFactory, - numPersistThreads - ); - } - - @Override - public String toString() - { - return "IndexTuningConfig{" + - "maxRowsInMemory=" + maxRowsInMemory + - ", maxBytesInMemory=" + maxBytesInMemory + - ", skipBytesInMemoryOverheadCheck=" + skipBytesInMemoryOverheadCheck + - ", maxColumnsToMerge=" + maxColumnsToMerge + - ", partitionsSpec=" + partitionsSpec + - ", indexSpec=" + indexSpec + - ", indexSpecForIntermediatePersists=" + indexSpecForIntermediatePersists + - ", basePersistDirectory=" + basePersistDirectory + - ", maxPendingPersists=" + maxPendingPersists + - ", reportParseExceptions=" + reportParseExceptions + - ", pushTimeout=" + pushTimeout + - ", segmentWriteOutMediumFactory=" + segmentWriteOutMediumFactory + - ", numPersistThreads=" + numPersistThreads + - '}'; - } - } - } diff --git a/server/src/test/java/org/apache/druid/segment/realtime/appenderator/ClosedSegmentsSinksBatchAppenderatorDriverTest.java b/server/src/test/java/org/apache/druid/segment/realtime/appenderator/ClosedSegmentsSinksBatchAppenderatorDriverTest.java index cc5a7f282eb..269aeaca7c4 100644 --- a/server/src/test/java/org/apache/druid/segment/realtime/appenderator/ClosedSegmentsSinksBatchAppenderatorDriverTest.java +++ b/server/src/test/java/org/apache/druid/segment/realtime/appenderator/ClosedSegmentsSinksBatchAppenderatorDriverTest.java @@ -95,7 +95,7 @@ public class ClosedSegmentsSinksBatchAppenderatorDriverTest extends EasyMockSupp driver = new BatchAppenderatorDriver( appenderatorTester.getAppenderator(), allocator, - new TestUsedSegmentChecker(appenderatorTester.getPushedSegments()), + new TestPublishedSegmentRetriever(appenderatorTester.getPushedSegments()), dataSegmentKiller ); diff --git a/server/src/test/java/org/apache/druid/segment/realtime/appenderator/DefaultOfflineAppenderatorFactoryTest.java b/server/src/test/java/org/apache/druid/segment/realtime/appenderator/DefaultOfflineAppenderatorFactoryTest.java deleted file mode 100644 index 97578426812..00000000000 --- a/server/src/test/java/org/apache/druid/segment/realtime/appenderator/DefaultOfflineAppenderatorFactoryTest.java +++ /dev/null @@ -1,179 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package org.apache.druid.segment.realtime.appenderator; - -import com.fasterxml.jackson.databind.ObjectMapper; -import com.google.common.collect.ImmutableList; -import com.google.inject.Binder; -import com.google.inject.Injector; -import com.google.inject.Module; -import com.google.inject.name.Names; -import org.apache.druid.data.input.impl.DimensionsSpec; -import org.apache.druid.data.input.impl.JSONParseSpec; -import org.apache.druid.data.input.impl.MapInputRowParser; -import org.apache.druid.data.input.impl.TimestampSpec; -import org.apache.druid.guice.GuiceInjectors; -import org.apache.druid.initialization.Initialization; -import org.apache.druid.java.util.common.Intervals; -import org.apache.druid.java.util.common.granularity.Granularities; -import org.apache.druid.query.DruidProcessingConfig; -import org.apache.druid.query.aggregation.AggregatorFactory; -import org.apache.druid.query.aggregation.CountAggregatorFactory; -import org.apache.druid.query.aggregation.LongSumAggregatorFactory; -import org.apache.druid.segment.column.ColumnConfig; -import org.apache.druid.segment.indexing.DataSchema; -import org.apache.druid.segment.indexing.RealtimeTuningConfig; -import org.apache.druid.segment.indexing.granularity.UniformGranularitySpec; -import org.apache.druid.segment.realtime.FireDepartmentMetrics; -import org.apache.druid.timeline.partition.LinearShardSpec; -import org.junit.Assert; -import org.junit.Rule; -import org.junit.Test; -import org.junit.rules.TemporaryFolder; - -import java.io.IOException; -import java.util.Map; - -public class DefaultOfflineAppenderatorFactoryTest -{ - @Rule - public TemporaryFolder temporaryFolder = new TemporaryFolder(); - - @Test - public void testBuild() throws IOException, SegmentNotWritableException - { - Injector injector = Initialization.makeInjectorWithModules( - GuiceInjectors.makeStartupInjector(), - ImmutableList.of( - new Module() - { - @Override - public void configure(Binder binder) - { - binder.bindConstant().annotatedWith(Names.named("serviceName")).to("druid/tool"); - binder.bindConstant().annotatedWith(Names.named("servicePort")).to(9999); - binder.bindConstant().annotatedWith(Names.named("tlsServicePort")).to(-1); - binder.bind(DruidProcessingConfig.class).toInstance( - new DruidProcessingConfig() - { - @Override - public String getFormatString() - { - return "processing-%s"; - } - - @Override - public int intermediateComputeSizeBytes() - { - return 100 * 1024 * 1024; - } - - @Override - public int getNumThreads() - { - return 1; - } - - } - ); - binder.bind(ColumnConfig.class).to(DruidProcessingConfig.class); - } - } - ) - ); - ObjectMapper objectMapper = injector.getInstance(ObjectMapper.class); - AppenderatorFactory defaultOfflineAppenderatorFactory = objectMapper.readerFor(AppenderatorFactory.class) - .readValue("{\"type\":\"offline\"}"); - - final Map parserMap = objectMapper.convertValue( - new MapInputRowParser( - new JSONParseSpec( - new TimestampSpec("ts", "auto", null), - DimensionsSpec.EMPTY, - null, - null, - null - ) - ), - Map.class - ); - DataSchema schema = new DataSchema( - "dataSourceName", - parserMap, - new AggregatorFactory[]{ - new CountAggregatorFactory("count"), - new LongSumAggregatorFactory("met", "met") - }, - new UniformGranularitySpec(Granularities.MINUTE, Granularities.NONE, null), - null, - objectMapper - ); - - RealtimeTuningConfig tuningConfig = new RealtimeTuningConfig( - null, - 75000, - null, - null, - null, - null, - temporaryFolder.newFolder(), - null, - null, - null, - null, - null, - null, - 0, - 0, - null, - null, - null, - null, - null, - null - ); - - Appenderator appenderator = defaultOfflineAppenderatorFactory.build( - schema, - tuningConfig, - new FireDepartmentMetrics() - ); - try { - Assert.assertEquals("dataSourceName", appenderator.getDataSource()); - Assert.assertEquals(null, appenderator.startJob()); - SegmentIdWithShardSpec identifier = new SegmentIdWithShardSpec( - "dataSourceName", - Intervals.of("2000/2001"), - "A", - new LinearShardSpec(0) - ); - Assert.assertEquals(0, ((AppenderatorImpl) appenderator).getRowsInMemory()); - appenderator.add(identifier, StreamAppenderatorTest.ir("2000", "bar", 1), null); - Assert.assertEquals(1, ((AppenderatorImpl) appenderator).getRowsInMemory()); - appenderator.add(identifier, StreamAppenderatorTest.ir("2000", "baz", 1), null); - Assert.assertEquals(2, ((AppenderatorImpl) appenderator).getRowsInMemory()); - appenderator.close(); - Assert.assertEquals(0, ((AppenderatorImpl) appenderator).getRowsInMemory()); - } - finally { - appenderator.close(); - } - } -} diff --git a/server/src/test/java/org/apache/druid/segment/realtime/appenderator/OpenAndClosedSegmentsAppenderatorTester.java b/server/src/test/java/org/apache/druid/segment/realtime/appenderator/OpenAndClosedSegmentsAppenderatorTester.java index 0d210f21a2c..33a0ed2f8a4 100644 --- a/server/src/test/java/org/apache/druid/segment/realtime/appenderator/OpenAndClosedSegmentsAppenderatorTester.java +++ b/server/src/test/java/org/apache/druid/segment/realtime/appenderator/OpenAndClosedSegmentsAppenderatorTester.java @@ -46,7 +46,7 @@ import org.apache.druid.segment.indexing.TuningConfig; import org.apache.druid.segment.indexing.granularity.UniformGranularitySpec; import org.apache.druid.segment.loading.DataSegmentPusher; import org.apache.druid.segment.metadata.CentralizedDatasourceSchemaConfig; -import org.apache.druid.segment.realtime.FireDepartmentMetrics; +import org.apache.druid.segment.realtime.SegmentGenerationMetrics; import org.apache.druid.segment.writeout.OffHeapMemorySegmentWriteOutMediumFactory; import org.apache.druid.timeline.DataSegment; import org.apache.druid.timeline.partition.LinearShardSpec; @@ -63,7 +63,7 @@ public class OpenAndClosedSegmentsAppenderatorTester implements AutoCloseable public static final String DATASOURCE = "foo"; private final DataSchema schema; - private final FireDepartmentMetrics metrics; + private final SegmentGenerationMetrics metrics; private final DataSegmentPusher dataSegmentPusher; private final ObjectMapper objectMapper; private final Appenderator appenderator; @@ -139,23 +139,21 @@ public class OpenAndClosedSegmentsAppenderatorTester implements AutoCloseable null, objectMapper ); - tuningConfig = - new ClosedSegmensSinksBatchAppenderatorTester.TestIndexTuningConfig( - TuningConfig.DEFAULT_APPENDABLE_INDEX, - maxRowsInMemory, - maxSizeInBytes == 0L ? getDefaultMaxBytesInMemory() : maxSizeInBytes, - skipBytesInMemoryOverheadCheck, - IndexSpec.DEFAULT, - 0, - false, - 0L, - OffHeapMemorySegmentWriteOutMediumFactory.instance(), - IndexMerger.UNLIMITED_MAX_COLUMNS_TO_MERGE, - basePersistDirectory == null ? createNewBasePersistDirectory() : basePersistDirectory, - null - ); + tuningConfig = new TestAppenderatorConfig( + TuningConfig.DEFAULT_APPENDABLE_INDEX, + maxRowsInMemory, + maxSizeInBytes == 0L ? getDefaultMaxBytesInMemory() : maxSizeInBytes, + skipBytesInMemoryOverheadCheck, + IndexSpec.DEFAULT, + 0, + false, + 0L, + OffHeapMemorySegmentWriteOutMediumFactory.instance(), + IndexMerger.UNLIMITED_MAX_COLUMNS_TO_MERGE, + basePersistDirectory == null ? createNewBasePersistDirectory() : basePersistDirectory + ); - metrics = new FireDepartmentMetrics(); + metrics = new SegmentGenerationMetrics(); indexIO = new IndexIO( objectMapper, @@ -251,7 +249,7 @@ public class OpenAndClosedSegmentsAppenderatorTester implements AutoCloseable return tuningConfig; } - public FireDepartmentMetrics getMetrics() + public SegmentGenerationMetrics getMetrics() { return metrics; } diff --git a/server/src/test/java/org/apache/druid/segment/realtime/appenderator/OpenAndClosedSegmentsBatchAppenderatorDriverTest.java b/server/src/test/java/org/apache/druid/segment/realtime/appenderator/OpenAndClosedSegmentsBatchAppenderatorDriverTest.java index ba8f097b669..0c6fb552a4d 100644 --- a/server/src/test/java/org/apache/druid/segment/realtime/appenderator/OpenAndClosedSegmentsBatchAppenderatorDriverTest.java +++ b/server/src/test/java/org/apache/druid/segment/realtime/appenderator/OpenAndClosedSegmentsBatchAppenderatorDriverTest.java @@ -93,7 +93,7 @@ public class OpenAndClosedSegmentsBatchAppenderatorDriverTest extends EasyMockSu driver = new BatchAppenderatorDriver( openAndClosedSegmentsAppenderatorTester.getAppenderator(), allocator, - new TestUsedSegmentChecker(openAndClosedSegmentsAppenderatorTester.getPushedSegments()), + new TestPublishedSegmentRetriever(openAndClosedSegmentsAppenderatorTester.getPushedSegments()), dataSegmentKiller ); diff --git a/server/src/test/java/org/apache/druid/segment/realtime/appenderator/StreamAppenderatorDriverFailTest.java b/server/src/test/java/org/apache/druid/segment/realtime/appenderator/StreamAppenderatorDriverFailTest.java index 5a21a4331fe..ed3fe97d6cc 100644 --- a/server/src/test/java/org/apache/druid/segment/realtime/appenderator/StreamAppenderatorDriverFailTest.java +++ b/server/src/test/java/org/apache/druid/segment/realtime/appenderator/StreamAppenderatorDriverFailTest.java @@ -41,11 +41,12 @@ import org.apache.druid.query.Query; import org.apache.druid.query.QueryRunner; import org.apache.druid.query.SegmentDescriptor; import org.apache.druid.segment.loading.DataSegmentKiller; -import org.apache.druid.segment.realtime.FireDepartmentMetrics; +import org.apache.druid.segment.realtime.SegmentGenerationMetrics; import org.apache.druid.segment.realtime.appenderator.StreamAppenderatorDriverTest.TestCommitterSupplier; import org.apache.druid.segment.realtime.appenderator.StreamAppenderatorDriverTest.TestSegmentAllocator; import org.apache.druid.segment.realtime.appenderator.StreamAppenderatorDriverTest.TestSegmentHandoffNotifierFactory; import org.apache.druid.timeline.DataSegment; +import org.apache.druid.timeline.SegmentId; import org.apache.druid.timeline.partition.NumberedShardSpec; import org.easymock.EasyMock; import org.easymock.EasyMockSupport; @@ -132,10 +133,10 @@ public class StreamAppenderatorDriverFailTest extends EasyMockSupport createPersistFailAppenderator(), allocator, segmentHandoffNotifierFactory, - new NoopUsedSegmentChecker(), + new NoopPublishedSegmentRetriever(), dataSegmentKiller, OBJECT_MAPPER, - new FireDepartmentMetrics() + new SegmentGenerationMetrics() ); driver.startJob(null); @@ -170,10 +171,10 @@ public class StreamAppenderatorDriverFailTest extends EasyMockSupport createPushFailAppenderator(), allocator, segmentHandoffNotifierFactory, - new NoopUsedSegmentChecker(), + new NoopPublishedSegmentRetriever(), dataSegmentKiller, OBJECT_MAPPER, - new FireDepartmentMetrics() + new SegmentGenerationMetrics() ); driver.startJob(null); @@ -208,10 +209,10 @@ public class StreamAppenderatorDriverFailTest extends EasyMockSupport createDropFailAppenderator(), allocator, segmentHandoffNotifierFactory, - new NoopUsedSegmentChecker(), + new NoopPublishedSegmentRetriever(), dataSegmentKiller, OBJECT_MAPPER, - new FireDepartmentMetrics() + new SegmentGenerationMetrics() ); driver.startJob(null); @@ -259,10 +260,10 @@ public class StreamAppenderatorDriverFailTest extends EasyMockSupport new FailableAppenderator(), allocator, segmentHandoffNotifierFactory, - new NoopUsedSegmentChecker(), + new NoopPublishedSegmentRetriever(), dataSegmentKiller, OBJECT_MAPPER, - new FireDepartmentMetrics() + new SegmentGenerationMetrics() ); driver.startJob(null); @@ -323,10 +324,10 @@ public class StreamAppenderatorDriverFailTest extends EasyMockSupport } } - private static class NoopUsedSegmentChecker implements UsedSegmentChecker + private static class NoopPublishedSegmentRetriever implements PublishedSegmentRetriever { @Override - public Set findUsedSegments(Set identifiers) + public Set findPublishedSegments(Set identifiers) { return ImmutableSet.of(); } @@ -337,11 +338,6 @@ public class StreamAppenderatorDriverFailTest extends EasyMockSupport return new FailableAppenderator().disablePush(); } - static Appenderator createPushInterruptAppenderator() - { - return new FailableAppenderator().interruptPush(); - } - static Appenderator createPersistFailAppenderator() { return new FailableAppenderator().disablePersist(); diff --git a/server/src/test/java/org/apache/druid/segment/realtime/appenderator/StreamAppenderatorDriverTest.java b/server/src/test/java/org/apache/druid/segment/realtime/appenderator/StreamAppenderatorDriverTest.java index 63775e2dc3b..69528b339ff 100644 --- a/server/src/test/java/org/apache/druid/segment/realtime/appenderator/StreamAppenderatorDriverTest.java +++ b/server/src/test/java/org/apache/druid/segment/realtime/appenderator/StreamAppenderatorDriverTest.java @@ -41,7 +41,7 @@ import org.apache.druid.query.SegmentDescriptor; import org.apache.druid.segment.handoff.SegmentHandoffNotifier; import org.apache.druid.segment.handoff.SegmentHandoffNotifierFactory; import org.apache.druid.segment.loading.DataSegmentKiller; -import org.apache.druid.segment.realtime.FireDepartmentMetrics; +import org.apache.druid.segment.realtime.SegmentGenerationMetrics; import org.apache.druid.timeline.DataSegment; import org.apache.druid.timeline.SegmentId; import org.apache.druid.timeline.partition.NumberedShardSpec; @@ -127,10 +127,10 @@ public class StreamAppenderatorDriverTest extends EasyMockSupport streamAppenderatorTester.getAppenderator(), allocator, segmentHandoffNotifierFactory, - new TestUsedSegmentChecker(streamAppenderatorTester.getPushedSegments()), + new TestPublishedSegmentRetriever(streamAppenderatorTester.getPushedSegments()), dataSegmentKiller, OBJECT_MAPPER, - new FireDepartmentMetrics() + new SegmentGenerationMetrics() ); EasyMock.replay(dataSegmentKiller); diff --git a/server/src/test/java/org/apache/druid/segment/realtime/appenderator/StreamAppenderatorTest.java b/server/src/test/java/org/apache/druid/segment/realtime/appenderator/StreamAppenderatorTest.java index 4e057c5c897..538784a8853 100644 --- a/server/src/test/java/org/apache/druid/segment/realtime/appenderator/StreamAppenderatorTest.java +++ b/server/src/test/java/org/apache/druid/segment/realtime/appenderator/StreamAppenderatorTest.java @@ -48,9 +48,8 @@ import org.apache.druid.query.timeseries.TimeseriesResultValue; import org.apache.druid.segment.column.ColumnType; import org.apache.druid.segment.incremental.RowIngestionMeters; import org.apache.druid.segment.incremental.SimpleRowIngestionMeters; -import org.apache.druid.segment.indexing.RealtimeTuningConfig; import org.apache.druid.segment.metadata.CentralizedDatasourceSchemaConfig; -import org.apache.druid.segment.realtime.plumber.Committers; +import org.apache.druid.segment.realtime.sink.Committers; import org.apache.druid.server.coordination.DataSegmentAnnouncer; import org.apache.druid.testing.InitializedNullHandlingTest; import org.apache.druid.timeline.DataSegment; @@ -909,7 +908,7 @@ public class StreamAppenderatorTest extends InitializedNullHandlingTest @Test public void testRestoreFromDisk() throws Exception { - final RealtimeTuningConfig tuningConfig; + final AppenderatorConfig tuningConfig; try ( final StreamAppenderatorTester tester = new StreamAppenderatorTester.Builder().maxRowsInMemory(2) diff --git a/server/src/test/java/org/apache/druid/segment/realtime/appenderator/StreamAppenderatorTester.java b/server/src/test/java/org/apache/druid/segment/realtime/appenderator/StreamAppenderatorTester.java index da4a76f1a10..2a39718667c 100644 --- a/server/src/test/java/org/apache/druid/segment/realtime/appenderator/StreamAppenderatorTester.java +++ b/server/src/test/java/org/apache/druid/segment/realtime/appenderator/StreamAppenderatorTester.java @@ -53,18 +53,20 @@ import org.apache.druid.query.timeseries.TimeseriesQueryEngine; import org.apache.druid.query.timeseries.TimeseriesQueryQueryToolChest; import org.apache.druid.query.timeseries.TimeseriesQueryRunnerFactory; import org.apache.druid.segment.IndexIO; +import org.apache.druid.segment.IndexMerger; import org.apache.druid.segment.IndexMergerV9; +import org.apache.druid.segment.IndexSpec; import org.apache.druid.segment.column.ColumnConfig; import org.apache.druid.segment.incremental.ParseExceptionHandler; import org.apache.druid.segment.incremental.RowIngestionMeters; import org.apache.druid.segment.incremental.SimpleRowIngestionMeters; import org.apache.druid.segment.indexing.DataSchema; -import org.apache.druid.segment.indexing.RealtimeTuningConfig; +import org.apache.druid.segment.indexing.TuningConfig; import org.apache.druid.segment.indexing.granularity.UniformGranularitySpec; import org.apache.druid.segment.loading.DataSegmentPusher; import org.apache.druid.segment.loading.SegmentLoaderConfig; import org.apache.druid.segment.metadata.CentralizedDatasourceSchemaConfig; -import org.apache.druid.segment.realtime.FireDepartmentMetrics; +import org.apache.druid.segment.realtime.SegmentGenerationMetrics; import org.apache.druid.segment.writeout.OffHeapMemorySegmentWriteOutMediumFactory; import org.apache.druid.server.coordination.DataSegmentAnnouncer; import org.apache.druid.server.coordination.NoopDataSegmentAnnouncer; @@ -84,8 +86,8 @@ public class StreamAppenderatorTester implements AutoCloseable public static final String DATASOURCE = "foo"; private final DataSchema schema; - private final RealtimeTuningConfig tuningConfig; - private final FireDepartmentMetrics metrics; + private final AppenderatorConfig tuningConfig; + private final SegmentGenerationMetrics metrics; private final DataSegmentPusher dataSegmentPusher; private final ObjectMapper objectMapper; private final Appenderator appenderator; @@ -132,31 +134,21 @@ public class StreamAppenderatorTester implements AutoCloseable null, objectMapper ); - tuningConfig = new RealtimeTuningConfig( - null, - maxRowsInMemory, - maxSizeInBytes == 0L ? getDefaultMaxBytesInMemory() : maxSizeInBytes, - skipBytesInMemoryOverheadCheck, - null, - null, - basePersistDirectory, - null, - null, - null, - null, - null, - null, - 0, - 0, - null, - null, - null, - null, - null, - null - ); + tuningConfig = new TestAppenderatorConfig( + TuningConfig.DEFAULT_APPENDABLE_INDEX, + maxRowsInMemory, + maxSizeInBytes == 0L ? getDefaultMaxBytesInMemory() : maxSizeInBytes, + skipBytesInMemoryOverheadCheck, + IndexSpec.DEFAULT, + 0, + false, + 0L, + OffHeapMemorySegmentWriteOutMediumFactory.instance(), + IndexMerger.UNLIMITED_MAX_COLUMNS_TO_MERGE, + basePersistDirectory + ); - metrics = new FireDepartmentMetrics(); + metrics = new SegmentGenerationMetrics(); queryExecutor = Execs.singleThreaded("queryExecutor(%d)"); IndexIO indexIO = new IndexIO( @@ -310,12 +302,12 @@ public class StreamAppenderatorTester implements AutoCloseable return schema; } - public RealtimeTuningConfig getTuningConfig() + public AppenderatorConfig getTuningConfig() { return tuningConfig; } - public FireDepartmentMetrics getMetrics() + public SegmentGenerationMetrics getMetrics() { return metrics; } diff --git a/server/src/test/java/org/apache/druid/segment/realtime/appenderator/TestAppenderatorConfig.java b/server/src/test/java/org/apache/druid/segment/realtime/appenderator/TestAppenderatorConfig.java new file mode 100644 index 00000000000..5dba99de500 --- /dev/null +++ b/server/src/test/java/org/apache/druid/segment/realtime/appenderator/TestAppenderatorConfig.java @@ -0,0 +1,228 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.druid.segment.realtime.appenderator; + +import org.apache.druid.indexer.partitions.PartitionsSpec; +import org.apache.druid.segment.IndexSpec; +import org.apache.druid.segment.incremental.AppendableIndexSpec; +import org.apache.druid.segment.writeout.SegmentWriteOutMediumFactory; +import org.joda.time.Period; + +import javax.annotation.Nullable; +import java.io.File; +import java.util.Objects; + +public class TestAppenderatorConfig implements AppenderatorConfig +{ + private final AppendableIndexSpec appendableIndexSpec; + private final int maxRowsInMemory; + private final long maxBytesInMemory; + private final boolean skipBytesInMemoryOverheadCheck; + private final int maxColumnsToMerge; + private final PartitionsSpec partitionsSpec; + private final IndexSpec indexSpec; + private final File basePersistDirectory; + private final int maxPendingPersists; + private final boolean reportParseExceptions; + private final long pushTimeout; + private final IndexSpec indexSpecForIntermediatePersists; + @Nullable + private final SegmentWriteOutMediumFactory segmentWriteOutMediumFactory; + + public TestAppenderatorConfig( + AppendableIndexSpec appendableIndexSpec, + Integer maxRowsInMemory, + Long maxBytesInMemory, + Boolean skipBytesInMemoryOverheadCheck, + IndexSpec indexSpec, + Integer maxPendingPersists, + Boolean reportParseExceptions, + Long pushTimeout, + @Nullable SegmentWriteOutMediumFactory segmentWriteOutMediumFactory, + Integer maxColumnsToMerge, + File basePersistDirectory + ) + { + this.appendableIndexSpec = appendableIndexSpec; + this.maxRowsInMemory = maxRowsInMemory; + this.maxBytesInMemory = maxBytesInMemory; + this.skipBytesInMemoryOverheadCheck = skipBytesInMemoryOverheadCheck; + this.indexSpec = indexSpec; + this.maxPendingPersists = maxPendingPersists; + this.reportParseExceptions = reportParseExceptions; + this.pushTimeout = pushTimeout; + this.segmentWriteOutMediumFactory = segmentWriteOutMediumFactory; + this.maxColumnsToMerge = maxColumnsToMerge; + this.basePersistDirectory = basePersistDirectory; + + this.partitionsSpec = null; + this.indexSpecForIntermediatePersists = this.indexSpec; + } + + @Override + public TestAppenderatorConfig withBasePersistDirectory(File dir) + { + throw new UnsupportedOperationException(); + } + + @Override + public AppendableIndexSpec getAppendableIndexSpec() + { + return appendableIndexSpec; + } + + @Override + public int getMaxRowsInMemory() + { + return maxRowsInMemory; + } + + @Override + public long getMaxBytesInMemory() + { + return maxBytesInMemory; + } + + @Override + public boolean isSkipBytesInMemoryOverheadCheck() + { + return skipBytesInMemoryOverheadCheck; + } + + @Nullable + @Override + public PartitionsSpec getPartitionsSpec() + { + return partitionsSpec; + } + + @Override + public IndexSpec getIndexSpec() + { + return indexSpec; + } + + @Override + public IndexSpec getIndexSpecForIntermediatePersists() + { + return indexSpecForIntermediatePersists; + } + + @Override + public int getMaxPendingPersists() + { + return maxPendingPersists; + } + + @Override + public boolean isReportParseExceptions() + { + return reportParseExceptions; + } + + @Nullable + @Override + public SegmentWriteOutMediumFactory getSegmentWriteOutMediumFactory() + { + return segmentWriteOutMediumFactory; + } + + @Override + public int getMaxColumnsToMerge() + { + return maxColumnsToMerge; + } + + @Override + public File getBasePersistDirectory() + { + return basePersistDirectory; + } + + @Override + public Period getIntermediatePersistPeriod() + { + return new Period(Integer.MAX_VALUE); // intermediate persist doesn't make much sense for batch jobs + } + + @Override + public boolean equals(Object o) + { + if (this == o) { + return true; + } + if (o == null || getClass() != o.getClass()) { + return false; + } + TestAppenderatorConfig that = (TestAppenderatorConfig) o; + return Objects.equals(appendableIndexSpec, that.appendableIndexSpec) && + maxRowsInMemory == that.maxRowsInMemory && + maxBytesInMemory == that.maxBytesInMemory && + skipBytesInMemoryOverheadCheck == that.skipBytesInMemoryOverheadCheck && + maxColumnsToMerge == that.maxColumnsToMerge && + maxPendingPersists == that.maxPendingPersists && + reportParseExceptions == that.reportParseExceptions && + pushTimeout == that.pushTimeout && + Objects.equals(partitionsSpec, that.partitionsSpec) && + Objects.equals(indexSpec, that.indexSpec) && + Objects.equals(indexSpecForIntermediatePersists, that.indexSpecForIntermediatePersists) && + Objects.equals(basePersistDirectory, that.basePersistDirectory) && + Objects.equals(segmentWriteOutMediumFactory, that.segmentWriteOutMediumFactory); + } + + @Override + public int hashCode() + { + return Objects.hash( + appendableIndexSpec, + maxRowsInMemory, + maxBytesInMemory, + skipBytesInMemoryOverheadCheck, + maxColumnsToMerge, + partitionsSpec, + indexSpec, + indexSpecForIntermediatePersists, + basePersistDirectory, + maxPendingPersists, + reportParseExceptions, + pushTimeout, + segmentWriteOutMediumFactory + ); + } + + @Override + public String toString() + { + return "TestAppenderatorConfig{" + + "maxRowsInMemory=" + maxRowsInMemory + + ", maxBytesInMemory=" + maxBytesInMemory + + ", skipBytesInMemoryOverheadCheck=" + skipBytesInMemoryOverheadCheck + + ", maxColumnsToMerge=" + maxColumnsToMerge + + ", partitionsSpec=" + partitionsSpec + + ", indexSpec=" + indexSpec + + ", indexSpecForIntermediatePersists=" + indexSpecForIntermediatePersists + + ", basePersistDirectory=" + basePersistDirectory + + ", maxPendingPersists=" + maxPendingPersists + + ", reportParseExceptions=" + reportParseExceptions + + ", pushTimeout=" + pushTimeout + + ", segmentWriteOutMediumFactory=" + segmentWriteOutMediumFactory + + '}'; + } +} diff --git a/server/src/test/java/org/apache/druid/segment/realtime/appenderator/TestUsedSegmentChecker.java b/server/src/test/java/org/apache/druid/segment/realtime/appenderator/TestPublishedSegmentRetriever.java similarity index 79% rename from server/src/test/java/org/apache/druid/segment/realtime/appenderator/TestUsedSegmentChecker.java rename to server/src/test/java/org/apache/druid/segment/realtime/appenderator/TestPublishedSegmentRetriever.java index 91ee44d1ce1..8042c798a87 100644 --- a/server/src/test/java/org/apache/druid/segment/realtime/appenderator/TestUsedSegmentChecker.java +++ b/server/src/test/java/org/apache/druid/segment/realtime/appenderator/TestPublishedSegmentRetriever.java @@ -20,6 +20,7 @@ package org.apache.druid.segment.realtime.appenderator; import org.apache.druid.timeline.DataSegment; +import org.apache.druid.timeline.SegmentId; import org.apache.druid.timeline.SegmentTimeline; import org.apache.druid.timeline.TimelineObjectHolder; import org.apache.druid.timeline.partition.PartitionChunk; @@ -28,24 +29,24 @@ import java.util.HashSet; import java.util.List; import java.util.Set; -public class TestUsedSegmentChecker implements UsedSegmentChecker +public class TestPublishedSegmentRetriever implements PublishedSegmentRetriever { private final List pushedSegments; - public TestUsedSegmentChecker(List pushedSegments) + public TestPublishedSegmentRetriever(List pushedSegments) { this.pushedSegments = pushedSegments; } @Override - public Set findUsedSegments(Set identifiers) + public Set findPublishedSegments(Set identifiers) { final SegmentTimeline timeline = SegmentTimeline.forSegments(pushedSegments); final Set retVal = new HashSet<>(); - for (SegmentIdWithShardSpec identifier : identifiers) { - for (TimelineObjectHolder holder : timeline.lookup(identifier.getInterval())) { + for (SegmentId segmentId : identifiers) { + for (TimelineObjectHolder holder : timeline.lookup(segmentId.getInterval())) { for (PartitionChunk chunk : holder.getObject()) { - if (identifiers.contains(SegmentIdWithShardSpec.fromDataSegment(chunk.getObject()))) { + if (identifiers.contains(chunk.getObject().getId())) { retVal.add(chunk.getObject()); } } diff --git a/server/src/test/java/org/apache/druid/segment/realtime/appenderator/UnifiedIndexerAppenderatorsManagerTest.java b/server/src/test/java/org/apache/druid/segment/realtime/appenderator/UnifiedIndexerAppenderatorsManagerTest.java index ce77db1d7cf..9bf629931b5 100644 --- a/server/src/test/java/org/apache/druid/segment/realtime/appenderator/UnifiedIndexerAppenderatorsManagerTest.java +++ b/server/src/test/java/org/apache/druid/segment/realtime/appenderator/UnifiedIndexerAppenderatorsManagerTest.java @@ -49,7 +49,7 @@ import org.apache.druid.segment.indexing.granularity.UniformGranularitySpec; import org.apache.druid.segment.join.JoinableFactoryWrapperTest; import org.apache.druid.segment.loading.NoopDataSegmentPusher; import org.apache.druid.segment.metadata.CentralizedDatasourceSchemaConfig; -import org.apache.druid.segment.realtime.FireDepartmentMetrics; +import org.apache.druid.segment.realtime.SegmentGenerationMetrics; import org.apache.druid.segment.writeout.OnHeapMemorySegmentWriteOutMediumFactory; import org.apache.druid.segment.writeout.SegmentWriteOutMediumFactory; import org.apache.druid.server.metrics.NoopServiceEmitter; @@ -107,7 +107,7 @@ public class UnifiedIndexerAppenderatorsManagerTest extends InitializedNullHandl null ), appenderatorConfig, - new FireDepartmentMetrics(), + new SegmentGenerationMetrics(), new NoopDataSegmentPusher(), TestHelper.makeJsonMapper(), TestHelper.getTestIndexIO(), diff --git a/server/src/test/java/org/apache/druid/segment/realtime/plumber/CustomVersioningPolicyTest.java b/server/src/test/java/org/apache/druid/segment/realtime/plumber/CustomVersioningPolicyTest.java deleted file mode 100644 index f145b0e1d79..00000000000 --- a/server/src/test/java/org/apache/druid/segment/realtime/plumber/CustomVersioningPolicyTest.java +++ /dev/null @@ -1,50 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package org.apache.druid.segment.realtime.plumber; - -import com.fasterxml.jackson.databind.ObjectMapper; -import org.apache.druid.segment.TestHelper; -import org.joda.time.DateTime; -import org.joda.time.DateTimeZone; -import org.joda.time.Interval; -import org.junit.Assert; -import org.junit.Test; - -public class CustomVersioningPolicyTest -{ - - @Test - public void testSerialization() throws Exception - { - Interval interval = new Interval(DateTime.now(DateTimeZone.UTC), DateTime.now(DateTimeZone.UTC)); - String version = "someversion"; - - CustomVersioningPolicy policy = new CustomVersioningPolicy(version); - - final ObjectMapper mapper = TestHelper.makeJsonMapper(); - CustomVersioningPolicy serialized = mapper.readValue( - mapper.writeValueAsBytes(policy), - CustomVersioningPolicy.class - ); - - Assert.assertEquals(version, policy.getVersion(interval)); - Assert.assertEquals(version, serialized.getVersion(interval)); - } -} diff --git a/server/src/test/java/org/apache/druid/segment/realtime/plumber/MessageTimeRejectionPolicyFactoryTest.java b/server/src/test/java/org/apache/druid/segment/realtime/plumber/MessageTimeRejectionPolicyFactoryTest.java deleted file mode 100644 index 36e73dc60bc..00000000000 --- a/server/src/test/java/org/apache/druid/segment/realtime/plumber/MessageTimeRejectionPolicyFactoryTest.java +++ /dev/null @@ -1,47 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package org.apache.druid.segment.realtime.plumber; - -import org.apache.druid.java.util.common.DateTimes; -import org.joda.time.DateTime; -import org.joda.time.Period; -import org.junit.Assert; -import org.junit.Test; - -/** - */ -public class MessageTimeRejectionPolicyFactoryTest -{ - @Test - public void testAccept() - { - Period period = new Period("PT10M"); - RejectionPolicy rejectionPolicy = new MessageTimeRejectionPolicyFactory().create(period); - - DateTime now = DateTimes.nowUtc(); - DateTime past = now.minus(period).minus(1); - DateTime future = now.plus(period).plus(1); - - Assert.assertTrue(rejectionPolicy.accept(now.getMillis())); - Assert.assertFalse(rejectionPolicy.accept(past.getMillis())); - Assert.assertTrue(rejectionPolicy.accept(future.getMillis())); - Assert.assertFalse(rejectionPolicy.accept(now.getMillis())); - } -} diff --git a/server/src/test/java/org/apache/druid/segment/realtime/plumber/RealtimePlumberSchoolTest.java b/server/src/test/java/org/apache/druid/segment/realtime/plumber/RealtimePlumberSchoolTest.java deleted file mode 100644 index 74a1217ed27..00000000000 --- a/server/src/test/java/org/apache/druid/segment/realtime/plumber/RealtimePlumberSchoolTest.java +++ /dev/null @@ -1,714 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package org.apache.druid.segment.realtime.plumber; - -import com.fasterxml.jackson.databind.ObjectMapper; -import com.google.common.base.Supplier; -import com.google.common.base.Suppliers; -import com.google.common.collect.ImmutableList; -import com.google.common.collect.Lists; -import org.apache.druid.client.cache.CachePopulatorStats; -import org.apache.druid.client.cache.MapCache; -import org.apache.druid.data.input.Committer; -import org.apache.druid.data.input.InputRow; -import org.apache.druid.data.input.Row; -import org.apache.druid.data.input.impl.DimensionsSpec; -import org.apache.druid.data.input.impl.JSONParseSpec; -import org.apache.druid.data.input.impl.StringInputRowParser; -import org.apache.druid.data.input.impl.TimestampSpec; -import org.apache.druid.jackson.DefaultObjectMapper; -import org.apache.druid.java.util.common.DateTimes; -import org.apache.druid.java.util.common.FileUtils; -import org.apache.druid.java.util.common.Intervals; -import org.apache.druid.java.util.common.granularity.Granularities; -import org.apache.druid.java.util.emitter.service.ServiceEmitter; -import org.apache.druid.query.DefaultQueryRunnerFactoryConglomerate; -import org.apache.druid.query.DirectQueryProcessingPool; -import org.apache.druid.query.aggregation.AggregatorFactory; -import org.apache.druid.query.aggregation.CountAggregatorFactory; -import org.apache.druid.segment.QueryableIndex; -import org.apache.druid.segment.ReferenceCountingSegment; -import org.apache.druid.segment.TestHelper; -import org.apache.druid.segment.handoff.SegmentHandoffNotifier; -import org.apache.druid.segment.handoff.SegmentHandoffNotifierFactory; -import org.apache.druid.segment.indexing.DataSchema; -import org.apache.druid.segment.indexing.RealtimeTuningConfig; -import org.apache.druid.segment.indexing.granularity.UniformGranularitySpec; -import org.apache.druid.segment.join.NoopJoinableFactory; -import org.apache.druid.segment.loading.DataSegmentPusher; -import org.apache.druid.segment.realtime.FireDepartmentMetrics; -import org.apache.druid.segment.realtime.FireDepartmentTest; -import org.apache.druid.segment.realtime.FireHydrant; -import org.apache.druid.segment.realtime.SegmentPublisher; -import org.apache.druid.segment.writeout.OffHeapMemorySegmentWriteOutMediumFactory; -import org.apache.druid.segment.writeout.SegmentWriteOutMediumFactory; -import org.apache.druid.segment.writeout.TmpFileSegmentWriteOutMediumFactory; -import org.apache.druid.server.coordination.DataSegmentAnnouncer; -import org.apache.druid.testing.InitializedNullHandlingTest; -import org.easymock.EasyMock; -import org.joda.time.DateTime; -import org.joda.time.Interval; -import org.junit.After; -import org.junit.Assert; -import org.junit.Before; -import org.junit.Rule; -import org.junit.Test; -import org.junit.rules.TemporaryFolder; -import org.junit.runner.RunWith; -import org.junit.runners.Parameterized; - -import java.io.File; -import java.util.ArrayList; -import java.util.Collection; -import java.util.HashMap; -import java.util.List; -import java.util.Map; -import java.util.concurrent.CountDownLatch; -import java.util.concurrent.TimeUnit; - -/** - * - */ -@RunWith(Parameterized.class) -public class RealtimePlumberSchoolTest extends InitializedNullHandlingTest -{ - @Parameterized.Parameters(name = "rejectionPolicy = {0}, segmentWriteOutMediumFactory = {1}") - public static Collection constructorFeeder() - { - final RejectionPolicyFactory[] rejectionPolicies = new RejectionPolicyFactory[]{ - new NoopRejectionPolicyFactory(), - new MessageTimeRejectionPolicyFactory() - }; - - final List constructors = new ArrayList<>(); - for (RejectionPolicyFactory rejectionPolicy : rejectionPolicies) { - constructors.add(new Object[]{rejectionPolicy, OffHeapMemorySegmentWriteOutMediumFactory.instance()}); - constructors.add(new Object[]{rejectionPolicy, TmpFileSegmentWriteOutMediumFactory.instance()}); - } - return constructors; - } - - private final RejectionPolicyFactory rejectionPolicy; - private final SegmentWriteOutMediumFactory segmentWriteOutMediumFactory; - private RealtimePlumber plumber; - private RealtimePlumberSchool realtimePlumberSchool; - private DataSegmentAnnouncer announcer; - private SegmentPublisher segmentPublisher; - private DataSegmentPusher dataSegmentPusher; - private SegmentHandoffNotifier handoffNotifier; - private SegmentHandoffNotifierFactory handoffNotifierFactory; - private ServiceEmitter emitter; - private RealtimeTuningConfig tuningConfig; - private DataSchema schema; - private DataSchema schema2; - private FireDepartmentMetrics metrics; - private File tmpDir; - - @Rule - public TemporaryFolder temporaryFolder = new TemporaryFolder(); - - public RealtimePlumberSchoolTest( - RejectionPolicyFactory rejectionPolicy, - SegmentWriteOutMediumFactory segmentWriteOutMediumFactory - ) - { - this.rejectionPolicy = rejectionPolicy; - this.segmentWriteOutMediumFactory = segmentWriteOutMediumFactory; - } - - @Before - public void setUp() throws Exception - { - tmpDir = FileUtils.createTempDir(); - - ObjectMapper jsonMapper = new DefaultObjectMapper(); - - schema = new DataSchema( - "test", - jsonMapper.convertValue( - new StringInputRowParser( - new JSONParseSpec( - new TimestampSpec("timestamp", "auto", null), - DimensionsSpec.EMPTY, - null, - null, - null - ), - null - ), - Map.class - ), - new AggregatorFactory[]{new CountAggregatorFactory("rows")}, - new UniformGranularitySpec(Granularities.HOUR, Granularities.NONE, null), - null, - jsonMapper - ); - - schema2 = new DataSchema( - "test", - jsonMapper.convertValue( - new StringInputRowParser( - new JSONParseSpec( - new TimestampSpec("timestamp", "auto", null), - DimensionsSpec.EMPTY, - null, - null, - null - ), - null - ), - Map.class - ), - new AggregatorFactory[]{new CountAggregatorFactory("rows")}, - new UniformGranularitySpec(Granularities.YEAR, Granularities.NONE, null), - null, - jsonMapper - ); - - announcer = EasyMock.createMock(DataSegmentAnnouncer.class); - announcer.announceSegment(EasyMock.anyObject()); - EasyMock.expectLastCall().anyTimes(); - - segmentPublisher = EasyMock.createNiceMock(SegmentPublisher.class); - dataSegmentPusher = EasyMock.createNiceMock(DataSegmentPusher.class); - handoffNotifierFactory = EasyMock.createNiceMock(SegmentHandoffNotifierFactory.class); - handoffNotifier = EasyMock.createNiceMock(SegmentHandoffNotifier.class); - EasyMock.expect(handoffNotifierFactory.createSegmentHandoffNotifier(EasyMock.anyString())) - .andReturn(handoffNotifier) - .anyTimes(); - EasyMock.expect( - handoffNotifier.registerSegmentHandoffCallback( - EasyMock.anyObject(), - EasyMock.anyObject(), - EasyMock.anyObject() - ) - ).andReturn(true).anyTimes(); - - emitter = EasyMock.createMock(ServiceEmitter.class); - - EasyMock.replay(announcer, segmentPublisher, dataSegmentPusher, handoffNotifierFactory, handoffNotifier, emitter); - - tuningConfig = new RealtimeTuningConfig( - null, - 1, - null, - null, - null, - null, - temporaryFolder.newFolder(), - new IntervalStartVersioningPolicy(), - rejectionPolicy, - null, - null, - null, - null, - 0, - 0, - false, - null, - null, - null, - null, - null - ); - - realtimePlumberSchool = new RealtimePlumberSchool( - emitter, - new DefaultQueryRunnerFactoryConglomerate(new HashMap<>()), - dataSegmentPusher, - announcer, - segmentPublisher, - handoffNotifierFactory, - DirectQueryProcessingPool.INSTANCE, - NoopJoinableFactory.INSTANCE, - TestHelper.getTestIndexMergerV9(segmentWriteOutMediumFactory), - TestHelper.getTestIndexIO(), - MapCache.create(0), - FireDepartmentTest.NO_CACHE_CONFIG, - new CachePopulatorStats(), - TestHelper.makeJsonMapper() - ); - - metrics = new FireDepartmentMetrics(); - plumber = (RealtimePlumber) realtimePlumberSchool.findPlumber(schema, tuningConfig, metrics); - } - - @After - public void tearDown() throws Exception - { - EasyMock.verify(announcer, segmentPublisher, dataSegmentPusher, handoffNotifierFactory, handoffNotifier, emitter); - FileUtils.deleteDirectory( - new File( - tuningConfig.getBasePersistDirectory(), - schema.getDataSource() - ) - ); - FileUtils.deleteDirectory(tmpDir); - } - - @Test(timeout = 60_000L) - public void testPersist() throws Exception - { - testPersist(null); - } - - @Test(timeout = 60_000L) - public void testPersistWithCommitMetadata() throws Exception - { - final Object commitMetadata = "dummyCommitMetadata"; - testPersist(commitMetadata); - - plumber = (RealtimePlumber) realtimePlumberSchool.findPlumber(schema, tuningConfig, metrics); - Assert.assertEquals(commitMetadata, plumber.startJob()); - } - - private void testPersist(final Object commitMetadata) throws Exception - { - Sink sink = new Sink( - Intervals.utc(0, TimeUnit.HOURS.toMillis(1)), - schema, - tuningConfig.getShardSpec(), - DateTimes.of("2014-12-01T12:34:56.789").toString(), - tuningConfig.getAppendableIndexSpec(), - tuningConfig.getMaxRowsInMemory(), - tuningConfig.getMaxBytesInMemoryOrDefault(), - true, - tuningConfig.getDedupColumn() - ); - plumber.getSinks().put(0L, sink); - Assert.assertNull(plumber.startJob()); - - final InputRow row = EasyMock.createNiceMock(InputRow.class); - EasyMock.expect(row.getTimestampFromEpoch()).andReturn(0L); - EasyMock.expect(row.getDimensions()).andReturn(new ArrayList()); - EasyMock.replay(row); - - final CountDownLatch doneSignal = new CountDownLatch(1); - - final Committer committer = new Committer() - { - @Override - public Object getMetadata() - { - return commitMetadata; - } - - @Override - public void run() - { - doneSignal.countDown(); - } - }; - plumber.add(row, Suppliers.ofInstance(committer)); - plumber.persist(committer); - - doneSignal.await(); - - plumber.getSinks().clear(); - plumber.finishJob(); - } - - @Test(timeout = 60_000L) - public void testPersistFails() throws Exception - { - Sink sink = new Sink( - Intervals.utc(0, TimeUnit.HOURS.toMillis(1)), - schema, - tuningConfig.getShardSpec(), - DateTimes.of("2014-12-01T12:34:56.789").toString(), - tuningConfig.getAppendableIndexSpec(), - tuningConfig.getMaxRowsInMemory(), - tuningConfig.getMaxBytesInMemoryOrDefault(), - true, - tuningConfig.getDedupColumn() - ); - plumber.getSinks().put(0L, sink); - plumber.startJob(); - final InputRow row = EasyMock.createNiceMock(InputRow.class); - EasyMock.expect(row.getTimestampFromEpoch()).andReturn(0L); - EasyMock.expect(row.getDimensions()).andReturn(new ArrayList()); - EasyMock.replay(row); - plumber.add(row, Suppliers.ofInstance(Committers.nil())); - - final CountDownLatch doneSignal = new CountDownLatch(1); - - plumber.persist( - supplierFromRunnable( - () -> { - doneSignal.countDown(); - throw new RuntimeException(); - } - ).get() - ); - - doneSignal.await(); - - // Exception may need time to propagate - while (metrics.failedPersists() < 1) { - Thread.sleep(100); - } - - Assert.assertEquals(1, metrics.failedPersists()); - } - - @Test(timeout = 60_000L) - public void testPersistHydrantGaps() throws Exception - { - final Object commitMetadata = "dummyCommitMetadata"; - testPersistHydrantGapsHelper(commitMetadata); - } - - private void testPersistHydrantGapsHelper(final Object commitMetadata) throws Exception - { - Interval testInterval = new Interval(DateTimes.of("1970-01-01"), DateTimes.of("1971-01-01")); - - RealtimePlumber plumber2 = (RealtimePlumber) realtimePlumberSchool.findPlumber(schema2, tuningConfig, metrics); - Sink sink = new Sink( - testInterval, - schema2, - tuningConfig.getShardSpec(), - DateTimes.of("2014-12-01T12:34:56.789").toString(), - tuningConfig.getAppendableIndexSpec(), - tuningConfig.getMaxRowsInMemory(), - tuningConfig.getMaxBytesInMemoryOrDefault(), - true, - tuningConfig.getDedupColumn() - ); - plumber2.getSinks().put(0L, sink); - Assert.assertNull(plumber2.startJob()); - final CountDownLatch doneSignal = new CountDownLatch(1); - final Committer committer = new Committer() - { - @Override - public Object getMetadata() - { - return commitMetadata; - } - - @Override - public void run() - { - doneSignal.countDown(); - } - }; - plumber2.add(getTestInputRow("1970-01-01"), Suppliers.ofInstance(committer)); - plumber2.add(getTestInputRow("1970-02-01"), Suppliers.ofInstance(committer)); - plumber2.add(getTestInputRow("1970-03-01"), Suppliers.ofInstance(committer)); - plumber2.add(getTestInputRow("1970-04-01"), Suppliers.ofInstance(committer)); - plumber2.add(getTestInputRow("1970-05-01"), Suppliers.ofInstance(committer)); - - plumber2.persist(committer); - - doneSignal.await(); - plumber2.getSinks().clear(); - plumber2.finishJob(); - - File persistDir = plumber2.computePersistDir(schema2, testInterval); - - /* Check that all hydrants were persisted */ - for (int i = 0; i < 5; i++) { - Assert.assertTrue(new File(persistDir, String.valueOf(i)).exists()); - } - - /* Create some gaps in the persisted hydrants and reload */ - FileUtils.deleteDirectory(new File(persistDir, "1")); - FileUtils.deleteDirectory(new File(persistDir, "3")); - RealtimePlumber restoredPlumber = (RealtimePlumber) realtimePlumberSchool.findPlumber( - schema2, - tuningConfig, - metrics - ); - restoredPlumber.bootstrapSinksFromDisk(); - - Map sinks = restoredPlumber.getSinks(); - Assert.assertEquals(1, sinks.size()); - - - List hydrants = Lists.newArrayList(sinks.get(new Long(0))); - DateTime startTime = DateTimes.of("1970-01-01T00:00:00.000Z"); - Interval expectedInterval = new Interval(startTime, DateTimes.of("1971-01-01T00:00:00.000Z")); - Assert.assertEquals(0, hydrants.get(0).getCount()); - Assert.assertEquals( - expectedInterval, - hydrants.get(0).getSegmentDataInterval() - ); - Assert.assertEquals(2, hydrants.get(1).getCount()); - Assert.assertEquals( - expectedInterval, - hydrants.get(1).getSegmentDataInterval() - ); - Assert.assertEquals(4, hydrants.get(2).getCount()); - Assert.assertEquals( - expectedInterval, - hydrants.get(2).getSegmentDataInterval() - ); - - /* Delete all the hydrants and reload, no sink should be created */ - FileUtils.deleteDirectory(new File(persistDir, "0")); - FileUtils.deleteDirectory(new File(persistDir, "2")); - FileUtils.deleteDirectory(new File(persistDir, "4")); - RealtimePlumber restoredPlumber2 = (RealtimePlumber) realtimePlumberSchool.findPlumber( - schema2, - tuningConfig, - metrics - ); - restoredPlumber2.bootstrapSinksFromDisk(); - - Assert.assertEquals(0, restoredPlumber2.getSinks().size()); - } - - @Test(timeout = 60_000L) - public void testDimOrderInheritance() throws Exception - { - final Object commitMetadata = "dummyCommitMetadata"; - testDimOrderInheritanceHelper(commitMetadata); - } - - private void testDimOrderInheritanceHelper(final Object commitMetadata) throws Exception - { - List> expectedDims = ImmutableList.of( - ImmutableList.of("dimD"), - ImmutableList.of("dimC"), - ImmutableList.of("dimA"), - ImmutableList.of("dimB"), - ImmutableList.of("dimE"), - ImmutableList.of("dimD", "dimC", "dimA", "dimB", "dimE") - ); - - QueryableIndex qindex; - FireHydrant hydrant; - Map sinks; - - RealtimePlumber plumber = (RealtimePlumber) realtimePlumberSchool.findPlumber(schema2, tuningConfig, metrics); - Assert.assertNull(plumber.startJob()); - - final CountDownLatch doneSignal = new CountDownLatch(1); - - final Committer committer = new Committer() - { - @Override - public Object getMetadata() - { - return commitMetadata; - } - - @Override - public void run() - { - doneSignal.countDown(); - } - }; - - plumber.add( - getTestInputRowFull( - "1970-01-01", - ImmutableList.of("dimD"), - ImmutableList.of("1") - ), - Suppliers.ofInstance(committer) - ); - plumber.add( - getTestInputRowFull( - "1970-01-01", - ImmutableList.of("dimC"), - ImmutableList.of("1") - ), - Suppliers.ofInstance(committer) - ); - plumber.add( - getTestInputRowFull( - "1970-01-01", - ImmutableList.of("dimA"), - ImmutableList.of("1") - ), - Suppliers.ofInstance(committer) - ); - plumber.add( - getTestInputRowFull( - "1970-01-01", - ImmutableList.of("dimB"), - ImmutableList.of("1") - ), - Suppliers.ofInstance(committer) - ); - plumber.add( - getTestInputRowFull( - "1970-01-01", - ImmutableList.of("dimE"), - ImmutableList.of("1") - ), - Suppliers.ofInstance(committer) - ); - plumber.add( - getTestInputRowFull( - "1970-01-01", - ImmutableList.of("dimA", "dimB", "dimC", "dimD", "dimE"), - ImmutableList.of("1") - ), - Suppliers.ofInstance(committer) - ); - - plumber.persist(committer); - - doneSignal.await(); - - plumber.getSinks().clear(); - plumber.finishJob(); - - RealtimePlumber restoredPlumber = (RealtimePlumber) realtimePlumberSchool.findPlumber( - schema2, - tuningConfig, - metrics - ); - restoredPlumber.bootstrapSinksFromDisk(); - - sinks = restoredPlumber.getSinks(); - Assert.assertEquals(1, sinks.size()); - List hydrants = Lists.newArrayList(sinks.get(0L)); - - for (int i = 0; i < hydrants.size(); i++) { - hydrant = hydrants.get(i); - ReferenceCountingSegment segment = hydrant.getIncrementedSegment(); - try { - qindex = segment.asQueryableIndex(); - Assert.assertEquals(i, hydrant.getCount()); - Assert.assertEquals(expectedDims.get(i), ImmutableList.copyOf(qindex.getAvailableDimensions())); - } - finally { - segment.decrement(); - } - } - } - - private InputRow getTestInputRow(final String timeStr) - { - return new InputRow() - { - @Override - public List getDimensions() - { - return new ArrayList<>(); - } - - @Override - public long getTimestampFromEpoch() - { - return DateTimes.of(timeStr).getMillis(); - } - - @Override - public DateTime getTimestamp() - { - return DateTimes.of(timeStr); - } - - @Override - public List getDimension(String dimension) - { - return new ArrayList<>(); - } - - @Override - public Number getMetric(String metric) - { - return 0; - } - - @Override - public Object getRaw(String dimension) - { - return null; - } - - @Override - public int compareTo(Row o) - { - return 0; - } - }; - } - - private InputRow getTestInputRowFull(final String timeStr, final List dims, final List dimVals) - { - return new InputRow() - { - @Override - public List getDimensions() - { - return dims; - } - - @Override - public long getTimestampFromEpoch() - { - return DateTimes.of(timeStr).getMillis(); - } - - @Override - public DateTime getTimestamp() - { - return DateTimes.of(timeStr); - } - - @Override - public List getDimension(String dimension) - { - return dimVals; - } - - @Override - public Number getMetric(String metric) - { - return 0; - } - - @Override - public Object getRaw(String dimension) - { - return dimVals; - } - - @Override - public int compareTo(Row o) - { - return 0; - } - }; - } - - private static Supplier supplierFromRunnable(final Runnable runnable) - { - final Committer committer = new Committer() - { - @Override - public Object getMetadata() - { - return null; - } - - @Override - public void run() - { - runnable.run(); - } - }; - return Suppliers.ofInstance(committer); - } -} diff --git a/server/src/test/java/org/apache/druid/segment/realtime/plumber/ServerTimeRejectionPolicyFactoryTest.java b/server/src/test/java/org/apache/druid/segment/realtime/plumber/ServerTimeRejectionPolicyFactoryTest.java deleted file mode 100644 index 496788e030e..00000000000 --- a/server/src/test/java/org/apache/druid/segment/realtime/plumber/ServerTimeRejectionPolicyFactoryTest.java +++ /dev/null @@ -1,47 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package org.apache.druid.segment.realtime.plumber; - -import org.apache.druid.java.util.common.DateTimes; -import org.joda.time.DateTime; -import org.joda.time.Period; -import org.junit.Assert; -import org.junit.Test; - -/** - */ -public class ServerTimeRejectionPolicyFactoryTest -{ - @Test - public void testAccept() - { - Period period = new Period("PT10M"); - - RejectionPolicy rejectionPolicy = new ServerTimeRejectionPolicyFactory().create(period); - - DateTime now = DateTimes.nowUtc(); - DateTime past = now.minus(period).minus(100); - DateTime future = now.plus(period).plus(100); - - Assert.assertTrue(rejectionPolicy.accept(now.getMillis())); - Assert.assertFalse(rejectionPolicy.accept(past.getMillis())); - Assert.assertFalse(rejectionPolicy.accept(future.getMillis())); - } -} diff --git a/server/src/test/java/org/apache/druid/segment/realtime/plumber/SinkTest.java b/server/src/test/java/org/apache/druid/segment/realtime/sink/SinkTest.java similarity index 74% rename from server/src/test/java/org/apache/druid/segment/realtime/plumber/SinkTest.java rename to server/src/test/java/org/apache/druid/segment/realtime/sink/SinkTest.java index 0d3c9c23bff..9d85ec6c8e6 100644 --- a/server/src/test/java/org/apache/druid/segment/realtime/plumber/SinkTest.java +++ b/server/src/test/java/org/apache/druid/segment/realtime/sink/SinkTest.java @@ -17,7 +17,7 @@ * under the License. */ -package org.apache.druid.segment.realtime.plumber; +package org.apache.druid.segment.realtime.sink; import com.google.common.collect.ImmutableList; import com.google.common.collect.ImmutableMap; @@ -43,16 +43,17 @@ import org.apache.druid.segment.column.RowSignature; import org.apache.druid.segment.incremental.IncrementalIndex; import org.apache.druid.segment.incremental.IndexSizeExceededException; import org.apache.druid.segment.indexing.DataSchema; -import org.apache.druid.segment.indexing.RealtimeTuningConfig; +import org.apache.druid.segment.indexing.TuningConfig; import org.apache.druid.segment.indexing.granularity.UniformGranularitySpec; import org.apache.druid.segment.realtime.FireHydrant; import org.apache.druid.testing.InitializedNullHandlingTest; import org.apache.druid.timeline.SegmentId; +import org.apache.druid.timeline.partition.NumberedShardSpec; +import org.apache.druid.timeline.partition.ShardSpec; import org.apache.druid.utils.CloseableUtils; import org.easymock.EasyMock; import org.joda.time.DateTime; import org.joda.time.Interval; -import org.joda.time.Period; import org.junit.Assert; import org.junit.Test; @@ -69,6 +70,9 @@ import java.util.function.Function; */ public class SinkTest extends InitializedNullHandlingTest { + private static final ShardSpec SHARD_SPEC = new NumberedShardSpec(0, 1); + private static final int MAX_ROWS_IN_MEMORY = 100; + @Test public void testSwap() throws Exception { @@ -83,39 +87,15 @@ public class SinkTest extends InitializedNullHandlingTest final Interval interval = Intervals.of("2013-01-01/2013-01-02"); final String version = DateTimes.nowUtc().toString(); - RealtimeTuningConfig tuningConfig = new RealtimeTuningConfig( - null, - 100, - null, - null, - new Period("P1Y"), - null, - null, - null, - null, - null, - null, - null, - null, - 0, - 0, - null, - null, - null, - null, - null, - null - ); final Sink sink = new Sink( interval, schema, - tuningConfig.getShardSpec(), + SHARD_SPEC, version, - tuningConfig.getAppendableIndexSpec(), - tuningConfig.getMaxRowsInMemory(), - tuningConfig.getMaxBytesInMemoryOrDefault(), - true, - tuningConfig.getDedupColumn() + TuningConfig.DEFAULT_APPENDABLE_INDEX, + MAX_ROWS_IN_MEMORY, + TuningConfig.DEFAULT_APPENDABLE_INDEX.getDefaultMaxBytesInMemory(), + true ); sink.add( @@ -227,93 +207,6 @@ public class SinkTest extends InitializedNullHandlingTest Assert.assertEquals(2, Iterators.size(sink.iterator())); } - @Test - public void testDedup() throws Exception - { - final DataSchema schema = new DataSchema( - "test", - new TimestampSpec(null, null, null), - DimensionsSpec.EMPTY, - new AggregatorFactory[]{new CountAggregatorFactory("rows")}, - new UniformGranularitySpec(Granularities.HOUR, Granularities.MINUTE, null), - null - ); - - final Interval interval = Intervals.of("2013-01-01/2013-01-02"); - final String version = DateTimes.nowUtc().toString(); - RealtimeTuningConfig tuningConfig = new RealtimeTuningConfig( - null, - 100, - null, - null, - new Period("P1Y"), - null, - null, - null, - null, - null, - null, - null, - null, - 0, - 0, - null, - null, - null, - null, - "dedupColumn", - null - ); - final Sink sink = new Sink( - interval, - schema, - tuningConfig.getShardSpec(), - version, - tuningConfig.getAppendableIndexSpec(), - tuningConfig.getMaxRowsInMemory(), - tuningConfig.getMaxBytesInMemoryOrDefault(), - true, - tuningConfig.getDedupColumn() - ); - - int rows = sink.add(new MapBasedInputRow( - DateTimes.of("2013-01-01"), - ImmutableList.of("field", "dedupColumn"), - ImmutableMap.of("field1", "value1", "dedupColumn", "v1") - ), false).getRowCount(); - Assert.assertTrue(rows > 0); - - // dedupColumn is null - rows = sink.add(new MapBasedInputRow( - DateTimes.of("2013-01-01"), - ImmutableList.of("field", "dedupColumn"), - ImmutableMap.of("field1", "value2") - ), false).getRowCount(); - Assert.assertTrue(rows > 0); - - // dedupColumn is null - rows = sink.add(new MapBasedInputRow( - DateTimes.of("2013-01-01"), - ImmutableList.of("field", "dedupColumn"), - ImmutableMap.of("field1", "value3") - ), false).getRowCount(); - Assert.assertTrue(rows > 0); - - rows = sink.add(new MapBasedInputRow( - DateTimes.of("2013-01-01"), - ImmutableList.of("field", "dedupColumn"), - ImmutableMap.of("field1", "value4", "dedupColumn", "v2") - ), false).getRowCount(); - Assert.assertTrue(rows > 0); - - rows = sink.add(new MapBasedInputRow( - DateTimes.of("2013-01-01"), - ImmutableList.of("field", "dedupColumn"), - ImmutableMap.of("field1", "value5", "dedupColumn", "v1") - ), false).getRowCount(); - Assert.assertTrue(rows == -2); - } - @Test public void testAcquireSegmentReferences_empty() { @@ -378,39 +271,15 @@ public class SinkTest extends InitializedNullHandlingTest final Interval interval = Intervals.of("2013-01-01/2013-01-02"); final String version = DateTimes.nowUtc().toString(); - RealtimeTuningConfig tuningConfig = new RealtimeTuningConfig( - null, - 2, - null, - null, - new Period("P1Y"), - null, - null, - null, - null, - null, - null, - null, - null, - 0, - 0, - null, - null, - null, - null, - "dedupColumn", - null - ); final Sink sink = new Sink( interval, schema, - tuningConfig.getShardSpec(), + SHARD_SPEC, version, - tuningConfig.getAppendableIndexSpec(), - tuningConfig.getMaxRowsInMemory(), - tuningConfig.getMaxBytesInMemoryOrDefault(), - true, - tuningConfig.getDedupColumn() + TuningConfig.DEFAULT_APPENDABLE_INDEX, + MAX_ROWS_IN_MEMORY, + TuningConfig.DEFAULT_APPENDABLE_INDEX.getDefaultMaxBytesInMemory(), + true ); sink.add(new MapBasedInputRow( diff --git a/server/src/test/java/org/apache/druid/server/QueryStackTests.java b/server/src/test/java/org/apache/druid/server/QueryStackTests.java index 7d3f58ef600..3df1d95b33a 100644 --- a/server/src/test/java/org/apache/druid/server/QueryStackTests.java +++ b/server/src/test/java/org/apache/druid/server/QueryStackTests.java @@ -98,7 +98,6 @@ import org.apache.druid.utils.JvmUtils; import org.junit.Assert; import javax.annotation.Nullable; - import java.util.Collections; import java.util.Map; import java.util.Set; diff --git a/server/src/test/java/org/apache/druid/server/coordination/SegmentLoadDropHandlerCacheTest.java b/server/src/test/java/org/apache/druid/server/coordination/SegmentBootstrapperCacheTest.java similarity index 79% rename from server/src/test/java/org/apache/druid/server/coordination/SegmentLoadDropHandlerCacheTest.java rename to server/src/test/java/org/apache/druid/server/coordination/SegmentBootstrapperCacheTest.java index b7ce3b8e058..7629a6b875c 100644 --- a/server/src/test/java/org/apache/druid/server/coordination/SegmentLoadDropHandlerCacheTest.java +++ b/server/src/test/java/org/apache/druid/server/coordination/SegmentBootstrapperCacheTest.java @@ -23,6 +23,8 @@ import com.fasterxml.jackson.databind.ObjectMapper; import com.google.common.collect.ImmutableList; import org.apache.druid.guice.ServerTypeConfig; import org.apache.druid.java.util.emitter.EmittingLogger; +import org.apache.druid.java.util.emitter.service.ServiceEmitter; +import org.apache.druid.java.util.metrics.StubServiceEmitter; import org.apache.druid.segment.TestHelper; import org.apache.druid.segment.TestIndex; import org.apache.druid.segment.loading.DataSegmentPusher; @@ -34,7 +36,6 @@ import org.apache.druid.segment.loading.StorageLocation; import org.apache.druid.segment.loading.StorageLocationConfig; import org.apache.druid.server.SegmentManager; import org.apache.druid.server.TestSegmentUtils; -import org.apache.druid.server.metrics.NoopServiceEmitter; import org.apache.druid.timeline.DataSegment; import org.junit.Assert; import org.junit.Before; @@ -49,10 +50,10 @@ import java.util.Collections; import java.util.List; /** - * Similar to {@link SegmentLoadDropHandlerTest}. This class includes tests that cover the + * Similar to {@link SegmentBootstrapperTest}. This class includes tests that cover the * storage location layer as well. */ -public class SegmentLoadDropHandlerCacheTest +public class SegmentBootstrapperCacheTest { private static final long MAX_SIZE = 1000L; private static final long SEGMENT_SIZE = 100L; @@ -63,6 +64,8 @@ public class SegmentLoadDropHandlerCacheTest private SegmentManager segmentManager; private SegmentLoaderConfig loaderConfig; private SegmentLocalCacheManager cacheManager; + private TestCoordinatorClient coordinatorClient; + private ServiceEmitter emitter; private ObjectMapper objectMapper; @Before @@ -98,19 +101,22 @@ public class SegmentLoadDropHandlerCacheTest objectMapper ); segmentManager = new SegmentManager(cacheManager); - segmentAnnouncer = new TestDataSegmentAnnouncer(); serverAnnouncer = new TestDataServerAnnouncer(); - EmittingLogger.registerEmitter(new NoopServiceEmitter()); + segmentAnnouncer = new TestDataSegmentAnnouncer(); + coordinatorClient = new TestCoordinatorClient(); + emitter = new StubServiceEmitter(); + EmittingLogger.registerEmitter(emitter); } @Test public void testLoadStartStopWithEmptyLocations() throws IOException { final List emptyLocations = ImmutableList.of(); + final SegmentLoaderConfig loaderConfig = new SegmentLoaderConfig(); segmentManager = new SegmentManager( new SegmentLocalCacheManager( emptyLocations, - new SegmentLoaderConfig(), + loaderConfig, new LeastBytesUsedStorageLocationSelectorStrategy(emptyLocations), TestIndex.INDEX_IO, objectMapper @@ -118,17 +124,26 @@ public class SegmentLoadDropHandlerCacheTest ); final SegmentLoadDropHandler loadDropHandler = new SegmentLoadDropHandler( - new SegmentLoaderConfig(), + loaderConfig, + segmentAnnouncer, + segmentManager + ); + + final SegmentBootstrapper bootstrapper = new SegmentBootstrapper( + loadDropHandler, + loaderConfig, segmentAnnouncer, serverAnnouncer, segmentManager, - new ServerTypeConfig(ServerType.BROKER) + new ServerTypeConfig(ServerType.HISTORICAL), + coordinatorClient, + emitter ); - loadDropHandler.start(); - Assert.assertEquals(0, serverAnnouncer.getObservedCount()); + bootstrapper.start(); + Assert.assertEquals(1, serverAnnouncer.getObservedCount()); - loadDropHandler.stop(); + bootstrapper.stop(); Assert.assertEquals(0, serverAnnouncer.getObservedCount()); } @@ -138,15 +153,24 @@ public class SegmentLoadDropHandlerCacheTest final SegmentLoadDropHandler loadDropHandler = new SegmentLoadDropHandler( loaderConfig, segmentAnnouncer, - serverAnnouncer, - segmentManager, - new ServerTypeConfig(ServerType.BROKER) + segmentManager ); - loadDropHandler.start(); + final SegmentBootstrapper bootstrapper = new SegmentBootstrapper( + loadDropHandler, + loaderConfig, + segmentAnnouncer, + serverAnnouncer, + segmentManager, + new ServerTypeConfig(ServerType.HISTORICAL), + coordinatorClient, + emitter + ); + + bootstrapper.start(); Assert.assertEquals(1, serverAnnouncer.getObservedCount()); - loadDropHandler.stop(); + bootstrapper.stop(); Assert.assertEquals(0, serverAnnouncer.getObservedCount()); } @@ -169,13 +193,21 @@ public class SegmentLoadDropHandlerCacheTest final SegmentLoadDropHandler loadDropHandler = new SegmentLoadDropHandler( loaderConfig, segmentAnnouncer, - serverAnnouncer, - segmentManager, - new ServerTypeConfig(ServerType.HISTORICAL) + segmentManager ); - // Start the load drop handler - loadDropHandler.start(); + final SegmentBootstrapper bootstrapper = new SegmentBootstrapper( + loadDropHandler, + loaderConfig, + segmentAnnouncer, + serverAnnouncer, + segmentManager, + new ServerTypeConfig(ServerType.HISTORICAL), + coordinatorClient, + emitter + ); + + bootstrapper.start(); Assert.assertEquals(1, serverAnnouncer.getObservedCount()); // Verify the expected announcements @@ -191,7 +223,7 @@ public class SegmentLoadDropHandlerCacheTest loadDropHandler.addSegment(newSegment, null); Assert.assertTrue(segmentAnnouncer.getObservedSegments().contains(newSegment)); - loadDropHandler.stop(); + bootstrapper.stop(); Assert.assertEquals(0, serverAnnouncer.getObservedCount()); } } diff --git a/server/src/test/java/org/apache/druid/server/coordination/SegmentBootstrapperTest.java b/server/src/test/java/org/apache/druid/server/coordination/SegmentBootstrapperTest.java new file mode 100644 index 00000000000..c41763f1824 --- /dev/null +++ b/server/src/test/java/org/apache/druid/server/coordination/SegmentBootstrapperTest.java @@ -0,0 +1,306 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.druid.server.coordination; + +import com.google.common.collect.ImmutableList; +import org.apache.druid.guice.ServerTypeConfig; +import org.apache.druid.java.util.common.Intervals; +import org.apache.druid.java.util.emitter.EmittingLogger; +import org.apache.druid.java.util.metrics.StubServiceEmitter; +import org.apache.druid.segment.loading.SegmentLoaderConfig; +import org.apache.druid.segment.loading.StorageLocationConfig; +import org.apache.druid.server.SegmentManager; +import org.apache.druid.timeline.DataSegment; +import org.junit.Assert; +import org.junit.Before; +import org.junit.Rule; +import org.junit.Test; +import org.junit.rules.TemporaryFolder; + +import java.io.File; +import java.io.IOException; +import java.util.Collections; +import java.util.HashSet; +import java.util.List; +import java.util.Set; + +import static org.apache.druid.server.TestSegmentUtils.makeSegment; + +public class SegmentBootstrapperTest +{ + private static final int COUNT = 50; + + private TestDataSegmentAnnouncer segmentAnnouncer; + private TestDataServerAnnouncer serverAnnouncer; + private SegmentLoaderConfig segmentLoaderConfig; + private TestCoordinatorClient coordinatorClient; + private StubServiceEmitter serviceEmitter; + + @Rule + public TemporaryFolder temporaryFolder = new TemporaryFolder(); + + @Before + public void setUp() throws IOException + { + final File segmentCacheDir = temporaryFolder.newFolder(); + + segmentAnnouncer = new TestDataSegmentAnnouncer(); + serverAnnouncer = new TestDataServerAnnouncer(); + segmentLoaderConfig = new SegmentLoaderConfig() + { + @Override + public File getInfoDir() + { + return segmentCacheDir; + } + + @Override + public int getNumLoadingThreads() + { + return 5; + } + + @Override + public int getAnnounceIntervalMillis() + { + return 50; + } + + @Override + public List getLocations() + { + return Collections.singletonList( + new StorageLocationConfig(segmentCacheDir, null, null) + ); + } + }; + + coordinatorClient = new TestCoordinatorClient(); + serviceEmitter = new StubServiceEmitter(); + EmittingLogger.registerEmitter(serviceEmitter); + } + + + @Test + public void testStartStop() throws Exception + { + final Set segments = new HashSet<>(); + for (int i = 0; i < COUNT; ++i) { + segments.add(makeSegment("test" + i, "1", Intervals.of("P1d/2011-04-01"))); + segments.add(makeSegment("test" + i, "1", Intervals.of("P1d/2011-04-02"))); + segments.add(makeSegment("test" + i, "2", Intervals.of("P1d/2011-04-02"))); + segments.add(makeSegment("test_two" + i, "1", Intervals.of("P1d/2011-04-01"))); + segments.add(makeSegment("test_two" + i, "1", Intervals.of("P1d/2011-04-02"))); + } + + final TestSegmentCacheManager cacheManager = new TestSegmentCacheManager(segments); + final SegmentManager segmentManager = new SegmentManager(cacheManager); + final SegmentLoadDropHandler handler = new SegmentLoadDropHandler( + segmentLoaderConfig, + segmentAnnouncer, + segmentManager + ); + final SegmentBootstrapper bootstrapper = new SegmentBootstrapper( + handler, + segmentLoaderConfig, + segmentAnnouncer, + serverAnnouncer, + segmentManager, + new ServerTypeConfig(ServerType.HISTORICAL), + coordinatorClient, + serviceEmitter + ); + + Assert.assertTrue(segmentManager.getDataSourceCounts().isEmpty()); + bootstrapper.start(); + + Assert.assertEquals(1, serverAnnouncer.getObservedCount()); + Assert.assertFalse(segmentManager.getDataSourceCounts().isEmpty()); + + for (int i = 0; i < COUNT; ++i) { + Assert.assertEquals(3L, segmentManager.getDataSourceCounts().get("test" + i).longValue()); + Assert.assertEquals(2L, segmentManager.getDataSourceCounts().get("test_two" + i).longValue()); + } + + Assert.assertEquals(ImmutableList.copyOf(segments), segmentAnnouncer.getObservedSegments()); + + final ImmutableList expectedBootstrapSegments = ImmutableList.copyOf(segments); + Assert.assertEquals(expectedBootstrapSegments, cacheManager.getObservedBootstrapSegments()); + Assert.assertEquals(expectedBootstrapSegments, cacheManager.getObservedBootstrapSegmentsLoadedIntoPageCache()); + Assert.assertEquals(ImmutableList.of(), cacheManager.getObservedSegments()); + Assert.assertEquals(ImmutableList.of(), cacheManager.getObservedSegmentsLoadedIntoPageCache()); + + bootstrapper.stop(); + + Assert.assertEquals(0, serverAnnouncer.getObservedCount()); + Assert.assertEquals(1, cacheManager.getObservedShutdownBootstrapCount().get()); + } + + @Test + public void testLoadCachedSegments() throws Exception + { + final Set segments = new HashSet<>(); + for (int i = 0; i < COUNT; ++i) { + segments.add(makeSegment("test" + i, "1", Intervals.of("P1d/2011-04-01"))); + segments.add(makeSegment("test" + i, "1", Intervals.of("P1d/2011-04-02"))); + segments.add(makeSegment("test" + i, "2", Intervals.of("P1d/2011-04-02"))); + segments.add(makeSegment("test" + i, "1", Intervals.of("P1d/2011-04-03"))); + segments.add(makeSegment("test" + i, "1", Intervals.of("P1d/2011-04-04"))); + segments.add(makeSegment("test" + i, "1", Intervals.of("P1d/2011-04-05"))); + segments.add(makeSegment("test" + i, "2", Intervals.of("PT1h/2011-04-04T01"))); + segments.add(makeSegment("test" + i, "2", Intervals.of("PT1h/2011-04-04T02"))); + segments.add(makeSegment("test" + i, "2", Intervals.of("PT1h/2011-04-04T03"))); + segments.add(makeSegment("test" + i, "2", Intervals.of("PT1h/2011-04-04T05"))); + segments.add(makeSegment("test" + i, "2", Intervals.of("PT1h/2011-04-04T06"))); + segments.add(makeSegment("test_two" + i, "1", Intervals.of("P1d/2011-04-01"))); + segments.add(makeSegment("test_two" + i, "1", Intervals.of("P1d/2011-04-02"))); + } + + final TestSegmentCacheManager cacheManager = new TestSegmentCacheManager(segments); + final SegmentManager segmentManager = new SegmentManager(cacheManager); + final SegmentLoadDropHandler handler = new SegmentLoadDropHandler(segmentLoaderConfig, segmentAnnouncer, segmentManager); + final SegmentBootstrapper bootstrapper = new SegmentBootstrapper( + handler, + segmentLoaderConfig, + segmentAnnouncer, + serverAnnouncer, + segmentManager, + new ServerTypeConfig(ServerType.HISTORICAL), + coordinatorClient, + serviceEmitter + ); + + Assert.assertTrue(segmentManager.getDataSourceCounts().isEmpty()); + + bootstrapper.start(); + + Assert.assertEquals(1, serverAnnouncer.getObservedCount()); + Assert.assertFalse(segmentManager.getDataSourceCounts().isEmpty()); + + for (int i = 0; i < COUNT; ++i) { + Assert.assertEquals(11L, segmentManager.getDataSourceCounts().get("test" + i).longValue()); + Assert.assertEquals(2L, segmentManager.getDataSourceCounts().get("test_two" + i).longValue()); + } + + Assert.assertEquals(ImmutableList.copyOf(segments), segmentAnnouncer.getObservedSegments()); + + final ImmutableList expectedBootstrapSegments = ImmutableList.copyOf(segments); + Assert.assertEquals(expectedBootstrapSegments, cacheManager.getObservedBootstrapSegments()); + Assert.assertEquals(expectedBootstrapSegments, cacheManager.getObservedBootstrapSegmentsLoadedIntoPageCache()); + Assert.assertEquals(ImmutableList.of(), cacheManager.getObservedSegments()); + Assert.assertEquals(ImmutableList.of(), cacheManager.getObservedSegmentsLoadedIntoPageCache()); + + bootstrapper.stop(); + + Assert.assertEquals(0, serverAnnouncer.getObservedCount()); + Assert.assertEquals(1, cacheManager.getObservedShutdownBootstrapCount().get()); + } + + @Test + public void testLoadBootstrapSegments() throws Exception + { + final Set segments = new HashSet<>(); + for (int i = 0; i < COUNT; ++i) { + segments.add(makeSegment("test" + i, "1", Intervals.of("P1d/2011-04-01"))); + segments.add(makeSegment("test" + i, "1", Intervals.of("P1d/2011-04-02"))); + segments.add(makeSegment("test_two" + i, "1", Intervals.of("P1d/2011-04-01"))); + segments.add(makeSegment("test_two" + i, "1", Intervals.of("P1d/2011-04-02"))); + } + + final TestCoordinatorClient coordinatorClient = new TestCoordinatorClient(segments); + final TestSegmentCacheManager cacheManager = new TestSegmentCacheManager(); + final SegmentManager segmentManager = new SegmentManager(cacheManager); + final SegmentLoadDropHandler handler = new SegmentLoadDropHandler( + segmentLoaderConfig, + segmentAnnouncer, + segmentManager + ); + final SegmentBootstrapper bootstrapper = new SegmentBootstrapper( + handler, + segmentLoaderConfig, + segmentAnnouncer, + serverAnnouncer, + segmentManager, + new ServerTypeConfig(ServerType.HISTORICAL), + coordinatorClient, + serviceEmitter + ); + + Assert.assertTrue(segmentManager.getDataSourceCounts().isEmpty()); + + bootstrapper.start(); + + Assert.assertEquals(1, serverAnnouncer.getObservedCount()); + Assert.assertFalse(segmentManager.getDataSourceCounts().isEmpty()); + + for (int i = 0; i < COUNT; ++i) { + Assert.assertEquals(2L, segmentManager.getDataSourceCounts().get("test" + i).longValue()); + Assert.assertEquals(2L, segmentManager.getDataSourceCounts().get("test_two" + i).longValue()); + } + + final ImmutableList expectedBootstrapSegments = ImmutableList.copyOf(segments); + + Assert.assertEquals(expectedBootstrapSegments, segmentAnnouncer.getObservedSegments()); + + Assert.assertEquals(expectedBootstrapSegments, cacheManager.getObservedBootstrapSegments()); + Assert.assertEquals(expectedBootstrapSegments, cacheManager.getObservedBootstrapSegmentsLoadedIntoPageCache()); + serviceEmitter.verifyValue("segment/bootstrap/count", expectedBootstrapSegments.size()); + serviceEmitter.verifyEmitted("segment/bootstrap/time", 1); + + bootstrapper.stop(); + } + + @Test + public void testLoadBootstrapSegmentsWhenExceptionThrown() throws Exception + { + final TestSegmentCacheManager cacheManager = new TestSegmentCacheManager(); + final SegmentManager segmentManager = new SegmentManager(cacheManager); + final SegmentLoadDropHandler handler = new SegmentLoadDropHandler( + segmentLoaderConfig, + segmentAnnouncer, + segmentManager + ); + final SegmentBootstrapper bootstrapper = new SegmentBootstrapper( + handler, + segmentLoaderConfig, + segmentAnnouncer, + serverAnnouncer, + segmentManager, + new ServerTypeConfig(ServerType.HISTORICAL), + coordinatorClient, + serviceEmitter + ); + + Assert.assertTrue(segmentManager.getDataSourceCounts().isEmpty()); + + bootstrapper.start(); + + Assert.assertEquals(1, serverAnnouncer.getObservedCount()); + Assert.assertTrue(segmentManager.getDataSourceCounts().isEmpty()); + + Assert.assertEquals(ImmutableList.of(), segmentAnnouncer.getObservedSegments()); + Assert.assertEquals(ImmutableList.of(), cacheManager.getObservedBootstrapSegments()); + Assert.assertEquals(ImmutableList.of(), cacheManager.getObservedBootstrapSegmentsLoadedIntoPageCache()); + serviceEmitter.verifyValue("segment/bootstrap/count", 0); + serviceEmitter.verifyEmitted("segment/bootstrap/time", 1); + + bootstrapper.stop(); + } +} diff --git a/server/src/test/java/org/apache/druid/server/coordination/SegmentLoadDropHandlerTest.java b/server/src/test/java/org/apache/druid/server/coordination/SegmentLoadDropHandlerTest.java index 1a776c6c34a..cd2fe2dbd63 100644 --- a/server/src/test/java/org/apache/druid/server/coordination/SegmentLoadDropHandlerTest.java +++ b/server/src/test/java/org/apache/druid/server/coordination/SegmentLoadDropHandlerTest.java @@ -20,31 +20,21 @@ package org.apache.druid.server.coordination; import com.google.common.collect.ImmutableList; -import com.google.common.collect.ImmutableSet; import com.google.common.util.concurrent.ListenableFuture; -import org.apache.druid.guice.ServerTypeConfig; import org.apache.druid.java.util.common.Intervals; -import org.apache.druid.java.util.common.MapUtils; import org.apache.druid.java.util.common.concurrent.Execs; import org.apache.druid.java.util.common.concurrent.ScheduledExecutorFactory; import org.apache.druid.java.util.emitter.EmittingLogger; -import org.apache.druid.segment.ReferenceCountingSegment; -import org.apache.druid.segment.SegmentLazyLoadFailCallback; -import org.apache.druid.segment.loading.NoopSegmentCacheManager; +import org.apache.druid.java.util.metrics.StubServiceEmitter; import org.apache.druid.segment.loading.SegmentLoaderConfig; import org.apache.druid.segment.loading.StorageLocationConfig; -import org.apache.druid.segment.loading.TombstoneSegmentizerFactory; import org.apache.druid.server.SegmentManager; -import org.apache.druid.server.TestSegmentUtils; import org.apache.druid.server.coordination.SegmentChangeStatus.State; -import org.apache.druid.server.metrics.NoopServiceEmitter; import org.apache.druid.timeline.DataSegment; -import org.joda.time.Interval; import org.junit.Assert; import org.junit.Before; import org.junit.Rule; import org.junit.Test; -import org.junit.rules.ExpectedException; import org.junit.rules.TemporaryFolder; import org.mockito.ArgumentMatchers; import org.mockito.Mockito; @@ -54,28 +44,21 @@ import java.io.IOException; import java.util.ArrayList; import java.util.Collections; import java.util.HashMap; -import java.util.HashSet; import java.util.List; import java.util.Map; -import java.util.Set; import java.util.concurrent.ScheduledFuture; import java.util.concurrent.ScheduledThreadPoolExecutor; import java.util.concurrent.TimeUnit; -import java.util.concurrent.atomic.AtomicInteger; + +import static org.apache.druid.server.TestSegmentUtils.makeSegment; public class SegmentLoadDropHandlerTest { - private static final int COUNT = 50; - private TestDataSegmentAnnouncer segmentAnnouncer; - private TestDataServerAnnouncer serverAnnouncer; private List scheduledRunnable; private SegmentLoaderConfig segmentLoaderConfig; private ScheduledExecutorFactory scheduledExecutorFactory; - @Rule - public ExpectedException expectedException = ExpectedException.none(); - @Rule public TemporaryFolder temporaryFolder = new TemporaryFolder(); @@ -86,7 +69,6 @@ public class SegmentLoadDropHandlerTest scheduledRunnable = new ArrayList<>(); segmentAnnouncer = new TestDataSegmentAnnouncer(); - serverAnnouncer = new TestDataServerAnnouncer(); segmentLoaderConfig = new SegmentLoaderConfig() { @Override @@ -136,7 +118,7 @@ public class SegmentLoadDropHandlerTest }; }; - EmittingLogger.registerEmitter(new NoopServiceEmitter()); + EmittingLogger.registerEmitter(new StubServiceEmitter()); } /** @@ -148,16 +130,12 @@ public class SegmentLoadDropHandlerTest * */ @Test - public void testSegmentLoading1() throws Exception + public void testSegmentLoading1() { final TestSegmentCacheManager cacheManager = new TestSegmentCacheManager(); final SegmentManager segmentManager = new SegmentManager(cacheManager); final SegmentLoadDropHandler handler = initSegmentLoadDropHandler(segmentManager); - handler.start(); - - Assert.assertEquals(1, serverAnnouncer.getObservedCount()); - final DataSegment segment = makeSegment("test", "1", Intervals.of("P1d/2011-04-01")); handler.removeSegment(segment, DataSegmentChangeCallback.NOOP); @@ -172,19 +150,16 @@ public class SegmentLoadDropHandlerTest for (Runnable runnable : scheduledRunnable) { runnable.run(); } - Assert.assertEquals(ImmutableList.of(segment), cacheManager.observedSegments); - Assert.assertEquals(ImmutableList.of(segment), cacheManager.observedSegmentsLoadedIntoPageCache); - Assert.assertEquals(ImmutableList.of(), cacheManager.observedBootstrapSegments); - Assert.assertEquals(ImmutableList.of(), cacheManager.observedBootstrapSegmentsLoadedIntoPageCache); + Assert.assertEquals(ImmutableList.of(segment), cacheManager.getObservedSegments()); + Assert.assertEquals(ImmutableList.of(segment), cacheManager.getObservedSegmentsLoadedIntoPageCache()); + Assert.assertEquals(ImmutableList.of(), cacheManager.getObservedBootstrapSegments()); + Assert.assertEquals(ImmutableList.of(), cacheManager.getObservedBootstrapSegmentsLoadedIntoPageCache()); Assert.assertEquals(ImmutableList.of(segment), segmentAnnouncer.getObservedSegments()); Assert.assertFalse( "segment files shouldn't be deleted", - cacheManager.observedSegmentsRemovedFromCache.contains(segment) + cacheManager.getObservedSegmentsRemovedFromCache().contains(segment) ); - - handler.stop(); - Assert.assertEquals(0, serverAnnouncer.getObservedCount()); } /** @@ -197,15 +172,15 @@ public class SegmentLoadDropHandlerTest * */ @Test - public void testSegmentLoading2() throws Exception + public void testSegmentLoading2() { final TestSegmentCacheManager cacheManager = new TestSegmentCacheManager(); final SegmentManager segmentManager = new SegmentManager(cacheManager); final SegmentLoadDropHandler handler = initSegmentLoadDropHandler(segmentManager); - handler.start(); + // handler.start(); - Assert.assertEquals(1, serverAnnouncer.getObservedCount()); + // Assert.assertEquals(1, serverAnnouncer.getObservedCount()); final DataSegment segment = makeSegment("test", "1", Intervals.of("P1d/2011-04-01")); @@ -228,111 +203,16 @@ public class SegmentLoadDropHandlerTest // The same segment reference will be fetched more than once in the above sequence, but the segment should // be loaded only once onto the page cache. - Assert.assertEquals(ImmutableList.of(segment, segment), cacheManager.observedSegments); - Assert.assertEquals(ImmutableList.of(segment), cacheManager.observedSegmentsLoadedIntoPageCache); - Assert.assertEquals(ImmutableList.of(), cacheManager.observedBootstrapSegments); - Assert.assertEquals(ImmutableList.of(), cacheManager.observedBootstrapSegmentsLoadedIntoPageCache); + Assert.assertEquals(ImmutableList.of(segment, segment), cacheManager.getObservedSegments()); + Assert.assertEquals(ImmutableList.of(segment), cacheManager.getObservedSegmentsLoadedIntoPageCache()); + Assert.assertEquals(ImmutableList.of(), cacheManager.getObservedBootstrapSegments()); + Assert.assertEquals(ImmutableList.of(), cacheManager.getObservedBootstrapSegmentsLoadedIntoPageCache()); Assert.assertTrue(segmentAnnouncer.getObservedSegments().contains(segment)); Assert.assertFalse( "segment files shouldn't be deleted", - cacheManager.observedSegmentsRemovedFromCache.contains(segment) + cacheManager.getObservedSegmentsRemovedFromCache().contains(segment) ); - - handler.stop(); - Assert.assertEquals(0, serverAnnouncer.getObservedCount()); - } - - @Test - public void testLoadCache() throws Exception - { - Set segments = new HashSet<>(); - for (int i = 0; i < COUNT; ++i) { - segments.add(makeSegment("test" + i, "1", Intervals.of("P1d/2011-04-01"))); - segments.add(makeSegment("test" + i, "1", Intervals.of("P1d/2011-04-02"))); - segments.add(makeSegment("test" + i, "2", Intervals.of("P1d/2011-04-02"))); - segments.add(makeSegment("test" + i, "1", Intervals.of("P1d/2011-04-03"))); - segments.add(makeSegment("test" + i, "1", Intervals.of("P1d/2011-04-04"))); - segments.add(makeSegment("test" + i, "1", Intervals.of("P1d/2011-04-05"))); - segments.add(makeSegment("test" + i, "2", Intervals.of("PT1h/2011-04-04T01"))); - segments.add(makeSegment("test" + i, "2", Intervals.of("PT1h/2011-04-04T02"))); - segments.add(makeSegment("test" + i, "2", Intervals.of("PT1h/2011-04-04T03"))); - segments.add(makeSegment("test" + i, "2", Intervals.of("PT1h/2011-04-04T05"))); - segments.add(makeSegment("test" + i, "2", Intervals.of("PT1h/2011-04-04T06"))); - segments.add(makeSegment("test_two" + i, "1", Intervals.of("P1d/2011-04-01"))); - segments.add(makeSegment("test_two" + i, "1", Intervals.of("P1d/2011-04-02"))); - } - - final TestSegmentCacheManager cacheManager = new TestSegmentCacheManager(segments); - final SegmentManager segmentManager = new SegmentManager(cacheManager); - final SegmentLoadDropHandler handler = initSegmentLoadDropHandler(segmentManager); - - Assert.assertTrue(segmentManager.getDataSourceCounts().isEmpty()); - - handler.start(); - - Assert.assertEquals(1, serverAnnouncer.getObservedCount()); - Assert.assertFalse(segmentManager.getDataSourceCounts().isEmpty()); - - for (int i = 0; i < COUNT; ++i) { - Assert.assertEquals(11L, segmentManager.getDataSourceCounts().get("test" + i).longValue()); - Assert.assertEquals(2L, segmentManager.getDataSourceCounts().get("test_two" + i).longValue()); - } - - Assert.assertEquals(ImmutableList.copyOf(segments), segmentAnnouncer.getObservedSegments()); - - final ImmutableList expectedBootstrapSegments = ImmutableList.copyOf(segments); - Assert.assertEquals(expectedBootstrapSegments, cacheManager.observedBootstrapSegments); - Assert.assertEquals(expectedBootstrapSegments, cacheManager.observedBootstrapSegmentsLoadedIntoPageCache); - Assert.assertEquals(ImmutableList.of(), cacheManager.observedSegments); - Assert.assertEquals(ImmutableList.of(), cacheManager.observedSegmentsLoadedIntoPageCache); - - handler.stop(); - - Assert.assertEquals(0, serverAnnouncer.getObservedCount()); - Assert.assertEquals(1, cacheManager.observedShutdownBootstrapCount.get()); - } - - @Test - public void testStartStop() throws Exception - { - final Set segments = new HashSet<>(); - for (int i = 0; i < COUNT; ++i) { - segments.add(makeSegment("test" + i, "1", Intervals.of("P1d/2011-04-01"))); - segments.add(makeSegment("test" + i, "1", Intervals.of("P1d/2011-04-02"))); - segments.add(makeSegment("test" + i, "2", Intervals.of("P1d/2011-04-02"))); - segments.add(makeSegment("test_two" + i, "1", Intervals.of("P1d/2011-04-01"))); - segments.add(makeSegment("test_two" + i, "1", Intervals.of("P1d/2011-04-02"))); - } - - final TestSegmentCacheManager cacheManager = new TestSegmentCacheManager(segments); - final SegmentManager segmentManager = new SegmentManager(cacheManager); - final SegmentLoadDropHandler handler = initSegmentLoadDropHandler(segmentManager); - - Assert.assertTrue(segmentManager.getDataSourceCounts().isEmpty()); - - handler.start(); - - Assert.assertEquals(1, serverAnnouncer.getObservedCount()); - Assert.assertFalse(segmentManager.getDataSourceCounts().isEmpty()); - - for (int i = 0; i < COUNT; ++i) { - Assert.assertEquals(3L, segmentManager.getDataSourceCounts().get("test" + i).longValue()); - Assert.assertEquals(2L, segmentManager.getDataSourceCounts().get("test_two" + i).longValue()); - } - - Assert.assertEquals(ImmutableList.copyOf(segments), segmentAnnouncer.getObservedSegments()); - - final ImmutableList expectedBootstrapSegments = ImmutableList.copyOf(segments); - Assert.assertEquals(expectedBootstrapSegments, cacheManager.observedBootstrapSegments); - Assert.assertEquals(expectedBootstrapSegments, cacheManager.observedBootstrapSegmentsLoadedIntoPageCache); - Assert.assertEquals(ImmutableList.of(), cacheManager.observedSegments); - Assert.assertEquals(ImmutableList.of(), cacheManager.observedSegmentsLoadedIntoPageCache); - - handler.stop(); - - Assert.assertEquals(0, serverAnnouncer.getObservedCount()); - Assert.assertEquals(1, cacheManager.observedShutdownBootstrapCount.get()); } @Test(timeout = 60_000L) @@ -342,10 +222,6 @@ public class SegmentLoadDropHandlerTest final SegmentManager segmentManager = new SegmentManager(cacheManager); final SegmentLoadDropHandler handler = initSegmentLoadDropHandler(segmentManager); - handler.start(); - - Assert.assertEquals(1, serverAnnouncer.getObservedCount()); - DataSegment segment1 = makeSegment("batchtest1", "1", Intervals.of("P1d/2011-04-01")); DataSegment segment2 = makeSegment("batchtest2", "1", Intervals.of("P1d/2011-04-01")); @@ -374,13 +250,10 @@ public class SegmentLoadDropHandlerTest Assert.assertEquals(ImmutableList.of(segment1), segmentAnnouncer.getObservedSegments()); final ImmutableList expectedSegments = ImmutableList.of(segment1); - Assert.assertEquals(expectedSegments, cacheManager.observedSegments); - Assert.assertEquals(expectedSegments, cacheManager.observedSegmentsLoadedIntoPageCache); - Assert.assertEquals(ImmutableList.of(), cacheManager.observedBootstrapSegments); - Assert.assertEquals(ImmutableList.of(), cacheManager.observedBootstrapSegmentsLoadedIntoPageCache); - - handler.stop(); - Assert.assertEquals(0, serverAnnouncer.getObservedCount()); + Assert.assertEquals(expectedSegments, cacheManager.getObservedSegments()); + Assert.assertEquals(expectedSegments, cacheManager.getObservedSegmentsLoadedIntoPageCache()); + Assert.assertEquals(ImmutableList.of(), cacheManager.getObservedBootstrapSegments()); + Assert.assertEquals(ImmutableList.of(), cacheManager.getObservedBootstrapSegmentsLoadedIntoPageCache()); } @Test(timeout = 60_000L) @@ -394,9 +267,6 @@ public class SegmentLoadDropHandlerTest final SegmentLoadDropHandler handler = initSegmentLoadDropHandler(segmentManager); - handler.start(); - - Assert.assertEquals(1, serverAnnouncer.getObservedCount()); DataSegment segment1 = makeSegment("batchtest1", "1", Intervals.of("P1d/2011-04-01")); List batch = ImmutableList.of(new SegmentChangeRequestLoad(segment1)); @@ -418,8 +288,6 @@ public class SegmentLoadDropHandlerTest Assert.assertEquals(State.SUCCESS, result.get(0).getStatus().getState()); Assert.assertEquals(ImmutableList.of(segment1, segment1), segmentAnnouncer.getObservedSegments()); - handler.stop(); - Assert.assertEquals(0, serverAnnouncer.getObservedCount()); } @Test(timeout = 60_000L) @@ -470,9 +338,6 @@ public class SegmentLoadDropHandlerTest segmentManager ); - handler.start(); - - Assert.assertEquals(1, serverAnnouncer.getObservedCount()); final DataSegment segment1 = makeSegment("batchtest1", "1", Intervals.of("P1d/2011-04-01")); List batch = ImmutableList.of(new SegmentChangeRequestLoad(segment1)); @@ -539,8 +404,6 @@ public class SegmentLoadDropHandlerTest Mockito.verify(segmentManager, Mockito.times(1)) .dropSegment(ArgumentMatchers.any()); - handler.stop(); - Assert.assertEquals(0, serverAnnouncer.getObservedCount()); } private SegmentLoadDropHandler initSegmentLoadDropHandler(SegmentManager segmentManager) @@ -548,129 +411,16 @@ public class SegmentLoadDropHandlerTest return initSegmentLoadDropHandler(segmentLoaderConfig, segmentManager); } - private SegmentLoadDropHandler initSegmentLoadDropHandler(SegmentLoaderConfig config, SegmentManager segmentManager) + private SegmentLoadDropHandler initSegmentLoadDropHandler( + SegmentLoaderConfig config, + SegmentManager segmentManager + ) { return new SegmentLoadDropHandler( config, segmentAnnouncer, - serverAnnouncer, segmentManager, - scheduledExecutorFactory.create(5, "SegmentLoadDropHandlerTest-[%d]"), - new ServerTypeConfig(ServerType.HISTORICAL) + scheduledExecutorFactory.create(5, "SegmentLoadDropHandlerTest-[%d]") ); } - - private DataSegment makeSegment(String dataSource, String version, Interval interval) - { - return TestSegmentUtils.makeSegment(dataSource, version, interval); - } - - /** - * A local cache manager to test the bootstrapping and segment add/remove operations. It stubs only the necessary - * methods to support these operations; any other method invoked will throw an exception from the base class, - * {@link NoopSegmentCacheManager}. - */ - private static class TestSegmentCacheManager extends NoopSegmentCacheManager - { - private final List cachedSegments; - - private final List observedBootstrapSegments; - private final List observedBootstrapSegmentsLoadedIntoPageCache; - private final List observedSegments; - private final List observedSegmentsLoadedIntoPageCache; - private final List observedSegmentsRemovedFromCache; - private final AtomicInteger observedShutdownBootstrapCount; - - TestSegmentCacheManager() - { - this(ImmutableSet.of()); - } - - TestSegmentCacheManager(final Set segmentsToCache) - { - this.cachedSegments = ImmutableList.copyOf(segmentsToCache); - this.observedBootstrapSegments = new ArrayList<>(); - this.observedBootstrapSegmentsLoadedIntoPageCache = new ArrayList<>(); - this.observedSegments = new ArrayList<>(); - this.observedSegmentsLoadedIntoPageCache = new ArrayList<>(); - this.observedSegmentsRemovedFromCache = new ArrayList<>(); - this.observedShutdownBootstrapCount = new AtomicInteger(0); - } - - @Override - public boolean canHandleSegments() - { - return true; - } - - @Override - public List getCachedSegments() - { - return cachedSegments; - } - - @Override - public ReferenceCountingSegment getBootstrapSegment(DataSegment segment, SegmentLazyLoadFailCallback loadFailed) - { - observedBootstrapSegments.add(segment); - return getSegmentInternal(segment); - } - - @Override - public ReferenceCountingSegment getSegment(final DataSegment segment) - { - observedSegments.add(segment); - return getSegmentInternal(segment); - } - - private ReferenceCountingSegment getSegmentInternal(final DataSegment segment) - { - if (segment.isTombstone()) { - return ReferenceCountingSegment - .wrapSegment(TombstoneSegmentizerFactory.segmentForTombstone(segment), segment.getShardSpec()); - } else { - return ReferenceCountingSegment.wrapSegment( - new TestSegmentUtils.SegmentForTesting( - segment.getDataSource(), - (Interval) segment.getLoadSpec().get("interval"), - MapUtils.getString(segment.getLoadSpec(), "version") - ), segment.getShardSpec() - ); - } - } - - @Override - public void loadSegmentIntoPageCache(DataSegment segment) - { - observedSegmentsLoadedIntoPageCache.add(segment); - } - - @Override - public void loadSegmentIntoPageCacheOnBootstrap(DataSegment segment) - { - observedBootstrapSegmentsLoadedIntoPageCache.add(segment); - } - - @Override - public void shutdownBootstrap() - { - observedShutdownBootstrapCount.incrementAndGet(); - } - - @Override - public void storeInfoFile(DataSegment segment) - { - } - - @Override - public void removeInfoFile(DataSegment segment) - { - } - - @Override - public void cleanup(DataSegment segment) - { - observedSegmentsRemovedFromCache.add(segment); - } - } } diff --git a/server/src/test/java/org/apache/druid/server/coordination/TestCoordinatorClient.java b/server/src/test/java/org/apache/druid/server/coordination/TestCoordinatorClient.java new file mode 100644 index 00000000000..9f297ddd39e --- /dev/null +++ b/server/src/test/java/org/apache/druid/server/coordination/TestCoordinatorClient.java @@ -0,0 +1,53 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.druid.server.coordination; + +import com.google.common.util.concurrent.Futures; +import com.google.common.util.concurrent.ListenableFuture; +import org.apache.druid.client.BootstrapSegmentsResponse; +import org.apache.druid.client.coordinator.NoopCoordinatorClient; +import org.apache.druid.java.util.common.CloseableIterators; +import org.apache.druid.timeline.DataSegment; + +import java.util.HashSet; +import java.util.Set; + +class TestCoordinatorClient extends NoopCoordinatorClient +{ + private final Set bootstrapSegments; + + TestCoordinatorClient() + { + this(new HashSet<>()); + } + + TestCoordinatorClient(final Set bootstrapSegments) + { + this.bootstrapSegments = bootstrapSegments; + } + + @Override + public ListenableFuture fetchBootstrapSegments() + { + return Futures.immediateFuture( + new BootstrapSegmentsResponse(CloseableIterators.withEmptyBaggage(bootstrapSegments.iterator())) + ); + } +} diff --git a/server/src/test/java/org/apache/druid/server/coordination/TestSegmentCacheManager.java b/server/src/test/java/org/apache/druid/server/coordination/TestSegmentCacheManager.java new file mode 100644 index 00000000000..2cd5e8e61fe --- /dev/null +++ b/server/src/test/java/org/apache/druid/server/coordination/TestSegmentCacheManager.java @@ -0,0 +1,175 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.druid.server.coordination; + +import com.google.common.collect.ImmutableList; +import com.google.common.collect.ImmutableSet; +import org.apache.druid.java.util.common.MapUtils; +import org.apache.druid.segment.ReferenceCountingSegment; +import org.apache.druid.segment.SegmentLazyLoadFailCallback; +import org.apache.druid.segment.loading.NoopSegmentCacheManager; +import org.apache.druid.segment.loading.TombstoneSegmentizerFactory; +import org.apache.druid.server.TestSegmentUtils; +import org.apache.druid.timeline.DataSegment; +import org.joda.time.Interval; + +import java.util.ArrayList; +import java.util.List; +import java.util.Set; +import java.util.concurrent.atomic.AtomicInteger; + +/** + * A local cache manager to test the bootstrapping and segment add/remove operations. It stubs only the necessary + * methods to support these operations; any other method invoked will throw an exception from the base class, + * {@link NoopSegmentCacheManager}. + */ +class TestSegmentCacheManager extends NoopSegmentCacheManager +{ + private final List cachedSegments; + + private final List observedBootstrapSegments; + private final List observedBootstrapSegmentsLoadedIntoPageCache; + private final List observedSegments; + private final List observedSegmentsLoadedIntoPageCache; + private final List observedSegmentsRemovedFromCache; + private final AtomicInteger observedShutdownBootstrapCount; + + TestSegmentCacheManager() + { + this(ImmutableSet.of()); + } + + TestSegmentCacheManager(final Set segmentsToCache) + { + this.cachedSegments = ImmutableList.copyOf(segmentsToCache); + this.observedBootstrapSegments = new ArrayList<>(); + this.observedBootstrapSegmentsLoadedIntoPageCache = new ArrayList<>(); + this.observedSegments = new ArrayList<>(); + this.observedSegmentsLoadedIntoPageCache = new ArrayList<>(); + this.observedSegmentsRemovedFromCache = new ArrayList<>(); + this.observedShutdownBootstrapCount = new AtomicInteger(0); + } + + @Override + public boolean canHandleSegments() + { + return true; + } + + @Override + public List getCachedSegments() + { + return cachedSegments; + } + + @Override + public ReferenceCountingSegment getBootstrapSegment(DataSegment segment, SegmentLazyLoadFailCallback loadFailed) + { + observedBootstrapSegments.add(segment); + return getSegmentInternal(segment); + } + + @Override + public ReferenceCountingSegment getSegment(final DataSegment segment) + { + observedSegments.add(segment); + return getSegmentInternal(segment); + } + + private ReferenceCountingSegment getSegmentInternal(final DataSegment segment) + { + if (segment.isTombstone()) { + return ReferenceCountingSegment + .wrapSegment(TombstoneSegmentizerFactory.segmentForTombstone(segment), segment.getShardSpec()); + } else { + return ReferenceCountingSegment.wrapSegment( + new TestSegmentUtils.SegmentForTesting( + segment.getDataSource(), + (Interval) segment.getLoadSpec().get("interval"), + MapUtils.getString(segment.getLoadSpec(), "version") + ), segment.getShardSpec() + ); + } + } + + @Override + public void loadSegmentIntoPageCache(DataSegment segment) + { + observedSegmentsLoadedIntoPageCache.add(segment); + } + + @Override + public void loadSegmentIntoPageCacheOnBootstrap(DataSegment segment) + { + observedBootstrapSegmentsLoadedIntoPageCache.add(segment); + } + + @Override + public void shutdownBootstrap() + { + observedShutdownBootstrapCount.incrementAndGet(); + } + + @Override + public void storeInfoFile(DataSegment segment) + { + } + + @Override + public void removeInfoFile(DataSegment segment) + { + } + + @Override + public void cleanup(DataSegment segment) + { + observedSegmentsRemovedFromCache.add(segment); + } + + public List getObservedBootstrapSegments() + { + return observedBootstrapSegments; + } + + public List getObservedBootstrapSegmentsLoadedIntoPageCache() + { + return observedBootstrapSegmentsLoadedIntoPageCache; + } + + public List getObservedSegments() + { + return observedSegments; + } + + public List getObservedSegmentsLoadedIntoPageCache() + { + return observedSegmentsLoadedIntoPageCache; + } + + public List getObservedSegmentsRemovedFromCache() + { + return observedSegmentsRemovedFromCache; + } + + public AtomicInteger getObservedShutdownBootstrapCount() + { + return observedShutdownBootstrapCount; + } +} diff --git a/server/src/test/java/org/apache/druid/server/coordination/ZkCoordinatorTest.java b/server/src/test/java/org/apache/druid/server/coordination/ZkCoordinatorTest.java index f0f2bd3b4e4..a9f7772e59d 100644 --- a/server/src/test/java/org/apache/druid/server/coordination/ZkCoordinatorTest.java +++ b/server/src/test/java/org/apache/druid/server/coordination/ZkCoordinatorTest.java @@ -23,7 +23,6 @@ import com.fasterxml.jackson.databind.ObjectMapper; import com.google.common.collect.ImmutableMap; import org.apache.curator.utils.ZKPaths; import org.apache.druid.curator.CuratorTestBase; -import org.apache.druid.guice.ServerTypeConfig; import org.apache.druid.java.util.common.Intervals; import org.apache.druid.java.util.emitter.EmittingLogger; import org.apache.druid.segment.IndexIO; @@ -42,7 +41,6 @@ import org.junit.Test; import java.util.Arrays; import java.util.concurrent.CountDownLatch; -import java.util.concurrent.ScheduledExecutorService; /** */ @@ -103,10 +101,7 @@ public class ZkCoordinatorTest extends CuratorTestBase SegmentLoadDropHandler segmentLoadDropHandler = new SegmentLoadDropHandler( new SegmentLoaderConfig(), EasyMock.createNiceMock(DataSegmentAnnouncer.class), - EasyMock.createNiceMock(DataSegmentServerAnnouncer.class), - EasyMock.createNiceMock(SegmentManager.class), - EasyMock.createNiceMock(ScheduledExecutorService.class), - new ServerTypeConfig(ServerType.HISTORICAL) + EasyMock.createNiceMock(SegmentManager.class) ) { @Override diff --git a/server/src/test/java/org/apache/druid/server/coordinator/AutoCompactionSnapshotTest.java b/server/src/test/java/org/apache/druid/server/coordinator/AutoCompactionSnapshotTest.java index e034459fc74..5517bf9e6a4 100644 --- a/server/src/test/java/org/apache/druid/server/coordinator/AutoCompactionSnapshotTest.java +++ b/server/src/test/java/org/apache/druid/server/coordinator/AutoCompactionSnapshotTest.java @@ -19,6 +19,7 @@ package org.apache.druid.server.coordinator; +import org.apache.druid.server.coordinator.compact.CompactionStatistics; import org.junit.Assert; import org.junit.Test; @@ -30,17 +31,11 @@ public class AutoCompactionSnapshotTest final String expectedDataSource = "data"; final AutoCompactionSnapshot.Builder builder = AutoCompactionSnapshot.builder(expectedDataSource); - // Increment every stats twice + // Increment every stat twice for (int i = 0; i < 2; i++) { - builder.incrementIntervalCountSkipped(13) - .incrementBytesSkipped(13) - .incrementSegmentCountSkipped(13) - .incrementIntervalCountCompacted(13) - .incrementBytesCompacted(13) - .incrementSegmentCountCompacted(13) - .incrementIntervalCountAwaitingCompaction(13) - .incrementBytesAwaitingCompaction(13) - .incrementSegmentCountAwaitingCompaction(13); + builder.incrementSkippedStats(CompactionStatistics.create(13, 13, 13)); + builder.incrementWaitingStats(CompactionStatistics.create(13, 13, 13)); + builder.incrementCompactedStats(CompactionStatistics.create(13, 13, 13)); } final AutoCompactionSnapshot actual = builder.build(); diff --git a/server/src/test/java/org/apache/druid/server/coordinator/DataSourceCompactionConfigTest.java b/server/src/test/java/org/apache/druid/server/coordinator/DataSourceCompactionConfigTest.java index e3f8c01c3dc..a9334f077a4 100644 --- a/server/src/test/java/org/apache/druid/server/coordinator/DataSourceCompactionConfigTest.java +++ b/server/src/test/java/org/apache/druid/server/coordinator/DataSourceCompactionConfigTest.java @@ -25,6 +25,7 @@ import com.google.common.collect.ImmutableMap; import org.apache.druid.common.config.NullHandling; import org.apache.druid.data.input.SegmentsSplitHintSpec; import org.apache.druid.data.input.impl.DimensionsSpec; +import org.apache.druid.indexer.CompactionEngine; import org.apache.druid.indexer.partitions.DynamicPartitionsSpec; import org.apache.druid.jackson.DefaultObjectMapper; import org.apache.druid.java.util.common.HumanReadableBytes; @@ -69,6 +70,7 @@ public class DataSourceCompactionConfigTest extends InitializedNullHandlingTest null, null, null, + null, ImmutableMap.of("key", "val") ); final String json = OBJECT_MAPPER.writeValueAsString(config); @@ -82,6 +84,7 @@ public class DataSourceCompactionConfigTest extends InitializedNullHandlingTest Assert.assertEquals(config.getTuningConfig(), fromJson.getTuningConfig()); Assert.assertEquals(config.getTaskContext(), fromJson.getTaskContext()); Assert.assertEquals(config.getGranularitySpec(), fromJson.getGranularitySpec()); + Assert.assertEquals(config.getEngine(), fromJson.getEngine()); } @Test @@ -99,6 +102,7 @@ public class DataSourceCompactionConfigTest extends InitializedNullHandlingTest null, null, null, + CompactionEngine.MSQ, ImmutableMap.of("key", "val") ); final String json = OBJECT_MAPPER.writeValueAsString(config); @@ -111,6 +115,7 @@ public class DataSourceCompactionConfigTest extends InitializedNullHandlingTest Assert.assertEquals(config.getSkipOffsetFromLatest(), fromJson.getSkipOffsetFromLatest()); Assert.assertEquals(config.getTuningConfig(), fromJson.getTuningConfig()); Assert.assertEquals(config.getTaskContext(), fromJson.getTaskContext()); + Assert.assertEquals(config.getEngine(), fromJson.getEngine()); } @Test @@ -148,6 +153,7 @@ public class DataSourceCompactionConfigTest extends InitializedNullHandlingTest null, null, null, + CompactionEngine.NATIVE, ImmutableMap.of("key", "val") ); final String json = OBJECT_MAPPER.writeValueAsString(config); @@ -160,6 +166,7 @@ public class DataSourceCompactionConfigTest extends InitializedNullHandlingTest Assert.assertEquals(config.getSkipOffsetFromLatest(), fromJson.getSkipOffsetFromLatest()); Assert.assertEquals(config.getTuningConfig(), fromJson.getTuningConfig()); Assert.assertEquals(config.getTaskContext(), fromJson.getTaskContext()); + Assert.assertEquals(config.getEngine(), fromJson.getEngine()); } @Test @@ -197,6 +204,7 @@ public class DataSourceCompactionConfigTest extends InitializedNullHandlingTest null, null, null, + null, ImmutableMap.of("key", "val") ); @@ -305,6 +313,7 @@ public class DataSourceCompactionConfigTest extends InitializedNullHandlingTest null, null, null, + null, ImmutableMap.of("key", "val") ); final String json = OBJECT_MAPPER.writeValueAsString(config); @@ -335,6 +344,7 @@ public class DataSourceCompactionConfigTest extends InitializedNullHandlingTest null, null, null, + null, ImmutableMap.of("key", "val") ); final String json = OBJECT_MAPPER.writeValueAsString(config); @@ -368,6 +378,7 @@ public class DataSourceCompactionConfigTest extends InitializedNullHandlingTest null, null, null, + null, ImmutableMap.of("key", "val") ); final String json = OBJECT_MAPPER.writeValueAsString(config); @@ -398,6 +409,7 @@ public class DataSourceCompactionConfigTest extends InitializedNullHandlingTest null, null, null, + null, ImmutableMap.of("key", "val") ); final String json = OBJECT_MAPPER.writeValueAsString(config); @@ -428,6 +440,7 @@ public class DataSourceCompactionConfigTest extends InitializedNullHandlingTest null, null, null, + null, ImmutableMap.of("key", "val") ); final String json = OBJECT_MAPPER.writeValueAsString(config); @@ -461,6 +474,7 @@ public class DataSourceCompactionConfigTest extends InitializedNullHandlingTest null, null, new UserCompactionTaskIOConfig(true), + null, ImmutableMap.of("key", "val") ); final String json = OBJECT_MAPPER.writeValueAsString(config); @@ -492,6 +506,7 @@ public class DataSourceCompactionConfigTest extends InitializedNullHandlingTest null, null, new UserCompactionTaskIOConfig(null), + null, ImmutableMap.of("key", "val") ); final String json = OBJECT_MAPPER.writeValueAsString(config); @@ -523,6 +538,7 @@ public class DataSourceCompactionConfigTest extends InitializedNullHandlingTest null, null, null, + null, ImmutableMap.of("key", "val") ); final String json = OBJECT_MAPPER.writeValueAsString(config); @@ -554,6 +570,7 @@ public class DataSourceCompactionConfigTest extends InitializedNullHandlingTest null, new UserCompactionTaskTransformConfig(new SelectorDimFilter("dim1", "foo", null)), null, + null, ImmutableMap.of("key", "val") ); final String json = OBJECT_MAPPER.writeValueAsString(config); @@ -584,6 +601,7 @@ public class DataSourceCompactionConfigTest extends InitializedNullHandlingTest new AggregatorFactory[] {new CountAggregatorFactory("cnt")}, null, null, + null, ImmutableMap.of("key", "val") ); final String json = OBJECT_MAPPER.writeValueAsString(config); diff --git a/server/src/test/java/org/apache/druid/server/coordinator/DruidCoordinatorTest.java b/server/src/test/java/org/apache/druid/server/coordinator/DruidCoordinatorTest.java index a5ed1616a6d..8c7e0ae14e5 100644 --- a/server/src/test/java/org/apache/druid/server/coordinator/DruidCoordinatorTest.java +++ b/server/src/test/java/org/apache/druid/server/coordinator/DruidCoordinatorTest.java @@ -82,6 +82,7 @@ import org.junit.Test; import javax.annotation.Nullable; import java.util.Collections; +import java.util.HashSet; import java.util.List; import java.util.Map; import java.util.concurrent.CountDownLatch; @@ -266,6 +267,7 @@ public class DruidCoordinatorTest extends CuratorTestBase coordinator.start(); Assert.assertNull(coordinator.getReplicationFactor(dataSegment.getId())); + Assert.assertNull(coordinator.getBroadcastSegments()); // Wait for this coordinator to become leader leaderAnnouncerLatch.await(); @@ -293,6 +295,7 @@ public class DruidCoordinatorTest extends CuratorTestBase coordinator.getDatasourceToUnavailableSegmentCount(); Assert.assertEquals(1, numsUnavailableUsedSegmentsPerDataSource.size()); Assert.assertEquals(0, numsUnavailableUsedSegmentsPerDataSource.getInt(dataSource)); + Assert.assertEquals(0, coordinator.getBroadcastSegments().size()); Map> underReplicationCountsPerDataSourcePerTier = coordinator.getTierToDatasourceToUnderReplicatedCount(false); @@ -571,6 +574,7 @@ public class DruidCoordinatorTest extends CuratorTestBase coordinatorRunLatch.await(); Assert.assertEquals(ImmutableMap.of(dataSource, 100.0), coordinator.getDatasourceToLoadStatus()); + Assert.assertEquals(new HashSet<>(dataSegments.values()), coordinator.getBroadcastSegments()); // Under-replicated counts are updated only after the next coordinator run Map> underReplicationCountsPerDataSourcePerTier = diff --git a/server/src/test/java/org/apache/druid/server/coordinator/compact/CompactionStatusTest.java b/server/src/test/java/org/apache/druid/server/coordinator/compact/CompactionStatusTest.java new file mode 100644 index 00000000000..0e13f8cd0e1 --- /dev/null +++ b/server/src/test/java/org/apache/druid/server/coordinator/compact/CompactionStatusTest.java @@ -0,0 +1,178 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.druid.server.coordinator.compact; + +import org.apache.druid.client.indexing.ClientCompactionTaskQueryTuningConfig; +import org.apache.druid.indexer.partitions.DimensionRangePartitionsSpec; +import org.apache.druid.indexer.partitions.DynamicPartitionsSpec; +import org.apache.druid.indexer.partitions.HashedPartitionsSpec; +import org.apache.druid.indexer.partitions.PartitionsSpec; +import org.apache.druid.server.coordinator.DataSourceCompactionConfig; +import org.apache.druid.server.coordinator.UserCompactionTaskQueryTuningConfig; +import org.junit.Assert; +import org.junit.Test; + +import java.util.Collections; + +public class CompactionStatusTest +{ + private static final String DS_WIKI = "wiki"; + + @Test + public void testFindPartitionsSpecWhenGivenIsNull() + { + final ClientCompactionTaskQueryTuningConfig tuningConfig + = ClientCompactionTaskQueryTuningConfig.from(null); + Assert.assertEquals( + new DynamicPartitionsSpec(null, Long.MAX_VALUE), + CompactionStatus.findPartitionsSpecFromConfig(tuningConfig) + ); + } + + @Test + public void testFindPartitionsSpecWhenGivenIsDynamicWithNullMaxTotalRows() + { + final PartitionsSpec partitionsSpec = new DynamicPartitionsSpec(null, null); + final ClientCompactionTaskQueryTuningConfig tuningConfig + = ClientCompactionTaskQueryTuningConfig.from(createCompactionConfig(partitionsSpec)); + Assert.assertEquals( + new DynamicPartitionsSpec(null, Long.MAX_VALUE), + CompactionStatus.findPartitionsSpecFromConfig(tuningConfig) + ); + } + + @Test + public void testFindPartitionsSpecWhenGivenIsDynamicWithMaxTotalRows() + { + final PartitionsSpec partitionsSpec = new DynamicPartitionsSpec(null, 1000L); + final ClientCompactionTaskQueryTuningConfig tuningConfig + = ClientCompactionTaskQueryTuningConfig.from(createCompactionConfig(partitionsSpec)); + Assert.assertEquals( + partitionsSpec, + CompactionStatus.findPartitionsSpecFromConfig(tuningConfig) + ); + } + + @Test + public void testFindPartitionsSpecWhenGivenIsDynamicWithMaxRowsPerSegment() + { + final PartitionsSpec partitionsSpec = new DynamicPartitionsSpec(100, 1000L); + final ClientCompactionTaskQueryTuningConfig tuningConfig + = ClientCompactionTaskQueryTuningConfig.from(createCompactionConfig(partitionsSpec)); + Assert.assertEquals( + partitionsSpec, + CompactionStatus.findPartitionsSpecFromConfig(tuningConfig) + ); + } + + @Test + public void testFindPartitionsSpecFromConfigWithDeprecatedMaxRowsPerSegmentAndMaxTotalRowsReturnGivenValues() + { + final DataSourceCompactionConfig config = new DataSourceCompactionConfig( + "datasource", + null, + null, + 100, + null, + new UserCompactionTaskQueryTuningConfig( + null, + null, + null, + 1000L, + null, + null, + null, + null, + null, + null, + null, + null, + null, + null, + null, + null, + null, + null, + null + ), + null, + null, + null, + null, + null, + null, + null + ); + Assert.assertEquals( + new DynamicPartitionsSpec(100, 1000L), + CompactionStatus.findPartitionsSpecFromConfig( + ClientCompactionTaskQueryTuningConfig.from(config) + ) + ); + } + + @Test + public void testFindPartitionsSpecWhenGivenIsHashed() + { + final PartitionsSpec partitionsSpec = + new HashedPartitionsSpec(null, 100, Collections.singletonList("dim")); + final ClientCompactionTaskQueryTuningConfig tuningConfig + = ClientCompactionTaskQueryTuningConfig.from(createCompactionConfig(partitionsSpec)); + Assert.assertEquals( + partitionsSpec, + CompactionStatus.findPartitionsSpecFromConfig(tuningConfig) + ); + } + + @Test + public void testFindPartitionsSpecWhenGivenIsRange() + { + final PartitionsSpec partitionsSpec = + new DimensionRangePartitionsSpec(null, 10000, Collections.singletonList("dim"), false); + final ClientCompactionTaskQueryTuningConfig tuningConfig + = ClientCompactionTaskQueryTuningConfig.from(createCompactionConfig(partitionsSpec)); + Assert.assertEquals( + partitionsSpec, + CompactionStatus.findPartitionsSpecFromConfig(tuningConfig) + ); + } + + private static DataSourceCompactionConfig createCompactionConfig( + PartitionsSpec partitionsSpec + ) + { + return new DataSourceCompactionConfig( + DS_WIKI, + null, null, null, null, createTuningConfig(partitionsSpec), + null, null, null, null, null, null, null + ); + } + + private static UserCompactionTaskQueryTuningConfig createTuningConfig( + PartitionsSpec partitionsSpec + ) + { + return new UserCompactionTaskQueryTuningConfig( + null, + null, null, null, null, partitionsSpec, null, null, null, + null, null, null, null, null, null, null, null, null, null + ); + } +} diff --git a/server/src/test/java/org/apache/druid/server/coordinator/compact/DataSourceCompactibleSegmentIteratorTest.java b/server/src/test/java/org/apache/druid/server/coordinator/compact/DataSourceCompactibleSegmentIteratorTest.java new file mode 100644 index 00000000000..a2765ccfc09 --- /dev/null +++ b/server/src/test/java/org/apache/druid/server/coordinator/compact/DataSourceCompactibleSegmentIteratorTest.java @@ -0,0 +1,77 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.druid.server.coordinator.compact; + +import com.google.common.collect.ImmutableList; +import com.google.common.collect.Lists; +import org.apache.druid.java.util.common.DateTimes; +import org.apache.druid.java.util.common.Intervals; +import org.joda.time.Interval; +import org.joda.time.Period; +import org.junit.Assert; +import org.junit.Test; + +import java.util.List; + +public class DataSourceCompactibleSegmentIteratorTest +{ + @Test + public void testFilterSkipIntervals() + { + final Interval totalInterval = Intervals.of("2018-01-01/2019-01-01"); + final List expectedSkipIntervals = ImmutableList.of( + Intervals.of("2018-01-15/2018-03-02"), + Intervals.of("2018-07-23/2018-10-01"), + Intervals.of("2018-10-02/2018-12-25"), + Intervals.of("2018-12-31/2019-01-01") + ); + final List skipIntervals = DataSourceCompactibleSegmentIterator.filterSkipIntervals( + totalInterval, + Lists.newArrayList( + Intervals.of("2017-12-01/2018-01-15"), + Intervals.of("2018-03-02/2018-07-23"), + Intervals.of("2018-10-01/2018-10-02"), + Intervals.of("2018-12-25/2018-12-31") + ) + ); + + Assert.assertEquals(expectedSkipIntervals, skipIntervals); + } + + @Test + public void testAddSkipIntervalFromLatestAndSort() + { + final List expectedIntervals = ImmutableList.of( + Intervals.of("2018-12-24/2018-12-25"), + Intervals.of("2018-12-29/2019-01-01") + ); + final List fullSkipIntervals = DataSourceCompactibleSegmentIterator.sortAndAddSkipIntervalFromLatest( + DateTimes.of("2019-01-01"), + new Period(72, 0, 0, 0), + null, + ImmutableList.of( + Intervals.of("2018-12-30/2018-12-31"), + Intervals.of("2018-12-24/2018-12-25") + ) + ); + + Assert.assertEquals(expectedIntervals, fullSkipIntervals); + } +} diff --git a/server/src/test/java/org/apache/druid/server/coordinator/compact/NewestSegmentFirstIteratorTest.java b/server/src/test/java/org/apache/druid/server/coordinator/compact/NewestSegmentFirstIteratorTest.java deleted file mode 100644 index b4ea5d69e00..00000000000 --- a/server/src/test/java/org/apache/druid/server/coordinator/compact/NewestSegmentFirstIteratorTest.java +++ /dev/null @@ -1,468 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package org.apache.druid.server.coordinator.compact; - -import com.google.common.collect.ImmutableList; -import com.google.common.collect.Lists; -import org.apache.druid.client.indexing.ClientCompactionTaskQueryTuningConfig; -import org.apache.druid.indexer.partitions.DynamicPartitionsSpec; -import org.apache.druid.indexer.partitions.HashedPartitionsSpec; -import org.apache.druid.indexer.partitions.SingleDimensionPartitionsSpec; -import org.apache.druid.java.util.common.DateTimes; -import org.apache.druid.java.util.common.Intervals; -import org.apache.druid.server.coordinator.DataSourceCompactionConfig; -import org.apache.druid.server.coordinator.UserCompactionTaskQueryTuningConfig; -import org.joda.time.Interval; -import org.joda.time.Period; -import org.junit.Assert; -import org.junit.Test; - -import java.util.List; - -public class NewestSegmentFirstIteratorTest -{ - @Test - public void testFilterSkipIntervals() - { - final Interval totalInterval = Intervals.of("2018-01-01/2019-01-01"); - final List expectedSkipIntervals = ImmutableList.of( - Intervals.of("2018-01-15/2018-03-02"), - Intervals.of("2018-07-23/2018-10-01"), - Intervals.of("2018-10-02/2018-12-25"), - Intervals.of("2018-12-31/2019-01-01") - ); - final List skipIntervals = NewestSegmentFirstIterator.filterSkipIntervals( - totalInterval, - Lists.newArrayList( - Intervals.of("2017-12-01/2018-01-15"), - Intervals.of("2018-03-02/2018-07-23"), - Intervals.of("2018-10-01/2018-10-02"), - Intervals.of("2018-12-25/2018-12-31") - ) - ); - - Assert.assertEquals(expectedSkipIntervals, skipIntervals); - } - - @Test - public void testAddSkipIntervalFromLatestAndSort() - { - final List expectedIntervals = ImmutableList.of( - Intervals.of("2018-12-24/2018-12-25"), - Intervals.of("2018-12-29/2019-01-01") - ); - final List fullSkipIntervals = NewestSegmentFirstIterator.sortAndAddSkipIntervalFromLatest( - DateTimes.of("2019-01-01"), - new Period(72, 0, 0, 0), - null, - ImmutableList.of( - Intervals.of("2018-12-30/2018-12-31"), - Intervals.of("2018-12-24/2018-12-25") - ) - ); - - Assert.assertEquals(expectedIntervals, fullSkipIntervals); - } - - @Test - public void testFindPartitionsSpecFromConfigWithNullTuningConfigReturnDynamicPartitinosSpecWithMaxTotalRowsOfLongMax() - { - final DataSourceCompactionConfig config = new DataSourceCompactionConfig( - "datasource", - null, - null, - null, - null, - null, - null, - null, - null, - null, - null, - null - ); - Assert.assertEquals( - new DynamicPartitionsSpec(null, Long.MAX_VALUE), - CompactionStatus.findPartitionsSpecFromConfig( - ClientCompactionTaskQueryTuningConfig.from(config.getTuningConfig(), config.getMaxRowsPerSegment(), null) - ) - ); - } - - @Test - public void testFindPartitionsSpecFromConfigWithNullMaxTotalRowsReturnLongMaxValue() - { - final DataSourceCompactionConfig config = new DataSourceCompactionConfig( - "datasource", - null, - null, - null, - null, - new UserCompactionTaskQueryTuningConfig( - null, - null, - null, - null, - null, - new DynamicPartitionsSpec(null, null), - null, - null, - null, - null, - null, - null, - null, - null, - null, - null, - null, - null, - null - ), - null, - null, - null, - null, - null, - null - ); - Assert.assertEquals( - new DynamicPartitionsSpec(null, Long.MAX_VALUE), - CompactionStatus.findPartitionsSpecFromConfig( - ClientCompactionTaskQueryTuningConfig.from(config.getTuningConfig(), config.getMaxRowsPerSegment(), null) - ) - ); - } - - @Test - public void testFindPartitionsSpecFromConfigWithNonNullMaxTotalRowsReturnGivenValue() - { - final DataSourceCompactionConfig config = new DataSourceCompactionConfig( - "datasource", - null, - null, - null, - null, - new UserCompactionTaskQueryTuningConfig( - null, - null, - null, - null, - null, - new DynamicPartitionsSpec(null, 1000L), - null, - null, - null, - null, - null, - null, - null, - null, - null, - null, - null, - null, - null - ), - null, - null, - null, - null, - null, - null - ); - Assert.assertEquals( - new DynamicPartitionsSpec(null, 1000L), - CompactionStatus.findPartitionsSpecFromConfig( - ClientCompactionTaskQueryTuningConfig.from(config.getTuningConfig(), config.getMaxRowsPerSegment(), null) - ) - ); - } - - @Test - public void testFindPartitionsSpecFromConfigWithNonNullMaxRowsPerSegmentReturnGivenValue() - { - final DataSourceCompactionConfig config = new DataSourceCompactionConfig( - "datasource", - null, - null, - null, - null, - new UserCompactionTaskQueryTuningConfig( - null, - null, - null, - null, - null, - new DynamicPartitionsSpec(100, 1000L), - null, - null, - null, - null, - null, - null, - null, - null, - null, - null, - null, - null, - null - ), - null, - null, - null, - null, - null, - null - ); - Assert.assertEquals( - new DynamicPartitionsSpec(100, 1000L), - CompactionStatus.findPartitionsSpecFromConfig( - ClientCompactionTaskQueryTuningConfig.from(config.getTuningConfig(), config.getMaxRowsPerSegment(), null) - ) - ); - } - - @Test - public void testFindPartitionsSpecFromConfigWithDeprecatedMaxRowsPerSegmentAndMaxTotalRowsReturnGivenValues() - { - final DataSourceCompactionConfig config = new DataSourceCompactionConfig( - "datasource", - null, - null, - 100, - null, - new UserCompactionTaskQueryTuningConfig( - null, - null, - null, - 1000L, - null, - null, - null, - null, - null, - null, - null, - null, - null, - null, - null, - null, - null, - null, - null - ), - null, - null, - null, - null, - null, - null - ); - Assert.assertEquals( - new DynamicPartitionsSpec(100, 1000L), - CompactionStatus.findPartitionsSpecFromConfig( - ClientCompactionTaskQueryTuningConfig.from(config.getTuningConfig(), config.getMaxRowsPerSegment(), null) - ) - ); - } - - @Test - public void testFindPartitionsSpecFromConfigWithDeprecatedMaxRowsPerSegmentAndPartitionsSpecIgnoreDeprecatedOne() - { - final DataSourceCompactionConfig config = new DataSourceCompactionConfig( - "datasource", - null, - null, - 100, - null, - new UserCompactionTaskQueryTuningConfig( - null, - null, - null, - null, - null, - new DynamicPartitionsSpec(null, null), - null, - null, - null, - null, - null, - null, - null, - null, - null, - null, - null, - null, - null - ), - null, - null, - null, - null, - null, - null - ); - Assert.assertEquals( - new DynamicPartitionsSpec(null, Long.MAX_VALUE), - CompactionStatus.findPartitionsSpecFromConfig( - ClientCompactionTaskQueryTuningConfig.from(config.getTuningConfig(), config.getMaxRowsPerSegment(), null) - ) - ); - } - - @Test - public void testFindPartitionsSpecFromConfigWithDeprecatedMaxTotalRowsAndPartitionsSpecIgnoreDeprecatedOne() - { - final DataSourceCompactionConfig config = new DataSourceCompactionConfig( - "datasource", - null, - null, - null, - null, - new UserCompactionTaskQueryTuningConfig( - null, - null, - null, - 1000L, - null, - new DynamicPartitionsSpec(null, null), - null, - null, - null, - null, - null, - null, - null, - null, - null, - null, - null, - null, - null - ), - null, - null, - null, - null, - null, - null - ); - Assert.assertEquals( - new DynamicPartitionsSpec(null, Long.MAX_VALUE), - CompactionStatus.findPartitionsSpecFromConfig( - ClientCompactionTaskQueryTuningConfig.from(config.getTuningConfig(), config.getMaxRowsPerSegment(), null) - ) - ); - } - - @Test - public void testFindPartitionsSpecFromConfigWithHashPartitionsSpec() - { - final DataSourceCompactionConfig config = new DataSourceCompactionConfig( - "datasource", - null, - null, - null, - null, - new UserCompactionTaskQueryTuningConfig( - null, - null, - null, - null, - null, - new HashedPartitionsSpec(null, 10, ImmutableList.of("dim")), - null, - null, - null, - null, - null, - null, - null, - null, - null, - null, - null, - null, - null - ), - null, - null, - null, - null, - null, - null - ); - Assert.assertEquals( - new HashedPartitionsSpec(null, 10, ImmutableList.of("dim")), - CompactionStatus.findPartitionsSpecFromConfig( - ClientCompactionTaskQueryTuningConfig.from(config.getTuningConfig(), config.getMaxRowsPerSegment(), null) - ) - ); - } - - @Test - public void testFindPartitionsSpecFromConfigWithRangePartitionsSpec() - { - final DataSourceCompactionConfig config = new DataSourceCompactionConfig( - "datasource", - null, - null, - null, - null, - new UserCompactionTaskQueryTuningConfig( - null, - null, - null, - null, - null, - new SingleDimensionPartitionsSpec(10000, null, "dim", false), - null, - null, - null, - null, - null, - null, - null, - null, - null, - null, - null, - null, - null - ), - null, - null, - null, - null, - null, - null - ); - Assert.assertEquals( - new SingleDimensionPartitionsSpec(10000, null, "dim", false), - CompactionStatus.findPartitionsSpecFromConfig( - ClientCompactionTaskQueryTuningConfig.from(config.getTuningConfig(), config.getMaxRowsPerSegment(), null) - ) - ); - } -} diff --git a/server/src/test/java/org/apache/druid/server/coordinator/compact/NewestSegmentFirstPolicyTest.java b/server/src/test/java/org/apache/druid/server/coordinator/compact/NewestSegmentFirstPolicyTest.java index dda1cb1af13..8f24a4ebb7e 100644 --- a/server/src/test/java/org/apache/druid/server/coordinator/compact/NewestSegmentFirstPolicyTest.java +++ b/server/src/test/java/org/apache/druid/server/coordinator/compact/NewestSegmentFirstPolicyTest.java @@ -88,7 +88,7 @@ public class NewestSegmentFirstPolicyTest public void testLargeOffsetAndSmallSegmentInterval() { final Period segmentPeriod = new Period("PT1H"); - final CompactionSegmentIterator iterator = policy.reset( + final CompactionSegmentIterator iterator = policy.createIterator( ImmutableMap.of(DATA_SOURCE, createCompactionConfig(10000, new Period("P2D"), null)), ImmutableMap.of( DATA_SOURCE, @@ -113,7 +113,7 @@ public class NewestSegmentFirstPolicyTest public void testSmallOffsetAndLargeSegmentInterval() { final Period segmentPeriod = new Period("PT1H"); - final CompactionSegmentIterator iterator = policy.reset( + final CompactionSegmentIterator iterator = policy.createIterator( ImmutableMap.of(DATA_SOURCE, createCompactionConfig(10000, new Period("PT1M"), null)), ImmutableMap.of( DATA_SOURCE, @@ -146,7 +146,7 @@ public class NewestSegmentFirstPolicyTest public void testLargeGapInData() { final Period segmentPeriod = new Period("PT1H"); - final CompactionSegmentIterator iterator = policy.reset( + final CompactionSegmentIterator iterator = policy.createIterator( ImmutableMap.of(DATA_SOURCE, createCompactionConfig(10000, new Period("PT1H1M"), null)), ImmutableMap.of( DATA_SOURCE, @@ -179,7 +179,7 @@ public class NewestSegmentFirstPolicyTest @Test public void testHugeShard() { - final CompactionSegmentIterator iterator = policy.reset( + final CompactionSegmentIterator iterator = policy.createIterator( ImmutableMap.of(DATA_SOURCE, createCompactionConfig(10000, new Period("P1D"), null)), ImmutableMap.of( DATA_SOURCE, @@ -229,7 +229,7 @@ public class NewestSegmentFirstPolicyTest @Test public void testManySegmentsPerShard() { - final CompactionSegmentIterator iterator = policy.reset( + final CompactionSegmentIterator iterator = policy.createIterator( ImmutableMap.of(DATA_SOURCE, createCompactionConfig(800000, new Period("P1D"), null)), ImmutableMap.of( DATA_SOURCE, @@ -287,7 +287,7 @@ public class NewestSegmentFirstPolicyTest { final String unknownDataSource = "unknown"; final Period segmentPeriod = new Period("PT1H"); - final CompactionSegmentIterator iterator = policy.reset( + final CompactionSegmentIterator iterator = policy.createIterator( ImmutableMap.of( unknownDataSource, createCompactionConfig(10000, new Period("P2D"), null), @@ -337,7 +337,7 @@ public class NewestSegmentFirstPolicyTest 2 ) ); - final CompactionSegmentIterator iterator = policy.reset( + final CompactionSegmentIterator iterator = policy.createIterator( ImmutableMap.of(DATA_SOURCE, createCompactionConfig(inputSegmentSizeBytes, new Period("P0D"), null)), ImmutableMap.of(DATA_SOURCE, timeline), Collections.emptyMap() @@ -374,7 +374,7 @@ public class NewestSegmentFirstPolicyTest ) ); - final CompactionSegmentIterator iterator = policy.reset( + final CompactionSegmentIterator iterator = policy.createIterator( ImmutableMap.of(DATA_SOURCE, createCompactionConfig(40000, new Period("P1D"), null)), ImmutableMap.of(DATA_SOURCE, timeline), Collections.emptyMap() @@ -395,7 +395,7 @@ public class NewestSegmentFirstPolicyTest ) ); - final CompactionSegmentIterator iterator = policy.reset( + final CompactionSegmentIterator iterator = policy.createIterator( ImmutableMap.of(DATA_SOURCE, createCompactionConfig(40000, new Period("P1D"), null)), ImmutableMap.of(DATA_SOURCE, timeline), Collections.emptyMap() @@ -412,7 +412,7 @@ public class NewestSegmentFirstPolicyTest new SegmentGenerateSpec(Intervals.of("2017-10-14T00:00:00/2017-10-15T00:00:00"), new Period("P1D")) ); - final CompactionSegmentIterator iterator = policy.reset( + final CompactionSegmentIterator iterator = policy.createIterator( ImmutableMap.of(DATA_SOURCE, createCompactionConfig(40000, new Period("P1D"), new UserCompactionTaskGranularityConfig(Granularities.DAY, null, null))), ImmutableMap.of(DATA_SOURCE, timeline), Collections.emptyMap() @@ -445,7 +445,7 @@ public class NewestSegmentFirstPolicyTest new SegmentGenerateSpec(Intervals.of("2017-10-14T00:00:00/2017-10-15T00:00:00"), new Period("PT5H")) ); - final CompactionSegmentIterator iterator = policy.reset( + final CompactionSegmentIterator iterator = policy.createIterator( ImmutableMap.of(DATA_SOURCE, createCompactionConfig(40000, new Period("P1D"), new UserCompactionTaskGranularityConfig(Granularities.MONTH, null, null))), ImmutableMap.of(DATA_SOURCE, timeline), Collections.emptyMap() @@ -471,7 +471,7 @@ public class NewestSegmentFirstPolicyTest new SegmentGenerateSpec(Intervals.of("2017-10-14T00:00:00/2017-10-15T00:00:00"), new Period("PT5H")) ); - final CompactionSegmentIterator iterator = policy.reset( + final CompactionSegmentIterator iterator = policy.createIterator( ImmutableMap.of(DATA_SOURCE, createCompactionConfig(40000, new Period("P1D"), new UserCompactionTaskGranularityConfig(Granularities.MINUTE, null, null))), ImmutableMap.of(DATA_SOURCE, timeline), Collections.emptyMap() @@ -496,7 +496,7 @@ public class NewestSegmentFirstPolicyTest public void testWithSkipIntervals() { final Period segmentPeriod = new Period("PT1H"); - final CompactionSegmentIterator iterator = policy.reset( + final CompactionSegmentIterator iterator = policy.createIterator( ImmutableMap.of(DATA_SOURCE, createCompactionConfig(10000, new Period("P1D"), null)), ImmutableMap.of( DATA_SOURCE, @@ -536,7 +536,7 @@ public class NewestSegmentFirstPolicyTest public void testHoleInSearchInterval() { final Period segmentPeriod = new Period("PT1H"); - final CompactionSegmentIterator iterator = policy.reset( + final CompactionSegmentIterator iterator = policy.createIterator( ImmutableMap.of(DATA_SOURCE, createCompactionConfig(10000, new Period("PT1H"), null)), ImmutableMap.of( DATA_SOURCE, @@ -586,7 +586,7 @@ public class NewestSegmentFirstPolicyTest new SegmentGenerateSpec(Intervals.of("2017-10-01T00:00:00/2017-12-31T00:00:00"), new Period("P1D")) ); - final CompactionSegmentIterator iterator = policy.reset( + final CompactionSegmentIterator iterator = policy.createIterator( ImmutableMap.of(DATA_SOURCE, createCompactionConfig(130000, new Period("P0D"), new UserCompactionTaskGranularityConfig(Granularities.MONTH, null, null))), ImmutableMap.of(DATA_SOURCE, timeline), Collections.emptyMap() @@ -635,7 +635,7 @@ public class NewestSegmentFirstPolicyTest new SegmentGenerateSpec(Intervals.of("2020-02-08/2020-02-15"), new Period("P7D")) ); - final CompactionSegmentIterator iterator = policy.reset( + final CompactionSegmentIterator iterator = policy.createIterator( ImmutableMap.of(DATA_SOURCE, createCompactionConfig(130000, new Period("P0D"), new UserCompactionTaskGranularityConfig(Granularities.MONTH, null, null))), ImmutableMap.of(DATA_SOURCE, timeline), Collections.emptyMap() @@ -670,7 +670,7 @@ public class NewestSegmentFirstPolicyTest new SegmentGenerateSpec(Intervals.of("2017-12-01T00:00:00/2017-12-02T00:00:00"), new Period("P1D")) ); - final CompactionSegmentIterator iterator = policy.reset( + final CompactionSegmentIterator iterator = policy.createIterator( ImmutableMap.of(DATA_SOURCE, createCompactionConfig(40000, new Period("P0D"), new UserCompactionTaskGranularityConfig(Granularities.MINUTE, null, null))), ImmutableMap.of(DATA_SOURCE, timeline), Collections.emptyMap() @@ -696,7 +696,7 @@ public class NewestSegmentFirstPolicyTest new SegmentGenerateSpec(Intervals.of("2017-10-01T01:00:00/2017-10-01T02:00:00"), new Period("PT1H"), "1994-04-30T00:00:00.000Z", null) ); - final CompactionSegmentIterator iterator = policy.reset( + final CompactionSegmentIterator iterator = policy.createIterator( ImmutableMap.of(DATA_SOURCE, createCompactionConfig(130000, new Period("P0D"), new UserCompactionTaskGranularityConfig(Granularities.MONTH, null, null))), ImmutableMap.of(DATA_SOURCE, timeline), Collections.emptyMap() @@ -721,7 +721,7 @@ public class NewestSegmentFirstPolicyTest // Same indexSpec as what is set in the auto compaction config Map indexSpec = IndexSpec.DEFAULT.asMap(mapper); // Same partitionsSpec as what is set in the auto compaction config - PartitionsSpec partitionsSpec = CompactionStatus.findPartitionsSpecFromConfig(ClientCompactionTaskQueryTuningConfig.from(null, null, null)); + PartitionsSpec partitionsSpec = CompactionStatus.findPartitionsSpecFromConfig(ClientCompactionTaskQueryTuningConfig.from(null)); // Create segments that were compacted (CompactionState != null) and have segmentGranularity=DAY final SegmentTimeline timeline = createTimeline( @@ -740,7 +740,7 @@ public class NewestSegmentFirstPolicyTest ); // Auto compaction config sets segmentGranularity=DAY - final CompactionSegmentIterator iterator = policy.reset( + final CompactionSegmentIterator iterator = policy.createIterator( ImmutableMap.of(DATA_SOURCE, createCompactionConfig(130000, new Period("P0D"), new UserCompactionTaskGranularityConfig(Granularities.DAY, null, null))), ImmutableMap.of(DATA_SOURCE, timeline), Collections.emptyMap() @@ -754,7 +754,7 @@ public class NewestSegmentFirstPolicyTest // Same indexSpec as what is set in the auto compaction config Map indexSpec = IndexSpec.DEFAULT.asMap(mapper); // Same partitionsSpec as what is set in the auto compaction config - PartitionsSpec partitionsSpec = CompactionStatus.findPartitionsSpecFromConfig(ClientCompactionTaskQueryTuningConfig.from(null, null, null)); + PartitionsSpec partitionsSpec = CompactionStatus.findPartitionsSpecFromConfig(ClientCompactionTaskQueryTuningConfig.from(null)); // Create segments that were compacted (CompactionState != null) and have segmentGranularity=DAY final SegmentTimeline timeline = createTimeline( @@ -773,7 +773,7 @@ public class NewestSegmentFirstPolicyTest ); // Auto compaction config sets segmentGranularity=DAY - final CompactionSegmentIterator iterator = policy.reset( + final CompactionSegmentIterator iterator = policy.createIterator( ImmutableMap.of(DATA_SOURCE, createCompactionConfig(130000, new Period("P0D"), new UserCompactionTaskGranularityConfig(Granularities.DAY, null, null))), ImmutableMap.of(DATA_SOURCE, timeline), Collections.emptyMap() @@ -787,7 +787,7 @@ public class NewestSegmentFirstPolicyTest // Same indexSpec as what is set in the auto compaction config Map indexSpec = IndexSpec.DEFAULT.asMap(mapper); // Same partitionsSpec as what is set in the auto compaction config - PartitionsSpec partitionsSpec = CompactionStatus.findPartitionsSpecFromConfig(ClientCompactionTaskQueryTuningConfig.from(null, null, null)); + PartitionsSpec partitionsSpec = CompactionStatus.findPartitionsSpecFromConfig(ClientCompactionTaskQueryTuningConfig.from(null)); // Create segments that were compacted (CompactionState != null) and have segmentGranularity=DAY final SegmentTimeline timeline = createTimeline( @@ -806,7 +806,7 @@ public class NewestSegmentFirstPolicyTest ); // Auto compaction config sets segmentGranularity=YEAR - final CompactionSegmentIterator iterator = policy.reset( + final CompactionSegmentIterator iterator = policy.createIterator( ImmutableMap.of(DATA_SOURCE, createCompactionConfig(130000, new Period("P0D"), new UserCompactionTaskGranularityConfig(Granularities.YEAR, null, null))), ImmutableMap.of(DATA_SOURCE, timeline), Collections.emptyMap() @@ -830,7 +830,7 @@ public class NewestSegmentFirstPolicyTest // Same indexSpec as what is set in the auto compaction config Map indexSpec = IndexSpec.DEFAULT.asMap(mapper); // Same partitionsSpec as what is set in the auto compaction config - PartitionsSpec partitionsSpec = CompactionStatus.findPartitionsSpecFromConfig(ClientCompactionTaskQueryTuningConfig.from(null, null, null)); + PartitionsSpec partitionsSpec = CompactionStatus.findPartitionsSpecFromConfig(ClientCompactionTaskQueryTuningConfig.from(null)); // Create segments that were compacted (CompactionState != null) and have segmentGranularity=DAY final SegmentTimeline timeline = createTimeline( @@ -849,7 +849,7 @@ public class NewestSegmentFirstPolicyTest ); // Auto compaction config sets segmentGranularity=YEAR - final CompactionSegmentIterator iterator = policy.reset( + final CompactionSegmentIterator iterator = policy.createIterator( ImmutableMap.of(DATA_SOURCE, createCompactionConfig(130000, new Period("P0D"), new UserCompactionTaskGranularityConfig(Granularities.YEAR, null, null))), ImmutableMap.of(DATA_SOURCE, timeline), Collections.emptyMap() @@ -873,7 +873,7 @@ public class NewestSegmentFirstPolicyTest // Same indexSpec as what is set in the auto compaction config Map indexSpec = IndexSpec.DEFAULT.asMap(mapper); // Same partitionsSpec as what is set in the auto compaction config - PartitionsSpec partitionsSpec = CompactionStatus.findPartitionsSpecFromConfig(ClientCompactionTaskQueryTuningConfig.from(null, null, null)); + PartitionsSpec partitionsSpec = CompactionStatus.findPartitionsSpecFromConfig(ClientCompactionTaskQueryTuningConfig.from(null)); // Create segments that were compacted (CompactionState != null) and have segmentGranularity=DAY final SegmentTimeline timeline = createTimeline( @@ -887,7 +887,7 @@ public class NewestSegmentFirstPolicyTest // Duration of new segmentGranularity is the same as before (P1D), // but we changed the timezone from UTC to Bangkok in the auto compaction spec - final CompactionSegmentIterator iterator = policy.reset( + final CompactionSegmentIterator iterator = policy.createIterator( ImmutableMap.of(DATA_SOURCE, createCompactionConfig( 130000, @@ -925,7 +925,7 @@ public class NewestSegmentFirstPolicyTest // Same indexSpec as what is set in the auto compaction config Map indexSpec = IndexSpec.DEFAULT.asMap(mapper); // Same partitionsSpec as what is set in the auto compaction config - PartitionsSpec partitionsSpec = CompactionStatus.findPartitionsSpecFromConfig(ClientCompactionTaskQueryTuningConfig.from(null, null, null)); + PartitionsSpec partitionsSpec = CompactionStatus.findPartitionsSpecFromConfig(ClientCompactionTaskQueryTuningConfig.from(null)); // Create segments that were compacted (CompactionState != null) and have segmentGranularity=DAY final SegmentTimeline timeline = createTimeline( @@ -938,7 +938,7 @@ public class NewestSegmentFirstPolicyTest ); // Duration of new segmentGranularity is the same as before (P1D), but we changed the origin in the autocompaction spec - final CompactionSegmentIterator iterator = policy.reset( + final CompactionSegmentIterator iterator = policy.createIterator( ImmutableMap.of(DATA_SOURCE, createCompactionConfig( 130000, @@ -976,7 +976,7 @@ public class NewestSegmentFirstPolicyTest // Same indexSpec as what is set in the auto compaction config Map indexSpec = IndexSpec.DEFAULT.asMap(mapper); // Same partitionsSpec as what is set in the auto compaction config - PartitionsSpec partitionsSpec = CompactionStatus.findPartitionsSpecFromConfig(ClientCompactionTaskQueryTuningConfig.from(null, null, null)); + PartitionsSpec partitionsSpec = CompactionStatus.findPartitionsSpecFromConfig(ClientCompactionTaskQueryTuningConfig.from(null)); // Create segments that were compacted (CompactionState != null) and have // rollup=false for interval 2017-10-01T00:00:00/2017-10-02T00:00:00, @@ -1004,7 +1004,7 @@ public class NewestSegmentFirstPolicyTest ); // Auto compaction config sets rollup=true - final CompactionSegmentIterator iterator = policy.reset( + final CompactionSegmentIterator iterator = policy.createIterator( ImmutableMap.of(DATA_SOURCE, createCompactionConfig(130000, new Period("P0D"), new UserCompactionTaskGranularityConfig(null, null, true))), ImmutableMap.of(DATA_SOURCE, timeline), Collections.emptyMap() @@ -1036,7 +1036,7 @@ public class NewestSegmentFirstPolicyTest // Same indexSpec as what is set in the auto compaction config Map indexSpec = IndexSpec.DEFAULT.asMap(mapper); // Same partitionsSpec as what is set in the auto compaction config - PartitionsSpec partitionsSpec = CompactionStatus.findPartitionsSpecFromConfig(ClientCompactionTaskQueryTuningConfig.from(null, null, null)); + PartitionsSpec partitionsSpec = CompactionStatus.findPartitionsSpecFromConfig(ClientCompactionTaskQueryTuningConfig.from(null)); // Create segments that were compacted (CompactionState != null) and have // queryGranularity=DAY for interval 2017-10-01T00:00:00/2017-10-02T00:00:00, @@ -1064,7 +1064,7 @@ public class NewestSegmentFirstPolicyTest ); // Auto compaction config sets queryGranularity=MINUTE - final CompactionSegmentIterator iterator = policy.reset( + final CompactionSegmentIterator iterator = policy.createIterator( ImmutableMap.of(DATA_SOURCE, createCompactionConfig(130000, new Period("P0D"), new UserCompactionTaskGranularityConfig(null, Granularities.MINUTE, null))), ImmutableMap.of(DATA_SOURCE, timeline), Collections.emptyMap() @@ -1096,7 +1096,7 @@ public class NewestSegmentFirstPolicyTest // Same indexSpec as what is set in the auto compaction config Map indexSpec = IndexSpec.DEFAULT.asMap(mapper); // Same partitionsSpec as what is set in the auto compaction config - PartitionsSpec partitionsSpec = CompactionStatus.findPartitionsSpecFromConfig(ClientCompactionTaskQueryTuningConfig.from(null, null, null)); + PartitionsSpec partitionsSpec = CompactionStatus.findPartitionsSpecFromConfig(ClientCompactionTaskQueryTuningConfig.from(null)); // Create segments that were compacted (CompactionState != null) and have // Dimensions=["foo", "bar"] for interval 2017-10-01T00:00:00/2017-10-02T00:00:00, @@ -1131,7 +1131,7 @@ public class NewestSegmentFirstPolicyTest ); // Auto compaction config sets Dimensions=["foo"] - CompactionSegmentIterator iterator = policy.reset( + CompactionSegmentIterator iterator = policy.createIterator( ImmutableMap.of(DATA_SOURCE, createCompactionConfig( 130000, new Period("P0D"), @@ -1172,7 +1172,7 @@ public class NewestSegmentFirstPolicyTest Assert.assertFalse(iterator.hasNext()); // Auto compaction config sets Dimensions=null - iterator = policy.reset( + iterator = policy.createIterator( ImmutableMap.of(DATA_SOURCE, createCompactionConfig( 130000, new Period("P0D"), @@ -1195,7 +1195,7 @@ public class NewestSegmentFirstPolicyTest // Same indexSpec as what is set in the auto compaction config Map indexSpec = IndexSpec.DEFAULT.asMap(mapper); // Same partitionsSpec as what is set in the auto compaction config - PartitionsSpec partitionsSpec = CompactionStatus.findPartitionsSpecFromConfig(ClientCompactionTaskQueryTuningConfig.from(null, null, null)); + PartitionsSpec partitionsSpec = CompactionStatus.findPartitionsSpecFromConfig(ClientCompactionTaskQueryTuningConfig.from(null)); // Create segments that were compacted (CompactionState != null) and have // filter=SelectorDimFilter("dim1", "foo", null) for interval 2017-10-01T00:00:00/2017-10-02T00:00:00, @@ -1251,7 +1251,7 @@ public class NewestSegmentFirstPolicyTest ); // Auto compaction config sets filter=SelectorDimFilter("dim1", "bar", null) - CompactionSegmentIterator iterator = policy.reset( + CompactionSegmentIterator iterator = policy.createIterator( ImmutableMap.of(DATA_SOURCE, createCompactionConfig( 130000, new Period("P0D"), @@ -1292,7 +1292,7 @@ public class NewestSegmentFirstPolicyTest Assert.assertFalse(iterator.hasNext()); // Auto compaction config sets filter=null - iterator = policy.reset( + iterator = policy.createIterator( ImmutableMap.of(DATA_SOURCE, createCompactionConfig( 130000, new Period("P0D"), @@ -1319,7 +1319,7 @@ public class NewestSegmentFirstPolicyTest // Same indexSpec as what is set in the auto compaction config Map indexSpec = IndexSpec.DEFAULT.asMap(mapper); // Same partitionsSpec as what is set in the auto compaction config - PartitionsSpec partitionsSpec = CompactionStatus.findPartitionsSpecFromConfig(ClientCompactionTaskQueryTuningConfig.from(null, null, null)); + PartitionsSpec partitionsSpec = CompactionStatus.findPartitionsSpecFromConfig(ClientCompactionTaskQueryTuningConfig.from(null)); // Create segments that were compacted (CompactionState != null) and have // metricsSpec={CountAggregatorFactory("cnt")} for interval 2017-10-01T00:00:00/2017-10-02T00:00:00, @@ -1375,7 +1375,7 @@ public class NewestSegmentFirstPolicyTest ); // Auto compaction config sets metricsSpec={CountAggregatorFactory("cnt"), LongSumAggregatorFactory("val", "val")} - CompactionSegmentIterator iterator = policy.reset( + CompactionSegmentIterator iterator = policy.createIterator( ImmutableMap.of(DATA_SOURCE, createCompactionConfig( 130000, new Period("P0D"), @@ -1416,7 +1416,7 @@ public class NewestSegmentFirstPolicyTest Assert.assertFalse(iterator.hasNext()); // Auto compaction config sets metricsSpec=null - iterator = policy.reset( + iterator = policy.createIterator( ImmutableMap.of(DATA_SOURCE, createCompactionConfig( 130000, new Period("P0D"), @@ -1440,7 +1440,7 @@ public class NewestSegmentFirstPolicyTest new SegmentGenerateSpec(Intervals.of("2017-10-01T01:00:00/2017-10-01T02:00:00"), new Period("PT1H"), "1994-04-30T00:00:00.000Z", null) ); - final CompactionSegmentIterator iterator = policy.reset( + final CompactionSegmentIterator iterator = policy.createIterator( ImmutableMap.of(DATA_SOURCE, createCompactionConfig(130000, new Period("P0D"), new UserCompactionTaskGranularityConfig(Granularities.HOUR, null, null))), ImmutableMap.of(DATA_SOURCE, timeline), Collections.emptyMap() @@ -1468,7 +1468,7 @@ public class NewestSegmentFirstPolicyTest // Different indexSpec as what is set in the auto compaction config IndexSpec newIndexSpec = IndexSpec.builder().withBitmapSerdeFactory(new ConciseBitmapSerdeFactory()).build(); Map newIndexSpecMap = mapper.convertValue(newIndexSpec, new TypeReference>() {}); - PartitionsSpec partitionsSpec = CompactionStatus.findPartitionsSpecFromConfig(ClientCompactionTaskQueryTuningConfig.from(null, null, null)); + PartitionsSpec partitionsSpec = CompactionStatus.findPartitionsSpecFromConfig(ClientCompactionTaskQueryTuningConfig.from(null)); // Create segments that were compacted (CompactionState != null) and have segmentGranularity=DAY final SegmentTimeline timeline = createTimeline( @@ -1481,7 +1481,7 @@ public class NewestSegmentFirstPolicyTest ); // Duration of new segmentGranularity is the same as before (P1D) - final CompactionSegmentIterator iterator = policy.reset( + final CompactionSegmentIterator iterator = policy.createIterator( ImmutableMap.of(DATA_SOURCE, createCompactionConfig( 130000, @@ -1517,7 +1517,7 @@ public class NewestSegmentFirstPolicyTest public void testIteratorDoesNotReturnSegmentWithChangingAppendableIndexSpec() { NullHandling.initializeForTests(); - PartitionsSpec partitionsSpec = CompactionStatus.findPartitionsSpecFromConfig(ClientCompactionTaskQueryTuningConfig.from(null, null, null)); + PartitionsSpec partitionsSpec = CompactionStatus.findPartitionsSpecFromConfig(ClientCompactionTaskQueryTuningConfig.from(null)); final SegmentTimeline timeline = createTimeline( new SegmentGenerateSpec( Intervals.of("2017-10-01T00:00:00/2017-10-02T00:00:00"), @@ -1534,7 +1534,7 @@ public class NewestSegmentFirstPolicyTest ) ); - CompactionSegmentIterator iterator = policy.reset( + CompactionSegmentIterator iterator = policy.createIterator( ImmutableMap.of(DATA_SOURCE, createCompactionConfig( 130000, new Period("P0D"), @@ -1569,7 +1569,7 @@ public class NewestSegmentFirstPolicyTest ); Assert.assertFalse(iterator.hasNext()); - iterator = policy.reset( + iterator = policy.createIterator( ImmutableMap.of(DATA_SOURCE, createCompactionConfig( 130000, new Period("P0D"), @@ -1608,7 +1608,7 @@ public class NewestSegmentFirstPolicyTest @Test public void testSkipAllGranularityToDefault() { - CompactionSegmentIterator iterator = policy.reset( + CompactionSegmentIterator iterator = policy.createIterator( ImmutableMap.of(DATA_SOURCE, createCompactionConfig(10000, new Period("P0D"), @@ -1640,7 +1640,7 @@ public class NewestSegmentFirstPolicyTest @Test public void testSkipFirstHalfEternityToDefault() { - CompactionSegmentIterator iterator = policy.reset( + CompactionSegmentIterator iterator = policy.createIterator( ImmutableMap.of(DATA_SOURCE, createCompactionConfig(10000, new Period("P0D"), @@ -1672,7 +1672,7 @@ public class NewestSegmentFirstPolicyTest @Test public void testSkipSecondHalfOfEternityToDefault() { - CompactionSegmentIterator iterator = policy.reset( + CompactionSegmentIterator iterator = policy.createIterator( ImmutableMap.of(DATA_SOURCE, createCompactionConfig(10000, new Period("P0D"), @@ -1704,7 +1704,7 @@ public class NewestSegmentFirstPolicyTest @Test public void testSkipAllToAllGranularity() { - CompactionSegmentIterator iterator = policy.reset( + CompactionSegmentIterator iterator = policy.createIterator( ImmutableMap.of(DATA_SOURCE, createCompactionConfig(10000, new Period("P0D"), @@ -1736,7 +1736,7 @@ public class NewestSegmentFirstPolicyTest @Test public void testSkipAllToFinerGranularity() { - CompactionSegmentIterator iterator = policy.reset( + CompactionSegmentIterator iterator = policy.createIterator( ImmutableMap.of(DATA_SOURCE, createCompactionConfig(10000, new Period("P0D"), @@ -1799,7 +1799,7 @@ public class NewestSegmentFirstPolicyTest 0, 1); - CompactionSegmentIterator iterator = policy.reset( + CompactionSegmentIterator iterator = policy.createIterator( ImmutableMap.of(DATA_SOURCE, createCompactionConfig(10000, new Period("P0D"), @@ -1850,7 +1850,7 @@ public class NewestSegmentFirstPolicyTest TombstoneShardSpec.INSTANCE, 0, 1); - iterator = policy.reset( + iterator = policy.createIterator( ImmutableMap.of(DATA_SOURCE, createCompactionConfig(10000, new Period("P0D"), @@ -2023,6 +2023,7 @@ public class NewestSegmentFirstPolicyTest metricsSpec, transformSpec, null, + null, null ); } diff --git a/server/src/test/java/org/apache/druid/server/coordinator/duty/CompactSegmentsTest.java b/server/src/test/java/org/apache/druid/server/coordinator/duty/CompactSegmentsTest.java index 173e4537cca..236cfaf7da5 100644 --- a/server/src/test/java/org/apache/druid/server/coordinator/duty/CompactSegmentsTest.java +++ b/server/src/test/java/org/apache/druid/server/coordinator/duty/CompactSegmentsTest.java @@ -36,12 +36,14 @@ import org.apache.druid.client.indexing.ClientCompactionIntervalSpec; import org.apache.druid.client.indexing.ClientCompactionTaskGranularitySpec; import org.apache.druid.client.indexing.ClientCompactionTaskQuery; import org.apache.druid.client.indexing.ClientCompactionTaskQueryTuningConfig; +import org.apache.druid.client.indexing.ClientMSQContext; import org.apache.druid.client.indexing.ClientTaskQuery; import org.apache.druid.client.indexing.IndexingTotalWorkerCapacityInfo; import org.apache.druid.client.indexing.NoopOverlordClient; import org.apache.druid.client.indexing.TaskPayloadResponse; import org.apache.druid.common.config.NullHandling; import org.apache.druid.data.input.impl.DimensionsSpec; +import org.apache.druid.indexer.CompactionEngine; import org.apache.druid.indexer.RunnerTaskState; import org.apache.druid.indexer.TaskLocation; import org.apache.druid.indexer.TaskState; @@ -130,50 +132,63 @@ public class CompactSegmentsTest private static final int MAXIMUM_CAPACITY_WITH_AUTO_SCALE = 10; private static final NewestSegmentFirstPolicy SEARCH_POLICY = new NewestSegmentFirstPolicy(JSON_MAPPER); - @Parameterized.Parameters(name = "{0}") + @Parameterized.Parameters(name = "scenario: {0}, engine: {2}") public static Collection constructorFeeder() { final MutableInt nextRangePartitionBoundary = new MutableInt(0); + + final DynamicPartitionsSpec dynamicPartitionsSpec = new DynamicPartitionsSpec(300000, Long.MAX_VALUE); + final BiFunction numberedShardSpecCreator = NumberedShardSpec::new; + + final HashedPartitionsSpec hashedPartitionsSpec = new HashedPartitionsSpec(null, 2, ImmutableList.of("dim")); + final BiFunction hashBasedNumberedShardSpecCreator = + (bucketId, numBuckets) -> new HashBasedNumberedShardSpec( + bucketId, + numBuckets, + bucketId, + numBuckets, + ImmutableList.of("dim"), + null, + JSON_MAPPER + ); + + final SingleDimensionPartitionsSpec singleDimensionPartitionsSpec = + new SingleDimensionPartitionsSpec(300000, null, "dim", false); + final BiFunction singleDimensionShardSpecCreator = + (bucketId, numBuckets) -> new SingleDimensionShardSpec( + "dim", + bucketId == 0 ? null : String.valueOf(nextRangePartitionBoundary.getAndIncrement()), + bucketId.equals(numBuckets) ? null : String.valueOf(nextRangePartitionBoundary.getAndIncrement()), + bucketId, + numBuckets + ); + + // Hash partition spec is not supported by MSQ engine. return ImmutableList.of( - new Object[]{ - new DynamicPartitionsSpec(300000, Long.MAX_VALUE), - (BiFunction) NumberedShardSpec::new - }, - new Object[]{ - new HashedPartitionsSpec(null, 2, ImmutableList.of("dim")), - (BiFunction) (bucketId, numBuckets) -> new HashBasedNumberedShardSpec( - bucketId, - numBuckets, - bucketId, - numBuckets, - ImmutableList.of("dim"), - null, - JSON_MAPPER - ) - }, - new Object[]{ - new SingleDimensionPartitionsSpec(300000, null, "dim", false), - (BiFunction) (bucketId, numBuckets) -> new SingleDimensionShardSpec( - "dim", - bucketId == 0 ? null : String.valueOf(nextRangePartitionBoundary.getAndIncrement()), - bucketId.equals(numBuckets) ? null : String.valueOf(nextRangePartitionBoundary.getAndIncrement()), - bucketId, - numBuckets - ) - } + new Object[]{dynamicPartitionsSpec, numberedShardSpecCreator, CompactionEngine.NATIVE}, + new Object[]{hashedPartitionsSpec, hashBasedNumberedShardSpecCreator, CompactionEngine.NATIVE}, + new Object[]{singleDimensionPartitionsSpec, singleDimensionShardSpecCreator, CompactionEngine.NATIVE}, + new Object[]{dynamicPartitionsSpec, numberedShardSpecCreator, CompactionEngine.MSQ}, + new Object[]{singleDimensionPartitionsSpec, singleDimensionShardSpecCreator, CompactionEngine.MSQ} ); } private final PartitionsSpec partitionsSpec; private final BiFunction shardSpecFactory; + private final CompactionEngine engine; private DataSourcesSnapshot dataSources; Map> datasourceToSegments = new HashMap<>(); - public CompactSegmentsTest(PartitionsSpec partitionsSpec, BiFunction shardSpecFactory) + public CompactSegmentsTest( + PartitionsSpec partitionsSpec, + BiFunction shardSpecFactory, + CompactionEngine engine + ) { this.partitionsSpec = partitionsSpec; this.shardSpecFactory = shardSpecFactory; + this.engine = engine; } @Before @@ -640,7 +655,13 @@ public class CompactSegmentsTest final CoordinatorRunStats stats = doCompactSegments(compactSegments, 3); Assert.assertEquals(3, stats.get(Stats.Compaction.AVAILABLE_SLOTS)); Assert.assertEquals(3, stats.get(Stats.Compaction.MAX_SLOTS)); - Assert.assertEquals(3, stats.get(Stats.Compaction.SUBMITTED_TASKS)); + // Native takes up 1 task slot by default whereas MSQ takes up all available upto 5. Since there are 3 available + // slots, there are 3 submitted tasks for native whereas 1 for MSQ. + if (engine == CompactionEngine.NATIVE) { + Assert.assertEquals(3, stats.get(Stats.Compaction.SUBMITTED_TASKS)); + } else { + Assert.assertEquals(1, stats.get(Stats.Compaction.SUBMITTED_TASKS)); + } } @Test @@ -654,7 +675,13 @@ public class CompactSegmentsTest doCompactSegments(compactSegments, createCompactionConfigs(), maxCompactionSlot, true); Assert.assertEquals(maxCompactionSlot, stats.get(Stats.Compaction.AVAILABLE_SLOTS)); Assert.assertEquals(maxCompactionSlot, stats.get(Stats.Compaction.MAX_SLOTS)); - Assert.assertEquals(maxCompactionSlot, stats.get(Stats.Compaction.SUBMITTED_TASKS)); + // Native takes up 1 task slot by default whereas MSQ takes up all available upto 5. Since there are 3 available + // slots, there are 3 submitted tasks for native whereas 1 for MSQ. + if (engine == CompactionEngine.NATIVE) { + Assert.assertEquals(maxCompactionSlot, stats.get(Stats.Compaction.SUBMITTED_TASKS)); + } else { + Assert.assertEquals(1, stats.get(Stats.Compaction.SUBMITTED_TASKS)); + } } @Test @@ -668,7 +695,16 @@ public class CompactSegmentsTest doCompactSegments(compactSegments, createCompactionConfigs(), maxCompactionSlot, true); Assert.assertEquals(MAXIMUM_CAPACITY_WITH_AUTO_SCALE, stats.get(Stats.Compaction.AVAILABLE_SLOTS)); Assert.assertEquals(MAXIMUM_CAPACITY_WITH_AUTO_SCALE, stats.get(Stats.Compaction.MAX_SLOTS)); - Assert.assertEquals(MAXIMUM_CAPACITY_WITH_AUTO_SCALE, stats.get(Stats.Compaction.SUBMITTED_TASKS)); + // Native takes up 1 task slot by default whereas MSQ takes up all available upto 5. Since there are 10 available + // slots, there are 10 submitted tasks for native whereas 2 for MSQ. + if (engine == CompactionEngine.NATIVE) { + Assert.assertEquals(MAXIMUM_CAPACITY_WITH_AUTO_SCALE, stats.get(Stats.Compaction.SUBMITTED_TASKS)); + } else { + Assert.assertEquals( + MAXIMUM_CAPACITY_WITH_AUTO_SCALE / ClientMSQContext.MAX_TASK_SLOTS_FOR_MSQ_COMPACTION_TASK, + stats.get(Stats.Compaction.SUBMITTED_TASKS) + ); + } } @Test @@ -712,6 +748,7 @@ public class CompactSegmentsTest null, null, null, + engine, null ) ); @@ -769,6 +806,7 @@ public class CompactSegmentsTest null, null, new UserCompactionTaskIOConfig(true), + engine, null ) ); @@ -818,6 +856,7 @@ public class CompactSegmentsTest null, null, null, + engine, null ) ); @@ -867,6 +906,7 @@ public class CompactSegmentsTest null, null, null, + engine, null ) ); @@ -927,6 +967,7 @@ public class CompactSegmentsTest null, null, null, + engine, null ) ); @@ -979,6 +1020,7 @@ public class CompactSegmentsTest null, null, null, + engine, null ) ); @@ -1028,6 +1070,7 @@ public class CompactSegmentsTest null, null, null, + engine, null ) ); @@ -1081,6 +1124,7 @@ public class CompactSegmentsTest null, null, null, + null, null ) ); @@ -1137,6 +1181,7 @@ public class CompactSegmentsTest null, null, null, + engine, null ) ); @@ -1165,8 +1210,13 @@ public class CompactSegmentsTest { final TestOverlordClient overlordClient = new TestOverlordClient(JSON_MAPPER); final CompactSegments compactSegments = new CompactSegments(SEARCH_POLICY, overlordClient); - - final CoordinatorRunStats stats = doCompactSegments(compactSegments, createCompactionConfigs(2), 4); + final CoordinatorRunStats stats; + // Native uses maxNumConcurrentSubTasks for task slots whereas MSQ uses maxNumTasks. + if (engine == CompactionEngine.NATIVE) { + stats = doCompactSegments(compactSegments, createcompactionConfigsForNative(2), 4); + } else { + stats = doCompactSegments(compactSegments, createcompactionConfigsForMSQ(2), 4); + } Assert.assertEquals(4, stats.get(Stats.Compaction.AVAILABLE_SLOTS)); Assert.assertEquals(4, stats.get(Stats.Compaction.MAX_SLOTS)); Assert.assertEquals(2, stats.get(Stats.Compaction.SUBMITTED_TASKS)); @@ -1198,7 +1248,7 @@ public class CompactSegmentsTest // is submitted for dataSource_0 CompactSegments compactSegments = new CompactSegments(SEARCH_POLICY, overlordClient); final CoordinatorRunStats stats = - doCompactSegments(compactSegments, createCompactionConfigs(2), 4); + doCompactSegments(compactSegments, createcompactionConfigsForNative(2), 4); Assert.assertEquals(1, stats.get(Stats.Compaction.SUBMITTED_TASKS)); Assert.assertEquals(1, overlordClient.submittedCompactionTasks.size()); @@ -1252,6 +1302,7 @@ public class CompactSegmentsTest null, new UserCompactionTaskTransformConfig(new SelectorDimFilter("dim1", "foo", null)), null, + engine, null ) ); @@ -1302,6 +1353,7 @@ public class CompactSegmentsTest null, null, null, + engine, null ) ); @@ -1354,6 +1406,7 @@ public class CompactSegmentsTest aggregatorFactories, null, null, + engine, null ) ); @@ -1434,6 +1487,7 @@ public class CompactSegmentsTest null, null, null, + engine, null ) ); @@ -1520,6 +1574,7 @@ public class CompactSegmentsTest null, null, null, + engine, null ) ); @@ -1577,6 +1632,7 @@ public class CompactSegmentsTest new AggregatorFactory[] {new CountAggregatorFactory("cnt")}, null, null, + engine, null ) ); @@ -1629,6 +1685,7 @@ public class CompactSegmentsTest null, null, null, + engine, null ) ); @@ -1781,7 +1838,8 @@ public class CompactSegmentsTest compactionConfigs, numCompactionTaskSlots == null ? null : 1.0, // 100% when numCompactionTaskSlots is not null numCompactionTaskSlots, - useAutoScaleSlots + useAutoScaleSlots, + null ) ) .build(); @@ -1901,10 +1959,23 @@ public class CompactSegmentsTest private List createCompactionConfigs() { - return createCompactionConfigs(null); + return createCompactionConfigs(null, null); } - private List createCompactionConfigs(@Nullable Integer maxNumConcurrentSubTasks) + private List createcompactionConfigsForNative(@Nullable Integer maxNumConcurrentSubTasks) + { + return createCompactionConfigs(maxNumConcurrentSubTasks, null); + } + + private List createcompactionConfigsForMSQ(Integer maxNumTasks) + { + return createCompactionConfigs(null, maxNumTasks); + } + + private List createCompactionConfigs( + @Nullable Integer maxNumConcurrentSubTasksForNative, + @Nullable Integer maxNumTasksForMSQ + ) { final List compactionConfigs = new ArrayList<>(); for (int i = 0; i < 3; i++) { @@ -1928,7 +1999,7 @@ public class CompactSegmentsTest null, null, null, - maxNumConcurrentSubTasks, + maxNumConcurrentSubTasksForNative, null, null, null, @@ -1942,7 +2013,8 @@ public class CompactSegmentsTest null, null, null, - null + engine, + maxNumTasksForMSQ == null ? null : ImmutableMap.of(ClientMSQContext.CTX_MAX_NUM_TASKS, maxNumTasksForMSQ) ) ); } @@ -2172,7 +2244,7 @@ public class CompactSegmentsTest ClientCompactionTaskQueryTuningConfig tuningConfig = Mockito.mock(ClientCompactionTaskQueryTuningConfig.class); Mockito.when(tuningConfig.getPartitionsSpec()).thenReturn(Mockito.mock(PartitionsSpec.class)); Mockito.when(tuningConfig.getMaxNumConcurrentSubTasks()).thenReturn(2); - Assert.assertEquals(3, CompactSegments.findMaxNumTaskSlotsUsedByOneCompactionTask(tuningConfig)); + Assert.assertEquals(3, CompactSegments.findMaxNumTaskSlotsUsedByOneNativeCompactionTask(tuningConfig)); } @Test @@ -2181,7 +2253,7 @@ public class CompactSegmentsTest ClientCompactionTaskQueryTuningConfig tuningConfig = Mockito.mock(ClientCompactionTaskQueryTuningConfig.class); Mockito.when(tuningConfig.getPartitionsSpec()).thenReturn(Mockito.mock(PartitionsSpec.class)); Mockito.when(tuningConfig.getMaxNumConcurrentSubTasks()).thenReturn(1); - Assert.assertEquals(1, CompactSegments.findMaxNumTaskSlotsUsedByOneCompactionTask(tuningConfig)); + Assert.assertEquals(1, CompactSegments.findMaxNumTaskSlotsUsedByOneNativeCompactionTask(tuningConfig)); } } diff --git a/server/src/test/java/org/apache/druid/server/coordinator/duty/KillCompactionConfigTest.java b/server/src/test/java/org/apache/druid/server/coordinator/duty/KillCompactionConfigTest.java index ec4547c940c..3d441d9b06d 100644 --- a/server/src/test/java/org/apache/druid/server/coordinator/duty/KillCompactionConfigTest.java +++ b/server/src/test/java/org/apache/druid/server/coordinator/duty/KillCompactionConfigTest.java @@ -158,6 +158,7 @@ public class KillCompactionConfigTest null, null, null, + null, ImmutableMap.of("key", "val") ); @@ -173,6 +174,7 @@ public class KillCompactionConfigTest null, null, null, + null, ImmutableMap.of("key", "val") ); CoordinatorCompactionConfig originalCurrentConfig = CoordinatorCompactionConfig.from(ImmutableList.of(inactiveDatasourceConfig, activeDatasourceConfig)); @@ -255,6 +257,7 @@ public class KillCompactionConfigTest null, null, null, + null, ImmutableMap.of("key", "val") ); diff --git a/server/src/test/java/org/apache/druid/server/coordinator/duty/KillUnusedSegmentsTest.java b/server/src/test/java/org/apache/druid/server/coordinator/duty/KillUnusedSegmentsTest.java index 2455610ceac..9d0f752869e 100644 --- a/server/src/test/java/org/apache/druid/server/coordinator/duty/KillUnusedSegmentsTest.java +++ b/server/src/test/java/org/apache/druid/server/coordinator/duty/KillUnusedSegmentsTest.java @@ -36,9 +36,11 @@ import org.apache.druid.java.util.common.CloseableIterators; import org.apache.druid.java.util.common.DateTimes; import org.apache.druid.java.util.common.Intervals; import org.apache.druid.java.util.common.parsers.CloseableIterator; -import org.apache.druid.metadata.SQLMetadataSegmentPublisher; +import org.apache.druid.metadata.MetadataStorageTablesConfig; +import org.apache.druid.metadata.SQLMetadataConnector; import org.apache.druid.metadata.SegmentsMetadataManagerConfig; import org.apache.druid.metadata.SqlSegmentsMetadataManager; +import org.apache.druid.metadata.SqlSegmentsMetadataManagerTestBase; import org.apache.druid.metadata.TestDerbyConnector; import org.apache.druid.segment.TestHelper; import org.apache.druid.segment.metadata.CentralizedDatasourceSchemaConfig; @@ -98,12 +100,13 @@ public class KillUnusedSegmentsTest @Rule public final TestDerbyConnector.DerbyConnectorRule derbyConnectorRule = new TestDerbyConnector.DerbyConnectorRule(); private SqlSegmentsMetadataManager sqlSegmentsMetadataManager; - private SQLMetadataSegmentPublisher publisher; + private SQLMetadataConnector connector; + private MetadataStorageTablesConfig config; @Before public void setup() { - final TestDerbyConnector connector = derbyConnectorRule.getConnector(); + connector = derbyConnectorRule.getConnector(); SegmentsMetadataManagerConfig config = new SegmentsMetadataManagerConfig(); config.setPollDuration(Period.millis(1)); sqlSegmentsMetadataManager = new SqlSegmentsMetadataManager( @@ -116,11 +119,7 @@ public class KillUnusedSegmentsTest ); sqlSegmentsMetadataManager.start(); - publisher = new SQLMetadataSegmentPublisher( - TestHelper.makeJsonMapper(), - derbyConnectorRule.metadataTablesConfigSupplier().get(), - connector - ); + this.config = derbyConnectorRule.metadataTablesConfigSupplier().get(); connector.createSegmentTable(); overlordClient = new TestOverlordClient(); @@ -733,7 +732,7 @@ public class KillUnusedSegmentsTest { final DataSegment segment = createSegment(dataSource, interval, version); try { - publisher.publishSegment(segment); + SqlSegmentsMetadataManagerTestBase.publishSegment(connector, config, TestHelper.makeJsonMapper(), segment); } catch (IOException e) { throw new RuntimeException(e); diff --git a/server/src/test/java/org/apache/druid/server/http/CoordinatorCompactionConfigsResourceTest.java b/server/src/test/java/org/apache/druid/server/http/CoordinatorCompactionConfigsResourceTest.java index c31364ac9fd..17db2285477 100644 --- a/server/src/test/java/org/apache/druid/server/http/CoordinatorCompactionConfigsResourceTest.java +++ b/server/src/test/java/org/apache/druid/server/http/CoordinatorCompactionConfigsResourceTest.java @@ -22,8 +22,12 @@ package org.apache.druid.server.http; import com.google.common.collect.ImmutableList; import com.google.common.collect.ImmutableMap; import org.apache.druid.audit.AuditManager; +import org.apache.druid.client.indexing.ClientMSQContext; import org.apache.druid.common.config.ConfigManager; import org.apache.druid.common.config.JacksonConfigManager; +import org.apache.druid.error.DruidException; +import org.apache.druid.error.ErrorResponse; +import org.apache.druid.indexer.CompactionEngine; import org.apache.druid.java.util.common.ISE; import org.apache.druid.java.util.common.granularity.Granularities; import org.apache.druid.metadata.MetadataStorageConnector; @@ -62,6 +66,7 @@ public class CoordinatorCompactionConfigsResourceTest null, null, null, + null, ImmutableMap.of("key", "val") ); private static final DataSourceCompactionConfig NEW_CONFIG = new DataSourceCompactionConfig( @@ -76,6 +81,7 @@ public class CoordinatorCompactionConfigsResourceTest null, null, null, + null, ImmutableMap.of("key", "val") ); private static final byte[] OLD_CONFIG_IN_BYTES = {1, 2, 3}; @@ -189,6 +195,7 @@ public class CoordinatorCompactionConfigsResourceTest null, null, null, + CompactionEngine.NATIVE, ImmutableMap.of("key", "val") ); Response result = coordinatorCompactionConfigsResource.addOrUpdateCompactionConfig( @@ -202,6 +209,7 @@ public class CoordinatorCompactionConfigsResourceTest Assert.assertEquals(2, newConfigCaptor.getValue().getCompactionConfigs().size()); Assert.assertEquals(OLD_CONFIG, newConfigCaptor.getValue().getCompactionConfigs().get(0)); Assert.assertEquals(newConfig, newConfigCaptor.getValue().getCompactionConfigs().get(1)); + Assert.assertEquals(newConfig.getEngine(), newConfigCaptor.getValue().getEngine()); } @Test @@ -230,6 +238,7 @@ public class CoordinatorCompactionConfigsResourceTest null, null, null, + null, ImmutableMap.of("key", "val") ); final CoordinatorCompactionConfig originalConfig = CoordinatorCompactionConfig.from(ImmutableList.of(toDelete)); @@ -388,6 +397,7 @@ public class CoordinatorCompactionConfigsResourceTest null, null, null, + CompactionEngine.MSQ, ImmutableMap.of("key", "val") ); String author = "maytas"; @@ -401,6 +411,102 @@ public class CoordinatorCompactionConfigsResourceTest Assert.assertNotNull(newConfigCaptor.getValue()); Assert.assertEquals(1, newConfigCaptor.getValue().getCompactionConfigs().size()); Assert.assertEquals(newConfig, newConfigCaptor.getValue().getCompactionConfigs().get(0)); + Assert.assertEquals(newConfig.getEngine(), newConfigCaptor.getValue().getCompactionConfigs().get(0).getEngine()); + } + + @Test + public void testAddOrUpdateCompactionConfigWithoutExistingConfigAndEngineAsNull() + { + Mockito.when(mockConnector.lookup( + ArgumentMatchers.anyString(), + ArgumentMatchers.eq("name"), + ArgumentMatchers.eq("payload"), + ArgumentMatchers.eq(CoordinatorCompactionConfig.CONFIG_KEY) + ) + ).thenReturn(null); + Mockito.when(mockJacksonConfigManager.convertByteToConfig( + ArgumentMatchers.eq(null), + ArgumentMatchers.eq(CoordinatorCompactionConfig.class), + ArgumentMatchers.eq(CoordinatorCompactionConfig.empty()) + ) + ).thenReturn(CoordinatorCompactionConfig.empty()); + final ArgumentCaptor oldConfigCaptor = ArgumentCaptor.forClass(byte[].class); + final ArgumentCaptor newConfigCaptor = ArgumentCaptor.forClass( + CoordinatorCompactionConfig.class); + Mockito.when(mockJacksonConfigManager.set( + ArgumentMatchers.eq(CoordinatorCompactionConfig.CONFIG_KEY), + oldConfigCaptor.capture(), + newConfigCaptor.capture(), + ArgumentMatchers.any() + ) + ).thenReturn(ConfigManager.SetResult.ok()); + + final DataSourceCompactionConfig newConfig = new DataSourceCompactionConfig( + "dataSource", + null, + 500L, + null, + new Period(3600), + null, + new UserCompactionTaskGranularityConfig(Granularities.HOUR, null, null), + null, + null, + null, + null, + null, + ImmutableMap.of("key", "val") + ); + coordinatorCompactionConfigsResource.addOrUpdateCompactionConfig( + newConfig, + mockHttpServletRequest + ); + Assert.assertEquals(null, newConfigCaptor.getValue().getCompactionConfigs().get(0).getEngine()); + } + + @Test + public void testAddOrUpdateCompactionConfigWithInvalidMaxNumTasksForMSQEngine() + { + Mockito.when(mockConnector.lookup( + ArgumentMatchers.anyString(), + ArgumentMatchers.eq("name"), + ArgumentMatchers.eq("payload"), + ArgumentMatchers.eq(CoordinatorCompactionConfig.CONFIG_KEY) + ) + ).thenReturn(null); + Mockito.when(mockJacksonConfigManager.convertByteToConfig( + ArgumentMatchers.eq(null), + ArgumentMatchers.eq(CoordinatorCompactionConfig.class), + ArgumentMatchers.eq(CoordinatorCompactionConfig.empty()) + ) + ).thenReturn(CoordinatorCompactionConfig.empty()); + + int maxNumTasks = 1; + + final DataSourceCompactionConfig newConfig = new DataSourceCompactionConfig( + "dataSource", + null, + 500L, + null, + new Period(3600), + null, + new UserCompactionTaskGranularityConfig(Granularities.HOUR, null, null), + null, + null, + null, + null, + CompactionEngine.MSQ, + ImmutableMap.of(ClientMSQContext.CTX_MAX_NUM_TASKS, maxNumTasks) + ); + Response response = coordinatorCompactionConfigsResource.addOrUpdateCompactionConfig( + newConfig, + mockHttpServletRequest + ); + Assert.assertEquals(DruidException.Category.INVALID_INPUT.getExpectedStatus(), response.getStatus()); + Assert.assertEquals( + "Compaction config not supported. Reason[MSQ context maxNumTasks [1] cannot be less than 2, " + + "since at least 1 controller and 1 worker is necessary.].", + ((ErrorResponse) response.getEntity()).getUnderlyingException().getMessage() + ); } @Test diff --git a/server/src/test/java/org/apache/druid/server/http/DataSegmentPlusTest.java b/server/src/test/java/org/apache/druid/server/http/DataSegmentPlusTest.java index 0f20fc96bdc..b963f433708 100644 --- a/server/src/test/java/org/apache/druid/server/http/DataSegmentPlusTest.java +++ b/server/src/test/java/org/apache/druid/server/http/DataSegmentPlusTest.java @@ -100,6 +100,7 @@ public class DataSegmentPlusTest usedStatusLastUpdatedDate, null, null, + null, null ); @@ -108,7 +109,7 @@ public class DataSegmentPlusTest JacksonUtils.TYPE_REFERENCE_MAP_STRING_OBJECT ); - Assert.assertEquals(6, objectMap.size()); + Assert.assertEquals(7, objectMap.size()); final Map segmentObjectMap = MAPPER.readValue( MAPPER.writeValueAsString(segmentPlus.getDataSegment()), JacksonUtils.TYPE_REFERENCE_MAP_STRING_OBJECT diff --git a/server/src/test/java/org/apache/druid/server/http/MetadataResourceTest.java b/server/src/test/java/org/apache/druid/server/http/MetadataResourceTest.java index 1b86bbca453..9c52d639300 100644 --- a/server/src/test/java/org/apache/druid/server/http/MetadataResourceTest.java +++ b/server/src/test/java/org/apache/druid/server/http/MetadataResourceTest.java @@ -77,7 +77,7 @@ public class MetadataResourceTest .toArray(new DataSegment[0]); private final List segmentsPlus = Arrays.stream(segments) - .map(s -> new DataSegmentPlus(s, DateTimes.nowUtc(), DateTimes.nowUtc(), null, null, null)) + .map(s -> new DataSegmentPlus(s, DateTimes.nowUtc(), DateTimes.nowUtc(), null, null, null, null)) .collect(Collectors.toList()); private HttpServletRequest request; private SegmentsMetadataManager segmentsMetadataManager; @@ -486,6 +486,35 @@ public class MetadataResourceTest ); } + @Test + public void testGetBootstrapSegments() + { + Mockito.doReturn(ImmutableSet.of(segments[0], segments[1])).when(coordinator).getBroadcastSegments(); + + Response response = metadataResource.getBootstrapSegments(); + final List observedSegments = extractResponseList(response); + Assert.assertEquals(2, observedSegments.size()); + } + + @Test + public void testEmptyGetBootstrapSegments() + { + Mockito.doReturn(ImmutableSet.of()).when(coordinator).getBroadcastSegments(); + + Response response = metadataResource.getBootstrapSegments(); + final List observedSegments = extractResponseList(response); + Assert.assertEquals(0, observedSegments.size()); + } + + @Test + public void testNullGetBootstrapSegments() + { + Mockito.doReturn(null).when(coordinator).getBroadcastSegments(); + + Response response = metadataResource.getBootstrapSegments(); + Assert.assertEquals(503, response.getStatus()); + } + private List extractResponseList(Response response) { return Lists.newArrayList( diff --git a/services/src/main/java/org/apache/druid/cli/CliBroker.java b/services/src/main/java/org/apache/druid/cli/CliBroker.java index 7fe5ec57631..94160f55779 100644 --- a/services/src/main/java/org/apache/druid/cli/CliBroker.java +++ b/services/src/main/java/org/apache/druid/cli/CliBroker.java @@ -63,6 +63,7 @@ import org.apache.druid.server.ResponseContextConfig; import org.apache.druid.server.SegmentManager; import org.apache.druid.server.SubqueryGuardrailHelper; import org.apache.druid.server.SubqueryGuardrailHelperProvider; +import org.apache.druid.server.coordination.SegmentBootstrapper; import org.apache.druid.server.coordination.ServerType; import org.apache.druid.server.coordination.ZkCoordinator; import org.apache.druid.server.http.BrokerResource; @@ -172,6 +173,7 @@ public class CliBroker extends ServerRunnable if (isZkEnabled) { LifecycleModule.register(binder, ZkCoordinator.class); } + LifecycleModule.register(binder, SegmentBootstrapper.class); bindAnnouncer( binder, diff --git a/services/src/main/java/org/apache/druid/cli/CliHistorical.java b/services/src/main/java/org/apache/druid/cli/CliHistorical.java index dc1acc41f87..2e231bcdcc3 100644 --- a/services/src/main/java/org/apache/druid/cli/CliHistorical.java +++ b/services/src/main/java/org/apache/druid/cli/CliHistorical.java @@ -49,6 +49,7 @@ import org.apache.druid.query.lookup.LookupModule; import org.apache.druid.server.QueryResource; import org.apache.druid.server.ResponseContextConfig; import org.apache.druid.server.SegmentManager; +import org.apache.druid.server.coordination.SegmentBootstrapper; import org.apache.druid.server.coordination.ServerManager; import org.apache.druid.server.coordination.ServerType; import org.apache.druid.server.coordination.ZkCoordinator; @@ -125,6 +126,7 @@ public class CliHistorical extends ServerRunnable if (isZkEnabled) { LifecycleModule.register(binder, ZkCoordinator.class); } + LifecycleModule.register(binder, SegmentBootstrapper.class); JsonConfigProvider.bind(binder, "druid.historical.cache", CacheConfig.class); binder.install(new CacheModule()); diff --git a/services/src/main/java/org/apache/druid/cli/CliIndexer.java b/services/src/main/java/org/apache/druid/cli/CliIndexer.java index c6b817fa4a9..312b6f6b05a 100644 --- a/services/src/main/java/org/apache/druid/cli/CliIndexer.java +++ b/services/src/main/java/org/apache/druid/cli/CliIndexer.java @@ -74,6 +74,7 @@ import org.apache.druid.segment.realtime.appenderator.UnifiedIndexerAppenderator import org.apache.druid.server.DruidNode; import org.apache.druid.server.ResponseContextConfig; import org.apache.druid.server.SegmentManager; +import org.apache.druid.server.coordination.SegmentBootstrapper; import org.apache.druid.server.coordination.ServerType; import org.apache.druid.server.coordination.ZkCoordinator; import org.apache.druid.server.http.HistoricalResource; @@ -187,6 +188,7 @@ public class CliIndexer extends ServerRunnable if (isZkEnabled) { LifecycleModule.register(binder, ZkCoordinator.class); } + LifecycleModule.register(binder, SegmentBootstrapper.class); bindAnnouncer( binder, diff --git a/services/src/main/java/org/apache/druid/cli/CliPeon.java b/services/src/main/java/org/apache/druid/cli/CliPeon.java index 1ca8ddf539f..f78a763cec4 100644 --- a/services/src/main/java/org/apache/druid/cli/CliPeon.java +++ b/services/src/main/java/org/apache/druid/cli/CliPeon.java @@ -125,6 +125,7 @@ import org.apache.druid.segment.realtime.firehose.ServiceAnnouncingChatHandlerPr import org.apache.druid.server.DruidNode; import org.apache.druid.server.ResponseContextConfig; import org.apache.druid.server.SegmentManager; +import org.apache.druid.server.coordination.SegmentBootstrapper; import org.apache.druid.server.coordination.ServerType; import org.apache.druid.server.coordination.ZkCoordinator; import org.apache.druid.server.http.HistoricalResource; @@ -553,6 +554,7 @@ public class CliPeon extends GuiceRunnable if (isZkEnabled) { LifecycleModule.register(binder, ZkCoordinator.class); } + LifecycleModule.register(binder, SegmentBootstrapper.class); } @Provides diff --git a/sql/src/main/java/org/apache/druid/sql/calcite/expression/builtin/NestedDataOperatorConversions.java b/sql/src/main/java/org/apache/druid/sql/calcite/expression/builtin/NestedDataOperatorConversions.java index 3fe049bdcb4..a6006046553 100644 --- a/sql/src/main/java/org/apache/druid/sql/calcite/expression/builtin/NestedDataOperatorConversions.java +++ b/sql/src/main/java/org/apache/druid/sql/calcite/expression/builtin/NestedDataOperatorConversions.java @@ -92,8 +92,9 @@ public class NestedDataOperatorConversions public static class JsonPathsOperatorConversion implements SqlOperatorConversion { + private static final String FUNCTION_NAME = "json_paths"; private static final SqlFunction SQL_FUNCTION = OperatorConversions - .operatorBuilder("JSON_PATHS") + .operatorBuilder(StringUtils.toUpperCase(FUNCTION_NAME)) .operandTypeChecker(OperandTypes.ANY) .functionCategory(SqlFunctionCategory.USER_DEFINED_FUNCTION) .returnTypeArrayWithNullableElements(SqlTypeName.VARCHAR) @@ -119,7 +120,7 @@ public class NestedDataOperatorConversions rexNode, druidExpressions -> DruidExpression.ofExpression( null, - DruidExpression.functionCall("json_paths"), + DruidExpression.functionCall(FUNCTION_NAME), druidExpressions ) ); @@ -128,8 +129,9 @@ public class NestedDataOperatorConversions public static class JsonKeysOperatorConversion implements SqlOperatorConversion { + private static final String FUNCTION_NAME = "json_keys"; private static final SqlFunction SQL_FUNCTION = OperatorConversions - .operatorBuilder("JSON_KEYS") + .operatorBuilder(StringUtils.toUpperCase(FUNCTION_NAME)) .operandNames("expr", "path") .operandTypes(SqlTypeFamily.ANY, SqlTypeFamily.STRING) .literalOperands(1) @@ -158,7 +160,7 @@ public class NestedDataOperatorConversions rexNode, druidExpressions -> DruidExpression.ofExpression( ColumnType.STRING_ARRAY, - DruidExpression.functionCall("json_keys"), + DruidExpression.functionCall(FUNCTION_NAME), druidExpressions ) ); @@ -167,9 +169,9 @@ public class NestedDataOperatorConversions public static class JsonQueryOperatorConversion implements SqlOperatorConversion { - private static final String FUNCTION_NAME = StringUtils.toUpperCase("json_query"); + private static final String FUNCTION_NAME = "json_query"; private static final SqlFunction SQL_FUNCTION = OperatorConversions - .operatorBuilder(FUNCTION_NAME) + .operatorBuilder(StringUtils.toUpperCase(FUNCTION_NAME)) .operandTypeChecker( OperandTypes.family( SqlTypeFamily.ANY, @@ -212,7 +214,7 @@ public class NestedDataOperatorConversions final Expr pathExpr = plannerContext.parseExpression(druidExpressions.get(1).getExpression()); if (!pathExpr.isLiteral()) { // if path argument is not constant, just use a pure expression - return DruidExpression.ofFunctionCall(ColumnType.NESTED_DATA, "json_query", druidExpressions); + return DruidExpression.ofFunctionCall(ColumnType.NESTED_DATA, FUNCTION_NAME, druidExpressions); } // pre-normalize path so that the same expressions with different json path syntax are collapsed final String path = (String) pathExpr.eval(InputBindings.nilBindings()).value(); @@ -723,7 +725,7 @@ public class NestedDataOperatorConversions { private static final String FUNCTION_NAME = "json_object"; private static final SqlFunction SQL_FUNCTION = OperatorConversions - .operatorBuilder(FUNCTION_NAME) + .operatorBuilder(StringUtils.toUpperCase(FUNCTION_NAME)) .operandTypeChecker(OperandTypes.variadic(SqlOperandCountRanges.from(1))) .operandTypeInference((callBinding, returnType, operandTypes) -> { RelDataTypeFactory typeFactory = callBinding.getTypeFactory(); @@ -756,7 +758,7 @@ public class NestedDataOperatorConversions DruidExpression.ofExpression( ColumnType.NESTED_DATA, null, - DruidExpression.functionCall("json_object"), + DruidExpression.functionCall(FUNCTION_NAME), druidExpressions ); @@ -809,7 +811,7 @@ public class NestedDataOperatorConversions rexNode, druidExpressions -> DruidExpression.ofExpression( ColumnType.NESTED_DATA, - DruidExpression.functionCall("to_json_string"), + DruidExpression.functionCall(FUNCTION_NAME), druidExpressions ) ); @@ -847,7 +849,7 @@ public class NestedDataOperatorConversions rexNode, druidExpressions -> DruidExpression.ofExpression( ColumnType.NESTED_DATA, - DruidExpression.functionCall("parse_json"), + DruidExpression.functionCall(FUNCTION_NAME), druidExpressions ) ); @@ -885,7 +887,7 @@ public class NestedDataOperatorConversions rexNode, druidExpressions -> DruidExpression.ofExpression( ColumnType.NESTED_DATA, - DruidExpression.functionCall("try_parse_json"), + DruidExpression.functionCall(FUNCTION_NAME), druidExpressions ) ); diff --git a/sql/src/main/java/org/apache/druid/sql/calcite/parser/DruidSqlInsert.java b/sql/src/main/java/org/apache/druid/sql/calcite/parser/DruidSqlInsert.java index 1dc77885263..1877a212343 100644 --- a/sql/src/main/java/org/apache/druid/sql/calcite/parser/DruidSqlInsert.java +++ b/sql/src/main/java/org/apache/druid/sql/calcite/parser/DruidSqlInsert.java @@ -39,6 +39,7 @@ import javax.annotation.Nullable; public class DruidSqlInsert extends DruidSqlIngest { public static final String SQL_INSERT_SEGMENT_GRANULARITY = "sqlInsertSegmentGranularity"; + public static final String SQL_INSERT_QUERY_GRANULARITY = "sqlInsertQueryGranularity"; // This allows reusing super.unparse public static final SqlOperator OPERATOR = DruidSqlIngestOperator.INSERT_OPERATOR; diff --git a/sql/src/main/java/org/apache/druid/sql/calcite/planner/CalcitePlanner.java b/sql/src/main/java/org/apache/druid/sql/calcite/planner/CalcitePlanner.java index 933baaac9ba..8eb9541961c 100644 --- a/sql/src/main/java/org/apache/druid/sql/calcite/planner/CalcitePlanner.java +++ b/sql/src/main/java/org/apache/druid/sql/calcite/planner/CalcitePlanner.java @@ -200,7 +200,7 @@ public class CalcitePlanner implements Planner, ViewExpander state = CalcitePlanner.State.STATE_2_READY; - // If user specify own traitDef, instead of default default trait, + // If user specifies own traitDef, instead of default trait, // register the trait def specified in traitDefs. if (this.traitDefs == null) { planner.addRelTraitDef(ConventionTraitDef.INSTANCE); diff --git a/sql/src/main/java/org/apache/druid/sql/calcite/planner/CalciteRulesManager.java b/sql/src/main/java/org/apache/druid/sql/calcite/planner/CalciteRulesManager.java index 8b0fc163174..8b39be932e0 100644 --- a/sql/src/main/java/org/apache/druid/sql/calcite/planner/CalciteRulesManager.java +++ b/sql/src/main/java/org/apache/druid/sql/calcite/planner/CalciteRulesManager.java @@ -60,6 +60,7 @@ import org.apache.druid.sql.calcite.rule.ExtensionCalciteRuleProvider; import org.apache.druid.sql.calcite.rule.FilterDecomposeCoalesceRule; import org.apache.druid.sql.calcite.rule.FilterDecomposeConcatRule; import org.apache.druid.sql.calcite.rule.FilterJoinExcludePushToChildRule; +import org.apache.druid.sql.calcite.rule.FixIncorrectInExpansionTypes; import org.apache.druid.sql.calcite.rule.FlattenConcatRule; import org.apache.druid.sql.calcite.rule.ProjectAggregatePruneUnusedCallRule; import org.apache.druid.sql.calcite.rule.ReverseLookupRule; @@ -71,6 +72,7 @@ import org.apache.druid.sql.calcite.run.DruidHook; import org.apache.druid.sql.calcite.run.EngineFeature; import java.util.ArrayList; +import java.util.Collections; import java.util.List; import java.util.Set; @@ -291,6 +293,7 @@ public class CalciteRulesManager // Program that pre-processes the tree before letting the full-on VolcanoPlanner loose. final List prePrograms = new ArrayList<>(); prePrograms.add(new LoggingProgram("Start", isDebug)); + prePrograms.add(sqlToRelWorkaroundProgram()); prePrograms.add(Programs.subQuery(DefaultRelMetadataProvider.INSTANCE)); prePrograms.add(new LoggingProgram("Finished subquery program", isDebug)); prePrograms.add(DecorrelateAndTrimFieldsProgram.INSTANCE); @@ -306,6 +309,12 @@ public class CalciteRulesManager return Programs.sequence(prePrograms.toArray(new Program[0])); } + private Program sqlToRelWorkaroundProgram() + { + Set rules = Collections.singleton(new FixIncorrectInExpansionTypes()); + return Programs.hep(rules, true, DefaultRelMetadataProvider.INSTANCE); + } + /** * Program to perform manipulations on the logical tree prior to starting the cost-based planner. Mainly this * helps the cost-based planner finish faster, and helps the decoupled planner generate the same plans as the diff --git a/sql/src/main/java/org/apache/druid/sql/calcite/planner/DruidSqlValidator.java b/sql/src/main/java/org/apache/druid/sql/calcite/planner/DruidSqlValidator.java index a6319c40f66..75778daf559 100644 --- a/sql/src/main/java/org/apache/druid/sql/calcite/planner/DruidSqlValidator.java +++ b/sql/src/main/java/org/apache/druid/sql/calcite/planner/DruidSqlValidator.java @@ -61,7 +61,6 @@ import org.apache.druid.error.InvalidSqlInput; import org.apache.druid.java.util.common.StringUtils; import org.apache.druid.java.util.common.granularity.Granularity; import org.apache.druid.query.QueryContext; -import org.apache.druid.query.QueryContexts; import org.apache.druid.segment.column.ColumnType; import org.apache.druid.segment.column.Types; import org.apache.druid.segment.column.ValueType; @@ -123,7 +122,6 @@ public class DruidSqlValidator extends BaseDruidSqlValidator throw Util.unexpected(windowOrId.getKind()); } - @Nullable SqlNode lowerBound = targetWindow.getLowerBound(); @Nullable @@ -135,14 +133,15 @@ public class DruidSqlValidator extends BaseDruidSqlValidator ); } - if (isPrecedingOrFollowing(lowerBound) && - isPrecedingOrFollowing(upperBound) && - lowerBound.getKind() == upperBound.getKind()) { - // this limitation can be lifted when https://github.com/apache/druid/issues/15739 is addressed - throw buildCalciteContextException( - "Query bounds with both lower and upper bounds as PRECEDING or FOLLOWING is not supported.", - windowOrId - ); + if (lowerBound != null && upperBound == null) { + if (lowerBound.getKind() == SqlKind.FOLLOWING || SqlWindow.isUnboundedFollowing(lowerBound)) { + upperBound = lowerBound; + lowerBound = SqlWindow.createCurrentRow(SqlParserPos.ZERO); + } else { + upperBound = SqlWindow.createCurrentRow(SqlParserPos.ZERO); + } + targetWindow.setLowerBound(lowerBound); + targetWindow.setUpperBound(upperBound); } boolean hasBounds = lowerBound != null || upperBound != null; @@ -162,19 +161,13 @@ public class DruidSqlValidator extends BaseDruidSqlValidator } } - - if (plannerContext.queryContext().isWindowingStrictValidation()) { - if (!targetWindow.isRows() && - (!isUnboundedOrCurrent(lowerBound) || !isUnboundedOrCurrent(upperBound))) { - // this limitation can be lifted when https://github.com/apache/druid/issues/15767 is addressed - throw buildCalciteContextException( - StringUtils.format( - "The query contains a window frame which may return incorrect results. To disregard this warning, set [%s] to false in the query context.", - QueryContexts.WINDOWING_STRICT_VALIDATION - ), - windowOrId - ); - } + if (!targetWindow.isRows() && + (!isUnboundedOrCurrent(lowerBound) || !isUnboundedOrCurrent(upperBound))) { + // this limitation can be lifted when https://github.com/apache/druid/issues/15767 is addressed + throw buildCalciteContextException( + "Order By with RANGE clause currently supports only UNBOUNDED or CURRENT ROW. Use ROWS clause instead.", + windowOrId + ); } super.validateWindow(windowOrId, scope, call); diff --git a/sql/src/main/java/org/apache/druid/sql/calcite/rel/DruidQuery.java b/sql/src/main/java/org/apache/druid/sql/calcite/rel/DruidQuery.java index 5bcec39a1e8..a7d534db700 100644 --- a/sql/src/main/java/org/apache/druid/sql/calcite/rel/DruidQuery.java +++ b/sql/src/main/java/org/apache/druid/sql/calcite/rel/DruidQuery.java @@ -106,7 +106,6 @@ import org.joda.time.Interval; import javax.annotation.Nonnull; import javax.annotation.Nullable; - import java.util.ArrayList; import java.util.Collections; import java.util.Comparator; diff --git a/sql/src/main/java/org/apache/druid/sql/calcite/rel/DruidRel.java b/sql/src/main/java/org/apache/druid/sql/calcite/rel/DruidRel.java index 798b0bb407b..8db108fbb94 100644 --- a/sql/src/main/java/org/apache/druid/sql/calcite/rel/DruidRel.java +++ b/sql/src/main/java/org/apache/druid/sql/calcite/rel/DruidRel.java @@ -31,7 +31,6 @@ import org.apache.druid.server.QueryResponse; import org.apache.druid.sql.calcite.planner.PlannerContext; import javax.annotation.Nullable; - import java.util.Set; public abstract class DruidRel> extends AbstractRelNode diff --git a/sql/src/main/java/org/apache/druid/sql/calcite/rel/Windowing.java b/sql/src/main/java/org/apache/druid/sql/calcite/rel/Windowing.java index 4f0f0eda21b..afd775ef4ee 100644 --- a/sql/src/main/java/org/apache/druid/sql/calcite/rel/Windowing.java +++ b/sql/src/main/java/org/apache/druid/sql/calcite/rel/Windowing.java @@ -466,7 +466,9 @@ public class Windowing if (bound.isUnbounded() || bound.isCurrentRow()) { return 0; } - return getConstant(((RexInputRef) bound.getOffset()).getIndex()); + + final int value = getConstant(((RexInputRef) bound.getOffset()).getIndex()); + return bound.isPreceding() ? -value : value; } private int getConstant(int refIndex) diff --git a/sql/src/main/java/org/apache/druid/sql/calcite/rule/FixIncorrectInExpansionTypes.java b/sql/src/main/java/org/apache/druid/sql/calcite/rule/FixIncorrectInExpansionTypes.java new file mode 100644 index 00000000000..9c049ac89da --- /dev/null +++ b/sql/src/main/java/org/apache/druid/sql/calcite/rule/FixIncorrectInExpansionTypes.java @@ -0,0 +1,97 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.druid.sql.calcite.rule; + +import org.apache.calcite.plan.RelOptRule; +import org.apache.calcite.plan.RelOptRuleCall; +import org.apache.calcite.rel.RelNode; +import org.apache.calcite.rel.rules.SubstitutionRule; +import org.apache.calcite.rex.RexBuilder; +import org.apache.calcite.rex.RexCall; +import org.apache.calcite.rex.RexNode; +import org.apache.calcite.rex.RexShuttle; +import org.apache.calcite.rex.RexUtil; +import org.apache.calcite.sql.SqlKind; +import org.apache.calcite.sql.type.SqlTypeName; + +/** + * Rewrites comparisions to avoid bug FIXME. + * + * Rewrites RexCall::VARCHAR = RexLiteral::CHAR to RexCall::VARCHAR = + * RexLiteral::VARCHAR + * + * needed until CALCITE-6435 is fixed & released. + */ +public class FixIncorrectInExpansionTypes extends RelOptRule implements SubstitutionRule +{ + public FixIncorrectInExpansionTypes() + { + super(operand(RelNode.class, any())); + } + + @Override + public void onMatch(RelOptRuleCall call) + { + final RelNode oldNode = call.rel(0); + final RewriteShuttle shuttle = new RewriteShuttle(oldNode.getCluster().getRexBuilder()); + final RelNode newNode = oldNode.accept(shuttle); + + // noinspection ObjectEquality + if (newNode != oldNode) { + call.transformTo(newNode); + call.getPlanner().prune(oldNode); + } + } + + private static class RewriteShuttle extends RexShuttle + { + private final RexBuilder rexBuilder; + + public RewriteShuttle(RexBuilder rexBuilder) + { + this.rexBuilder = rexBuilder; + } + + @Override + public RexNode visitCall(RexCall call) + { + RexNode newNode = super.visitCall(call); + if (newNode.getKind() == SqlKind.EQUALS || newNode.getKind() == SqlKind.NOT_EQUALS) { + RexCall newCall = (RexCall) newNode; + RexNode op0 = newCall.getOperands().get(0); + RexNode op1 = newCall.getOperands().get(1); + if (RexUtil.isLiteral(op1, false)) { + + if (op1.getType().getSqlTypeName() == SqlTypeName.CHAR + && op0.getType().getSqlTypeName() == SqlTypeName.VARCHAR) { + + RexNode newLiteral = rexBuilder.ensureType(op0.getType(), op1, true); + return rexBuilder.makeCall( + newCall.getOperator(), + op0, + newLiteral + ); + } + } + } + return newNode; + } + } +} diff --git a/sql/src/main/java/org/apache/druid/sql/calcite/run/SqlResults.java b/sql/src/main/java/org/apache/druid/sql/calcite/run/SqlResults.java index 486c23f67c9..38b3ab1fd89 100644 --- a/sql/src/main/java/org/apache/druid/sql/calcite/run/SqlResults.java +++ b/sql/src/main/java/org/apache/druid/sql/calcite/run/SqlResults.java @@ -114,21 +114,21 @@ public class SqlResults } } else if (sqlTypeName == SqlTypeName.BIGINT) { try { - coercedValue = DimensionHandlerUtils.convertObjectToLong(value); + coercedValue = DimensionHandlerUtils.convertObjectToLong(value, fieldName); } catch (Exception e) { throw cannotCoerce(value, sqlTypeName, fieldName); } } else if (sqlTypeName == SqlTypeName.FLOAT) { try { - coercedValue = DimensionHandlerUtils.convertObjectToFloat(value); + coercedValue = DimensionHandlerUtils.convertObjectToFloat(value, fieldName); } catch (Exception e) { throw cannotCoerce(value, sqlTypeName, fieldName); } } else if (SqlTypeName.FRACTIONAL_TYPES.contains(sqlTypeName)) { try { - coercedValue = DimensionHandlerUtils.convertObjectToDouble(value); + coercedValue = DimensionHandlerUtils.convertObjectToDouble(value, fieldName); } catch (Exception e) { throw cannotCoerce(value, sqlTypeName, fieldName); diff --git a/sql/src/main/java/org/apache/druid/sql/calcite/schema/BrokerSegmentMetadataCache.java b/sql/src/main/java/org/apache/druid/sql/calcite/schema/BrokerSegmentMetadataCache.java index 0573d8d49ee..628b6ea3978 100644 --- a/sql/src/main/java/org/apache/druid/sql/calcite/schema/BrokerSegmentMetadataCache.java +++ b/sql/src/main/java/org/apache/druid/sql/calcite/schema/BrokerSegmentMetadataCache.java @@ -173,6 +173,16 @@ public class BrokerSegmentMetadataCache extends AbstractSegmentMetadataCache @@ -196,6 +206,11 @@ public class BrokerSegmentMetadataCache extends AbstractSegmentMetadataCache dataSourcesToQuery = new HashSet<>(segmentMetadataInfo.keySet()); + // this is the complete set of datasources polled from the Coordinator + final Set polledDatasources = queryDataSources(); + + dataSourcesToQuery.addAll(polledDatasources); + log.debug("Querying schema for [%s] datasources from Coordinator.", dataSourcesToQuery); // Fetch datasource information from the Coordinator @@ -227,14 +242,7 @@ public class BrokerSegmentMetadataCache extends AbstractSegmentMetadataCache queryDataSources() + { + Set dataSources = new HashSet<>(); + + try { + Set polled = FutureUtils.getUnchecked(coordinatorClient.fetchDataSourcesWithUsedSegments(), true); + if (polled != null) { + dataSources.addAll(polled); + } + } + catch (Exception e) { + log.debug(e, "Failed to query datasources from the Coordinator."); + } + + return dataSources; + } + private Map queryDataSourceInformation(Set dataSourcesToQuery) { Stopwatch stopwatch = Stopwatch.createStarted(); diff --git a/sql/src/test/java/org/apache/druid/sql/calcite/BaseCalciteQueryTest.java b/sql/src/test/java/org/apache/druid/sql/calcite/BaseCalciteQueryTest.java index dfee7d0e3a2..a2e45c4af8c 100644 --- a/sql/src/test/java/org/apache/druid/sql/calcite/BaseCalciteQueryTest.java +++ b/sql/src/test/java/org/apache/druid/sql/calcite/BaseCalciteQueryTest.java @@ -110,7 +110,6 @@ import org.junit.jupiter.api.Named; import org.junit.jupiter.api.extension.RegisterExtension; import javax.annotation.Nullable; - import java.io.File; import java.io.IOException; import java.io.InputStream; @@ -256,6 +255,8 @@ public class BaseCalciteQueryTest extends CalciteTestBase ImmutableMap.builder() .putAll(QUERY_CONTEXT_DEFAULT) .put(QueryContexts.MAX_SUBQUERY_BYTES_KEY, "100000") + // Disallows the fallback to row based limiting + .put(QueryContexts.MAX_SUBQUERY_ROWS_KEY, "1") .build(); // Add additional context to the given context map for when the diff --git a/sql/src/test/java/org/apache/druid/sql/calcite/CalciteCatalogIngestionDmlTest.java b/sql/src/test/java/org/apache/druid/sql/calcite/CalciteCatalogIngestionDmlTest.java index 621c919b0ed..514695c4afe 100644 --- a/sql/src/test/java/org/apache/druid/sql/calcite/CalciteCatalogIngestionDmlTest.java +++ b/sql/src/test/java/org/apache/druid/sql/calcite/CalciteCatalogIngestionDmlTest.java @@ -56,7 +56,6 @@ import org.apache.druid.sql.calcite.util.CalciteTests; import org.junit.jupiter.api.Test; import javax.annotation.Nullable; - import java.util.HashMap; import java.util.Map; diff --git a/sql/src/test/java/org/apache/druid/sql/calcite/CalciteIngestionDmlTest.java b/sql/src/test/java/org/apache/druid/sql/calcite/CalciteIngestionDmlTest.java index 74cd5a07b26..e26803fa498 100644 --- a/sql/src/test/java/org/apache/druid/sql/calcite/CalciteIngestionDmlTest.java +++ b/sql/src/test/java/org/apache/druid/sql/calcite/CalciteIngestionDmlTest.java @@ -76,7 +76,6 @@ import org.junit.jupiter.api.AfterEach; import javax.annotation.Nonnull; import javax.annotation.Nullable; - import java.io.File; import java.util.ArrayList; import java.util.Arrays; diff --git a/sql/src/test/java/org/apache/druid/sql/calcite/CalciteNestedDataQueryTest.java b/sql/src/test/java/org/apache/druid/sql/calcite/CalciteNestedDataQueryTest.java index 95a4d0c1c32..e2bc9d45eb2 100644 --- a/sql/src/test/java/org/apache/druid/sql/calcite/CalciteNestedDataQueryTest.java +++ b/sql/src/test/java/org/apache/druid/sql/calcite/CalciteNestedDataQueryTest.java @@ -6870,6 +6870,7 @@ public class CalciteNestedDataQueryTest extends BaseCalciteQueryTest @Test public void testJsonQueryArrays() { + msqIncompatible(); testBuilder() .sql("SELECT JSON_QUERY_ARRAY(arrayObject, '$') FROM druid.arrays") .queryContext(QUERY_CONTEXT_DEFAULT) diff --git a/sql/src/test/java/org/apache/druid/sql/calcite/CalciteQueryTest.java b/sql/src/test/java/org/apache/druid/sql/calcite/CalciteQueryTest.java index 1975f5589e6..2ae095d41c7 100644 --- a/sql/src/test/java/org/apache/druid/sql/calcite/CalciteQueryTest.java +++ b/sql/src/test/java/org/apache/druid/sql/calcite/CalciteQueryTest.java @@ -13823,10 +13823,8 @@ public class CalciteQueryTest extends BaseCalciteQueryTest .build() ), ImmutableList.builder().add( - new Object[]{"", null, 2L}, - new Object[]{"a", null, 1L}, - new Object[]{"", null, 1L}, - new Object[]{"a", null, 1L}, + new Object[]{"", null, 3L}, + new Object[]{"a", null, 2L}, new Object[]{"abc", null, 1L}, new Object[]{NULL_STRING, null, 6L}, new Object[]{"", timestamp("2000-01-01"), 2L}, @@ -15548,7 +15546,7 @@ public class CalciteQueryTest extends BaseCalciteQueryTest .queryContext(ImmutableMap.of(PlannerContext.CTX_ENABLE_WINDOW_FNS, true)) .sql("SELECT dim1,ROW_NUMBER() OVER (ORDER BY dim1 RANGE BETWEEN 3 PRECEDING AND 2 FOLLOWING) from druid.foo") .run()); - assertThat(e, invalidSqlIs("The query contains a window frame which may return incorrect results. To disregard this warning, set [windowingStrictValidation] to false in the query context. (line [1], column [31])")); + assertThat(e, invalidSqlIs("Order By with RANGE clause currently supports only UNBOUNDED or CURRENT ROW. Use ROWS clause instead. (line [1], column [31])")); } @Test @@ -15563,26 +15561,6 @@ public class CalciteQueryTest extends BaseCalciteQueryTest assertThat(e, invalidSqlIs("Window frames with expression based lower/upper bounds are not supported. (line [1], column [31])")); } - - @Test - public void testUnSupportedWindowBoundTypes() - { - assumeFeatureAvailable(EngineFeature.WINDOW_FUNCTIONS); - - DruidException e; - e = assertThrows(DruidException.class, () -> testBuilder() - .queryContext(ImmutableMap.of(PlannerContext.CTX_ENABLE_WINDOW_FNS, true)) - .sql("SELECT dim1,ROW_NUMBER() OVER (ORDER BY dim1 ROWS BETWEEN 1 PRECEDING AND 1 PRECEDING) from druid.foo") - .run()); - assertThat(e, invalidSqlIs("Query bounds with both lower and upper bounds as PRECEDING or FOLLOWING is not supported. (line [1], column [31])")); - - e = assertThrows(DruidException.class, () -> testBuilder() - .queryContext(ImmutableMap.of(PlannerContext.CTX_ENABLE_WINDOW_FNS, true)) - .sql("SELECT dim1,ROW_NUMBER() OVER (ORDER BY dim1 ROWS BETWEEN 1 FOLLOWING AND 1 FOLLOWING) from druid.foo") - .run()); - assertThat(e, invalidSqlIs("Query bounds with both lower and upper bounds as PRECEDING or FOLLOWING is not supported. (line [1], column [31])")); - } - @Test public void testNtileNotSupportedWithFrame() { @@ -16290,4 +16268,29 @@ public class CalciteQueryTest extends BaseCalciteQueryTest ) ).run(); } + + @SqlTestFrameworkConfig.NumMergeBuffers(3) + @Test + public void testGroupingSetsWithDifferentOrderLimitSpec() + { + msqIncompatible(); + testBuilder() + .sql( + "SELECT\n" + + " isNew, isRobot, COUNT(*) AS \"Cnt\"\n" + + "FROM \"wikipedia\"\n" + + "GROUP BY GROUPING SETS ((isRobot), (isNew))\n" + + "ORDER BY 2, 1\n" + + "limit 100" + ) + .expectedResults( + ResultMatchMode.RELAX_NULLS, + ImmutableList.of( + new Object[]{"false", null, 36966L}, + new Object[]{"true", null, 2278L}, + new Object[]{null, "false", 23824L}, + new Object[]{null, "true", 15420L} + ) + ).run(); + } } diff --git a/sql/src/test/java/org/apache/druid/sql/calcite/CalciteSelectQueryTest.java b/sql/src/test/java/org/apache/druid/sql/calcite/CalciteSelectQueryTest.java index 3203ae9915e..d2699e77f79 100644 --- a/sql/src/test/java/org/apache/druid/sql/calcite/CalciteSelectQueryTest.java +++ b/sql/src/test/java/org/apache/druid/sql/calcite/CalciteSelectQueryTest.java @@ -21,7 +21,9 @@ package org.apache.druid.sql.calcite; import com.google.common.collect.ImmutableList; import com.google.common.collect.ImmutableMap; +import org.apache.calcite.rel.RelNode; import org.apache.druid.common.config.NullHandling; +import org.apache.druid.error.DruidExceptionMatcher; import org.apache.druid.java.util.common.DateTimes; import org.apache.druid.java.util.common.Intervals; import org.apache.druid.java.util.common.granularity.Granularities; @@ -60,6 +62,8 @@ import java.util.HashMap; import java.util.List; import java.util.Map; +import static org.junit.jupiter.api.Assertions.assertEquals; + public class CalciteSelectQueryTest extends BaseCalciteQueryTest { @Test @@ -130,6 +134,28 @@ public class CalciteSelectQueryTest extends BaseCalciteQueryTest ); } + @Test + public void testTimeCeilExpressionContainingInvalidPeriod() + { + testQueryThrows( + "SELECT TIME_CEIL(__time, 'PT1Y') FROM foo", + DruidExceptionMatcher.invalidInput().expectMessageContains( + "Invalid period['PT1Y'] specified for expression[timestamp_ceil(\"__time\", 'PT1Y', null, 'UTC')]" + ) + ); + } + + @Test + public void testTimeFloorExpressionContainingInvalidPeriod() + { + testQueryThrows( + "SELECT TIME_FLOOR(TIMESTAMPADD(DAY, -1, __time), 'PT1D') FROM foo", + DruidExceptionMatcher.invalidInput().expectMessageContains( + "Invalid period['PT1D'] specified for expression[timestamp_floor((\"__time\" + -86400000), 'PT1D', null, 'UTC')]" + ) + ); + } + @Test public void testValuesContainingNull() { @@ -2152,4 +2178,28 @@ public class CalciteSelectQueryTest extends BaseCalciteQueryTest ) .run(); } + + @Test + public void testSqlToRelInConversion() + { + assertEquals( + "1.37.0", + RelNode.class.getPackage().getImplementationVersion(), + "Calcite version changed; check if CALCITE-6435 is fixed and remove:\n * method CalciteRulesManager#sqlToRelWorkaroundProgram\n * FixIncorrectInExpansionTypes class\n* this assertion" + ); + + testBuilder() + .sql( + "SELECT channel FROM wikipedia\n" + + "WHERE channel in ('#en.wikipedia') and channel = '#en.wikipedia' and\n" + + "isRobot = 'false'\n" + + "LIMIT 1" + ) + .expectedResults( + ImmutableList.of( + new Object[] {"#en.wikipedia"} + ) + ) + .run(); + } } diff --git a/sql/src/test/java/org/apache/druid/sql/calcite/CalciteSubqueryTest.java b/sql/src/test/java/org/apache/druid/sql/calcite/CalciteSubqueryTest.java index 808ab5c3663..6269e2a5c8c 100644 --- a/sql/src/test/java/org/apache/druid/sql/calcite/CalciteSubqueryTest.java +++ b/sql/src/test/java/org/apache/druid/sql/calcite/CalciteSubqueryTest.java @@ -20,7 +20,15 @@ package org.apache.druid.sql.calcite; import com.google.common.collect.ImmutableList; +import com.google.common.collect.ImmutableMap; +import com.google.inject.Injector; import org.apache.druid.common.config.NullHandling; +import org.apache.druid.data.input.InputRow; +import org.apache.druid.data.input.InputRowSchema; +import org.apache.druid.data.input.impl.DimensionsSpec; +import org.apache.druid.data.input.impl.MapInputRowParser; +import org.apache.druid.data.input.impl.StringDimensionSchema; +import org.apache.druid.data.input.impl.TimestampSpec; import org.apache.druid.java.util.common.Intervals; import org.apache.druid.java.util.common.StringUtils; import org.apache.druid.java.util.common.UOE; @@ -31,6 +39,7 @@ import org.apache.druid.query.Druids; import org.apache.druid.query.JoinDataSource; import org.apache.druid.query.QueryContexts; import org.apache.druid.query.QueryDataSource; +import org.apache.druid.query.QueryRunnerFactoryConglomerate; import org.apache.druid.query.ResourceLimitExceededException; import org.apache.druid.query.TableDataSource; import org.apache.druid.query.aggregation.CountAggregatorFactory; @@ -54,12 +63,21 @@ import org.apache.druid.query.ordering.StringComparators; import org.apache.druid.query.scan.ScanQuery; import org.apache.druid.query.topn.DimensionTopNMetricSpec; import org.apache.druid.query.topn.TopNQueryBuilder; +import org.apache.druid.segment.IndexBuilder; +import org.apache.druid.segment.QueryableIndex; import org.apache.druid.segment.VirtualColumns; import org.apache.druid.segment.column.ColumnType; +import org.apache.druid.segment.incremental.IncrementalIndexSchema; import org.apache.druid.segment.join.JoinType; +import org.apache.druid.segment.join.JoinableFactoryWrapper; +import org.apache.druid.segment.writeout.OnHeapMemorySegmentWriteOutMediumFactory; +import org.apache.druid.server.SpecificSegmentsQuerySegmentWalker; import org.apache.druid.sql.calcite.expression.DruidExpression; import org.apache.druid.sql.calcite.filtration.Filtration; import org.apache.druid.sql.calcite.util.CalciteTests; +import org.apache.druid.sql.calcite.util.SqlTestFramework; +import org.apache.druid.timeline.DataSegment; +import org.apache.druid.timeline.partition.LinearShardSpec; import org.hamcrest.CoreMatchers; import org.joda.time.DateTimeZone; import org.joda.time.Period; @@ -67,12 +85,14 @@ import org.junit.internal.matchers.ThrowableMessageMatcher; import org.junit.jupiter.params.ParameterizedTest; import org.junit.jupiter.params.provider.MethodSource; +import java.io.File; import java.util.ArrayList; import java.util.Arrays; import java.util.Collections; import java.util.HashMap; import java.util.List; import java.util.Map; +import java.util.stream.Collectors; import static org.junit.Assert.assertThrows; import static org.junit.jupiter.api.Assertions.assertTrue; @@ -84,6 +104,7 @@ import static org.junit.jupiter.api.Assertions.assertTrue; * 1. Where the memory limit is not set. The intermediate results are materialized as inline rows * 2. Where the memory limit is set. The intermediate results are materialized as frames */ +@SqlTestFrameworkConfig.ComponentSupplier(CalciteSubqueryTest.SubqueryComponentSupplier.class) public class CalciteSubqueryTest extends BaseCalciteQueryTest { public static Iterable constructorFeeder() @@ -147,6 +168,57 @@ public class CalciteSubqueryTest extends BaseCalciteQueryTest ); } + @MethodSource("constructorFeeder") + @ParameterizedTest(name = "{0}") + public void testSubqueryOnDataSourceWithMissingColumnsInSegments(String testName, Map queryContext) + { + if (!queryContext.containsKey(QueryContexts.MAX_SUBQUERY_BYTES_KEY)) { + cannotVectorize(); + } + testQuery( + "SELECT\n" + + " __time,\n" + + " col1,\n" + + " col2,\n" + + " col3,\n" + + " COUNT(*)\n" + + "FROM (SELECT * FROM dsMissingCol LIMIT 10)\n" + + "GROUP BY 1, 2, 3, 4", + queryContext, + ImmutableList.of( + GroupByQuery.builder() + .setDataSource( + new QueryDataSource( + newScanQueryBuilder() + .dataSource("dsMissingCol") + .intervals(querySegmentSpec(Filtration.eternity())) + .columns("__time", "col1", "col2", "col3") + .resultFormat(ScanQuery.ResultFormat.RESULT_FORMAT_COMPACTED_LIST) + .limit(10) + .build() + ) + ) + .setInterval(querySegmentSpec(Filtration.eternity())) + .setGranularity(Granularities.ALL) + .setDimensions( + new DefaultDimensionSpec("__time", "d0", ColumnType.LONG), + new DefaultDimensionSpec("col1", "d1", ColumnType.STRING), + new DefaultDimensionSpec("col2", "d2", ColumnType.STRING), + new DefaultDimensionSpec("col3", "d3", ColumnType.STRING) + ) + .setAggregatorSpecs(aggregators( + new CountAggregatorFactory("a0") + )) + .setContext(queryContext) + .build() + ), + ImmutableList.of( + new Object[]{946684800000L, "abc", NullHandling.defaultStringValue(), "def", 1L}, + new Object[]{946684800000L, "foo", "bar", NullHandling.defaultStringValue(), 1L} + ) + ); + } + @MethodSource("constructorFeeder") @ParameterizedTest(name = "{0}") public void testExactCountDistinctOfSemiJoinResult(String testName, Map queryContext) @@ -1315,4 +1387,134 @@ public class CalciteSubqueryTest extends BaseCalciteQueryTest ImmutableList.of() ); } + + public static class SubqueryComponentSupplier extends SqlTestFramework.StandardComponentSupplier + { + + private final TempDirProducer tmpDirProducer; + + public SubqueryComponentSupplier(TempDirProducer tempDirProducer) + { + super(tempDirProducer); + this.tmpDirProducer = tempDirProducer; + } + + @Override + public SpecificSegmentsQuerySegmentWalker createQuerySegmentWalker( + QueryRunnerFactoryConglomerate conglomerate, + JoinableFactoryWrapper joinableFactory, + Injector injector + ) + { + SpecificSegmentsQuerySegmentWalker walker = + super.createQuerySegmentWalker(conglomerate, joinableFactory, injector); + + final String datasource1 = "dsMissingCol"; + final File tmpFolder = tempDirProducer.newTempFolder(); + + final List> rawRows1 = ImmutableList.of( + ImmutableMap.builder() + .put("t", "2000-01-01") + .put("col1", "foo") + .put("col2", "bar") + .build() + ); + final List rows1 = + rawRows1 + .stream() + .map(mapInputRow -> MapInputRowParser.parse( + new InputRowSchema( + new TimestampSpec("t", "iso", null), + new DimensionsSpec( + DimensionsSpec.getDefaultSchemas(ImmutableList.of("col1", "col2")) + ), + null + ), + mapInputRow + )) + .collect(Collectors.toList()); + final QueryableIndex queryableIndex1 = IndexBuilder + .create() + .tmpDir(new File(tmpFolder, datasource1)) + .segmentWriteOutMediumFactory(OnHeapMemorySegmentWriteOutMediumFactory.instance()) + .schema(new IncrementalIndexSchema.Builder() + .withRollup(false) + .withDimensionsSpec( + new DimensionsSpec( + ImmutableList.of( + new StringDimensionSchema("col1"), + new StringDimensionSchema("col2") + ) + ) + ) + .build() + ) + .rows(rows1) + .buildMMappedIndex(); + + final List> rawRows2 = ImmutableList.of( + ImmutableMap.builder() + .put("t", "2000-01-01") + .put("col1", "abc") + .put("col3", "def") + .build() + ); + final List rows2 = + rawRows2 + .stream() + .map(mapInputRow -> MapInputRowParser.parse( + new InputRowSchema( + new TimestampSpec("t", "iso", null), + new DimensionsSpec( + DimensionsSpec.getDefaultSchemas(ImmutableList.of("col1", "col3")) + ), + null + ), + mapInputRow + )) + .collect(Collectors.toList()); + final QueryableIndex queryableIndex2 = IndexBuilder + .create() + .tmpDir(new File(tmpFolder, datasource1)) + .segmentWriteOutMediumFactory(OnHeapMemorySegmentWriteOutMediumFactory.instance()) + .schema(new IncrementalIndexSchema.Builder() + .withRollup(false) + .withDimensionsSpec( + new DimensionsSpec( + ImmutableList.of( + new StringDimensionSchema("col1"), + new StringDimensionSchema("col3") + ) + ) + ) + .build() + ) + .rows(rows2) + .buildMMappedIndex(); + + walker.add( + DataSegment.builder() + .dataSource(datasource1) + .interval(Intervals.ETERNITY) + .version("1") + .shardSpec(new LinearShardSpec(0)) + .size(0) + .build(), + queryableIndex1 + ); + + walker.add( + DataSegment.builder() + .dataSource(datasource1) + .interval(Intervals.ETERNITY) + .version("1") + .shardSpec(new LinearShardSpec(1)) + .size(0) + .build(), + queryableIndex2 + ); + + return walker; + } + } } diff --git a/sql/src/test/java/org/apache/druid/sql/calcite/CalciteWindowQueryTest.java b/sql/src/test/java/org/apache/druid/sql/calcite/CalciteWindowQueryTest.java index f5e161b3a6a..b3d657b148f 100644 --- a/sql/src/test/java/org/apache/druid/sql/calcite/CalciteWindowQueryTest.java +++ b/sql/src/test/java/org/apache/druid/sql/calcite/CalciteWindowQueryTest.java @@ -201,8 +201,7 @@ public class CalciteWindowQueryTest extends BaseCalciteQueryTest .sql(testCase.getSql()) .queryContext(ImmutableMap.of( PlannerContext.CTX_ENABLE_WINDOW_FNS, true, - QueryContexts.ENABLE_DEBUG, true, - QueryContexts.WINDOWING_STRICT_VALIDATION, false + QueryContexts.ENABLE_DEBUG, true )) .addCustomVerification(QueryVerification.ofResults(testCase)) .run(); @@ -224,8 +223,7 @@ public class CalciteWindowQueryTest extends BaseCalciteQueryTest .sql(testCase.getSql()) .queryContext(ImmutableMap.of(QueryContexts.ENABLE_DEBUG, true, PlannerContext.CTX_ENABLE_WINDOW_FNS, true, - QueryContexts.MAX_SUBQUERY_BYTES_KEY, "100000", - QueryContexts.WINDOWING_STRICT_VALIDATION, false + QueryContexts.MAX_SUBQUERY_BYTES_KEY, "100000" ) ) .addCustomVerification(QueryVerification.ofResults(testCase)) @@ -246,8 +244,7 @@ public class CalciteWindowQueryTest extends BaseCalciteQueryTest ) .queryContext(ImmutableMap.of( PlannerContext.CTX_ENABLE_WINDOW_FNS, true, - QueryContexts.ENABLE_DEBUG, true, - QueryContexts.WINDOWING_STRICT_VALIDATION, false + QueryContexts.ENABLE_DEBUG, true )) .expectedResults(ImmutableList.of( new Object[]{1L}, @@ -269,8 +266,7 @@ public class CalciteWindowQueryTest extends BaseCalciteQueryTest "FROM \"wikipedia\"") .queryContext(ImmutableMap.of( PlannerContext.CTX_ENABLE_WINDOW_FNS, true, - QueryContexts.ENABLE_DEBUG, true, - QueryContexts.WINDOWING_STRICT_VALIDATION, false + QueryContexts.ENABLE_DEBUG, true )) .expectedResults(ImmutableList.of( new Object[]{1L, 1L} @@ -278,6 +274,52 @@ public class CalciteWindowQueryTest extends BaseCalciteQueryTest .run(); } + @Test + public void testWindowAllBoundsCombination() + { + testBuilder() + .sql("select\n" + + "cityName,\n" + + "count(*) over (partition by cityName order by countryName rows between unbounded preceding and 1 preceding) c1,\n" + + "count(*) over (partition by cityName order by countryName rows between unbounded preceding and current row) c2,\n" + + "count(*) over (partition by cityName order by countryName rows between unbounded preceding and 1 following) c3,\n" + + "count(*) over (partition by cityName order by countryName rows between unbounded preceding and unbounded following) c4,\n" + + "count(*) over (partition by cityName order by countryName rows between 3 preceding and 1 preceding) c5,\n" + + "count(*) over (partition by cityName order by countryName rows between 1 preceding and current row) c6,\n" + + "count(*) over (partition by cityName order by countryName rows between 1 preceding and 1 FOLLOWING) c7,\n" + + "count(*) over (partition by cityName order by countryName rows between 1 preceding and unbounded FOLLOWING) c8,\n" + + "count(*) over (partition by cityName order by countryName rows between 1 FOLLOWING and unbounded FOLLOWING) c9,\n" + + "count(*) over (partition by cityName order by countryName rows between 1 FOLLOWING and 3 FOLLOWING) c10,\n" + + "count(*) over (partition by cityName order by countryName rows between current row and 1 following) c11,\n" + + "count(*) over (partition by cityName order by countryName rows between current row and unbounded following) c12\n" + + "from wikipedia\n" + + "where cityName in ('Vienna', 'Seoul')\n" + + "group by countryName, cityName, added") + .queryContext(ImmutableMap.of( + PlannerContext.CTX_ENABLE_WINDOW_FNS, true, + QueryContexts.ENABLE_DEBUG, true + )) + .expectedResults(ImmutableList.of( + new Object[]{"Seoul", 0L, 1L, 2L, 13L, 0L, 1L, 2L, 13L, 12L, 3L, 2L, 13L}, + new Object[]{"Seoul", 1L, 2L, 3L, 13L, 1L, 2L, 3L, 13L, 11L, 3L, 2L, 12L}, + new Object[]{"Seoul", 2L, 3L, 4L, 13L, 2L, 2L, 3L, 12L, 10L, 3L, 2L, 11L}, + new Object[]{"Seoul", 3L, 4L, 5L, 13L, 3L, 2L, 3L, 11L, 9L, 3L, 2L, 10L}, + new Object[]{"Seoul", 4L, 5L, 6L, 13L, 3L, 2L, 3L, 10L, 8L, 3L, 2L, 9L}, + new Object[]{"Seoul", 5L, 6L, 7L, 13L, 3L, 2L, 3L, 9L, 7L, 3L, 2L, 8L}, + new Object[]{"Seoul", 6L, 7L, 8L, 13L, 3L, 2L, 3L, 8L, 6L, 3L, 2L, 7L}, + new Object[]{"Seoul", 7L, 8L, 9L, 13L, 3L, 2L, 3L, 7L, 5L, 3L, 2L, 6L}, + new Object[]{"Seoul", 8L, 9L, 10L, 13L, 3L, 2L, 3L, 6L, 4L, 3L, 2L, 5L}, + new Object[]{"Seoul", 9L, 10L, 11L, 13L, 3L, 2L, 3L, 5L, 3L, 3L, 2L, 4L}, + new Object[]{"Seoul", 10L, 11L, 12L, 13L, 3L, 2L, 3L, 4L, 2L, 2L, 2L, 3L}, + new Object[]{"Seoul", 11L, 12L, 13L, 13L, 3L, 2L, 3L, 3L, 1L, 1L, 2L, 2L}, + new Object[]{"Seoul", 12L, 13L, 13L, 13L, 3L, 2L, 2L, 2L, 0L, 0L, 1L, 1L}, + new Object[]{"Vienna", 0L, 1L, 2L, 3L, 0L, 1L, 2L, 3L, 2L, 2L, 2L, 3L}, + new Object[]{"Vienna", 1L, 2L, 3L, 3L, 1L, 2L, 3L, 3L, 1L, 1L, 2L, 2L}, + new Object[]{"Vienna", 2L, 3L, 3L, 3L, 2L, 2L, 2L, 2L, 0L, 0L, 1L, 1L} + )) + .run(); + } + private WindowOperatorQuery getWindowOperatorQuery(List> queries) { assertEquals(1, queries.size()); diff --git a/sql/src/test/java/org/apache/druid/sql/calcite/DecoupledPlanningCalciteJoinQueryTest.java b/sql/src/test/java/org/apache/druid/sql/calcite/DecoupledPlanningCalciteJoinQueryTest.java index d8abfbb2075..19553104a83 100644 --- a/sql/src/test/java/org/apache/druid/sql/calcite/DecoupledPlanningCalciteJoinQueryTest.java +++ b/sql/src/test/java/org/apache/druid/sql/calcite/DecoupledPlanningCalciteJoinQueryTest.java @@ -24,6 +24,7 @@ import org.junit.jupiter.api.extension.ExtendWith; import org.junit.jupiter.api.extension.RegisterExtension; import org.junit.jupiter.params.ParameterizedTest; import org.junit.jupiter.params.provider.MethodSource; + import java.util.Map; import static org.junit.Assert.assertNotNull; diff --git a/sql/src/test/java/org/apache/druid/sql/calcite/DecoupledTestConfig.java b/sql/src/test/java/org/apache/druid/sql/calcite/DecoupledTestConfig.java index 9bd57e10afb..f23197d437a 100644 --- a/sql/src/test/java/org/apache/druid/sql/calcite/DecoupledTestConfig.java +++ b/sql/src/test/java/org/apache/druid/sql/calcite/DecoupledTestConfig.java @@ -22,6 +22,7 @@ package org.apache.druid.sql.calcite; import org.apache.calcite.rel.rules.CoreRules; import org.apache.druid.query.QueryContexts; import org.apache.druid.query.aggregation.post.FinalizingFieldAccessPostAggregator; + import java.lang.annotation.ElementType; import java.lang.annotation.Retention; import java.lang.annotation.RetentionPolicy; diff --git a/sql/src/test/java/org/apache/druid/sql/calcite/DrillWindowQueryTest.java b/sql/src/test/java/org/apache/druid/sql/calcite/DrillWindowQueryTest.java index 5ef280e24b8..cb7bed7e041 100644 --- a/sql/src/test/java/org/apache/druid/sql/calcite/DrillWindowQueryTest.java +++ b/sql/src/test/java/org/apache/druid/sql/calcite/DrillWindowQueryTest.java @@ -19,38 +19,22 @@ package org.apache.druid.sql.calcite; -import com.fasterxml.jackson.databind.ObjectMapper; import com.google.common.base.Function; import com.google.common.collect.ImmutableMap; -import com.google.common.collect.Iterators; import com.google.common.io.ByteStreams; import com.google.inject.Injector; import org.apache.calcite.sql.SqlNode; import org.apache.calcite.sql2rel.SqlToRelConverter; import org.apache.commons.io.FileUtils; import org.apache.druid.common.config.NullHandling; -import org.apache.druid.data.input.InputRow; -import org.apache.druid.data.input.MapBasedInputRow; -import org.apache.druid.data.input.impl.DimensionSchema; -import org.apache.druid.data.input.impl.DimensionsSpec; -import org.apache.druid.data.input.impl.DoubleDimensionSchema; -import org.apache.druid.data.input.impl.LongDimensionSchema; -import org.apache.druid.data.input.impl.StringDimensionSchema; -import org.apache.druid.jackson.DefaultObjectMapper; -import org.apache.druid.java.util.common.Intervals; import org.apache.druid.java.util.common.Numbers; -import org.apache.druid.java.util.common.RE; import org.apache.druid.java.util.common.StringUtils; import org.apache.druid.java.util.common.parsers.TimestampParser; import org.apache.druid.query.QueryContexts; import org.apache.druid.query.QueryRunnerFactoryConglomerate; -import org.apache.druid.segment.IndexBuilder; -import org.apache.druid.segment.QueryableIndex; import org.apache.druid.segment.column.ColumnType; import org.apache.druid.segment.column.RowSignature; -import org.apache.druid.segment.incremental.IncrementalIndexSchema; import org.apache.druid.segment.join.JoinableFactoryWrapper; -import org.apache.druid.segment.writeout.OnHeapMemorySegmentWriteOutMediumFactory; import org.apache.druid.server.SpecificSegmentsQuerySegmentWalker; import org.apache.druid.sql.calcite.DisableUnless.DisableUnlessRule; import org.apache.druid.sql.calcite.DrillWindowQueryTest.DrillComponentSupplier; @@ -60,8 +44,7 @@ import org.apache.druid.sql.calcite.QueryTestRunner.QueryResults; import org.apache.druid.sql.calcite.planner.PlannerCaptureHook; import org.apache.druid.sql.calcite.planner.PlannerContext; import org.apache.druid.sql.calcite.util.SqlTestFramework.StandardComponentSupplier; -import org.apache.druid.timeline.DataSegment; -import org.apache.druid.timeline.partition.NumberedShardSpec; +import org.apache.druid.sql.calcite.util.TestDataBuilder; import org.joda.time.DateTime; import org.joda.time.LocalTime; import org.junit.Assert; @@ -72,7 +55,6 @@ import org.junit.jupiter.api.extension.RegisterExtension; import javax.annotation.Nonnull; import javax.annotation.Nullable; - import java.io.File; import java.io.IOException; import java.io.InputStream; @@ -89,7 +71,6 @@ import java.util.Arrays; import java.util.Comparator; import java.util.List; import java.util.Locale; -import java.util.Map; import java.util.Set; import java.util.stream.Collectors; @@ -114,8 +95,6 @@ import static org.junit.Assert.fail; @SqlTestFrameworkConfig.ComponentSupplier(DrillComponentSupplier.class) public class DrillWindowQueryTest extends BaseCalciteQueryTest { - private static final ObjectMapper MAPPER = new DefaultObjectMapper(); - static { NullHandling.initializeForTests(); } @@ -260,135 +239,9 @@ public class DrillWindowQueryTest extends BaseCalciteQueryTest joinableFactory, injector); - attachIndex( - retVal, - "tblWnulls.parquet", - new LongDimensionSchema("c1"), - new StringDimensionSchema("c2")); - - // {"col0":1,"col1":65534,"col2":256.0,"col3":1234.9,"col4":73578580,"col5":1393720082338,"col6":421185052800000,"col7":false,"col8":"CA","col9":"AXXXXXXXXXXXXXXXXXXXXXXXXXCXXXXXXXXXXXXXXXXXXXXXXXXZ"} - attachIndex( - retVal, - "allTypsUniq.parquet", - new LongDimensionSchema("col0"), - new LongDimensionSchema("col1"), - new DoubleDimensionSchema("col2"), - new DoubleDimensionSchema("col3"), - new LongDimensionSchema("col4"), - new LongDimensionSchema("col5"), - new LongDimensionSchema("col6"), - new StringDimensionSchema("col7"), - new StringDimensionSchema("col8"), - new StringDimensionSchema("col9")); - attachIndex( - retVal, - "smlTbl.parquet", - // "col_int": 8122, - new LongDimensionSchema("col_int"), - // "col_bgint": 817200, - new LongDimensionSchema("col_bgint"), - // "col_char_2": "IN", - new StringDimensionSchema("col_char_2"), - // "col_vchar_52": - // "AXXXXXXXXXXXXXXXXXXXXXXXXXCXXXXXXXXXXXXXXXXXXXXXXXXB", - new StringDimensionSchema("col_vchar_52"), - // "col_tmstmp": 1409617682418, - new LongDimensionSchema("col_tmstmp"), - // "col_dt": 422717616000000, - new LongDimensionSchema("col_dt"), - // "col_booln": false, - new StringDimensionSchema("col_booln"), - // "col_dbl": 12900.48, - new DoubleDimensionSchema("col_dbl"), - // "col_tm": 33109170 - new LongDimensionSchema("col_tm")); - attachIndex( - retVal, - "fewRowsAllData.parquet", - // "col0":12024, - new LongDimensionSchema("col0"), - // "col1":307168, - new LongDimensionSchema("col1"), - // "col2":"VT", - new StringDimensionSchema("col2"), - // "col3":"DXXXXXXXXXXXXXXXXXXXXXXXXXEXXXXXXXXXXXXXXXXXXXXXXXXF", - new StringDimensionSchema("col3"), - // "col4":1338596882419, - new LongDimensionSchema("col4"), - // "col5":422705433600000, - new LongDimensionSchema("col5"), - // "col6":true, - new StringDimensionSchema("col6"), - // "col7":3.95110006277E8, - new DoubleDimensionSchema("col7"), - // "col8":67465430 - new LongDimensionSchema("col8")); - attachIndex( - retVal, - "t_alltype.parquet", - // "c1":1, - new LongDimensionSchema("c1"), - // "c2":592475043, - new LongDimensionSchema("c2"), - // "c3":616080519999272, - new LongDimensionSchema("c3"), - // "c4":"ObHeWTDEcbGzssDwPwurfs", - new StringDimensionSchema("c4"), - // "c5":"0sZxIfZ CGwTOaLWZ6nWkUNx", - new StringDimensionSchema("c5"), - // "c6":1456290852307, - new LongDimensionSchema("c6"), - // "c7":421426627200000, - new LongDimensionSchema("c7"), - // "c8":true, - new StringDimensionSchema("c8"), - // "c9":0.626179100469 - new DoubleDimensionSchema("c9")); - - return retVal; - } - - @SuppressWarnings({"rawtypes", "unchecked"}) - private void attachIndex(SpecificSegmentsQuerySegmentWalker texasRanger, String dataSource, DimensionSchema... dims) - { - ArrayList dimensionNames = new ArrayList<>(dims.length); - for (DimensionSchema dimension : dims) { - dimensionNames.add(dimension.getName()); - } - final File tmpFolder = tempDirProducer.newTempFolder(); - final QueryableIndex queryableIndex = IndexBuilder - .create() - .tmpDir(new File(tmpFolder, dataSource)) - .segmentWriteOutMediumFactory(OnHeapMemorySegmentWriteOutMediumFactory.instance()) - .schema(new IncrementalIndexSchema.Builder() - .withRollup(false) - .withDimensionsSpec(new DimensionsSpec(Arrays.asList(dims))) - .build()) - .rows( - () -> { - try { - return Iterators.transform( - MAPPER.readerFor(Map.class) - .readValues( - ClassLoader.getSystemResource("drill/window/datasources/" + dataSource + ".json")), - (Function) input -> new MapBasedInputRow(0, dimensionNames, input)); - } - catch (IOException e) { - throw new RE(e, "problem reading file"); - } - }) - .buildMMappedIndex(); - - texasRanger.add( - DataSegment.builder() - .dataSource(dataSource) - .interval(Intervals.ETERNITY) - .version("1") - .shardSpec(new NumberedShardSpec(0, 0)) - .size(0) - .build(), - queryableIndex); + TestDataBuilder.attachIndexesForDrillTestDatasources(retVal, tmpFolder); + return retVal; } } @@ -422,7 +275,7 @@ public class DrillWindowQueryTest extends BaseCalciteQueryTest try { Assert.assertEquals(StringUtils.format("result count: %s", sql), expectedResultsText.size(), results.size()); if (!isOrdered(queryResults)) { - // in case the resultset is not ordered; order via the same comparator before comparision + // in case the resultset is not ordered; order via the same comparator before comparison results.sort(new ArrayRowCmp()); expectedResults.sort(new ArrayRowCmp()); } @@ -4383,7 +4236,7 @@ public class DrillWindowQueryTest extends BaseCalciteQueryTest windowQueryTest(); } - @NotYetSupported(Modes.COLUMN_NOT_FOUND) + @NotYetSupported(Modes.AGGREGATION_NOT_SUPPORT_TYPE) @DrillTest("frameclause/subQueries/frmInSubQry_25") @Test public void test_frameclause_subQueries_frmInSubQry_25() @@ -4391,7 +4244,7 @@ public class DrillWindowQueryTest extends BaseCalciteQueryTest windowQueryTest(); } - @NotYetSupported(Modes.INCORRECT_SYNTAX) + @NotYetSupported(Modes.VIEWS_NOT_SUPPORTED) @DrillTest("nestedAggs/nstdWinView01") @Test public void test_nestedAggs_nstdWinView01() @@ -4411,6 +4264,7 @@ public class DrillWindowQueryTest extends BaseCalciteQueryTest @Test public void test_aggregates_winFnQry_83() { + msqIncompatible(); windowQueryTest(); } @@ -5229,7 +5083,6 @@ public class DrillWindowQueryTest extends BaseCalciteQueryTest windowQueryTest(); } - @NotYetSupported(Modes.COLUMN_NOT_FOUND) @DrillTest("aggregates/testW_Nulls_5") @Test public void test_aggregates_testW_Nulls_5() @@ -5237,7 +5090,6 @@ public class DrillWindowQueryTest extends BaseCalciteQueryTest windowQueryTest(); } - @NotYetSupported(Modes.COLUMN_NOT_FOUND) @DrillTest("aggregates/testW_Nulls_6") @Test public void test_aggregates_testW_Nulls_6() @@ -6303,6 +6155,7 @@ public class DrillWindowQueryTest extends BaseCalciteQueryTest @Test public void test_aggregates_winFnQry_84() { + msqIncompatible(); windowQueryTest(); } @@ -6310,6 +6163,7 @@ public class DrillWindowQueryTest extends BaseCalciteQueryTest @Test public void test_aggregates_winFnQry_85() { + msqIncompatible(); windowQueryTest(); } @@ -6695,6 +6549,7 @@ public class DrillWindowQueryTest extends BaseCalciteQueryTest @Test public void test_frameclause_multipl_wnwds_avg_mulwds() { + msqIncompatible(); windowQueryTest(); } @@ -6702,6 +6557,7 @@ public class DrillWindowQueryTest extends BaseCalciteQueryTest @Test public void test_frameclause_multipl_wnwds_count_mulwds() { + msqIncompatible(); windowQueryTest(); } @@ -6709,6 +6565,7 @@ public class DrillWindowQueryTest extends BaseCalciteQueryTest @Test public void test_frameclause_multipl_wnwds_fval_mulwds() { + msqIncompatible(); windowQueryTest(); } @@ -6716,6 +6573,7 @@ public class DrillWindowQueryTest extends BaseCalciteQueryTest @Test public void test_frameclause_multipl_wnwds_lval_mulwds() { + msqIncompatible(); windowQueryTest(); } @@ -6723,6 +6581,7 @@ public class DrillWindowQueryTest extends BaseCalciteQueryTest @Test public void test_frameclause_multipl_wnwds_mulwind_08() { + msqIncompatible(); windowQueryTest(); } @@ -6730,6 +6589,7 @@ public class DrillWindowQueryTest extends BaseCalciteQueryTest @Test public void test_frameclause_multipl_wnwds_mulwind_09() { + msqIncompatible(); windowQueryTest(); } @@ -6737,6 +6597,7 @@ public class DrillWindowQueryTest extends BaseCalciteQueryTest @Test public void test_frameclause_multipl_wnwds_sum_mulwds() { + msqIncompatible(); windowQueryTest(); } @@ -7509,6 +7370,7 @@ public class DrillWindowQueryTest extends BaseCalciteQueryTest @Test public void test_frameclause_multipl_wnwds_rnkNoFrm01() { + msqIncompatible(); windowQueryTest(); } @@ -7516,6 +7378,7 @@ public class DrillWindowQueryTest extends BaseCalciteQueryTest @Test public void test_frameclause_multipl_wnwds_rnkNoFrm02() { + msqIncompatible(); windowQueryTest(); } @@ -7523,6 +7386,7 @@ public class DrillWindowQueryTest extends BaseCalciteQueryTest @Test public void test_frameclause_multipl_wnwds_rnkNoFrm03() { + msqIncompatible(); windowQueryTest(); } @@ -7530,6 +7394,7 @@ public class DrillWindowQueryTest extends BaseCalciteQueryTest @Test public void test_frameclause_multipl_wnwds_rnkNoFrm04() { + msqIncompatible(); windowQueryTest(); } @@ -7537,6 +7402,7 @@ public class DrillWindowQueryTest extends BaseCalciteQueryTest @Test public void test_frameclause_multipl_wnwds_rnkNoFrm05() { + msqIncompatible(); windowQueryTest(); } @@ -7544,6 +7410,7 @@ public class DrillWindowQueryTest extends BaseCalciteQueryTest @Test public void test_frameclause_multipl_wnwds_rnkNoFrm06() { + msqIncompatible(); windowQueryTest(); } @@ -7656,6 +7523,7 @@ public class DrillWindowQueryTest extends BaseCalciteQueryTest @Test public void test_nestedAggs_multiWin_6() { + msqIncompatible(); windowQueryTest(); } diff --git a/sql/src/test/java/org/apache/druid/sql/calcite/NotYetSupported.java b/sql/src/test/java/org/apache/druid/sql/calcite/NotYetSupported.java index d1a0c4ec883..5d53593b7ce 100644 --- a/sql/src/test/java/org/apache/druid/sql/calcite/NotYetSupported.java +++ b/sql/src/test/java/org/apache/druid/sql/calcite/NotYetSupported.java @@ -80,13 +80,12 @@ public @interface NotYetSupported NOT_ENOUGH_RULES(DruidException.class, "not enough rules"), ERROR_HANDLING(AssertionError.class, "targetPersona: is <[A-Z]+> and category: is <[A-Z_]+> and errorCode: is"), EXPRESSION_NOT_GROUPED(DruidException.class, "Expression '[a-z]+' is not being grouped"), - COLUMN_NOT_FOUND(DruidException.class, "CalciteContextException.*Column.*not found in any table"), NULLS_FIRST_LAST(DruidException.class, "NULLS (FIRST|LAST)"), BIGINT_TO_DATE(DruidException.class, "BIGINT to type (DATE|TIME)"), AGGREGATION_NOT_SUPPORT_TYPE(DruidException.class, "Aggregation \\[(MIN|MAX)\\] does not support type \\[STRING\\]"), ALLDATA_CSV(DruidException.class, "allData.csv"), BIGINT_TIME_COMPARE(DruidException.class, "Cannot apply '.' to arguments of type"), - INCORRECT_SYNTAX(DruidException.class, "Incorrect syntax near the keyword"), + VIEWS_NOT_SUPPORTED(DruidException.class, "Incorrect syntax near the keyword 'CREATE'"), RESULT_MISMATCH(AssertionError.class, "(assertResulEquals|AssertionError: column content mismatch)"), LONG_CASTING(AssertionError.class, "expected: java.lang.Long"), UNSUPPORTED_NULL_ORDERING(DruidException.class, "(A|DE)SCENDING ordering with NULLS (LAST|FIRST)"), @@ -171,7 +170,7 @@ public @interface NotYetSupported Matcher m = annotation.value().getPattern().matcher(trace); if (!m.find()) { - throw new AssertionError("Exception stactrace doesn't match regex: " + annotation.value().regex, e); + throw new AssertionError("Exception stacktrace doesn't match regex: " + annotation.value().regex, e); } throw new AssumptionViolatedException("Test is not-yet supported; ignored with:" + annotation); } diff --git a/sql/src/test/java/org/apache/druid/sql/calcite/QueryTestBuilder.java b/sql/src/test/java/org/apache/druid/sql/calcite/QueryTestBuilder.java index 9484fd3d892..0c3ee685732 100644 --- a/sql/src/test/java/org/apache/druid/sql/calcite/QueryTestBuilder.java +++ b/sql/src/test/java/org/apache/druid/sql/calcite/QueryTestBuilder.java @@ -38,6 +38,7 @@ import org.apache.druid.sql.calcite.util.CalciteTests; import org.apache.druid.sql.calcite.util.SqlTestFramework; import org.apache.druid.sql.calcite.util.SqlTestFramework.PlannerFixture; import org.apache.druid.sql.http.SqlParameter; + import java.util.ArrayList; import java.util.Collections; import java.util.List; diff --git a/sql/src/test/java/org/apache/druid/sql/calcite/QueryTestRunner.java b/sql/src/test/java/org/apache/druid/sql/calcite/QueryTestRunner.java index 3430e10edfc..61678497fc8 100644 --- a/sql/src/test/java/org/apache/druid/sql/calcite/QueryTestRunner.java +++ b/sql/src/test/java/org/apache/druid/sql/calcite/QueryTestRunner.java @@ -356,15 +356,12 @@ public class QueryTestRunner public static class VerifyResults implements QueryVerifyStep { protected final BaseExecuteQuery execStep; - protected final boolean verifyRowSignature; public VerifyResults( - BaseExecuteQuery execStep, - boolean verifyRowSignature + BaseExecuteQuery execStep ) { this.execStep = execStep; - this.verifyRowSignature = verifyRowSignature; } @Override @@ -386,9 +383,7 @@ public class QueryTestRunner } QueryTestBuilder builder = execStep.builder(); - if (verifyRowSignature) { - builder.expectedResultsVerifier.verifyRowSignature(queryResults.signature); - } + builder.expectedResultsVerifier.verifyRowSignature(queryResults.signature); builder.expectedResultsVerifier.verify(builder.sql, queryResults); } } @@ -747,7 +742,7 @@ public class QueryTestRunner if (builder.expectedResultsVerifier != null) { // Don't verify the row signature when MSQ is running, since the broker receives the task id, and the signature // would be {TASK:STRING} instead of the expected results signature - verifySteps.add(new VerifyResults(finalExecStep, !config.isRunningMSQ())); + verifySteps.add(new VerifyResults(finalExecStep)); } if (!builder.customVerifications.isEmpty()) { diff --git a/sql/src/test/java/org/apache/druid/sql/calcite/SqlTestFrameworkConfig.java b/sql/src/test/java/org/apache/druid/sql/calcite/SqlTestFrameworkConfig.java index 7880cea33e2..1fc39d52ec5 100644 --- a/sql/src/test/java/org/apache/druid/sql/calcite/SqlTestFrameworkConfig.java +++ b/sql/src/test/java/org/apache/druid/sql/calcite/SqlTestFrameworkConfig.java @@ -49,7 +49,6 @@ import org.reflections.util.ConfigurationBuilder; import org.reflections.util.FilterBuilder; import javax.annotation.Nonnull; - import java.io.Closeable; import java.lang.annotation.Annotation; import java.lang.annotation.ElementType; diff --git a/sql/src/test/java/org/apache/druid/sql/calcite/expression/GreatestExpressionTest.java b/sql/src/test/java/org/apache/druid/sql/calcite/expression/GreatestExpressionTest.java index f1a5a869a19..893b5449934 100644 --- a/sql/src/test/java/org/apache/druid/sql/calcite/expression/GreatestExpressionTest.java +++ b/sql/src/test/java/org/apache/druid/sql/calcite/expression/GreatestExpressionTest.java @@ -214,7 +214,7 @@ public class GreatestExpressionTest extends CalciteTestBase } @Test - public void testDecimalWithNullShouldReturnString() + public void testDecimalWithNullShouldNotReturnString() { testExpression( Arrays.asList( @@ -227,7 +227,7 @@ public class GreatestExpressionTest extends CalciteTestBase null, 3.4 ), - "3.4" + 3.4 ); } @Test diff --git a/sql/src/test/java/org/apache/druid/sql/calcite/expression/LeastExpressionTest.java b/sql/src/test/java/org/apache/druid/sql/calcite/expression/LeastExpressionTest.java index eaec03c00c8..e2d1bbf306e 100644 --- a/sql/src/test/java/org/apache/druid/sql/calcite/expression/LeastExpressionTest.java +++ b/sql/src/test/java/org/apache/druid/sql/calcite/expression/LeastExpressionTest.java @@ -214,7 +214,7 @@ public class LeastExpressionTest extends CalciteTestBase } @Test - public void testDecimalWithNullShouldReturnString() + public void testDecimalWithNullShouldNotReturnString() { testExpression( Arrays.asList( @@ -227,7 +227,7 @@ public class LeastExpressionTest extends CalciteTestBase 3.4, null ), - "1.2" + 1.2 ); } diff --git a/sql/src/test/java/org/apache/druid/sql/calcite/planner/CalcitePlannerModuleTest.java b/sql/src/test/java/org/apache/druid/sql/calcite/planner/CalcitePlannerModuleTest.java index 8ef3ad3106f..43d47733277 100644 --- a/sql/src/test/java/org/apache/druid/sql/calcite/planner/CalcitePlannerModuleTest.java +++ b/sql/src/test/java/org/apache/druid/sql/calcite/planner/CalcitePlannerModuleTest.java @@ -60,7 +60,6 @@ import org.junit.jupiter.api.extension.ExtendWith; import javax.validation.Validation; import javax.validation.Validator; - import java.util.Collections; import java.util.Optional; import java.util.Set; diff --git a/sql/src/test/java/org/apache/druid/sql/calcite/schema/BrokerSegmentMetadataCacheTest.java b/sql/src/test/java/org/apache/druid/sql/calcite/schema/BrokerSegmentMetadataCacheTest.java index f8660b63494..65610ce99f2 100644 --- a/sql/src/test/java/org/apache/druid/sql/calcite/schema/BrokerSegmentMetadataCacheTest.java +++ b/sql/src/test/java/org/apache/druid/sql/calcite/schema/BrokerSegmentMetadataCacheTest.java @@ -74,11 +74,13 @@ import org.apache.druid.server.security.NoopEscalator; import org.apache.druid.sql.calcite.table.DatasourceTable; import org.apache.druid.sql.calcite.table.DruidTable; import org.apache.druid.sql.calcite.util.CalciteTests; +import org.apache.druid.sql.calcite.util.TestTimelineServerView; import org.apache.druid.timeline.DataSegment; import org.apache.druid.timeline.SegmentId; import org.apache.druid.timeline.partition.LinearShardSpec; import org.apache.druid.timeline.partition.NumberedShardSpec; import org.easymock.EasyMock; +import org.joda.time.Period; import org.junit.After; import org.junit.Assert; import org.junit.Before; @@ -332,6 +334,9 @@ public class BrokerSegmentMetadataCacheTest extends BrokerSegmentMetadataCacheTe ArgumentCaptor> argumentCaptor = ArgumentCaptor.forClass(Set.class); CoordinatorClient coordinatorClient = Mockito.mock(CoordinatorClient.class); Mockito.when(coordinatorClient.fetchDataSourceInformation(argumentCaptor.capture())).thenReturn(Futures.immediateFuture(null)); + + Set datsources = Sets.newHashSet(DATASOURCE1, DATASOURCE2, DATASOURCE3, SOME_DATASOURCE, "xyz", "coldDS"); + Mockito.when(coordinatorClient.fetchDataSourcesWithUsedSegments()).thenReturn(Futures.immediateFuture(datsources)); BrokerSegmentMetadataCache schema = new BrokerSegmentMetadataCache( CalciteTests.createMockQueryLifecycleFactory(walker, conglomerate), serverView, @@ -347,7 +352,7 @@ public class BrokerSegmentMetadataCacheTest extends BrokerSegmentMetadataCacheTe schema.start(); schema.awaitInitialization(); - Assert.assertEquals(Sets.newHashSet(DATASOURCE1, DATASOURCE2, DATASOURCE3, SOME_DATASOURCE), argumentCaptor.getValue()); + Assert.assertEquals(datsources, argumentCaptor.getValue()); refreshLatch = new CountDownLatch(1); serverView.addSegment(newSegment("xyz", 0), ServerType.HISTORICAL); @@ -355,7 +360,87 @@ public class BrokerSegmentMetadataCacheTest extends BrokerSegmentMetadataCacheTe refreshLatch.await(WAIT_TIMEOUT_SECS, TimeUnit.SECONDS); // verify that previously refreshed are included in the last coordinator poll - Assert.assertEquals(Sets.newHashSet(DATASOURCE1, DATASOURCE2, DATASOURCE3, SOME_DATASOURCE, "xyz"), argumentCaptor.getValue()); + Assert.assertEquals(datsources, argumentCaptor.getValue()); + } + + @Test + public void testRefreshOnEachCycleCentralizedDatasourceSchemaEnabled() throws InterruptedException + { + CentralizedDatasourceSchemaConfig config = CentralizedDatasourceSchemaConfig.create(); + config.setEnabled(true); + + serverView = new TestTimelineServerView(walker.getSegments(), Collections.emptyList()); + druidServers = serverView.getDruidServers(); + + BrokerSegmentMetadataCacheConfig metadataCacheConfig = BrokerSegmentMetadataCacheConfig.create("PT1S"); + metadataCacheConfig.setMetadataRefreshPeriod(Period.parse("PT0.001S")); + BrokerSegmentMetadataCache schema = new BrokerSegmentMetadataCache( + CalciteTests.createMockQueryLifecycleFactory(walker, conglomerate), + serverView, + SEGMENT_CACHE_CONFIG_DEFAULT, + new NoopEscalator(), + new InternalQueryConfig(), + new NoopServiceEmitter(), + new PhysicalDatasourceMetadataFactory(globalTableJoinable, segmentManager), + new NoopCoordinatorClient(), + config + ) { + @Override + public void refresh(Set segmentsToRefresh, Set dataSourcesToRebuild) + throws IOException + { + super.refresh(segmentsToRefresh, dataSourcesToRebuild); + refreshLatch.countDown(); + } + }; + + // refresh should be executed more than once, with the feature disabled refresh should be executed only once + refreshLatch = new CountDownLatch(3); + schema.start(); + schema.awaitInitialization(); + + refreshLatch.await(WAIT_TIMEOUT_SECS, TimeUnit.SECONDS); + + Assert.assertEquals(0, refreshLatch.getCount()); + } + + @Test + public void testRefreshOnEachCycleCentralizedDatasourceSchemaDisabled() throws InterruptedException + { + BrokerSegmentMetadataCacheConfig metadataCacheConfig = BrokerSegmentMetadataCacheConfig.create("PT1S"); + metadataCacheConfig.setMetadataRefreshPeriod(Period.parse("PT0.001S")); + + serverView = new TestTimelineServerView(walker.getSegments(), Collections.emptyList()); + druidServers = serverView.getDruidServers(); + + BrokerSegmentMetadataCache schema = new BrokerSegmentMetadataCache( + CalciteTests.createMockQueryLifecycleFactory(walker, conglomerate), + serverView, + SEGMENT_CACHE_CONFIG_DEFAULT, + new NoopEscalator(), + new InternalQueryConfig(), + new NoopServiceEmitter(), + new PhysicalDatasourceMetadataFactory(globalTableJoinable, segmentManager), + new NoopCoordinatorClient(), + CentralizedDatasourceSchemaConfig.create() + ) { + @Override + public void refresh(Set segmentsToRefresh, Set dataSourcesToRebuild) + throws IOException + { + super.refresh(segmentsToRefresh, dataSourcesToRebuild); + refreshLatch.countDown(); + } + }; + + // refresh should be executed only once + refreshLatch = new CountDownLatch(3); + schema.start(); + schema.awaitInitialization(); + + refreshLatch.await(WAIT_TIMEOUT_SECS, TimeUnit.SECONDS); + + Assert.assertEquals(2, refreshLatch.getCount()); } @Test @@ -1027,4 +1112,28 @@ public class BrokerSegmentMetadataCacheTest extends BrokerSegmentMetadataCacheTe buildSchemaMarkAndTableLatch(); serverView.invokeSegmentSchemasAnnouncedDummy(); } + + @Test + public void testNoDatasourceSchemaWhenNoSegmentMetadata() throws InterruptedException, IOException + { + BrokerSegmentMetadataCacheConfig config = new BrokerSegmentMetadataCacheConfig(); + config.setDisableSegmentMetadataQueries(true); + + BrokerSegmentMetadataCache schema = buildSchemaMarkAndTableLatch( + config, + new NoopCoordinatorClient() + ); + + schema.start(); + schema.awaitInitialization(); + + List segments = schema.getSegmentMetadataSnapshot().values() + .stream() + .map(AvailableSegmentMetadata::getSegment) + .collect(Collectors.toList()); + + schema.refresh(segments.stream().map(DataSegment::getId).collect(Collectors.toSet()), Collections.singleton("foo")); + + Assert.assertNull(schema.getDatasource("foo")); + } } diff --git a/sql/src/test/java/org/apache/druid/sql/calcite/schema/SystemSchemaTest.java b/sql/src/test/java/org/apache/druid/sql/calcite/schema/SystemSchemaTest.java index c2321d52250..992d91dabe8 100644 --- a/sql/src/test/java/org/apache/druid/sql/calcite/schema/SystemSchemaTest.java +++ b/sql/src/test/java/org/apache/druid/sql/calcite/schema/SystemSchemaTest.java @@ -117,6 +117,7 @@ import org.junit.jupiter.api.BeforeAll; import org.junit.jupiter.api.BeforeEach; import org.junit.jupiter.api.Test; import org.junit.jupiter.api.io.TempDir; + import java.io.File; import java.io.IOException; import java.net.URI; diff --git a/sql/src/test/java/org/apache/druid/sql/calcite/util/CalciteTests.java b/sql/src/test/java/org/apache/druid/sql/calcite/util/CalciteTests.java index ddd853d97be..3d0d0352e60 100644 --- a/sql/src/test/java/org/apache/druid/sql/calcite/util/CalciteTests.java +++ b/sql/src/test/java/org/apache/druid/sql/calcite/util/CalciteTests.java @@ -125,6 +125,11 @@ public class CalciteTests public static final String DRUID_SCHEMA_NAME = "druid"; public static final String WIKIPEDIA = "wikipedia"; public static final String WIKIPEDIA_FIRST_LAST = "wikipedia_first_last"; + public static final String TBL_WITH_NULLS_PARQUET = "tblWnulls.parquet"; + public static final String SML_TBL_PARQUET = "smlTbl.parquet"; + public static final String ALL_TYPES_UNIQ_PARQUET = "allTypsUniq.parquet"; + public static final String FEW_ROWS_ALL_DATA_PARQUET = "fewRowsAllData.parquet"; + public static final String T_ALL_TYPE_PARQUET = "t_alltype.parquet"; public static final String TEST_SUPERUSER_NAME = "testSuperuser"; public static final AuthorizerMapper TEST_AUTHORIZER_MAPPER = new AuthorizerMapper(null) diff --git a/sql/src/test/java/org/apache/druid/sql/calcite/util/TestDataBuilder.java b/sql/src/test/java/org/apache/druid/sql/calcite/util/TestDataBuilder.java index f732771991c..1f6b17e2ca2 100644 --- a/sql/src/test/java/org/apache/druid/sql/calcite/util/TestDataBuilder.java +++ b/sql/src/test/java/org/apache/druid/sql/calcite/util/TestDataBuilder.java @@ -19,9 +19,12 @@ package org.apache.druid.sql.calcite.util; +import com.fasterxml.jackson.databind.ObjectMapper; +import com.google.common.base.Function; import com.google.common.collect.ImmutableList; import com.google.common.collect.ImmutableMap; import com.google.common.collect.ImmutableSet; +import com.google.common.collect.Iterators; import com.google.inject.Injector; import org.apache.druid.data.input.InputRow; import org.apache.druid.data.input.InputRowSchema; @@ -36,8 +39,10 @@ import org.apache.druid.data.input.impl.LongDimensionSchema; import org.apache.druid.data.input.impl.MapInputRowParser; import org.apache.druid.data.input.impl.StringDimensionSchema; import org.apache.druid.data.input.impl.TimestampSpec; +import org.apache.druid.jackson.DefaultObjectMapper; import org.apache.druid.java.util.common.DateTimes; import org.apache.druid.java.util.common.Intervals; +import org.apache.druid.java.util.common.RE; import org.apache.druid.java.util.common.StringUtils; import org.apache.druid.java.util.common.parsers.JSONPathSpec; import org.apache.druid.query.DataSource; @@ -102,6 +107,8 @@ import java.util.stream.Collectors; */ public class TestDataBuilder { + private static final ObjectMapper MAPPER = new DefaultObjectMapper(); + public static final String TIMESTAMP_COLUMN = "t"; public static final GlobalTableDataSource CUSTOM_TABLE = new GlobalTableDataSource(CalciteTests.BROADCAST_DATASOURCE); @@ -962,6 +969,175 @@ public class TestDataBuilder ); } + public static void attachIndexesForDrillTestDatasources(SpecificSegmentsQuerySegmentWalker segmentWalker, File tmpDir) + { + attachIndexForDrillTestDatasource(segmentWalker, CalciteTests.TBL_WITH_NULLS_PARQUET, tmpDir); + attachIndexForDrillTestDatasource(segmentWalker, CalciteTests.SML_TBL_PARQUET, tmpDir); + attachIndexForDrillTestDatasource(segmentWalker, CalciteTests.ALL_TYPES_UNIQ_PARQUET, tmpDir); + attachIndexForDrillTestDatasource(segmentWalker, CalciteTests.FEW_ROWS_ALL_DATA_PARQUET, tmpDir); + attachIndexForDrillTestDatasource(segmentWalker, CalciteTests.T_ALL_TYPE_PARQUET, tmpDir); + } + + @SuppressWarnings({"rawtypes", "unchecked"}) + private static void attachIndexForDrillTestDatasource( + SpecificSegmentsQuerySegmentWalker segmentWalker, + String dataSource, + File tmpDir + ) + { + final QueryableIndex queryableIndex = getQueryableIndexForDrillDatasource(dataSource, tmpDir); + + segmentWalker.add( + DataSegment.builder() + .dataSource(dataSource) + .interval(Intervals.ETERNITY) + .version("1") + .shardSpec(new NumberedShardSpec(0, 0)) + .size(0) + .build(), + queryableIndex); + } + + public static QueryableIndex getQueryableIndexForDrillDatasource(String datasource, File parentTempDir) + { + final IncrementalIndexSchema indexSchema = new IncrementalIndexSchema.Builder() + .withDimensionsSpec(getDimensionSpecForDrillDatasource(datasource)) + .withRollup(false) + .build(); + Iterable inputRowsForDrillDatasource = getInputRowsForDrillDatasource(datasource); + return IndexBuilder + .create() + .tmpDir(new File(parentTempDir, datasource)) + .segmentWriteOutMediumFactory(OffHeapMemorySegmentWriteOutMediumFactory.instance()) + .schema(indexSchema) + .rows(inputRowsForDrillDatasource) + .buildMMappedIndex(); + } + + private static DimensionsSpec getDimensionSpecForDrillDatasource(String datasource) + { + switch (datasource) { + case CalciteTests.TBL_WITH_NULLS_PARQUET: { + return new DimensionsSpec( + ImmutableList.of( + new LongDimensionSchema("c1"), + new StringDimensionSchema("c2") + ) + ); + } + case CalciteTests.SML_TBL_PARQUET: { + return new DimensionsSpec( + ImmutableList.of( + // "col_int": 8122, + new LongDimensionSchema("col_int"), + // "col_bgint": 817200, + new LongDimensionSchema("col_bgint"), + // "col_char_2": "IN", + new StringDimensionSchema("col_char_2"), + // "col_vchar_52": + // "AXXXXXXXXXXXXXXXXXXXXXXXXXCXXXXXXXXXXXXXXXXXXXXXXXXB", + new StringDimensionSchema("col_vchar_52"), + // "col_tmstmp": 1409617682418, + new LongDimensionSchema("col_tmstmp"), + // "col_dt": 422717616000000, + new LongDimensionSchema("col_dt"), + // "col_booln": false, + new StringDimensionSchema("col_booln"), + // "col_dbl": 12900.48, + new DoubleDimensionSchema("col_dbl"), + // "col_tm": 33109170 + new LongDimensionSchema("col_tm") + ) + ); + } + case CalciteTests.ALL_TYPES_UNIQ_PARQUET: { + // {"col0":1,"col1":65534,"col2":256.0,"col3":1234.9,"col4":73578580,"col5":1393720082338,"col6":421185052800000,"col7":false,"col8":"CA","col9":"AXXXXXXXXXXXXXXXXXXXXXXXXXCXXXXXXXXXXXXXXXXXXXXXXXXZ"} + return new DimensionsSpec( + ImmutableList.of( + new LongDimensionSchema("col0"), + new LongDimensionSchema("col1"), + new DoubleDimensionSchema("col2"), + new DoubleDimensionSchema("col3"), + new LongDimensionSchema("col4"), + new LongDimensionSchema("col5"), + new LongDimensionSchema("col6"), + new StringDimensionSchema("col7"), + new StringDimensionSchema("col8"), + new StringDimensionSchema("col9") + ) + ); + } + case CalciteTests.FEW_ROWS_ALL_DATA_PARQUET: { + return new DimensionsSpec( + ImmutableList.of( + // "col0":12024, + new LongDimensionSchema("col0"), + // "col1":307168, + new LongDimensionSchema("col1"), + // "col2":"VT", + new StringDimensionSchema("col2"), + // "col3":"DXXXXXXXXXXXXXXXXXXXXXXXXXEXXXXXXXXXXXXXXXXXXXXXXXXF", + new StringDimensionSchema("col3"), + // "col4":1338596882419, + new LongDimensionSchema("col4"), + // "col5":422705433600000, + new LongDimensionSchema("col5"), + // "col6":true, + new StringDimensionSchema("col6"), + // "col7":3.95110006277E8, + new DoubleDimensionSchema("col7"), + // "col8":67465430 + new LongDimensionSchema("col8") + ) + ); + } + case CalciteTests.T_ALL_TYPE_PARQUET: { + return new DimensionsSpec( + ImmutableList.of( + // "c1":1, + new LongDimensionSchema("c1"), + // "c2":592475043, + new LongDimensionSchema("c2"), + // "c3":616080519999272, + new LongDimensionSchema("c3"), + // "c4":"ObHeWTDEcbGzssDwPwurfs", + new StringDimensionSchema("c4"), + // "c5":"0sZxIfZ CGwTOaLWZ6nWkUNx", + new StringDimensionSchema("c5"), + // "c6":1456290852307, + new LongDimensionSchema("c6"), + // "c7":421426627200000, + new LongDimensionSchema("c7"), + // "c8":true, + new StringDimensionSchema("c8"), + // "c9":0.626179100469 + new DoubleDimensionSchema("c9") + ) + ); + } + default: + throw new RuntimeException("Invalid datasource supplied for drill tests"); + } + } + + private static Iterable getInputRowsForDrillDatasource(String datasource) + { + DimensionsSpec dimensionSpecForDrillDatasource = getDimensionSpecForDrillDatasource(datasource); + return () -> { + try { + return Iterators.transform( + MAPPER.readerFor(Map.class) + .readValues( + ClassLoader.getSystemResource("drill/window/datasources/" + datasource + ".json")), + (Function) input -> new MapBasedInputRow(0, dimensionSpecForDrillDatasource.getDimensionNames(), input) + ); + } + catch (IOException e) { + throw new RE(e, "problem reading file"); + } + }; + } + private static MapBasedInputRow toRow(String time, List dimensions, Map event) { return new MapBasedInputRow(DateTimes.ISO_DATE_OPTIONAL_TIME.parse(time), dimensions, event); diff --git a/sql/src/test/quidem/org.apache.druid.quidem.SqlQuidemTest/decoupled.iq b/sql/src/test/quidem/org.apache.druid.quidem.SqlQuidemTest/decoupled.iq index be52c7c4c65..530bbe172bb 100644 --- a/sql/src/test/quidem/org.apache.druid.quidem.SqlQuidemTest/decoupled.iq +++ b/sql/src/test/quidem/org.apache.druid.quidem.SqlQuidemTest/decoupled.iq @@ -26,13 +26,13 @@ LogicalSort(sort0=[$0], dir0=[ASC]) LogicalSort(sort0=[$0], dir0=[ASC]) LogicalAggregate(group=[{0}], cnt=[COUNT($1) FILTER $2], aall=[COUNT()]) LogicalProject(cityName=[$2], channel=[$1], $f3=[IS TRUE(>($17, 0))]) - LogicalFilter(condition=[SEARCH($2, Sarg['Aarhus':VARCHAR(8), 'New York':VARCHAR(8)]:VARCHAR(8))]) + LogicalFilter(condition=[SEARCH($2, Sarg['Aarhus':VARCHAR, 'New York':VARCHAR]:VARCHAR)]) LogicalTableScan(table=[[druid, wikipedia]]) !logicalPlan DruidAggregate(group=[{0}], cnt=[COUNT($1) FILTER $2], aall=[COUNT()], druid=[logical]) DruidProject(cityName=[$2], channel=[$1], $f3=[IS TRUE(>($17, 0))], druid=[logical]) - DruidFilter(condition=[SEARCH($2, Sarg['Aarhus':VARCHAR(8), 'New York':VARCHAR(8)]:VARCHAR(8))]) + DruidFilter(condition=[SEARCH($2, Sarg['Aarhus':VARCHAR, 'New York':VARCHAR]:VARCHAR)]) DruidTableScan(table=[[druid, wikipedia]], druid=[logical]) !druidPlan diff --git a/sql/src/test/resources/calcite/tests/window/defaultBoundCurrentRow.sqlTest b/sql/src/test/resources/calcite/tests/window/defaultBoundCurrentRow.sqlTest new file mode 100644 index 00000000000..d5a324c9e2d --- /dev/null +++ b/sql/src/test/resources/calcite/tests/window/defaultBoundCurrentRow.sqlTest @@ -0,0 +1,82 @@ +type: "operatorValidation" + +sql: | + SELECT + dim2, + count(*) OVER (partition by dim2 ORDER BY dim1 ROWS UNBOUNDED PRECEDING), + count(*) OVER (partition by dim2 ORDER BY dim1 ROWS 1 PRECEDING), + count(*) OVER (partition by dim2 ORDER BY dim1 ROWS CURRENT ROW), + count(*) OVER (partition by dim2 ORDER BY dim1 ROWS 1 FOLLOWING), + count(*) OVER (partition by dim2 ORDER BY dim1 ROWS UNBOUNDED FOLLOWING) + FROM numfoo + WHERE dim2 IN ('a', 'abc') + GROUP BY dim2, dim1 + +expectedOperators: + - {"type":"naiveSort","columns":[{"column":"_d1","direction":"ASC"},{"column":"_d0","direction":"ASC"}]} + - { type: "naivePartition", partitionColumns: [ "_d1" ] } + - type: "window" + processor: + type: "framedAgg" + frame: + peerType: "ROWS" + lowUnbounded: true + lowOffset: 0 + uppUnbounded: false + uppOffset: 0 + orderBy: null + aggregations: + - { type: "count", name: "w0" } + - type: "window" + processor: + type: "framedAgg" + frame: + peerType: "ROWS" + lowUnbounded: false + lowOffset: -1 + uppUnbounded: false + uppOffset: 0 + orderBy: null + aggregations: + - { type: "count", name: "w1" } + - type: "window" + processor: + type: "framedAgg" + frame: + peerType: "ROWS" + lowUnbounded: false + lowOffset: 0 + uppUnbounded: false + uppOffset: 0 + orderBy: null + aggregations: + - { type: "count", name: "w2" } + - type: "window" + processor: + type: "framedAgg" + frame: + peerType: "ROWS" + lowUnbounded: false + lowOffset: 0 + uppUnbounded: false + uppOffset: 1 + orderBy: null + aggregations: + - { type: "count", name: "w3" } + - type: "window" + processor: + type: "framedAgg" + frame: + peerType: "ROWS" + lowUnbounded: false + lowOffset: 0 + uppUnbounded: true + uppOffset: 0 + orderBy: null + aggregations: + - { type: "count", name: "w4" } + +expectedResults: + - ["a",1,1,1,2,2] + - ["a",2,2,1,1,1] + - ["abc",1,1,1,1,1] diff --git a/sql/src/test/resources/calcite/tests/window/range_handling.sqlTest b/sql/src/test/resources/calcite/tests/window/range_handling.sqlTest index 717778d1fe2..405886af230 100644 --- a/sql/src/test/resources/calcite/tests/window/range_handling.sqlTest +++ b/sql/src/test/resources/calcite/tests/window/range_handling.sqlTest @@ -9,16 +9,13 @@ sql: | COUNT(1) OVER (ORDER BY FLOOR(m1/3) RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW), 'postfix', COUNT(1) OVER (ORDER BY FLOOR(m1/3) ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING), - COUNT(1) OVER (ORDER BY FLOOR(m1/3) RANGE BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING), - 'k(1)', - COUNT(1) OVER (ORDER BY FLOOR(m1/3) ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING), - COUNT(1) OVER (ORDER BY FLOOR(m1/3) RANGE BETWEEN 1 PRECEDING AND 1 FOLLOWING) + COUNT(1) OVER (ORDER BY FLOOR(m1/3) RANGE BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING) FROM foo expectedResults: - - [0.0,1,"prefix",1,2,"postfix",6,6,"k(1)",2,5] - - [0.0,1,"prefix",2,2,"postfix",5,6,"k(1)",3,5] - - [1.0,2,"prefix",3,5,"postfix",4,4,"k(1)",3,6] - - [1.0,2,"prefix",4,5,"postfix",3,4,"k(1)",3,6] - - [1.0,2,"prefix",5,5,"postfix",2,4,"k(1)",3,6] - - [2.0,3,"prefix",6,6,"postfix",1,1,"k(1)",2,4] + - [0.0,1,"prefix",1,2,"postfix",6,6] + - [0.0,1,"prefix",2,2,"postfix",5,6] + - [1.0,2,"prefix",3,5,"postfix",4,4] + - [1.0,2,"prefix",4,5,"postfix",3,4] + - [1.0,2,"prefix",5,5,"postfix",2,4] + - [2.0,3,"prefix",6,6,"postfix",1,1] diff --git a/sql/src/test/resources/calcite/tests/window/wikipediaAggregationsMultipleOrdering.sqlTest b/sql/src/test/resources/calcite/tests/window/wikipediaAggregationsMultipleOrdering.sqlTest index 221f7a52bcd..cc59868482a 100644 --- a/sql/src/test/resources/calcite/tests/window/wikipediaAggregationsMultipleOrdering.sqlTest +++ b/sql/src/test/resources/calcite/tests/window/wikipediaAggregationsMultipleOrdering.sqlTest @@ -5,7 +5,7 @@ sql: | countryIsoCode, CAST (FLOOR(__time TO HOUR) AS BIGINT) t, SUM(delta) delta, - SUM(SUM(delta)) OVER (PARTITION BY countryIsoCode ORDER BY CAST (FLOOR(__time TO HOUR) AS BIGINT) RANGE BETWEEN 3 PRECEDING AND 2 FOLLOWING) windowedDelta, + SUM(SUM(delta)) OVER (PARTITION BY countryIsoCode ORDER BY CAST (FLOOR(__time TO HOUR) AS BIGINT) ROWS BETWEEN 3 PRECEDING AND 2 FOLLOWING) windowedDelta, ROW_NUMBER() OVER (PARTITION BY CAST (FLOOR(__time TO HOUR) AS BIGINT) ORDER BY SUM(delta)) AS hourlyRank FROM wikipedia GROUP BY 1, 2 @@ -16,14 +16,12 @@ expectedOperators: processor: type: "framedAgg" frame: - peerType: "RANGE" + peerType: "ROWS" lowUnbounded: false - lowOffset: 3 + lowOffset: -3 uppUnbounded: false uppOffset: 2 - orderBy: - - column: d1 - direction: ASC + orderBy: null aggregations: - { type: "longSum", name: "w0", fieldName: "a0" } - { type: "naiveSort", columns: [ { column: "d1", direction: "ASC" }, { column: "a0", direction: "ASC"} ]} diff --git a/sql/src/test/resources/calcite/tests/window/wikipediaAggregationsMultipleOrderingDesc.sqlTest b/sql/src/test/resources/calcite/tests/window/wikipediaAggregationsMultipleOrderingDesc.sqlTest index 0180f615313..b16aa21d303 100644 --- a/sql/src/test/resources/calcite/tests/window/wikipediaAggregationsMultipleOrderingDesc.sqlTest +++ b/sql/src/test/resources/calcite/tests/window/wikipediaAggregationsMultipleOrderingDesc.sqlTest @@ -5,7 +5,7 @@ sql: | countryIsoCode, CAST (FLOOR(__time TO HOUR) AS BIGINT) t, SUM(delta) delta, - SUM(SUM(delta)) OVER (PARTITION BY countryIsoCode ORDER BY CAST (FLOOR(__time TO HOUR) AS BIGINT) DESC RANGE BETWEEN 3 PRECEDING AND 2 FOLLOWING) windowedDelta, + SUM(SUM(delta)) OVER (PARTITION BY countryIsoCode ORDER BY CAST (FLOOR(__time TO HOUR) AS BIGINT) DESC ROWS BETWEEN 3 PRECEDING AND 2 FOLLOWING) windowedDelta, ROW_NUMBER() OVER (PARTITION BY CAST (FLOOR(__time TO HOUR) AS BIGINT) ORDER BY SUM(delta) DESC) AS hourlyRank FROM wikipedia GROUP BY 1, 2 diff --git a/sql/src/test/resources/calcite/tests/window/wikipediaFramedAggregations.sqlTest b/sql/src/test/resources/calcite/tests/window/wikipediaFramedAggregations.sqlTest index 12e5736951b..c25f1ff0352 100644 --- a/sql/src/test/resources/calcite/tests/window/wikipediaFramedAggregations.sqlTest +++ b/sql/src/test/resources/calcite/tests/window/wikipediaFramedAggregations.sqlTest @@ -5,7 +5,7 @@ sql: | countryIsoCode, CAST (FLOOR(__time TO HOUR) AS BIGINT) t, SUM(delta) delta, - SUM(SUM(delta)) OVER (PARTITION BY countryIsoCode ORDER BY CAST (FLOOR(__time TO HOUR) AS BIGINT) RANGE BETWEEN 3 PRECEDING AND 2 FOLLOWING) windowedDelta + SUM(SUM(delta)) OVER (PARTITION BY countryIsoCode ORDER BY CAST (FLOOR(__time TO HOUR) AS BIGINT) ROWS BETWEEN 3 PRECEDING AND 2 FOLLOWING) windowedDelta FROM wikipedia GROUP BY 1, 2 @@ -15,12 +15,12 @@ expectedOperators: processor: type: "framedAgg" frame: - peerType: "RANGE" + peerType: "ROWS" lowUnbounded: false - lowOffset: 3 + lowOffset: -3 uppUnbounded: false uppOffset: 2 - orderBy: [ {column: "d1", direction: ASC} ] + orderBy: null aggregations: - { type: "longSum", name: "w0", fieldName: "a0" } diff --git a/sql/src/test/resources/drill/window/queries/aggregates/testW_Nulls_5.q b/sql/src/test/resources/drill/window/queries/aggregates/testW_Nulls_5.q index 30ecd90da00..8f4d929313f 100644 --- a/sql/src/test/resources/drill/window/queries/aggregates/testW_Nulls_5.q +++ b/sql/src/test/resources/drill/window/queries/aggregates/testW_Nulls_5.q @@ -1 +1 @@ -SELECT c1, c2, w_avg FROM ( SELECT c1, c2, avg ( c1 ) OVER ( PARTITION BY c2 ORDER BY c1 ASC nulls first ) w_AVG FROM "tblWnulls.parquet" ) sub_query WHERE w_avg IS null \ No newline at end of file +SELECT c1, c2, w_avg FROM ( SELECT c1, c2, avg ( c1 ) OVER ( PARTITION BY c2 ORDER BY c1 ASC nulls first ) w_avg FROM "tblWnulls.parquet" ) sub_query WHERE w_avg IS null \ No newline at end of file diff --git a/sql/src/test/resources/drill/window/queries/aggregates/testW_Nulls_6.q b/sql/src/test/resources/drill/window/queries/aggregates/testW_Nulls_6.q index f41206b8860..accb07b4824 100644 --- a/sql/src/test/resources/drill/window/queries/aggregates/testW_Nulls_6.q +++ b/sql/src/test/resources/drill/window/queries/aggregates/testW_Nulls_6.q @@ -1 +1 @@ -SELECT c1, c2, w_avg FROM ( SELECT c1, c2, avg ( c1 ) OVER ( PARTITION BY c2 ORDER BY c1 ASC nulls first ) w_AVG FROM "tblWnulls.parquet" ) sub_query \ No newline at end of file +SELECT c1, c2, w_avg FROM ( SELECT c1, c2, avg ( c1 ) OVER ( PARTITION BY c2 ORDER BY c1 ASC nulls first ) w_avg FROM "tblWnulls.parquet" ) sub_query \ No newline at end of file diff --git a/sql/src/test/resources/drill/window/queries/frameclause/subQueries/frmInSubQry_25.q b/sql/src/test/resources/drill/window/queries/frameclause/subQueries/frmInSubQry_25.q index 5eddd9c7bef..552e2f7ee24 100644 --- a/sql/src/test/resources/drill/window/queries/frameclause/subQueries/frmInSubQry_25.q +++ b/sql/src/test/resources/drill/window/queries/frameclause/subQueries/frmInSubQry_25.q @@ -2,7 +2,7 @@ SELECT * FROM ( SELECT MIN(c5) OVER W as w_min, MIN(c5) OVER W2 as w2_min, - MIN(C5) OVER W3 as w3_min + MIN(c5) OVER W3 as w3_min FROM "t_alltype.parquet" WINDOW W AS ( PARTITION BY c8 ORDER BY c1 RANGE BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING ), W2 AS ( PARTITION BY c8 ORDER BY c1 ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW ), diff --git a/web-console/assets/azureStorage.png b/web-console/assets/azure-storage.png similarity index 100% rename from web-console/assets/azureStorage.png rename to web-console/assets/azure-storage.png diff --git a/web-console/lib/keywords.js b/web-console/lib/keywords.js index bf7b9a03910..dbf10e12bac 100644 --- a/web-console/lib/keywords.js +++ b/web-console/lib/keywords.js @@ -47,6 +47,7 @@ exports.SQL_KEYWORDS = [ 'FULL', 'CROSS', 'USING', + 'NATURAL', 'FETCH', 'FIRST', 'NEXT', @@ -67,6 +68,8 @@ exports.SQL_KEYWORDS = [ 'RANGE', 'PRECEDING', 'FOLLOWING', + 'CURRENT', + 'UNBOUNDED', 'EXTEND', 'PIVOT', 'UNPIVOT', diff --git a/web-console/package-lock.json b/web-console/package-lock.json index 26999644f1e..2945955d9d6 100644 --- a/web-console/package-lock.json +++ b/web-console/package-lock.json @@ -15,7 +15,7 @@ "@blueprintjs/icons": "^4.16.0", "@blueprintjs/popover2": "^1.14.9", "@blueprintjs/select": "^4.9.24", - "@druid-toolkit/query": "^0.22.15", + "@druid-toolkit/query": "^0.22.20", "@druid-toolkit/visuals-core": "^0.3.3", "@druid-toolkit/visuals-react": "^0.3.3", "ace-builds": "~1.4.14", @@ -1005,9 +1005,9 @@ } }, "node_modules/@druid-toolkit/query": { - "version": "0.22.15", - "resolved": "https://registry.npmjs.org/@druid-toolkit/query/-/query-0.22.15.tgz", - "integrity": "sha512-LyQVIVkVNhduscf2wnBO/oGBvj353tS5ElIws20xQzApvEIwNNxmlkA+8npqwy77BkJj3nRQvlenbSEDHQdqow==", + "version": "0.22.20", + "resolved": "https://registry.npmjs.org/@druid-toolkit/query/-/query-0.22.20.tgz", + "integrity": "sha512-GmmSd27y7zLVTjgTBQy+XoGeSSGhSDNmwyiwWtSua7I5LX8XqHV7Chi8HIH25YQoVgTK1pLK4RS8eRXxthRAzg==", "dependencies": { "tslib": "^2.5.2" } @@ -19147,9 +19147,9 @@ "dev": true }, "@druid-toolkit/query": { - "version": "0.22.15", - "resolved": "https://registry.npmjs.org/@druid-toolkit/query/-/query-0.22.15.tgz", - "integrity": "sha512-LyQVIVkVNhduscf2wnBO/oGBvj353tS5ElIws20xQzApvEIwNNxmlkA+8npqwy77BkJj3nRQvlenbSEDHQdqow==", + "version": "0.22.20", + "resolved": "https://registry.npmjs.org/@druid-toolkit/query/-/query-0.22.20.tgz", + "integrity": "sha512-GmmSd27y7zLVTjgTBQy+XoGeSSGhSDNmwyiwWtSua7I5LX8XqHV7Chi8HIH25YQoVgTK1pLK4RS8eRXxthRAzg==", "requires": { "tslib": "^2.5.2" } diff --git a/web-console/package.json b/web-console/package.json index 44e6986c2bd..6e38ba24684 100644 --- a/web-console/package.json +++ b/web-console/package.json @@ -69,7 +69,7 @@ "@blueprintjs/icons": "^4.16.0", "@blueprintjs/popover2": "^1.14.9", "@blueprintjs/select": "^4.9.24", - "@druid-toolkit/query": "^0.22.15", + "@druid-toolkit/query": "^0.22.20", "@druid-toolkit/visuals-core": "^0.3.3", "@druid-toolkit/visuals-react": "^0.3.3", "ace-builds": "~1.4.14", diff --git a/web-console/src/components/header-bar/__snapshots__/header-bar.spec.tsx.snap b/web-console/src/components/header-bar/__snapshots__/header-bar.spec.tsx.snap index 6c7cf34cb70..a189bf5f954 100644 --- a/web-console/src/components/header-bar/__snapshots__/header-bar.spec.tsx.snap +++ b/web-console/src/components/header-bar/__snapshots__/header-bar.spec.tsx.snap @@ -205,8 +205,8 @@ exports[`HeaderBar matches snapshot 1`] = ` web console documentation @@ -88,7 +88,7 @@ exports[`RestrictedMode matches snapshot when in manual capability detection mod For more info refer to the web console documentation diff --git a/web-console/src/components/header-bar/restricted-mode/restricted-mode.tsx b/web-console/src/components/header-bar/restricted-mode/restricted-mode.tsx index 072d5b10e58..d9b7e18a5ec 100644 --- a/web-console/src/components/header-bar/restricted-mode/restricted-mode.tsx +++ b/web-console/src/components/header-bar/restricted-mode/restricted-mode.tsx @@ -128,7 +128,7 @@ export const RestrictedMode = React.memo(function RestrictedMode(props: Restrict {message}

    For more info refer to the{' '} - + web console documentation . diff --git a/web-console/src/components/record-table-pane/record-table-pane.tsx b/web-console/src/components/record-table-pane/record-table-pane.tsx index 29433b25ddc..6007559fb0d 100644 --- a/web-console/src/components/record-table-pane/record-table-pane.tsx +++ b/web-console/src/components/record-table-pane/record-table-pane.tsx @@ -174,7 +174,9 @@ export const RecordTablePane = React.memo(function RecordTablePane(props: Record })} /> )} - {showValue && setShowValue(undefined)} str={showValue} />} + {showValue && ( + setShowValue(undefined)} str={showValue} size="large" /> + )} ); }); diff --git a/web-console/src/components/supervisor-history-panel/supervisor-history-panel.tsx b/web-console/src/components/supervisor-history-panel/supervisor-history-panel.tsx index e3fd38b9161..0e1043a5681 100644 --- a/web-console/src/components/supervisor-history-panel/supervisor-history-panel.tsx +++ b/web-console/src/components/supervisor-history-panel/supervisor-history-panel.tsx @@ -52,9 +52,7 @@ export const SupervisorHistoryPanel = React.memo(function SupervisorHistoryPanel const resp = await Api.instance.get( `/druid/indexer/v1/supervisor/${Api.encodePath(supervisorId)}/history`, ); - return resp.data.map((vs: SupervisorHistoryEntry) => - deepSet(vs, 'spec', cleanSpec(vs.spec, true)), - ); + return resp.data.map((vs: SupervisorHistoryEntry) => deepSet(vs, 'spec', cleanSpec(vs.spec))); }, }); @@ -87,8 +85,8 @@ export const SupervisorHistoryPanel = React.memo(function SupervisorHistoryPanel ({ label: s.version, value: s.spec }))} - initLeftIndex={diffIndex + 1} - initRightIndex={diffIndex} + initOldIndex={diffIndex + 1} + initNewIndex={diffIndex} onClose={() => setDiffIndex(-1)} /> )} diff --git a/web-console/src/console-application.tsx b/web-console/src/console-application.tsx index e500af23e5d..6acefa40e7f 100644 --- a/web-console/src/console-application.tsx +++ b/web-console/src/console-application.tsx @@ -29,7 +29,7 @@ import type { Filter } from 'react-table'; import type { HeaderActiveTab } from './components'; import { HeaderBar, Loader } from './components'; import type { DruidEngine, QueryWithContext } from './druid-models'; -import { Capabilities } from './helpers'; +import { Capabilities, maybeGetClusterCapacity } from './helpers'; import { stringToTableFilters, tableFiltersToString } from './react-table'; import { AppToaster } from './singletons'; import { compact, localStorageGetJson, LocalStorageKeys, QueryManager } from './utils'; @@ -318,6 +318,7 @@ export class ConsoleApplication extends React.PureComponent< queryEngines={queryEngines} allowExplain goToTask={this.goToTasksWithTaskId} + getClusterCapacity={maybeGetClusterCapacity} />, 'thin', ); @@ -331,6 +332,8 @@ export class ConsoleApplication extends React.PureComponent< capabilities={capabilities} goToQuery={this.goToQuery} goToTask={this.goToTasksWithTaskId} + goToTaskGroup={this.goToTasksWithTaskGroupId} + getClusterCapacity={maybeGetClusterCapacity} />, ); }; diff --git a/web-console/src/dialogs/about-dialog/__snapshots__/about-dialog.spec.tsx.snap b/web-console/src/dialogs/about-dialog/__snapshots__/about-dialog.spec.tsx.snap index 9509dd1ecf9..7ea843d01df 100644 --- a/web-console/src/dialogs/about-dialog/__snapshots__/about-dialog.spec.tsx.snap +++ b/web-console/src/dialogs/about-dialog/__snapshots__/about-dialog.spec.tsx.snap @@ -109,7 +109,7 @@ exports[`AboutDialog matches snapshot 1`] = ` Druid is made with ❤️ by a community of passionate developers. To contribute, join in the discussion on the diff --git a/web-console/src/dialogs/compaction-config-dialog/__snapshots__/compaction-config-dialog.spec.tsx.snap b/web-console/src/dialogs/compaction-config-dialog/__snapshots__/compaction-config-dialog.spec.tsx.snap index 6df207060ff..30df7eeca2f 100644 --- a/web-console/src/dialogs/compaction-config-dialog/__snapshots__/compaction-config-dialog.spec.tsx.snap +++ b/web-console/src/dialogs/compaction-config-dialog/__snapshots__/compaction-config-dialog.spec.tsx.snap @@ -360,7 +360,7 @@ exports[`CompactionConfigDialog matches snapshot with compactionConfig (dynamic For more information refer to the documentation @@ -770,7 +770,7 @@ exports[`CompactionConfigDialog matches snapshot with compactionConfig (hashed p For more information refer to the documentation @@ -1180,7 +1180,7 @@ exports[`CompactionConfigDialog matches snapshot with compactionConfig (range pa For more information refer to the documentation @@ -1590,7 +1590,7 @@ exports[`CompactionConfigDialog matches snapshot without compactionConfig 1`] = For more information refer to the documentation diff --git a/web-console/src/dialogs/compaction-config-dialog/compaction-config-dialog.tsx b/web-console/src/dialogs/compaction-config-dialog/compaction-config-dialog.tsx index f07d0ce55ec..985430755c7 100644 --- a/web-console/src/dialogs/compaction-config-dialog/compaction-config-dialog.tsx +++ b/web-console/src/dialogs/compaction-config-dialog/compaction-config-dialog.tsx @@ -122,9 +122,7 @@ export const CompactionConfigDialog = React.memo(function CompactionConfigDialog

    For more information refer to the{' '} - + documentation . diff --git a/web-console/src/dialogs/compaction-dynamic-config-dialog/compaction-dynamic-config-dialog.tsx b/web-console/src/dialogs/compaction-dynamic-config-dialog/compaction-dynamic-config-dialog.tsx index 59f73c27a11..be5234b5bdb 100644 --- a/web-console/src/dialogs/compaction-dynamic-config-dialog/compaction-dynamic-config-dialog.tsx +++ b/web-console/src/dialogs/compaction-dynamic-config-dialog/compaction-dynamic-config-dialog.tsx @@ -86,7 +86,7 @@ export const CompactionDynamicConfigDialog = React.memo(function CompactionDynam async function saveConfig() { if (!dynamicConfig) return; try { - // This API is terrible. https://druid.apache.org/docs/latest/operations/api-reference.html#automatic-compaction-configuration + // This API is terrible. https://druid.apache.org/docs/latest/operations/api-reference#automatic-compaction-configuration await Api.instance.post( `/druid/coordinator/v1/config/compaction/taskslots?ratio=${ dynamicConfig.compactionTaskSlotRatio ?? DEFAULT_RATIO @@ -124,7 +124,7 @@ export const CompactionDynamicConfigDialog = React.memo(function CompactionDynam documentation diff --git a/web-console/src/dialogs/compaction-history-dialog/compaction-history-dialog.tsx b/web-console/src/dialogs/compaction-history-dialog/compaction-history-dialog.tsx index cb886d0483d..45234c68ad2 100644 --- a/web-console/src/dialogs/compaction-history-dialog/compaction-history-dialog.tsx +++ b/web-console/src/dialogs/compaction-history-dialog/compaction-history-dialog.tsx @@ -135,8 +135,8 @@ export const CompactionHistoryDialog = React.memo(function CompactionHistoryDial ({ label: s.auditTime, value: s.compactionConfig }))} - initLeftIndex={diffIndex + 1} - initRightIndex={diffIndex} + initOldIndex={diffIndex + 1} + initNewIndex={diffIndex} onClose={() => setDiffIndex(-1)} /> )} diff --git a/web-console/src/dialogs/coordinator-dynamic-config-dialog/coordinator-dynamic-config-dialog.tsx b/web-console/src/dialogs/coordinator-dynamic-config-dialog/coordinator-dynamic-config-dialog.tsx index b31f774d88b..3caa2c71b74 100644 --- a/web-console/src/dialogs/coordinator-dynamic-config-dialog/coordinator-dynamic-config-dialog.tsx +++ b/web-console/src/dialogs/coordinator-dynamic-config-dialog/coordinator-dynamic-config-dialog.tsx @@ -107,9 +107,7 @@ export const CoordinatorDynamicConfigDialog = React.memo(function CoordinatorDyn

    Edit the coordinator dynamic configuration on the fly. For more information please refer to the{' '} - + documentation . diff --git a/web-console/src/dialogs/diff-dialog/diff-dialog.tsx b/web-console/src/dialogs/diff-dialog/diff-dialog.tsx index bb42f1db847..011bbfa5d38 100644 --- a/web-console/src/dialogs/diff-dialog/diff-dialog.tsx +++ b/web-console/src/dialogs/diff-dialog/diff-dialog.tsx @@ -44,43 +44,47 @@ export interface DiffVersion { export interface DiffDialogProps { title?: string; - left?: unknown; - right?: unknown; + onClose(): void; + + // Single value + oldValue?: unknown; + newValue?: unknown; + + // Versions versions?: DiffVersion[]; - initLeftIndex?: number; - initRightIndex?: number; - onClose: () => void; + initOldIndex?: number; + initNewIndex?: number; } export const DiffDialog = React.memo(function DiffDialog(props: DiffDialogProps) { - const { title, left, right, versions, initLeftIndex, initRightIndex, onClose } = props; + const { title, oldValue, newValue, versions, initOldIndex, initNewIndex, onClose } = props; - const [leftIndex, setLeftIndex] = useState(initLeftIndex || 0); - const [rightIndex, setRightIndex] = useState(initRightIndex || 0); + const [leftIndex, setLeftIndex] = useState(initOldIndex || 0); + const [rightIndex, setRightIndex] = useState(initNewIndex || 0); - let leftValue: string; - let rightValue: string; + let oldValueString: string; + let newValueString: string; if (Array.isArray(versions)) { if (versions.length) { - const leftVersion = versions[leftIndex].value; - const rightVersion = versions[rightIndex].value; - if (typeof leftVersion === 'string' && typeof rightVersion === 'string') { - leftValue = leftVersion; - rightValue = rightVersion; + const oldVersionValue = versions[leftIndex].value; + const newVersionValue = versions[rightIndex].value; + if (typeof oldVersionValue === 'string' && typeof newVersionValue === 'string') { + oldValueString = oldVersionValue; + newValueString = newVersionValue; } else { - leftValue = JSONBig.stringify(leftVersion, undefined, 2); - rightValue = JSONBig.stringify(rightVersion, undefined, 2); + oldValueString = JSONBig.stringify(oldVersionValue, undefined, 2); + newValueString = JSONBig.stringify(newVersionValue, undefined, 2); } } else { - leftValue = rightValue = 'Nothing to diff'; + oldValueString = newValueString = 'Nothing to diff'; } } else { - if (typeof left === 'string' && typeof right === 'string') { - leftValue = left; - rightValue = right; + if (typeof oldValue === 'string' && typeof newValue === 'string') { + oldValueString = oldValue; + newValueString = newValue; } else { - leftValue = JSONBig.stringify(left, undefined, 2); - rightValue = JSONBig.stringify(right, undefined, 2); + oldValueString = JSONBig.stringify(oldValue, undefined, 2); + newValueString = JSONBig.stringify(newValue, undefined, 2); } } @@ -121,8 +125,8 @@ export const DiffDialog = React.memo(function DiffDialog(props: DiffDialogProps) )}

    - diff --git a/web-console/src/dialogs/show-value-dialog/show-value-dialog.scss b/web-console/src/dialogs/show-value-dialog/show-value-dialog.scss index a1da01e5931..f561f1bf99f 100644 --- a/web-console/src/dialogs/show-value-dialog/show-value-dialog.scss +++ b/web-console/src/dialogs/show-value-dialog/show-value-dialog.scss @@ -19,10 +19,6 @@ @import '../../variables'; .show-value-dialog { - &.#{$bp-ns}-dialog { - padding-bottom: 10px; - } - &.normal.#{$bp-ns}-dialog { height: 600px; } @@ -32,12 +28,21 @@ height: 90vh; } - .#{$bp-ns}-input { - margin: 10px; - flex: 1; + .#{$bp-ns}-dialog-body { + display: flex; + flex-direction: column; + + .ace-editor { + flex: 1; + } + + .#{$bp-ns}-input { + flex: 1; + resize: none; + } } - .#{$bp-ns}-dialog-footer-actions { - padding-right: 10px; + .#{$bp-ns}-dialog-footer { + margin-top: 0; } } diff --git a/web-console/src/dialogs/show-value-dialog/show-value-dialog.tsx b/web-console/src/dialogs/show-value-dialog/show-value-dialog.tsx index 8e1b0290865..4369a43bb76 100644 --- a/web-console/src/dialogs/show-value-dialog/show-value-dialog.tsx +++ b/web-console/src/dialogs/show-value-dialog/show-value-dialog.tsx @@ -16,11 +16,21 @@ * limitations under the License. */ -import { Button, Classes, Dialog, Intent, TextArea } from '@blueprintjs/core'; +import { + Button, + ButtonGroup, + Classes, + Dialog, + FormGroup, + Intent, + TextArea, +} from '@blueprintjs/core'; import { IconNames } from '@blueprintjs/icons'; import classNames from 'classnames'; import copy from 'copy-to-clipboard'; -import React from 'react'; +import * as JSONBig from 'json-bigint-native'; +import React, { useMemo, useState } from 'react'; +import AceEditor from 'react-ace'; import { AppToaster } from '../../singletons'; @@ -35,6 +45,15 @@ export interface ShowValueDialogProps { export const ShowValueDialog = React.memo(function ShowValueDialog(props: ShowValueDialogProps) { const { title, onClose, str, size } = props; + const [tab, setTab] = useState<'formatted' | 'raw'>('formatted'); + + const parsed = useMemo(() => { + try { + return JSONBig.parse(str); + } catch {} + }, [str]); + + const hasParsed = typeof parsed !== 'undefined'; function handleCopy() { copy(str, { format: 'text/plain' }); @@ -51,10 +70,41 @@ export const ShowValueDialog = React.memo(function ShowValueDialog(props: ShowVa onClose={onClose} title={title || 'Full value'} > -