druid/.github/workflows/standard-its.yml
Rishabh Singh e30790e013
Introduce Segment Schema Publishing and Polling for Efficient Datasource Schema Building (#15817)
Issue: #14989

The initial step in optimizing segment metadata was to centralize the construction of datasource schema in the Coordinator (#14985). Thereafter, we addressed the problem of publishing schema for realtime segments (#15475). Subsequently, our goal is to eliminate the requirement for regularly executing queries to obtain segment schema information.

This is the final change which involves publishing segment schema for finalized segments from task and periodically polling them in the Coordinator.
2024-04-24 22:22:53 +05:30

202 lines
8.8 KiB
YAML

# Licensed to the Apache Software Foundation (ASF) under one or more
# contributor license agreements. See the NOTICE file distributed with
# this work for additional information regarding copyright ownership.
# The ASF licenses this file to You under the Apache License, Version 2.0
# (the "License"); you may not use this file except in compliance with
# the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# Github workflow that runs standard/old ITs
name: "Standard ITs workflow"
on:
workflow_call:
workflow_dispatch:
jobs:
changes:
runs-on: ubuntu-latest
# Required permissions
permissions:
pull-requests: read
# Set job outputs to values from filter step
outputs:
# run everything if not a PR
core: ${{ steps.filter.outputs.core || github.event_name != 'pull_request'}}
common-extensions: ${{ steps.filter.outputs.common-extensions }}
steps:
- uses: dorny/paths-filter@v3.0.0
if: github.event_name == 'pull_request'
id: filter
with:
filters: |
common-extensions:
- 'extension-core/(mysql-metadata-storage|druid-basic-security|simple-client-sslcontext|druid-testing-tools|druid-lookups-cached-global|druid-histogram|druid-datasketches|druid-parquet-extensions|druid-avro-extensions|druid-protobuf-extensions|druid-orc-extensions|druid-kafka-indexing-service|druid-s3-extensions)/**'
core:
- '!extension*/**'
integration-index-tests-middleManager:
needs: changes
strategy:
fail-fast: false
matrix:
testing_group: [batch-index, input-format, input-source, perfect-rollup-parallel-batch-index, kafka-index, kafka-index-slow, kafka-transactional-index, kafka-transactional-index-slow, kafka-data-format, ldap-security, realtime-index, append-ingestion, compaction, cds-task-schema-publish-disabled, cds-coordinator-smq-disabled]
uses: ./.github/workflows/reusable-standard-its.yml
if: ${{ needs.changes.outputs.core == 'true' || needs.changes.outputs.common-extensions == 'true' }}
with:
build_jdk: 8
runtime_jdk: 8
testing_groups: -Dgroups=${{ matrix.testing_group }}
use_indexer: middleManager
group: ${{ matrix.testing_group }}
integration-index-tests-indexer:
needs: changes
strategy:
fail-fast: false
matrix:
testing_group: [input-source, perfect-rollup-parallel-batch-index, kafka-index, append-ingestion, compaction]
uses: ./.github/workflows/reusable-standard-its.yml
if: ${{ needs.changes.outputs.core == 'true' || needs.changes.outputs.common-extensions == 'true' }}
with:
build_jdk: 8
runtime_jdk: 8
testing_groups: -Dgroups=${{ matrix.testing_group }}
use_indexer: indexer
group: ${{ matrix.testing_group }}
integration-query-tests-middleManager:
needs: changes
strategy:
fail-fast: false
matrix:
testing_group: [query, query-retry, query-error, security, high-availability, centralized-datasource-schema]
uses: ./.github/workflows/reusable-standard-its.yml
if: ${{ needs.changes.outputs.core == 'true' || needs.changes.outputs.common-extensions == 'true' }}
with:
build_jdk: 8
runtime_jdk: 8
testing_groups: -Dgroups=${{ matrix.testing_group }}
use_indexer: middleManager
override_config_path: ./environment-configs/test-groups/prepopulated-data
group: ${{ matrix.testing_group }}
integration-query-tests-middleManager-mariaDB:
needs: changes
strategy:
fail-fast: false
matrix:
jdk: [8, 17, 21]
uses: ./.github/workflows/reusable-standard-its.yml
if: ${{ needs.changes.outputs.core == 'true' || needs.changes.outputs.common-extensions == 'true' }}
with:
build_jdk: ${{ matrix.jdk }}
runtime_jdk: ${{ matrix.jdk }}
testing_groups: -Dgroups=query
use_indexer: middleManager
mysql_driver: org.mariadb.jdbc.Driver
override_config_path: ./environment-configs/test-groups/prepopulated-data
group: query
integration-shuffle-deep-store-tests:
needs: changes
strategy:
fail-fast: false
matrix:
indexer: [indexer, middleManager]
uses: ./.github/workflows/reusable-standard-its.yml
if: ${{ needs.changes.outputs.core == 'true' || needs.changes.outputs.common-extensions == 'true' }}
with:
build_jdk: 8
runtime_jdk: 8
testing_groups: -Dgroups=shuffle-deep-store
use_indexer: ${{ matrix.indexer }}
override_config_path: ./environment-configs/test-groups/shuffle-deep-store
group: shuffle deep store
integration-custom-coordinator-duties-tests:
needs: changes
uses: ./.github/workflows/reusable-standard-its.yml
if: ${{ needs.changes.outputs.core == 'true' || needs.changes.outputs.common-extensions == 'true' }}
with:
build_jdk: 8
runtime_jdk: 8
testing_groups: -Dgroups=custom-coordinator-duties
use_indexer: middleManager
override_config_path: ./environment-configs/test-groups/custom-coordinator-duties
group: custom coordinator duties
integration-k8s-leadership-tests:
needs: changes
name: (Compile=openjdk8, Run=openjdk8, Cluster Build On K8s) ITNestedQueryPushDownTest integration test
if: ${{ needs.changes.outputs.core == 'true' || needs.changes.outputs.common-extensions == 'true' }}
runs-on: ubuntu-22.04
env:
MVN: mvn --no-snapshot-updates
MAVEN_SKIP: -P skip-static-checks -Dweb.console.skip=true -Dmaven.javadoc.skip=true
CONFIG_FILE: k8s_run_config_file.json
IT_TEST: -Dit.test=ITNestedQueryPushDownTest
POD_NAME: int-test
POD_NAMESPACE: default
BUILD_DRUID_CLUSTER: true
steps:
- name: Checkout branch
uses: actions/checkout@v4
- name: setup java
uses: actions/setup-java@v4
with:
java-version: '8'
distribution: 'zulu'
# the build step produces SNAPSHOT artifacts into the local maven repository,
# we include github.sha in the cache key to make it specific to that build/jdk
- name: Restore Maven repository
id: maven-restore
uses: actions/cache/restore@v4
with:
path: ~/.m2/repository
key: maven-${{ runner.os }}-8-${{ github.sha }}
restore-keys: setup-java-Linux-maven-${{ hashFiles('**/pom.xml') }}
- name: Maven build
if: steps.maven-restore.outputs.cache-hit != 'true'
run: |
./it.sh ci
- name: Run IT
id: test
run: |
# Debug echo
echo "MAVEN_OPTS='-Xmx2048m' ${MVN} verify -pl integration-tests -P int-tests-config-file ${IT_TEST} ${MAVEN_SKIP} -Dpod.name=${POD_NAME} -Dpod.namespace=${POD_NAMESPACE} -Dbuild.druid.cluster=${BUILD_DRUID_CLUSTER}"
MAVEN_OPTS='-Xmx2048m' ${MVN} verify -pl integration-tests -P int-tests-config-file ${IT_TEST} ${MAVEN_SKIP} -Dpod.name=${POD_NAME} -Dpod.namespace=${POD_NAMESPACE} -Dbuild.druid.cluster=${BUILD_DRUID_CLUSTER}
- name: Debug on failure
if: ${{ failure() && steps.test.conclusion == 'failure' }}
run: |
for v in broker middlemanager router coordinator historical ; do
echo "------------------------druid-tiny-cluster-"$v"s-0-------------------------";
/usr/local/bin/kubectl logs --tail 1000 druid-tiny-cluster-"$v"s-0 ||:;
/usr/local/bin/kubectl get events | grep druid-tiny-cluster-"$v"s-0 ||:;
done
integration-other-tests:
strategy:
fail-fast: false
matrix:
indexer: [middleManager, indexer]
uses: ./.github/workflows/reusable-standard-its.yml
with:
build_jdk: 8
runtime_jdk: 8
testing_groups: -DexcludedGroups=batch-index,input-format,input-source,perfect-rollup-parallel-batch-index,kafka-index,query,query-retry,query-error,realtime-index,security,ldap-security,s3-deep-storage,gcs-deep-storage,azure-deep-storage,hdfs-deep-storage,s3-ingestion,kinesis-index,kinesis-data-format,kafka-transactional-index,kafka-index-slow,kafka-transactional-index-slow,kafka-data-format,hadoop-s3-to-s3-deep-storage,hadoop-s3-to-hdfs-deep-storage,hadoop-azure-to-azure-deep-storage,hadoop-azure-to-hdfs-deep-storage,hadoop-gcs-to-gcs-deep-storage,hadoop-gcs-to-hdfs-deep-storage,aliyun-oss-deep-storage,append-ingestion,compaction,high-availability,upgrade,shuffle-deep-store,custom-coordinator-duties,centralized-datasource-schema,cds-task-schema-publish-disabled,cds-coordinator-smq-disabled
use_indexer: ${{ matrix.indexer }}
group: other