druid/integration-tests/docker/Dockerfile

121 lines
5.4 KiB
Docker
Raw Normal View History

# Licensed to the Apache Software Foundation (ASF) under one or more
# contributor license agreements. See the NOTICE file distributed with
# this work for additional information regarding copyright ownership.
# The ASF licenses this file to You under the Apache License, Version 2.0
# (the "License"); you may not use this file except in compliance with
# the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
ARG JDK_VERSION=8-slim
FROM openjdk:$JDK_VERSION as druidbase
# Bundle everything into one script so cleanup can reduce image size.
# Otherwise docker's layered images mean that things are not actually deleted.
COPY base-setup.sh /root/base-setup.sh
ARG KAFKA_VERSION
# ZooKeeper version to install in the base image
# This is passed in by maven at build time to align with the client version we depend on in the pom file
ARG ZK_VERSION
ARG APACHE_ARCHIVE_MIRROR_HOST=https://archive.apache.org
RUN APACHE_ARCHIVE_MIRROR_HOST=${APACHE_ARCHIVE_MIRROR_HOST} /root/base-setup.sh && rm -f /root/base-setup.sh
FROM druidbase
ARG MYSQL_VERSION
ARG CONFLUENT_VERSION
# Verify Java version
RUN java -version
2014-11-06 13:23:24 -05:00
RUN echo "[mysqld]\ncharacter-set-server=utf8\ncollation-server=utf8_bin\n" >> /etc/mysql/my.cnf
2014-11-06 13:23:24 -05:00
# Setup metadata store
# touch is needed because OverlayFS's copy-up operation breaks POSIX standards. See https://github.com/docker/for-linux/issues/72.
RUN find /var/lib/mysql -type f -exec touch {} \; && /etc/init.d/mysql start \
&& echo "CREATE USER 'druid'@'%' IDENTIFIED BY 'diurd'; GRANT ALL ON druid.* TO 'druid'@'%'; CREATE database druid DEFAULT CHARACTER SET utf8mb4;" | mysql -u root \
2015-11-12 20:28:56 -05:00
&& /etc/init.d/mysql stop
# Add Druid jars
ADD lib/* /usr/local/druid/lib/
2014-11-06 13:23:24 -05:00
# Download the MySQL Java connector
# target path must match the exact path referenced in environment-configs/common
RUN wget -q "https://repo1.maven.org/maven2/mysql/mysql-connector-java/$MYSQL_VERSION/mysql-connector-java-$MYSQL_VERSION.jar" \
-O /usr/local/druid/lib/mysql-connector-java.jar
RUN wget -q "https://packages.confluent.io/maven/io/confluent/kafka-protobuf-provider/$CONFLUENT_VERSION/kafka-protobuf-provider-$CONFLUENT_VERSION.jar" \
-O /usr/local/druid/lib/kafka-protobuf-provider.jar
2014-11-06 13:23:24 -05:00
# Add sample data
# touch is needed because OverlayFS's copy-up operation breaks POSIX standards. See https://github.com/docker/for-linux/issues/72.
RUN find /var/lib/mysql -type f -exec touch {} \; && service mysql start \
&& java -cp "/usr/local/druid/lib/*" -Ddruid.metadata.storage.type=mysql org.apache.druid.cli.Main tools metadata-init --connectURI="jdbc:mysql://localhost:3306/druid" --user=druid --password=diurd \
2015-11-12 20:28:56 -05:00
&& /etc/init.d/mysql stop
ADD test-data /test-data
2014-11-06 13:23:24 -05:00
# Setup supervisord
ADD supervisord.conf /etc/supervisor/conf.d/supervisord.conf
# Add druid configuration setup script
ADD druid.sh /druid.sh
# mysql
ADD run-mysql.sh /run-mysql.sh
# internal docker_ip:9092 endpoint is used to access Kafka from other Docker containers
# external docker ip:9093 endpoint is used to access Kafka from test code
2015-11-12 20:28:56 -05:00
# run this last to avoid rebuilding the image every time the ip changes
ADD docker_ip docker_ip
RUN perl -pi -e "s/#listeners=.*/listeners=INTERNAL:\/\/172.172.172.2:9092,EXTERNAL:\/\/172.172.172.2:9093/" /usr/local/kafka/config/server.properties
RUN perl -pi -e "s/#advertised.listeners=.*/advertised.listeners=INTERNAL:\/\/172.172.172.2:9092,EXTERNAL:\/\/$(cat docker_ip):9093/" /usr/local/kafka/config/server.properties
RUN perl -pi -e "s/#listener.security.protocol.map=.*/listener.security.protocol.map=INTERNAL:PLAINTEXT,EXTERNAL:PLAINTEXT\ninter.broker.listener.name=INTERNAL/" /usr/local/kafka/config/server.properties
RUN perl
2014-11-06 13:23:24 -05:00
# Add directory with TLS support files
ADD tls tls
ADD client_tls client_tls
2014-11-06 13:23:24 -05:00
# Expose ports:
# - 8081, 8281: HTTP, HTTPS (coordinator)
# - 8082, 8282: HTTP, HTTPS (broker)
# - 8083, 8283: HTTP, HTTPS (historical)
# - 8090, 8290: HTTP, HTTPS (overlord)
# - 8091, 8291: HTTP, HTTPS (middlemanager)
# - 8888-8891, 9088-9091: HTTP, HTTPS (routers)
2014-11-06 13:23:24 -05:00
# - 3306: MySQL
# - 2181 2888 3888: ZooKeeper
# - 8100 8101 8102 8103 8104 8105 : peon ports
# - 8300 8301 8302 8303 8304 8305 : peon HTTPS ports
EXPOSE 8081 8281
EXPOSE 8082 8282
EXPOSE 8083 8283
EXPOSE 8090 8290
EXPOSE 8091 8291
2014-11-06 13:23:24 -05:00
EXPOSE 3306
EXPOSE 2181 2888 3888
EXPOSE 8100 8101 8102 8103 8104 8105
EXPOSE 8300 8301 8302 8303 8304 8305
EXPOSE 9092 9093
2015-11-12 20:28:56 -05:00
2014-11-06 13:23:24 -05:00
WORKDIR /var/lib/druid
Fixes and tests related to the Indexer process. (#10631) * Fixes and tests related to the Indexer process. Three bugs fixed: 1) Indexers would not announce themselves as segment servers if they did not have storage locations defined. This used to work, but was broken in #9971. Fixed this by adding an "isSegmentServer" method to ServerType and updating SegmentLoadDropHandler to always announce if this method returns true. 2) Certain batch task types were written in a way that assumed "isReady" would be called before "run", which is not guaranteed. In particular, they relied on it in order to initialize "taskLockHelper". Fixed this by updating AbstractBatchIndexTask to ensure "isReady" is called before "run" for these tasks. 3) UnifiedIndexerAppenderatorsManager did not properly handle complex datasources. Introduced DataSourceAnalysis in order to fix this. Test changes: 1) Add a new "docker-compose.cli-indexer.yml" config that spins up an Indexer instead of a MiddleManager. 2) Introduce a "USE_INDEXER" environment variable that determines if docker-compose will start up an Indexer or a MiddleManager. 3) Duplicate all the jdk8 tests and run them in both MiddleManager and Indexer mode. 4) Various adjustments to encourage fail-fast errors in the Docker build scripts. 5) Various adjustments to speed up integration tests and reduce memory usage. 6) Add another Mac-specific approach to determining a machine's own IP. This was useful on my development machine. 7) Update segment-count check in ITCompactionTaskTest to eliminate a race condition (it was looking for 6 segments, which only exist together briefly, until the older 4 are marked unused). Javadoc updates: 1) AbstractBatchIndexTask: Added javadocs to determineLockGranularityXXX that make it clear when taskLockHelper will be initialized as a side effect. (Related to the second bug above.) 2) Task: Clarified that "isReady" is not guaranteed to be called before "run". It was already implied, but now it's explicit. 3) ZkCoordinator: Clarified deprecation message. 4) DataSegmentServerAnnouncer: Clarified deprecation message. * Fix stop_cluster script. * Fix sanity check in script. * Fix hashbang lines. * Test and doc adjustments. * Additional tests, and adjustments for tests. * Split ITs back out. * Revert change to druid_coordinator_period_indexingPeriod. * Set Indexer capacity to match MM. * Bump up Historical memory. * Bump down coordinator, overlord memory. * Bump up Broker memory.
2020-12-08 19:02:26 -05:00
ENTRYPOINT /tls/generate-server-certs-and-keystores.sh \
&& . /druid.sh \
# Create druid service config files with all the config variables
Fixes and tests related to the Indexer process. (#10631) * Fixes and tests related to the Indexer process. Three bugs fixed: 1) Indexers would not announce themselves as segment servers if they did not have storage locations defined. This used to work, but was broken in #9971. Fixed this by adding an "isSegmentServer" method to ServerType and updating SegmentLoadDropHandler to always announce if this method returns true. 2) Certain batch task types were written in a way that assumed "isReady" would be called before "run", which is not guaranteed. In particular, they relied on it in order to initialize "taskLockHelper". Fixed this by updating AbstractBatchIndexTask to ensure "isReady" is called before "run" for these tasks. 3) UnifiedIndexerAppenderatorsManager did not properly handle complex datasources. Introduced DataSourceAnalysis in order to fix this. Test changes: 1) Add a new "docker-compose.cli-indexer.yml" config that spins up an Indexer instead of a MiddleManager. 2) Introduce a "USE_INDEXER" environment variable that determines if docker-compose will start up an Indexer or a MiddleManager. 3) Duplicate all the jdk8 tests and run them in both MiddleManager and Indexer mode. 4) Various adjustments to encourage fail-fast errors in the Docker build scripts. 5) Various adjustments to speed up integration tests and reduce memory usage. 6) Add another Mac-specific approach to determining a machine's own IP. This was useful on my development machine. 7) Update segment-count check in ITCompactionTaskTest to eliminate a race condition (it was looking for 6 segments, which only exist together briefly, until the older 4 are marked unused). Javadoc updates: 1) AbstractBatchIndexTask: Added javadocs to determineLockGranularityXXX that make it clear when taskLockHelper will be initialized as a side effect. (Related to the second bug above.) 2) Task: Clarified that "isReady" is not guaranteed to be called before "run". It was already implied, but now it's explicit. 3) ZkCoordinator: Clarified deprecation message. 4) DataSegmentServerAnnouncer: Clarified deprecation message. * Fix stop_cluster script. * Fix sanity check in script. * Fix hashbang lines. * Test and doc adjustments. * Additional tests, and adjustments for tests. * Split ITs back out. * Revert change to druid_coordinator_period_indexingPeriod. * Set Indexer capacity to match MM. * Bump up Historical memory. * Bump down coordinator, overlord memory. * Bump up Broker memory.
2020-12-08 19:02:26 -05:00
&& setupConfig \
# Some test groups require pre-existing data to be setup
Fixes and tests related to the Indexer process. (#10631) * Fixes and tests related to the Indexer process. Three bugs fixed: 1) Indexers would not announce themselves as segment servers if they did not have storage locations defined. This used to work, but was broken in #9971. Fixed this by adding an "isSegmentServer" method to ServerType and updating SegmentLoadDropHandler to always announce if this method returns true. 2) Certain batch task types were written in a way that assumed "isReady" would be called before "run", which is not guaranteed. In particular, they relied on it in order to initialize "taskLockHelper". Fixed this by updating AbstractBatchIndexTask to ensure "isReady" is called before "run" for these tasks. 3) UnifiedIndexerAppenderatorsManager did not properly handle complex datasources. Introduced DataSourceAnalysis in order to fix this. Test changes: 1) Add a new "docker-compose.cli-indexer.yml" config that spins up an Indexer instead of a MiddleManager. 2) Introduce a "USE_INDEXER" environment variable that determines if docker-compose will start up an Indexer or a MiddleManager. 3) Duplicate all the jdk8 tests and run them in both MiddleManager and Indexer mode. 4) Various adjustments to encourage fail-fast errors in the Docker build scripts. 5) Various adjustments to speed up integration tests and reduce memory usage. 6) Add another Mac-specific approach to determining a machine's own IP. This was useful on my development machine. 7) Update segment-count check in ITCompactionTaskTest to eliminate a race condition (it was looking for 6 segments, which only exist together briefly, until the older 4 are marked unused). Javadoc updates: 1) AbstractBatchIndexTask: Added javadocs to determineLockGranularityXXX that make it clear when taskLockHelper will be initialized as a side effect. (Related to the second bug above.) 2) Task: Clarified that "isReady" is not guaranteed to be called before "run". It was already implied, but now it's explicit. 3) ZkCoordinator: Clarified deprecation message. 4) DataSegmentServerAnnouncer: Clarified deprecation message. * Fix stop_cluster script. * Fix sanity check in script. * Fix hashbang lines. * Test and doc adjustments. * Additional tests, and adjustments for tests. * Split ITs back out. * Revert change to druid_coordinator_period_indexingPeriod. * Set Indexer capacity to match MM. * Bump up Historical memory. * Bump down coordinator, overlord memory. * Bump up Broker memory.
2020-12-08 19:02:26 -05:00
&& setupData \
# Export the service config file path to use in supervisord conf file
&& export DRUID_COMMON_CONF_DIR="$(. /druid.sh; getConfPath ${DRUID_SERVICE})" \
# Export the common config file path to use in supervisord conf file
&& export DRUID_SERVICE_CONF_DIR="$(. /druid.sh; getConfPath _common)" \
# Run Druid service using supervisord
&& exec /usr/bin/supervisord -c /etc/supervisor/conf.d/supervisord.conf