mirror of https://github.com/apache/druid.git
Add integration tests for HDFS (#9542)
* HDFS IT * HDFS IT * HDFS IT * fix checkstyle
This commit is contained in:
parent
4870ad7b56
commit
5f127a1829
|
@ -344,7 +344,7 @@ jobs:
|
|||
name: "(Compile=openjdk8, Run=openjdk8) other integration test"
|
||||
jdk: openjdk8
|
||||
services: *integration_test_services
|
||||
env: TESTNG_GROUPS='-DexcludedGroups=batch-index,perfect-rollup-parallel-batch-index,kafka-index,query,realtime-index,security,s3-deep-storage,gcs-deep-storage,azure-deep-storage' JVM_RUNTIME='-Djvm.runtime=8'
|
||||
env: TESTNG_GROUPS='-DexcludedGroups=batch-index,perfect-rollup-parallel-batch-index,kafka-index,query,realtime-index,security,s3-deep-storage,gcs-deep-storage,azure-deep-storage,hdfs-deep-storage' JVM_RUNTIME='-Djvm.runtime=8'
|
||||
script: *run_integration_test
|
||||
after_failure: *integration_test_diags
|
||||
# END - Integration tests for Compile with Java 8 and Run with Java 8
|
||||
|
@ -383,7 +383,7 @@ jobs:
|
|||
- <<: *integration_tests
|
||||
name: "(Compile=openjdk8, Run=openjdk11) other integration test"
|
||||
jdk: openjdk8
|
||||
env: TESTNG_GROUPS='-DexcludedGroups=batch-index,perfect-rollup-parallel-batch-index,kafka-index,query,realtime-index,security,s3-deep-storage,gcs-deep-storage,azure-deep-storage' JVM_RUNTIME='-Djvm.runtime=11'
|
||||
env: TESTNG_GROUPS='-DexcludedGroups=batch-index,perfect-rollup-parallel-batch-index,kafka-index,query,realtime-index,security,s3-deep-storage,gcs-deep-storage,azure-deep-storage,hdfs-deep-storage' JVM_RUNTIME='-Djvm.runtime=11'
|
||||
# END - Integration tests for Compile with Java 8 and Run with Java 11
|
||||
|
||||
- name: "security vulnerabilities"
|
||||
|
|
|
@ -140,15 +140,22 @@ Running a Test That Uses Hadoop
|
|||
The integration test that indexes from hadoop is not run as part
|
||||
of the integration test run discussed above. This is because druid
|
||||
test clusters might not, in general, have access to hadoop.
|
||||
That's the case (for now, at least) when using the docker cluster set
|
||||
up by the integration-tests profile, so the hadoop test
|
||||
has to be run using a cluster specified in a configuration file.
|
||||
This also applies to integration test that uses Hadoop HDFS as an inputSource or as a deep storage.
|
||||
To run integration test that uses Hadoop, you will have to run a Hadoop cluster. This can be done in two ways:
|
||||
1) Run your own Druid + Haddop cluster and specified Hadoop configs in the configuration file (CONFIG_FILE).
|
||||
2) Run Druid Docker test clusters with Hadoop container by passing -Dstart.hadoop.docker=true to the mvn command.
|
||||
|
||||
The data file is
|
||||
integration-tests/src/test/resources/hadoop/batch_hadoop.data.
|
||||
Currently, hdfs-deep-storage and other <cloud>-deep-storage integration test groups can only be run with
|
||||
Druid Docker test clusters by passing -Dstart.hadoop.docker=true to start Hadoop container.
|
||||
You will also have to provide -Doverride.config.path=<PATH_TO_FILE> with your Druid's Hadoop configs set.
|
||||
See integration-tests/docker/environment-configs/override-examples/hdfs directory for example.
|
||||
Note that if the integration test you are running also uses other cloud extension (S3, Azure, GCS), additional
|
||||
credentials/configs may need to be set in the same file as your Druid's Hadoop configs set.
|
||||
|
||||
Currently, ITHadoopIndexTest can only be run with your own Druid + Haddop cluster by following the below steps:
|
||||
Create a directory called batchHadoop1 in the hadoop file system
|
||||
(anywhere you want) and put batch_hadoop.data into that directory
|
||||
(as its only file).
|
||||
(anywhere you want) and put batch_hadoop.data (integration-tests/src/test/resources/hadoop/batch_hadoop.data)
|
||||
into that directory (as its only file).
|
||||
|
||||
Add this keyword to the configuration file (see above):
|
||||
|
||||
|
|
|
@ -23,7 +23,7 @@ LC_ALL=C.UTF-8
|
|||
|
||||
# JAVA OPTS
|
||||
COMMON_DRUID_JAVA_OPTS=-Duser.timezone=UTC -Dfile.encoding=UTF-8 -Dlog4j.configurationFile=/shared/docker/lib/log4j2.xml
|
||||
DRUID_DEP_LIB_DIR=/shared/hadoop_xml/*:/shared/docker/lib/*:/usr/local/druid/lib/mysql-connector-java.jar
|
||||
DRUID_DEP_LIB_DIR=/shared/hadoop_xml:/shared/docker/lib/*:/usr/local/druid/lib/mysql-connector-java.jar
|
||||
|
||||
# Druid configs
|
||||
druid_extensions_loadList=[]
|
||||
|
|
|
@ -0,0 +1,24 @@
|
|||
#
|
||||
# Licensed to the Apache Software Foundation (ASF) under one
|
||||
# or more contributor license agreements. See the NOTICE file
|
||||
# distributed with this work for additional information
|
||||
# regarding copyright ownership. The ASF licenses this file
|
||||
# to you under the Apache License, Version 2.0 (the
|
||||
# "License"); you may not use this file except in compliance
|
||||
# with the License. You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing,
|
||||
# software distributed under the License is distributed on an
|
||||
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
# KIND, either express or implied. See the License for the
|
||||
# specific language governing permissions and limitations
|
||||
# under the License.
|
||||
#
|
||||
druid_storage_type=hdfs
|
||||
druid_storage_storageDirectory=/druid/segments
|
||||
# Depending on the test, additional extension(s) may be required.
|
||||
# Please refer to the other integration-tests/docker/environment-configs/override-examples/ files and Druid docs for
|
||||
# additional env vars to provide for each extension.
|
||||
druid_extensions_loadList=["druid-hdfs-storage"]
|
|
@ -55,6 +55,12 @@
|
|||
<version>${project.parent.version}</version>
|
||||
<scope>runtime</scope>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>org.apache.druid.extensions</groupId>
|
||||
<artifactId>druid-hdfs-storage</artifactId>
|
||||
<version>${project.parent.version}</version>
|
||||
<scope>runtime</scope>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>org.apache.druid.extensions</groupId>
|
||||
<artifactId>druid-datasketches</artifactId>
|
||||
|
|
|
@ -68,6 +68,9 @@
|
|||
# For druid-google-extensions
|
||||
mkdir -p $SHARED_DIR/docker/extensions/druid-google-extensions
|
||||
mv $SHARED_DIR/docker/lib/druid-google-extensions-* $SHARED_DIR/docker/extensions/druid-google-extensions
|
||||
$ For druid-hdfs-storage
|
||||
mkdir -p $SHARED_DIR/docker/extensions/druid-hdfs-storage
|
||||
mv $SHARED_DIR/docker/lib/druid-hdfs-storage-* $SHARED_DIR/docker/extensions/druid-hdfs-storage
|
||||
|
||||
# Pull Hadoop dependency if needed
|
||||
if [ -n "$DRUID_INTEGRATION_TEST_START_HADOOP_DOCKER" ] && [ "$DRUID_INTEGRATION_TEST_START_HADOOP_DOCKER" == true ]
|
||||
|
@ -157,6 +160,42 @@ fi
|
|||
|
||||
# Start docker containers for all Druid processes and dependencies
|
||||
{
|
||||
# Start Hadoop docker if needed
|
||||
if [ -n "$DRUID_INTEGRATION_TEST_START_HADOOP_DOCKER" ] && [ "$DRUID_INTEGRATION_TEST_START_HADOOP_DOCKER" == true ]
|
||||
then
|
||||
# Start Hadoop docker container
|
||||
docker run -d --privileged --net druid-it-net --ip 172.172.172.13 -h druid-it-hadoop --name druid-it-hadoop -p 2049:2049 -p 2122:2122 -p 8020:8020 -p 8021:8021 -p 8030:8030 -p 8031:8031 -p 8032:8032 -p 8033:8033 -p 8040:8040 -p 8042:8042 -p 8088:8088 -p 8443:8443 -p 9000:9000 -p 10020:10020 -p 19888:19888 -p 34455:34455 -p 49707:49707 -p 50010:50010 -p 50020:50020 -p 50030:50030 -p 50060:50060 -p 50070:50070 -p 50075:50075 -p 50090:50090 -p 51111:51111 -v $RESOURCEDIR:/resources -v $SHARED_DIR:/shared druid-it/hadoop:2.8.5 sh -c "/etc/bootstrap.sh && tail -f /dev/null"
|
||||
|
||||
# wait for hadoop namenode to be up
|
||||
echo "Waiting for hadoop namenode to be up"
|
||||
docker exec -t druid-it-hadoop sh -c "./usr/local/hadoop/bin/hdfs dfs -mkdir -p /druid"
|
||||
while [ $? -ne 0 ]
|
||||
do
|
||||
sleep 2
|
||||
docker exec -t druid-it-hadoop sh -c "./usr/local/hadoop/bin/hdfs dfs -mkdir -p /druid"
|
||||
done
|
||||
echo "Finished waiting for Hadoop namenode"
|
||||
|
||||
# Setup hadoop druid dirs
|
||||
echo "Setting up druid hadoop dirs"
|
||||
docker exec -t druid-it-hadoop sh -c "./usr/local/hadoop/bin/hdfs dfs -mkdir -p /druid"
|
||||
docker exec -t druid-it-hadoop sh -c "./usr/local/hadoop/bin/hdfs dfs -mkdir -p /druid/segments"
|
||||
docker exec -t druid-it-hadoop sh -c "./usr/local/hadoop/bin/hdfs dfs -mkdir -p /quickstart"
|
||||
docker exec -t druid-it-hadoop sh -c "./usr/local/hadoop/bin/hdfs dfs -chmod 777 /druid"
|
||||
docker exec -t druid-it-hadoop sh -c "./usr/local/hadoop/bin/hdfs dfs -chmod 777 /druid/segments"
|
||||
docker exec -t druid-it-hadoop sh -c "./usr/local/hadoop/bin/hdfs dfs -chmod 777 /quickstart"
|
||||
docker exec -t druid-it-hadoop sh -c "./usr/local/hadoop/bin/hdfs dfs -chmod -R 777 /tmp"
|
||||
docker exec -t druid-it-hadoop sh -c "./usr/local/hadoop/bin/hdfs dfs -chmod -R 777 /user"
|
||||
# Copy data files to Hadoop container
|
||||
docker exec -t druid-it-hadoop sh -c "./usr/local/hadoop/bin/hdfs dfs -put /shared/wikiticker-it/wikiticker-2015-09-12-sampled.json.gz /quickstart/wikiticker-2015-09-12-sampled.json.gz"
|
||||
docker exec -t druid-it-hadoop sh -c "./usr/local/hadoop/bin/hdfs dfs -put /resources/data/batch_index /batch_index"
|
||||
echo "Finished setting up druid hadoop dirs"
|
||||
|
||||
echo "Copying Hadoop XML files to shared"
|
||||
docker exec -t druid-it-hadoop sh -c "cp /usr/local/hadoop/etc/hadoop/*.xml /shared/hadoop_xml"
|
||||
echo "Copied Hadoop XML files to shared"
|
||||
fi
|
||||
|
||||
# Start zookeeper and kafka
|
||||
docker run -d --privileged --net druid-it-net --ip 172.172.172.2 ${COMMON_ENV} --name druid-zookeeper-kafka -p 2181:2181 -p 9092:9092 -p 9093:9093 -v $SHARED_DIR:/shared -v $SERVICE_SUPERVISORDS_DIR/zookeeper.conf:$SUPERVISORDIR/zookeeper.conf -v $SERVICE_SUPERVISORDS_DIR/kafka.conf:$SUPERVISORDIR/kafka.conf druid/cluster
|
||||
|
||||
|
@ -189,39 +228,4 @@ fi
|
|||
|
||||
# Start Router with custom TLS cert checkers
|
||||
docker run -d --privileged --net druid-it-net --ip 172.172.172.12 ${COMMON_ENV} ${ROUTER_CUSTOM_CHECK_TLS_ENV} ${OVERRIDE_ENV} --hostname druid-router-custom-check-tls --name druid-router-custom-check-tls -p 8891:8891 -p 9091:9091 -v $SHARED_DIR:/shared -v $SERVICE_SUPERVISORDS_DIR/druid.conf:$SUPERVISORDIR/druid.conf --link druid-zookeeper-kafka:druid-zookeeper-kafka --link druid-coordinator:druid-coordinator --link druid-broker:druid-broker druid/cluster
|
||||
|
||||
# Start Hadoop docker if needed
|
||||
if [ -n "$DRUID_INTEGRATION_TEST_START_HADOOP_DOCKER" ] && [ "$DRUID_INTEGRATION_TEST_START_HADOOP_DOCKER" == true ]
|
||||
then
|
||||
# Start Hadoop docker container
|
||||
docker run -d --privileged --net druid-it-net --ip 172.172.172.13 -h druid-it-hadoop --name druid-it-hadoop -p 2049:2049 -p 2122:2122 -p 8020:8020 -p 8021:8021 -p 8030:8030 -p 8031:8031 -p 8032:8032 -p 8033:8033 -p 8040:8040 -p 8042:8042 -p 8088:8088 -p 8443:8443 -p 9000:9000 -p 10020:10020 -p 19888:19888 -p 34455:34455 -p 49707:49707 -p 50010:50010 -p 50020:50020 -p 50030:50030 -p 50060:50060 -p 50070:50070 -p 50075:50075 -p 50090:50090 -p 51111:51111 -v $SHARED_DIR:/shared druid-it/hadoop:2.8.5 sh -c "/etc/bootstrap.sh && tail -f /dev/null"
|
||||
|
||||
# wait for hadoop namenode to be up
|
||||
echo "Waiting for hadoop namenode to be up"
|
||||
docker exec -t druid-it-hadoop sh -c "./usr/local/hadoop/bin/hdfs dfs -mkdir -p /druid"
|
||||
while [ $? -ne 0 ]
|
||||
do
|
||||
sleep 2
|
||||
docker exec -t druid-it-hadoop sh -c "./usr/local/hadoop/bin/hdfs dfs -mkdir -p /druid"
|
||||
done
|
||||
echo "Finished waiting for Hadoop namenode"
|
||||
|
||||
# Setup hadoop druid dirs
|
||||
echo "Setting up druid hadoop dirs"
|
||||
docker exec -t druid-it-hadoop sh -c "./usr/local/hadoop/bin/hdfs dfs -mkdir -p /druid"
|
||||
docker exec -t druid-it-hadoop sh -c "./usr/local/hadoop/bin/hdfs dfs -mkdir -p /druid/segments"
|
||||
docker exec -t druid-it-hadoop sh -c "./usr/local/hadoop/bin/hdfs dfs -mkdir -p /quickstart"
|
||||
docker exec -t druid-it-hadoop sh -c "./usr/local/hadoop/bin/hdfs dfs -chmod 777 /druid"
|
||||
docker exec -t druid-it-hadoop sh -c "./usr/local/hadoop/bin/hdfs dfs -chmod 777 /druid/segments"
|
||||
docker exec -t druid-it-hadoop sh -c "./usr/local/hadoop/bin/hdfs dfs -chmod 777 /quickstart"
|
||||
docker exec -t druid-it-hadoop sh -c "./usr/local/hadoop/bin/hdfs dfs -chmod -R 777 /tmp"
|
||||
docker exec -t druid-it-hadoop sh -c "./usr/local/hadoop/bin/hdfs dfs -chmod -R 777 /user"
|
||||
# Copy data files to Hadoop container
|
||||
docker exec -t druid-it-hadoop sh -c "./usr/local/hadoop/bin/hdfs dfs -put /shared/wikiticker-it/wikiticker-2015-09-12-sampled.json.gz /quickstart/wikiticker-2015-09-12-sampled.json.gz"
|
||||
echo "Finished setting up druid hadoop dirs"
|
||||
|
||||
echo "Copying Hadoop XML files to shared"
|
||||
docker exec -t druid-it-hadoop sh -c "cp /usr/local/hadoop/etc/hadoop/*.xml /shared/hadoop_xml"
|
||||
echo "Copied Hadoop XML files to shared"
|
||||
fi
|
||||
}
|
|
@ -36,17 +36,23 @@ public class TestNGGroup
|
|||
// This group can only be run individually using -Dgroups=security since it requires specific test data setup.
|
||||
public static final String SECURITY = "security";
|
||||
// This group is not part of CI. To run this group, s3 configs/credentials for your s3 must be provided in a file.
|
||||
// The path of the file must then we pass to mvn with -Doverride.config.path=<PATH_TO_FILE>
|
||||
// The path of the file must then be pass to mvn with -Doverride.config.path=<PATH_TO_FILE>
|
||||
// See integration-tests/docker/environment-configs/override-examples/s3 for env vars to provide.
|
||||
public static final String S3_DEEP_STORAGE = "s3-deep-storage";
|
||||
// This group is not part of CI. To run this group, gcs configs/credentials for your gcs must be provided in a file.
|
||||
// The path of the file must then we pass to mvn with -Doverride.config.path=<PATH_TO_FILE>
|
||||
// The path of the file must then be pass to mvn with -Doverride.config.path=<PATH_TO_FILE>
|
||||
// See integration-tests/docker/environment-configs/override-examples/gcs for env vars to provide.
|
||||
// The path to the folder that contains your GOOGLE_APPLICATION_CREDENTIALS file must also be pass
|
||||
// to mvn with -Dresource.file.dir.path=<PATH_TO_FOLDER>
|
||||
public static final String GCS_DEEP_STORAGE = "gcs-deep-storage";
|
||||
// This group is not part of CI. To run this group, azure configs/credentials for your azure must be provided in a file.
|
||||
// The path of the file must then we pass to mvn with -Doverride.config.path=<PATH_TO_FILE>
|
||||
// The path of the file must then be pass to mvn with -Doverride.config.path=<PATH_TO_FILE>
|
||||
// See integration-tests/docker/environment-configs/override-examples/azures for env vars to provide.
|
||||
public static final String AZURE_DEEP_STORAGE = "azure-deep-storage";
|
||||
// This group is not part of CI. To run this group, hadoop configs must be provided in a file. The path of the file
|
||||
// must then be pass to mvn with -Doverride.config.path=<PATH_TO_FILE>
|
||||
// See integration-tests/docker/environment-configs/override-examples/hdfs for env vars to provide.
|
||||
// Additionally, hadoop docker must be started by passing -Dstart.hadoop.docker=true to mvn.
|
||||
public static final String HDFS_DEEP_STORAGE = "hdfs-deep-storage";
|
||||
|
||||
}
|
||||
|
|
|
@ -24,30 +24,14 @@ import com.google.common.collect.ImmutableMap;
|
|||
import org.apache.druid.indexer.partitions.DynamicPartitionsSpec;
|
||||
import org.apache.druid.java.util.common.Pair;
|
||||
import org.apache.druid.java.util.common.StringUtils;
|
||||
import org.apache.druid.testing.guice.DruidTestModuleFactory;
|
||||
import org.apache.druid.tests.TestNGGroup;
|
||||
import org.testng.annotations.DataProvider;
|
||||
import org.testng.annotations.Guice;
|
||||
import org.testng.annotations.Test;
|
||||
|
||||
import java.io.Closeable;
|
||||
import java.util.List;
|
||||
import java.util.UUID;
|
||||
import java.util.function.Function;
|
||||
|
||||
/**
|
||||
* IMPORTANT:
|
||||
* To run this test, you must:
|
||||
* 1) Set the bucket and path for your data. This can be done by setting -Ddruid.test.config.cloudBucket and
|
||||
* -Ddruid.test.config.cloudPath or setting "cloud_bucket" and "cloud_path" in the config file.
|
||||
* 2) Copy wikipedia_index_data1.json, wikipedia_index_data2.json, and wikipedia_index_data3.json
|
||||
* located in integration-tests/src/test/resources/data/batch_index to your Azure at the location set in step 1.
|
||||
* 3) Provide -Doverride.config.path=<PATH_TO_FILE> with Azure credentials/configs set. See
|
||||
* integration-tests/docker/environment-configs/override-examples/azure for env vars to provide.
|
||||
*/
|
||||
@Test(groups = TestNGGroup.AZURE_DEEP_STORAGE)
|
||||
@Guice(moduleFactory = DruidTestModuleFactory.class)
|
||||
public class ITAzureParallelIndexTest extends AbstractITBatchIndexTest
|
||||
public abstract class AbstractAzureInputSourceSimpleIndexTest extends AbstractITBatchIndexTest
|
||||
{
|
||||
private static final String INDEX_TASK = "/indexer/wikipedia_cloud_index_task.json";
|
||||
private static final String INDEX_QUERIES_RESOURCE = "/indexer/wikipedia_index_queries.json";
|
||||
|
@ -85,8 +69,7 @@ public class ITAzureParallelIndexTest extends AbstractITBatchIndexTest
|
|||
};
|
||||
}
|
||||
|
||||
@Test(dataProvider = "resources")
|
||||
public void testAzureIndexData(Pair<String, List> azureInputSource) throws Exception
|
||||
void doTest(Pair<String, List> azureInputSource) throws Exception
|
||||
{
|
||||
try (
|
||||
final Closeable ignored1 = unloader(INDEX_DATASOURCE + config.getExtraDatasourceNameSuffix());
|
|
@ -24,31 +24,14 @@ import com.google.common.collect.ImmutableMap;
|
|||
import org.apache.druid.indexer.partitions.DynamicPartitionsSpec;
|
||||
import org.apache.druid.java.util.common.Pair;
|
||||
import org.apache.druid.java.util.common.StringUtils;
|
||||
import org.apache.druid.testing.guice.DruidTestModuleFactory;
|
||||
import org.apache.druid.tests.TestNGGroup;
|
||||
import org.testng.annotations.DataProvider;
|
||||
import org.testng.annotations.Guice;
|
||||
import org.testng.annotations.Test;
|
||||
|
||||
import java.io.Closeable;
|
||||
import java.util.List;
|
||||
import java.util.UUID;
|
||||
import java.util.function.Function;
|
||||
|
||||
/**
|
||||
* IMPORTANT:
|
||||
* To run this test, you must:
|
||||
* 1) Set the bucket and path for your data. This can be done by setting -Ddruid.test.config.cloudBucket and
|
||||
* -Ddruid.test.config.cloudPath or setting "cloud_bucket" and "cloud_path" in the config file.
|
||||
* 2) Copy wikipedia_index_data1.json, wikipedia_index_data2.json, and wikipedia_index_data3.json
|
||||
* located in integration-tests/src/test/resources/data/batch_index to your GCS at the location set in step 1.
|
||||
* 3) Provide -Doverride.config.path=<PATH_TO_FILE> with gcs configs set. See
|
||||
* integration-tests/docker/environment-configs/override-examples/gcs for env vars to provide.
|
||||
* 4) Provide -Dresource.file.dir.path=<PATH_TO_FOLDER> with folder that contains GOOGLE_APPLICATION_CREDENTIALS file
|
||||
*/
|
||||
@Test(groups = TestNGGroup.GCS_DEEP_STORAGE)
|
||||
@Guice(moduleFactory = DruidTestModuleFactory.class)
|
||||
public class ITGcsParallelIndexTest extends AbstractITBatchIndexTest
|
||||
public abstract class AbstractGcsInputSourceSimpleIndexTest extends AbstractITBatchIndexTest
|
||||
{
|
||||
private static final String INDEX_TASK = "/indexer/wikipedia_cloud_index_task.json";
|
||||
private static final String INDEX_QUERIES_RESOURCE = "/indexer/wikipedia_index_queries.json";
|
||||
|
@ -86,8 +69,7 @@ public class ITGcsParallelIndexTest extends AbstractITBatchIndexTest
|
|||
};
|
||||
}
|
||||
|
||||
@Test(dataProvider = "resources")
|
||||
public void testGcsIndexData(Pair<String, List> gcsInputSource) throws Exception
|
||||
void doTest(Pair<String, List> gcsInputSource) throws Exception
|
||||
{
|
||||
try (
|
||||
final Closeable ignored1 = unloader(INDEX_DATASOURCE + config.getExtraDatasourceNameSuffix());
|
|
@ -0,0 +1,100 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. The ASF licenses this file
|
||||
* to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing,
|
||||
* software distributed under the License is distributed on an
|
||||
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
* KIND, either express or implied. See the License for the
|
||||
* specific language governing permissions and limitations
|
||||
* under the License.
|
||||
*/
|
||||
|
||||
package org.apache.druid.tests.indexer;
|
||||
|
||||
import com.google.common.collect.ImmutableList;
|
||||
import org.apache.druid.java.util.common.Pair;
|
||||
import org.apache.druid.java.util.common.StringUtils;
|
||||
import org.testng.annotations.DataProvider;
|
||||
|
||||
import java.io.Closeable;
|
||||
import java.util.List;
|
||||
import java.util.UUID;
|
||||
import java.util.function.Function;
|
||||
|
||||
public abstract class AbstractHdfsInputSourceSimpleIndexTest extends AbstractITBatchIndexTest
|
||||
{
|
||||
private static final String INDEX_TASK = "/indexer/wikipedia_cloud_simple_index_task.json";
|
||||
private static final String INDEX_QUERIES_RESOURCE = "/indexer/wikipedia_index_queries.json";
|
||||
private static final String INDEX_DATASOURCE = "wikipedia_index_test_" + UUID.randomUUID();
|
||||
private static final String INPUT_SOURCE_PATHS_KEY = "paths";
|
||||
|
||||
@DataProvider
|
||||
public static Object[][] resources()
|
||||
{
|
||||
return new Object[][]{
|
||||
{new Pair<>(INPUT_SOURCE_PATHS_KEY,
|
||||
"hdfs://druid-it-hadoop:9000/batch_index"
|
||||
)},
|
||||
{new Pair<>(INPUT_SOURCE_PATHS_KEY,
|
||||
ImmutableList.of(
|
||||
"hdfs://druid-it-hadoop:9000/batch_index"
|
||||
)
|
||||
)},
|
||||
{new Pair<>(INPUT_SOURCE_PATHS_KEY,
|
||||
ImmutableList.of(
|
||||
"hdfs://druid-it-hadoop:9000/batch_index/wikipedia_index_data1.json",
|
||||
"hdfs://druid-it-hadoop:9000/batch_index/wikipedia_index_data2.json",
|
||||
"hdfs://druid-it-hadoop:9000/batch_index/wikipedia_index_data3.json"
|
||||
)
|
||||
)}
|
||||
};
|
||||
}
|
||||
|
||||
void doTest(Pair<String, List> hdfsInputSource) throws Exception
|
||||
{
|
||||
try (
|
||||
final Closeable ignored1 = unloader(INDEX_DATASOURCE + config.getExtraDatasourceNameSuffix());
|
||||
) {
|
||||
final Function<String, String> hdfsPropsTransform = spec -> {
|
||||
try {
|
||||
spec = StringUtils.replace(
|
||||
spec,
|
||||
"%%INPUT_SOURCE_TYPE%%",
|
||||
"hdfs"
|
||||
);
|
||||
spec = StringUtils.replace(
|
||||
spec,
|
||||
"%%INPUT_SOURCE_PROPERTY_KEY%%",
|
||||
hdfsInputSource.lhs
|
||||
);
|
||||
return StringUtils.replace(
|
||||
spec,
|
||||
"%%INPUT_SOURCE_PROPERTY_VALUE%%",
|
||||
jsonMapper.writeValueAsString(hdfsInputSource.rhs)
|
||||
);
|
||||
}
|
||||
catch (Exception e) {
|
||||
throw new RuntimeException(e);
|
||||
}
|
||||
};
|
||||
|
||||
doIndexTest(
|
||||
INDEX_DATASOURCE,
|
||||
INDEX_TASK,
|
||||
hdfsPropsTransform,
|
||||
INDEX_QUERIES_RESOURCE,
|
||||
false,
|
||||
true,
|
||||
true
|
||||
);
|
||||
}
|
||||
}
|
||||
}
|
|
@ -24,30 +24,14 @@ import com.google.common.collect.ImmutableMap;
|
|||
import org.apache.druid.indexer.partitions.DynamicPartitionsSpec;
|
||||
import org.apache.druid.java.util.common.Pair;
|
||||
import org.apache.druid.java.util.common.StringUtils;
|
||||
import org.apache.druid.testing.guice.DruidTestModuleFactory;
|
||||
import org.apache.druid.tests.TestNGGroup;
|
||||
import org.testng.annotations.DataProvider;
|
||||
import org.testng.annotations.Guice;
|
||||
import org.testng.annotations.Test;
|
||||
|
||||
import java.io.Closeable;
|
||||
import java.util.List;
|
||||
import java.util.UUID;
|
||||
import java.util.function.Function;
|
||||
|
||||
/**
|
||||
* IMPORTANT:
|
||||
* To run this test, you must:
|
||||
* 1) Set the bucket and path for your data. This can be done by setting -Ddruid.test.config.cloudBucket and
|
||||
* -Ddruid.test.config.cloudPath or setting "cloud_bucket" and "cloud_path" in the config file.
|
||||
* 2) Copy wikipedia_index_data1.json, wikipedia_index_data2.json, and wikipedia_index_data3.json
|
||||
* located in integration-tests/src/test/resources/data/batch_index to your S3 at the location set in step 1.
|
||||
* 3) Provide -Doverride.config.path=<PATH_TO_FILE> with s3 credentials/configs set. See
|
||||
* integration-tests/docker/environment-configs/override-examples/s3 for env vars to provide.
|
||||
*/
|
||||
@Test(groups = TestNGGroup.S3_DEEP_STORAGE)
|
||||
@Guice(moduleFactory = DruidTestModuleFactory.class)
|
||||
public class ITS3ParallelIndexTest extends AbstractITBatchIndexTest
|
||||
public abstract class AbstractS3InputSourceSimpleIndexTest extends AbstractITBatchIndexTest
|
||||
{
|
||||
private static final String INDEX_TASK = "/indexer/wikipedia_cloud_index_task.json";
|
||||
private static final String INDEX_QUERIES_RESOURCE = "/indexer/wikipedia_index_queries.json";
|
||||
|
@ -68,7 +52,7 @@ public class ITS3ParallelIndexTest extends AbstractITBatchIndexTest
|
|||
"s3://%%BUCKET%%/%%PATH%%" + WIKIPEDIA_DATA_1,
|
||||
"s3://%%BUCKET%%/%%PATH%%" + WIKIPEDIA_DATA_2,
|
||||
"s3://%%BUCKET%%/%%PATH%%" + WIKIPEDIA_DATA_3
|
||||
)
|
||||
)
|
||||
)},
|
||||
{new Pair<>(INPUT_SOURCE_PREFIXES_KEY,
|
||||
ImmutableList.of(
|
||||
|
@ -85,8 +69,7 @@ public class ITS3ParallelIndexTest extends AbstractITBatchIndexTest
|
|||
};
|
||||
}
|
||||
|
||||
@Test(dataProvider = "resources")
|
||||
public void testS3IndexData(Pair<String, List> s3InputSource) throws Exception
|
||||
void doTest(Pair<String, List> s3InputSource) throws Exception
|
||||
{
|
||||
try (
|
||||
final Closeable ignored1 = unloader(INDEX_DATASOURCE + config.getExtraDatasourceNameSuffix());
|
|
@ -0,0 +1,49 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. The ASF licenses this file
|
||||
* to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing,
|
||||
* software distributed under the License is distributed on an
|
||||
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
* KIND, either express or implied. See the License for the
|
||||
* specific language governing permissions and limitations
|
||||
* under the License.
|
||||
*/
|
||||
|
||||
package org.apache.druid.tests.indexer;
|
||||
|
||||
import org.apache.druid.java.util.common.Pair;
|
||||
import org.apache.druid.testing.guice.DruidTestModuleFactory;
|
||||
import org.apache.druid.tests.TestNGGroup;
|
||||
import org.testng.annotations.Guice;
|
||||
import org.testng.annotations.Test;
|
||||
|
||||
import java.util.List;
|
||||
|
||||
/**
|
||||
* IMPORTANT:
|
||||
* To run this test, you must:
|
||||
* 1) Set the bucket and path for your data. This can be done by setting -Ddruid.test.config.cloudBucket and
|
||||
* -Ddruid.test.config.cloudPath or setting "cloud_bucket" and "cloud_path" in the config file.
|
||||
* 2) Copy wikipedia_index_data1.json, wikipedia_index_data2.json, and wikipedia_index_data3.json
|
||||
* located in integration-tests/src/test/resources/data/batch_index to your Azure at the location set in step 1.
|
||||
* 3) Provide -Doverride.config.path=<PATH_TO_FILE> with Azure credentials/configs set. See
|
||||
* integration-tests/docker/environment-configs/override-examples/azure for env vars to provide.
|
||||
*/
|
||||
@Test(groups = TestNGGroup.AZURE_DEEP_STORAGE)
|
||||
@Guice(moduleFactory = DruidTestModuleFactory.class)
|
||||
public class ITAzureToAzureParallelIndexTest extends AbstractAzureInputSourceSimpleIndexTest
|
||||
{
|
||||
@Test(dataProvider = "resources")
|
||||
public void testAzureIndexData(Pair<String, List> azureInputSource) throws Exception
|
||||
{
|
||||
doTest(azureInputSource);
|
||||
}
|
||||
}
|
|
@ -0,0 +1,51 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. The ASF licenses this file
|
||||
* to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing,
|
||||
* software distributed under the License is distributed on an
|
||||
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
* KIND, either express or implied. See the License for the
|
||||
* specific language governing permissions and limitations
|
||||
* under the License.
|
||||
*/
|
||||
|
||||
package org.apache.druid.tests.indexer;
|
||||
|
||||
import org.apache.druid.java.util.common.Pair;
|
||||
import org.apache.druid.testing.guice.DruidTestModuleFactory;
|
||||
import org.apache.druid.tests.TestNGGroup;
|
||||
import org.testng.annotations.Guice;
|
||||
import org.testng.annotations.Test;
|
||||
|
||||
import java.util.List;
|
||||
|
||||
/**
|
||||
* IMPORTANT:
|
||||
* To run this test, you must:
|
||||
* 1) Set the bucket and path for your data. This can be done by setting -Ddruid.test.config.cloudBucket and
|
||||
* -Ddruid.test.config.cloudPath or setting "cloud_bucket" and "cloud_path" in the config file.
|
||||
* 2) Copy wikipedia_index_data1.json, wikipedia_index_data2.json, and wikipedia_index_data3.json
|
||||
* located in integration-tests/src/test/resources/data/batch_index to your Azure at the location set in step 1.
|
||||
* 3) Provide -Doverride.config.path=<PATH_TO_FILE> with Azure credentials and hdfs deep storage configs set. See
|
||||
* integration-tests/docker/environment-configs/override-examples/azure and
|
||||
* integration-tests/docker/environment-configs/override-examples/hdfs for env vars to provide.
|
||||
* 4) Run the test with -Dstart.hadoop.docker=true in the mvn command
|
||||
*/
|
||||
@Test(groups = TestNGGroup.HDFS_DEEP_STORAGE)
|
||||
@Guice(moduleFactory = DruidTestModuleFactory.class)
|
||||
public class ITAzureToHdfsParallelIndexTest extends AbstractAzureInputSourceSimpleIndexTest
|
||||
{
|
||||
@Test(dataProvider = "resources")
|
||||
public void testAzureIndexData(Pair<String, List> azureInputSource) throws Exception
|
||||
{
|
||||
doTest(azureInputSource);
|
||||
}
|
||||
}
|
|
@ -0,0 +1,50 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. The ASF licenses this file
|
||||
* to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing,
|
||||
* software distributed under the License is distributed on an
|
||||
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
* KIND, either express or implied. See the License for the
|
||||
* specific language governing permissions and limitations
|
||||
* under the License.
|
||||
*/
|
||||
|
||||
package org.apache.druid.tests.indexer;
|
||||
|
||||
import org.apache.druid.java.util.common.Pair;
|
||||
import org.apache.druid.testing.guice.DruidTestModuleFactory;
|
||||
import org.apache.druid.tests.TestNGGroup;
|
||||
import org.testng.annotations.Guice;
|
||||
import org.testng.annotations.Test;
|
||||
|
||||
import java.util.List;
|
||||
|
||||
/**
|
||||
* IMPORTANT:
|
||||
* To run this test, you must:
|
||||
* 1) Set the bucket and path for your data. This can be done by setting -Ddruid.test.config.cloudBucket and
|
||||
* -Ddruid.test.config.cloudPath or setting "cloud_bucket" and "cloud_path" in the config file.
|
||||
* 2) Copy wikipedia_index_data1.json, wikipedia_index_data2.json, and wikipedia_index_data3.json
|
||||
* located in integration-tests/src/test/resources/data/batch_index to your GCS at the location set in step 1.
|
||||
* 3) Provide -Doverride.config.path=<PATH_TO_FILE> with gcs configs set. See
|
||||
* integration-tests/docker/environment-configs/override-examples/gcs for env vars to provide.
|
||||
* 4) Provide -Dresource.file.dir.path=<PATH_TO_FOLDER> with folder that contains GOOGLE_APPLICATION_CREDENTIALS file
|
||||
*/
|
||||
@Test(groups = TestNGGroup.GCS_DEEP_STORAGE)
|
||||
@Guice(moduleFactory = DruidTestModuleFactory.class)
|
||||
public class ITGcsToGcsParallelIndexTest extends AbstractGcsInputSourceSimpleIndexTest
|
||||
{
|
||||
@Test(dataProvider = "resources")
|
||||
public void testGcsIndexData(Pair<String, List> gcsInputSource) throws Exception
|
||||
{
|
||||
doTest(gcsInputSource);
|
||||
}
|
||||
}
|
|
@ -0,0 +1,52 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. The ASF licenses this file
|
||||
* to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing,
|
||||
* software distributed under the License is distributed on an
|
||||
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
* KIND, either express or implied. See the License for the
|
||||
* specific language governing permissions and limitations
|
||||
* under the License.
|
||||
*/
|
||||
|
||||
package org.apache.druid.tests.indexer;
|
||||
|
||||
import org.apache.druid.java.util.common.Pair;
|
||||
import org.apache.druid.testing.guice.DruidTestModuleFactory;
|
||||
import org.apache.druid.tests.TestNGGroup;
|
||||
import org.testng.annotations.Guice;
|
||||
import org.testng.annotations.Test;
|
||||
|
||||
import java.util.List;
|
||||
|
||||
/**
|
||||
* IMPORTANT:
|
||||
* To run this test, you must:
|
||||
* 1) Set the bucket and path for your data. This can be done by setting -Ddruid.test.config.cloudBucket and
|
||||
* -Ddruid.test.config.cloudPath or setting "cloud_bucket" and "cloud_path" in the config file.
|
||||
* 2) Copy wikipedia_index_data1.json, wikipedia_index_data2.json, and wikipedia_index_data3.json
|
||||
* located in integration-tests/src/test/resources/data/batch_index to your GCS at the location set in step 1.
|
||||
* 3) Provide -Doverride.config.path=<PATH_TO_FILE> with gcs configs and hdfs deep storage configs set. See
|
||||
* integration-tests/docker/environment-configs/override-examples/gcs and
|
||||
* integration-tests/docker/environment-configs/override-examples/hdfs for env vars to provide.
|
||||
* 4) Provide -Dresource.file.dir.path=<PATH_TO_FOLDER> with folder that contains GOOGLE_APPLICATION_CREDENTIALS file
|
||||
* 5) Run the test with -Dstart.hadoop.docker=true in the mvn command
|
||||
*/
|
||||
@Test(groups = TestNGGroup.HDFS_DEEP_STORAGE)
|
||||
@Guice(moduleFactory = DruidTestModuleFactory.class)
|
||||
public class ITGcsToHdfsParallelIndexTest extends AbstractGcsInputSourceSimpleIndexTest
|
||||
{
|
||||
@Test(dataProvider = "resources")
|
||||
public void testGcsIndexData(Pair<String, List> gcsInputSource) throws Exception
|
||||
{
|
||||
doTest(gcsInputSource);
|
||||
}
|
||||
}
|
|
@ -0,0 +1,47 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. The ASF licenses this file
|
||||
* to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing,
|
||||
* software distributed under the License is distributed on an
|
||||
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
* KIND, either express or implied. See the License for the
|
||||
* specific language governing permissions and limitations
|
||||
* under the License.
|
||||
*/
|
||||
|
||||
package org.apache.druid.tests.indexer;
|
||||
|
||||
import org.apache.druid.java.util.common.Pair;
|
||||
import org.apache.druid.testing.guice.DruidTestModuleFactory;
|
||||
import org.apache.druid.tests.TestNGGroup;
|
||||
import org.testng.annotations.Guice;
|
||||
import org.testng.annotations.Test;
|
||||
|
||||
import java.util.List;
|
||||
|
||||
/**
|
||||
* IMPORTANT:
|
||||
* To run this test, you must:
|
||||
* 1) Run the test with -Dstart.hadoop.docker=true in the mvn command
|
||||
* 2) Provide -Doverride.config.path=<PATH_TO_FILE> with Azure credentials/configs set. See
|
||||
* integration-tests/docker/environment-configs/override-examples/azure for env vars to provide.
|
||||
* You will also need to include "druid-hdfs-storage" to druid_extensions_loadList in this file.
|
||||
*/
|
||||
@Test(groups = TestNGGroup.AZURE_DEEP_STORAGE)
|
||||
@Guice(moduleFactory = DruidTestModuleFactory.class)
|
||||
public class ITHdfsToAzureSimpleIndexTest extends AbstractHdfsInputSourceSimpleIndexTest
|
||||
{
|
||||
@Test(dataProvider = "resources")
|
||||
public void testHdfsIndexData(Pair<String, List> hdfsInputSource) throws Exception
|
||||
{
|
||||
doTest(hdfsInputSource);
|
||||
}
|
||||
}
|
|
@ -0,0 +1,48 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. The ASF licenses this file
|
||||
* to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing,
|
||||
* software distributed under the License is distributed on an
|
||||
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
* KIND, either express or implied. See the License for the
|
||||
* specific language governing permissions and limitations
|
||||
* under the License.
|
||||
*/
|
||||
|
||||
package org.apache.druid.tests.indexer;
|
||||
|
||||
import org.apache.druid.java.util.common.Pair;
|
||||
import org.apache.druid.testing.guice.DruidTestModuleFactory;
|
||||
import org.apache.druid.tests.TestNGGroup;
|
||||
import org.testng.annotations.Guice;
|
||||
import org.testng.annotations.Test;
|
||||
|
||||
import java.util.List;
|
||||
|
||||
/**
|
||||
* IMPORTANT:
|
||||
* To run this test, you must:
|
||||
* 1) Run the test with -Dstart.hadoop.docker=true in the mvn command
|
||||
* 2) Provide -Doverride.config.path=<PATH_TO_FILE> with gcs configs set. See
|
||||
* integration-tests/docker/environment-configs/override-examples/gcs for env vars to provide.
|
||||
* You will also need to include "druid-hdfs-storage" to druid_extensions_loadList in this file.
|
||||
* 3) Provide -Dresource.file.dir.path=<PATH_TO_FOLDER> with folder that contains GOOGLE_APPLICATION_CREDENTIALS file
|
||||
*/
|
||||
@Test(groups = TestNGGroup.GCS_DEEP_STORAGE)
|
||||
@Guice(moduleFactory = DruidTestModuleFactory.class)
|
||||
public class ITHdfsToGcsSimpleIndexTest extends AbstractHdfsInputSourceSimpleIndexTest
|
||||
{
|
||||
@Test(dataProvider = "resources")
|
||||
public void testHdfsIndexData(Pair<String, List> hdfsInputSource) throws Exception
|
||||
{
|
||||
doTest(hdfsInputSource);
|
||||
}
|
||||
}
|
|
@ -0,0 +1,46 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. The ASF licenses this file
|
||||
* to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing,
|
||||
* software distributed under the License is distributed on an
|
||||
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
* KIND, either express or implied. See the License for the
|
||||
* specific language governing permissions and limitations
|
||||
* under the License.
|
||||
*/
|
||||
|
||||
package org.apache.druid.tests.indexer;
|
||||
|
||||
import org.apache.druid.java.util.common.Pair;
|
||||
import org.apache.druid.testing.guice.DruidTestModuleFactory;
|
||||
import org.apache.druid.tests.TestNGGroup;
|
||||
import org.testng.annotations.Guice;
|
||||
import org.testng.annotations.Test;
|
||||
|
||||
import java.util.List;
|
||||
|
||||
/**
|
||||
* IMPORTANT:
|
||||
* To run this test, you must:
|
||||
* 1) Run the test with -Dstart.hadoop.docker=true in the mvn command
|
||||
* 2) Provide -Doverride.config.path=<PATH_TO_FILE> with hdfs configs set. See
|
||||
* integration-tests/docker/environment-configs/override-examples/hdfs for env vars to provide.
|
||||
*/
|
||||
@Test(groups = TestNGGroup.HDFS_DEEP_STORAGE)
|
||||
@Guice(moduleFactory = DruidTestModuleFactory.class)
|
||||
public class ITHdfsToHdfsSimpleIndexTest extends AbstractHdfsInputSourceSimpleIndexTest
|
||||
{
|
||||
@Test(dataProvider = "resources")
|
||||
public void testHdfsIndexData(Pair<String, List> hdfsInputSource) throws Exception
|
||||
{
|
||||
doTest(hdfsInputSource);
|
||||
}
|
||||
}
|
|
@ -0,0 +1,47 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. The ASF licenses this file
|
||||
* to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing,
|
||||
* software distributed under the License is distributed on an
|
||||
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
* KIND, either express or implied. See the License for the
|
||||
* specific language governing permissions and limitations
|
||||
* under the License.
|
||||
*/
|
||||
|
||||
package org.apache.druid.tests.indexer;
|
||||
|
||||
import org.apache.druid.java.util.common.Pair;
|
||||
import org.apache.druid.testing.guice.DruidTestModuleFactory;
|
||||
import org.apache.druid.tests.TestNGGroup;
|
||||
import org.testng.annotations.Guice;
|
||||
import org.testng.annotations.Test;
|
||||
|
||||
import java.util.List;
|
||||
|
||||
/**
|
||||
* IMPORTANT:
|
||||
* To run this test, you must:
|
||||
* 1) Run the test with -Dstart.hadoop.docker=true in the mvn command
|
||||
* 2) Provide -Doverride.config.path=<PATH_TO_FILE> with s3 credentials/configs set. See
|
||||
* integration-tests/docker/environment-configs/override-examples/s3 for env vars to provide.
|
||||
* You will also need to include "druid-hdfs-storage" to druid_extensions_loadList in this file.
|
||||
*/
|
||||
@Test(groups = TestNGGroup.S3_DEEP_STORAGE)
|
||||
@Guice(moduleFactory = DruidTestModuleFactory.class)
|
||||
public class ITHdfsToS3SimpleIndexTest extends AbstractHdfsInputSourceSimpleIndexTest
|
||||
{
|
||||
@Test(dataProvider = "resources")
|
||||
public void testHdfsIndexData(Pair<String, List> hdfsInputSource) throws Exception
|
||||
{
|
||||
doTest(hdfsInputSource);
|
||||
}
|
||||
}
|
|
@ -0,0 +1,51 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. The ASF licenses this file
|
||||
* to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing,
|
||||
* software distributed under the License is distributed on an
|
||||
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
* KIND, either express or implied. See the License for the
|
||||
* specific language governing permissions and limitations
|
||||
* under the License.
|
||||
*/
|
||||
|
||||
package org.apache.druid.tests.indexer;
|
||||
|
||||
import org.apache.druid.java.util.common.Pair;
|
||||
import org.apache.druid.testing.guice.DruidTestModuleFactory;
|
||||
import org.apache.druid.tests.TestNGGroup;
|
||||
import org.testng.annotations.Guice;
|
||||
import org.testng.annotations.Test;
|
||||
|
||||
import java.util.List;
|
||||
|
||||
/**
|
||||
* IMPORTANT:
|
||||
* To run this test, you must:
|
||||
* 1) Set the bucket and path for your data. This can be done by setting -Ddruid.test.config.cloudBucket and
|
||||
* -Ddruid.test.config.cloudPath or setting "cloud_bucket" and "cloud_path" in the config file.
|
||||
* 2) Copy wikipedia_index_data1.json, wikipedia_index_data2.json, and wikipedia_index_data3.json
|
||||
* located in integration-tests/src/test/resources/data/batch_index to your S3 at the location set in step 1.
|
||||
* 3) Provide -Doverride.config.path=<PATH_TO_FILE> with s3 credentials and hdfs deep storage configs set. See
|
||||
* integration-tests/docker/environment-configs/override-examples/s3 and
|
||||
* integration-tests/docker/environment-configs/override-examples/hdfs for env vars to provide.
|
||||
* 4) Run the test with -Dstart.hadoop.docker=true in the mvn command
|
||||
*/
|
||||
@Test(groups = TestNGGroup.HDFS_DEEP_STORAGE)
|
||||
@Guice(moduleFactory = DruidTestModuleFactory.class)
|
||||
public class ITS3ToHdfsParallelIndexTest extends AbstractS3InputSourceSimpleIndexTest
|
||||
{
|
||||
@Test(dataProvider = "resources")
|
||||
public void testS3IndexData(Pair<String, List> s3InputSource) throws Exception
|
||||
{
|
||||
doTest(s3InputSource);
|
||||
}
|
||||
}
|
|
@ -0,0 +1,49 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. The ASF licenses this file
|
||||
* to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing,
|
||||
* software distributed under the License is distributed on an
|
||||
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
* KIND, either express or implied. See the License for the
|
||||
* specific language governing permissions and limitations
|
||||
* under the License.
|
||||
*/
|
||||
|
||||
package org.apache.druid.tests.indexer;
|
||||
|
||||
import org.apache.druid.java.util.common.Pair;
|
||||
import org.apache.druid.testing.guice.DruidTestModuleFactory;
|
||||
import org.apache.druid.tests.TestNGGroup;
|
||||
import org.testng.annotations.Guice;
|
||||
import org.testng.annotations.Test;
|
||||
|
||||
import java.util.List;
|
||||
|
||||
/**
|
||||
* IMPORTANT:
|
||||
* To run this test, you must:
|
||||
* 1) Set the bucket and path for your data. This can be done by setting -Ddruid.test.config.cloudBucket and
|
||||
* -Ddruid.test.config.cloudPath or setting "cloud_bucket" and "cloud_path" in the config file.
|
||||
* 2) Copy wikipedia_index_data1.json, wikipedia_index_data2.json, and wikipedia_index_data3.json
|
||||
* located in integration-tests/src/test/resources/data/batch_index to your S3 at the location set in step 1.
|
||||
* 3) Provide -Doverride.config.path=<PATH_TO_FILE> with s3 credentials/configs set. See
|
||||
* integration-tests/docker/environment-configs/override-examples/s3 for env vars to provide.
|
||||
*/
|
||||
@Test(groups = TestNGGroup.S3_DEEP_STORAGE)
|
||||
@Guice(moduleFactory = DruidTestModuleFactory.class)
|
||||
public class ITS3ToS3ParallelIndexTest extends AbstractS3InputSourceSimpleIndexTest
|
||||
{
|
||||
@Test(dataProvider = "resources")
|
||||
public void testS3IndexData(Pair<String, List> s3InputSource) throws Exception
|
||||
{
|
||||
doTest(s3InputSource);
|
||||
}
|
||||
}
|
|
@ -0,0 +1,81 @@
|
|||
{
|
||||
"type": "index",
|
||||
"spec": {
|
||||
"dataSchema": {
|
||||
"dataSource": "%%DATASOURCE%%",
|
||||
"timestampSpec": {
|
||||
"column": "timestamp"
|
||||
},
|
||||
"dimensionsSpec": {
|
||||
"dimensions": [
|
||||
"page",
|
||||
{"type": "string", "name": "language", "createBitmapIndex": false},
|
||||
"user",
|
||||
"unpatrolled",
|
||||
"newPage",
|
||||
"robot",
|
||||
"anonymous",
|
||||
"namespace",
|
||||
"continent",
|
||||
"country",
|
||||
"region",
|
||||
"city"
|
||||
]
|
||||
},
|
||||
"metricsSpec": [
|
||||
{
|
||||
"type": "count",
|
||||
"name": "count"
|
||||
},
|
||||
{
|
||||
"type": "doubleSum",
|
||||
"name": "added",
|
||||
"fieldName": "added"
|
||||
},
|
||||
{
|
||||
"type": "doubleSum",
|
||||
"name": "deleted",
|
||||
"fieldName": "deleted"
|
||||
},
|
||||
{
|
||||
"type": "doubleSum",
|
||||
"name": "delta",
|
||||
"fieldName": "delta"
|
||||
},
|
||||
{
|
||||
"name": "thetaSketch",
|
||||
"type": "thetaSketch",
|
||||
"fieldName": "user"
|
||||
},
|
||||
{
|
||||
"name": "quantilesDoublesSketch",
|
||||
"type": "quantilesDoublesSketch",
|
||||
"fieldName": "delta"
|
||||
},
|
||||
{
|
||||
"name": "HLLSketchBuild",
|
||||
"type": "HLLSketchBuild",
|
||||
"fieldName": "user"
|
||||
}
|
||||
],
|
||||
"granularitySpec": {
|
||||
"segmentGranularity": "DAY",
|
||||
"queryGranularity": "second",
|
||||
"intervals" : [ "2013-08-31/2013-09-02" ]
|
||||
}
|
||||
},
|
||||
"ioConfig": {
|
||||
"type": "index",
|
||||
"inputSource": {
|
||||
"type": "%%INPUT_SOURCE_TYPE%%",
|
||||
"%%INPUT_SOURCE_PROPERTY_KEY%%": %%INPUT_SOURCE_PROPERTY_VALUE%%
|
||||
},
|
||||
"inputFormat": {
|
||||
"type": "json"
|
||||
}
|
||||
},
|
||||
"tuningConfig": {
|
||||
"type": "index"
|
||||
}
|
||||
}
|
||||
}
|
Loading…
Reference in New Issue