diff --git a/integration-tests/README.md b/integration-tests/README.md index 90c1bd1fd99..baadd3a7bd1 100644 --- a/integration-tests/README.md +++ b/integration-tests/README.md @@ -184,7 +184,7 @@ For Google Cloud Storage, Amazon S3, and Microsoft Azure, the following will als 1) Set the bucket and path for your test data. This can be done by setting -Ddruid.test.config.cloudBucket and -Ddruid.test.config.cloudPath in the mvn command or setting "cloud_bucket" and "cloud_path" in the config file. 2) Copy wikipedia_index_data1.json, wikipedia_index_data2.json, and wikipedia_index_data3.json -located in integration-tests/src/test/resources/data/batch_index to your Cloud storage at the location set in step 1. +located in integration-tests/src/test/resources/data/batch_index/json to your Cloud storage at the location set in step 1. For Google Cloud Storage, in addition to the above, you will also have to: 1) Provide -Dresource.file.dir.path= with folder that contains GOOGLE_APPLICATION_CREDENTIALS file diff --git a/integration-tests/docker/environment-configs/override-examples/hdfs b/integration-tests/docker/environment-configs/override-examples/hdfs index 1c495c0cea3..63b851e5085 100644 --- a/integration-tests/docker/environment-configs/override-examples/hdfs +++ b/integration-tests/docker/environment-configs/override-examples/hdfs @@ -21,4 +21,4 @@ druid_storage_storageDirectory=/druid/segments # Depending on the test, additional extension(s) may be required. # Please refer to the other integration-tests/docker/environment-configs/override-examples/ files and Druid docs for # additional env vars to provide for each extension. -druid_extensions_loadList=["druid-hdfs-storage"] \ No newline at end of file +druid_extensions_loadList=["druid-hdfs-storage", "druid-parquet-extensions", "druid-orc-extensions"] \ No newline at end of file diff --git a/integration-tests/pom.xml b/integration-tests/pom.xml index a93cb95e149..885419623d3 100644 --- a/integration-tests/pom.xml +++ b/integration-tests/pom.xml @@ -69,6 +69,18 @@ ${aws.sdk.version} runtime + + org.apache.druid.extensions + druid-orc-extensions + ${project.parent.version} + runtime + + + org.apache.druid.extensions + druid-parquet-extensions + ${project.parent.version} + runtime + org.apache.druid.extensions druid-s3-extensions diff --git a/integration-tests/run_cluster.sh b/integration-tests/run_cluster.sh index 0fda1ff9c43..122cf6a4d80 100755 --- a/integration-tests/run_cluster.sh +++ b/integration-tests/run_cluster.sh @@ -74,6 +74,12 @@ # For druid-kinesis-indexing-service mkdir -p $SHARED_DIR/docker/extensions/druid-kinesis-indexing-service mv $SHARED_DIR/docker/lib/druid-kinesis-indexing-service-* $SHARED_DIR/docker/extensions/druid-kinesis-indexing-service + $ For druid-parquet-extensions + mkdir -p $SHARED_DIR/docker/extensions/druid-parquet-extensions + mv $SHARED_DIR/docker/lib/druid-parquet-extensions-* $SHARED_DIR/docker/extensions/druid-parquet-extensions + $ For druid-orc-extensions + mkdir -p $SHARED_DIR/docker/extensions/druid-orc-extensions + mv $SHARED_DIR/docker/lib/druid-orc-extensions-* $SHARED_DIR/docker/extensions/druid-orc-extensions # Pull Hadoop dependency if needed if [ -n "$DRUID_INTEGRATION_TEST_START_HADOOP_DOCKER" ] && [ "$DRUID_INTEGRATION_TEST_START_HADOOP_DOCKER" == true ] diff --git a/integration-tests/src/test/java/org/apache/druid/tests/indexer/AbstractAzureInputSourceParallelIndexTest.java b/integration-tests/src/test/java/org/apache/druid/tests/indexer/AbstractAzureInputSourceParallelIndexTest.java index 1bc4801ca41..8ca08a0ef1e 100644 --- a/integration-tests/src/test/java/org/apache/druid/tests/indexer/AbstractAzureInputSourceParallelIndexTest.java +++ b/integration-tests/src/test/java/org/apache/druid/tests/indexer/AbstractAzureInputSourceParallelIndexTest.java @@ -35,7 +35,6 @@ public abstract class AbstractAzureInputSourceParallelIndexTest extends Abstract { private static final String INDEX_TASK = "/indexer/wikipedia_cloud_index_task.json"; private static final String INDEX_QUERIES_RESOURCE = "/indexer/wikipedia_index_queries.json"; - private static final String INDEX_DATASOURCE = "wikipedia_index_test_" + UUID.randomUUID(); private static final String INPUT_SOURCE_URIS_KEY = "uris"; private static final String INPUT_SOURCE_PREFIXES_KEY = "prefixes"; private static final String INPUT_SOURCE_OBJECTS_KEY = "objects"; @@ -71,8 +70,9 @@ public abstract class AbstractAzureInputSourceParallelIndexTest extends Abstract void doTest(Pair azureInputSource) throws Exception { + final String indexDatasource = "wikipedia_index_test_" + UUID.randomUUID(); try ( - final Closeable ignored1 = unloader(INDEX_DATASOURCE + config.getExtraDatasourceNameSuffix()); + final Closeable ignored1 = unloader(indexDatasource + config.getExtraDatasourceNameSuffix()); ) { final Function azurePropsTransform = spec -> { try { @@ -87,7 +87,11 @@ public abstract class AbstractAzureInputSourceParallelIndexTest extends Abstract "%%PATH%%", config.getCloudPath() ); - + spec = StringUtils.replace( + spec, + "%%INPUT_FORMAT_TYPE%%", + InputFormatDetails.JSON.getInputFormatType() + ); spec = StringUtils.replace( spec, "%%PARTITIONS_SPEC%%", @@ -115,7 +119,7 @@ public abstract class AbstractAzureInputSourceParallelIndexTest extends Abstract }; doIndexTest( - INDEX_DATASOURCE, + indexDatasource, INDEX_TASK, azurePropsTransform, INDEX_QUERIES_RESOURCE, diff --git a/integration-tests/src/test/java/org/apache/druid/tests/indexer/AbstractGcsInputSourceParallelIndexTest.java b/integration-tests/src/test/java/org/apache/druid/tests/indexer/AbstractGcsInputSourceParallelIndexTest.java index f5c192dfdd7..805619a2d18 100644 --- a/integration-tests/src/test/java/org/apache/druid/tests/indexer/AbstractGcsInputSourceParallelIndexTest.java +++ b/integration-tests/src/test/java/org/apache/druid/tests/indexer/AbstractGcsInputSourceParallelIndexTest.java @@ -35,7 +35,6 @@ public abstract class AbstractGcsInputSourceParallelIndexTest extends AbstractIT { private static final String INDEX_TASK = "/indexer/wikipedia_cloud_index_task.json"; private static final String INDEX_QUERIES_RESOURCE = "/indexer/wikipedia_index_queries.json"; - private static final String INDEX_DATASOURCE = "wikipedia_index_test_" + UUID.randomUUID(); private static final String INPUT_SOURCE_URIS_KEY = "uris"; private static final String INPUT_SOURCE_PREFIXES_KEY = "prefixes"; private static final String INPUT_SOURCE_OBJECTS_KEY = "objects"; @@ -71,8 +70,9 @@ public abstract class AbstractGcsInputSourceParallelIndexTest extends AbstractIT void doTest(Pair gcsInputSource) throws Exception { + final String indexDatasource = "wikipedia_index_test_" + UUID.randomUUID(); try ( - final Closeable ignored1 = unloader(INDEX_DATASOURCE + config.getExtraDatasourceNameSuffix()); + final Closeable ignored1 = unloader(indexDatasource + config.getExtraDatasourceNameSuffix()); ) { final Function gcsPropsTransform = spec -> { try { @@ -87,7 +87,11 @@ public abstract class AbstractGcsInputSourceParallelIndexTest extends AbstractIT "%%PATH%%", config.getCloudPath() ); - + spec = StringUtils.replace( + spec, + "%%INPUT_FORMAT_TYPE%%", + InputFormatDetails.JSON.getInputFormatType() + ); spec = StringUtils.replace( spec, "%%PARTITIONS_SPEC%%", @@ -115,7 +119,7 @@ public abstract class AbstractGcsInputSourceParallelIndexTest extends AbstractIT }; doIndexTest( - INDEX_DATASOURCE, + indexDatasource, INDEX_TASK, gcsPropsTransform, INDEX_QUERIES_RESOURCE, diff --git a/integration-tests/src/test/java/org/apache/druid/tests/indexer/AbstractHdfsInputSourceParallelIndexTest.java b/integration-tests/src/test/java/org/apache/druid/tests/indexer/AbstractHdfsInputSourceParallelIndexTest.java index 874eb78b8a7..742238accb0 100644 --- a/integration-tests/src/test/java/org/apache/druid/tests/indexer/AbstractHdfsInputSourceParallelIndexTest.java +++ b/integration-tests/src/test/java/org/apache/druid/tests/indexer/AbstractHdfsInputSourceParallelIndexTest.java @@ -34,7 +34,6 @@ public abstract class AbstractHdfsInputSourceParallelIndexTest extends AbstractI { private static final String INDEX_TASK = "/indexer/wikipedia_cloud_index_task.json"; private static final String INDEX_QUERIES_RESOURCE = "/indexer/wikipedia_index_queries.json"; - private static final String INDEX_DATASOURCE = "wikipedia_index_test_" + UUID.randomUUID(); private static final String INPUT_SOURCE_PATHS_KEY = "paths"; @DataProvider @@ -42,27 +41,28 @@ public abstract class AbstractHdfsInputSourceParallelIndexTest extends AbstractI { return new Object[][]{ {new Pair<>(INPUT_SOURCE_PATHS_KEY, - "hdfs://druid-it-hadoop:9000/batch_index" + "hdfs://druid-it-hadoop:9000/batch_index%%FOLDER_SUFFIX%%" )}, {new Pair<>(INPUT_SOURCE_PATHS_KEY, ImmutableList.of( - "hdfs://druid-it-hadoop:9000/batch_index" + "hdfs://druid-it-hadoop:9000/batch_index%%FOLDER_SUFFIX%%" ) )}, {new Pair<>(INPUT_SOURCE_PATHS_KEY, ImmutableList.of( - "hdfs://druid-it-hadoop:9000/batch_index/wikipedia_index_data1.json", - "hdfs://druid-it-hadoop:9000/batch_index/wikipedia_index_data2.json", - "hdfs://druid-it-hadoop:9000/batch_index/wikipedia_index_data3.json" + "hdfs://druid-it-hadoop:9000/batch_index%%FOLDER_SUFFIX%%/wikipedia_index_data1%%FILE_EXTENSION%%", + "hdfs://druid-it-hadoop:9000/batch_index%%FOLDER_SUFFIX%%/wikipedia_index_data2%%FILE_EXTENSION%%", + "hdfs://druid-it-hadoop:9000/batch_index%%FOLDER_SUFFIX%%/wikipedia_index_data3%%FILE_EXTENSION%%" ) )} }; } - void doTest(Pair hdfsInputSource) throws Exception + void doTest(Pair hdfsInputSource, InputFormatDetails inputFormatDetails) throws Exception { + final String indexDatasource = "wikipedia_index_test_" + UUID.randomUUID(); try ( - final Closeable ignored1 = unloader(INDEX_DATASOURCE + config.getExtraDatasourceNameSuffix()); + final Closeable ignored1 = unloader(indexDatasource + config.getExtraDatasourceNameSuffix()); ) { final Function hdfsPropsTransform = spec -> { try { @@ -81,11 +81,27 @@ public abstract class AbstractHdfsInputSourceParallelIndexTest extends AbstractI "%%INPUT_SOURCE_PROPERTY_KEY%%", hdfsInputSource.lhs ); - return StringUtils.replace( + spec = StringUtils.replace( + spec, + "%%INPUT_FORMAT_TYPE%%", + inputFormatDetails.getInputFormatType() + ); + spec = StringUtils.replace( spec, "%%INPUT_SOURCE_PROPERTY_VALUE%%", jsonMapper.writeValueAsString(hdfsInputSource.rhs) ); + spec = StringUtils.replace( + spec, + "%%FOLDER_SUFFIX%%", + inputFormatDetails.getFolderSuffix() + ); + spec = StringUtils.replace( + spec, + "%%FILE_EXTENSION%%", + inputFormatDetails.getFileExtension() + ); + return spec; } catch (Exception e) { throw new RuntimeException(e); @@ -93,7 +109,7 @@ public abstract class AbstractHdfsInputSourceParallelIndexTest extends AbstractI }; doIndexTest( - INDEX_DATASOURCE, + indexDatasource, INDEX_TASK, hdfsPropsTransform, INDEX_QUERIES_RESOURCE, diff --git a/integration-tests/src/test/java/org/apache/druid/tests/indexer/AbstractITBatchIndexTest.java b/integration-tests/src/test/java/org/apache/druid/tests/indexer/AbstractITBatchIndexTest.java index 1e8a0e9d59b..beaeac5330f 100644 --- a/integration-tests/src/test/java/org/apache/druid/tests/indexer/AbstractITBatchIndexTest.java +++ b/integration-tests/src/test/java/org/apache/druid/tests/indexer/AbstractITBatchIndexTest.java @@ -49,6 +49,39 @@ import java.util.function.Function; public abstract class AbstractITBatchIndexTest extends AbstractIndexerTest { + public enum InputFormatDetails + { + ORC("orc", ".orc", "/orc"), + JSON("json", ".json", "/json"), + PARQUET("parquet", ".parquet", "/parquet"); + + private final String inputFormatType; + private final String fileExtension; + private final String folderSuffix; + + InputFormatDetails(String inputFormatType, String fileExtension, String folderSuffix) + { + this.inputFormatType = inputFormatType; + this.fileExtension = fileExtension; + this.folderSuffix = folderSuffix; + } + + public String getInputFormatType() + { + return inputFormatType; + } + + public String getFileExtension() + { + return fileExtension; + } + + public String getFolderSuffix() + { + return folderSuffix; + } + } + private static final Logger LOG = new Logger(AbstractITBatchIndexTest.class); @Inject diff --git a/integration-tests/src/test/java/org/apache/druid/tests/indexer/AbstractS3InputSourceParallelIndexTest.java b/integration-tests/src/test/java/org/apache/druid/tests/indexer/AbstractS3InputSourceParallelIndexTest.java index 077eab49665..3962c8d82b9 100644 --- a/integration-tests/src/test/java/org/apache/druid/tests/indexer/AbstractS3InputSourceParallelIndexTest.java +++ b/integration-tests/src/test/java/org/apache/druid/tests/indexer/AbstractS3InputSourceParallelIndexTest.java @@ -35,7 +35,6 @@ public abstract class AbstractS3InputSourceParallelIndexTest extends AbstractITB { private static final String INDEX_TASK = "/indexer/wikipedia_cloud_index_task.json"; private static final String INDEX_QUERIES_RESOURCE = "/indexer/wikipedia_index_queries.json"; - private static final String INDEX_DATASOURCE = "wikipedia_index_test_" + UUID.randomUUID(); private static final String INPUT_SOURCE_URIS_KEY = "uris"; private static final String INPUT_SOURCE_PREFIXES_KEY = "prefixes"; private static final String INPUT_SOURCE_OBJECTS_KEY = "objects"; @@ -71,8 +70,9 @@ public abstract class AbstractS3InputSourceParallelIndexTest extends AbstractITB void doTest(Pair s3InputSource) throws Exception { + final String indexDatasource = "wikipedia_index_test_" + UUID.randomUUID(); try ( - final Closeable ignored1 = unloader(INDEX_DATASOURCE + config.getExtraDatasourceNameSuffix()); + final Closeable ignored1 = unloader(indexDatasource + config.getExtraDatasourceNameSuffix()); ) { final Function s3PropsTransform = spec -> { try { @@ -87,7 +87,11 @@ public abstract class AbstractS3InputSourceParallelIndexTest extends AbstractITB "%%PATH%%", config.getCloudPath() ); - + spec = StringUtils.replace( + spec, + "%%INPUT_FORMAT_TYPE%%", + InputFormatDetails.JSON.getInputFormatType() + ); spec = StringUtils.replace( spec, "%%PARTITIONS_SPEC%%", @@ -115,7 +119,7 @@ public abstract class AbstractS3InputSourceParallelIndexTest extends AbstractITB }; doIndexTest( - INDEX_DATASOURCE, + indexDatasource, INDEX_TASK, s3PropsTransform, INDEX_QUERIES_RESOURCE, diff --git a/integration-tests/src/test/java/org/apache/druid/tests/indexer/ITAzureToAzureParallelIndexTest.java b/integration-tests/src/test/java/org/apache/druid/tests/indexer/ITAzureToAzureParallelIndexTest.java index e8f76effd72..e8594ccf515 100644 --- a/integration-tests/src/test/java/org/apache/druid/tests/indexer/ITAzureToAzureParallelIndexTest.java +++ b/integration-tests/src/test/java/org/apache/druid/tests/indexer/ITAzureToAzureParallelIndexTest.java @@ -33,7 +33,7 @@ import java.util.List; * 1) Set the bucket and path for your data. This can be done by setting -Ddruid.test.config.cloudBucket and * -Ddruid.test.config.cloudPath or setting "cloud_bucket" and "cloud_path" in the config file. * 2) Copy wikipedia_index_data1.json, wikipedia_index_data2.json, and wikipedia_index_data3.json - * located in integration-tests/src/test/resources/data/batch_index to your Azure at the location set in step 1. + * located in integration-tests/src/test/resources/data/batch_index/json to your Azure at the location set in step 1. * 3) Provide -Doverride.config.path= with Azure credentials/configs set. See * integration-tests/docker/environment-configs/override-examples/azure for env vars to provide. */ diff --git a/integration-tests/src/test/java/org/apache/druid/tests/indexer/ITAzureToHdfsParallelIndexTest.java b/integration-tests/src/test/java/org/apache/druid/tests/indexer/ITAzureToHdfsParallelIndexTest.java index 91ccd612f2a..cd1d76a0e32 100644 --- a/integration-tests/src/test/java/org/apache/druid/tests/indexer/ITAzureToHdfsParallelIndexTest.java +++ b/integration-tests/src/test/java/org/apache/druid/tests/indexer/ITAzureToHdfsParallelIndexTest.java @@ -33,7 +33,7 @@ import java.util.List; * 1) Set the bucket and path for your data. This can be done by setting -Ddruid.test.config.cloudBucket and * -Ddruid.test.config.cloudPath or setting "cloud_bucket" and "cloud_path" in the config file. * 2) Copy wikipedia_index_data1.json, wikipedia_index_data2.json, and wikipedia_index_data3.json - * located in integration-tests/src/test/resources/data/batch_index to your Azure at the location set in step 1. + * located in integration-tests/src/test/resources/data/batch_index/json to your Azure at the location set in step 1. * 3) Provide -Doverride.config.path= with Azure credentials and hdfs deep storage configs set. See * integration-tests/docker/environment-configs/override-examples/azure and * integration-tests/docker/environment-configs/override-examples/hdfs for env vars to provide. diff --git a/integration-tests/src/test/java/org/apache/druid/tests/indexer/ITGcsToGcsParallelIndexTest.java b/integration-tests/src/test/java/org/apache/druid/tests/indexer/ITGcsToGcsParallelIndexTest.java index a41afb7103c..75949d7bc09 100644 --- a/integration-tests/src/test/java/org/apache/druid/tests/indexer/ITGcsToGcsParallelIndexTest.java +++ b/integration-tests/src/test/java/org/apache/druid/tests/indexer/ITGcsToGcsParallelIndexTest.java @@ -33,7 +33,7 @@ import java.util.List; * 1) Set the bucket and path for your data. This can be done by setting -Ddruid.test.config.cloudBucket and * -Ddruid.test.config.cloudPath or setting "cloud_bucket" and "cloud_path" in the config file. * 2) Copy wikipedia_index_data1.json, wikipedia_index_data2.json, and wikipedia_index_data3.json - * located in integration-tests/src/test/resources/data/batch_index to your GCS at the location set in step 1. + * located in integration-tests/src/test/resources/data/batch_index/json to your GCS at the location set in step 1. * 3) Provide -Doverride.config.path= with gcs configs set. See * integration-tests/docker/environment-configs/override-examples/gcs for env vars to provide. * 4) Provide -Dresource.file.dir.path= with folder that contains GOOGLE_APPLICATION_CREDENTIALS file diff --git a/integration-tests/src/test/java/org/apache/druid/tests/indexer/ITGcsToHdfsParallelIndexTest.java b/integration-tests/src/test/java/org/apache/druid/tests/indexer/ITGcsToHdfsParallelIndexTest.java index e3420d04f89..ec2e0980326 100644 --- a/integration-tests/src/test/java/org/apache/druid/tests/indexer/ITGcsToHdfsParallelIndexTest.java +++ b/integration-tests/src/test/java/org/apache/druid/tests/indexer/ITGcsToHdfsParallelIndexTest.java @@ -33,7 +33,7 @@ import java.util.List; * 1) Set the bucket and path for your data. This can be done by setting -Ddruid.test.config.cloudBucket and * -Ddruid.test.config.cloudPath or setting "cloud_bucket" and "cloud_path" in the config file. * 2) Copy wikipedia_index_data1.json, wikipedia_index_data2.json, and wikipedia_index_data3.json - * located in integration-tests/src/test/resources/data/batch_index to your GCS at the location set in step 1. + * located in integration-tests/src/test/resources/data/batch_index/json to your GCS at the location set in step 1. * 3) Provide -Doverride.config.path= with gcs configs and hdfs deep storage configs set. See * integration-tests/docker/environment-configs/override-examples/gcs and * integration-tests/docker/environment-configs/override-examples/hdfs for env vars to provide. diff --git a/integration-tests/src/test/java/org/apache/druid/tests/indexer/ITHdfsToAzureParallelIndexTest.java b/integration-tests/src/test/java/org/apache/druid/tests/indexer/ITHdfsToAzureParallelIndexTest.java index 9caf7440549..0abd874d3e4 100644 --- a/integration-tests/src/test/java/org/apache/druid/tests/indexer/ITHdfsToAzureParallelIndexTest.java +++ b/integration-tests/src/test/java/org/apache/druid/tests/indexer/ITHdfsToAzureParallelIndexTest.java @@ -42,6 +42,6 @@ public class ITHdfsToAzureParallelIndexTest extends AbstractHdfsInputSourceParal @Test(dataProvider = "resources") public void testHdfsIndexData(Pair hdfsInputSource) throws Exception { - doTest(hdfsInputSource); + doTest(hdfsInputSource, InputFormatDetails.JSON); } } diff --git a/integration-tests/src/test/java/org/apache/druid/tests/indexer/ITHdfsToGcsParallelIndexTest.java b/integration-tests/src/test/java/org/apache/druid/tests/indexer/ITHdfsToGcsParallelIndexTest.java index a2e1fb71416..c30e1a54509 100644 --- a/integration-tests/src/test/java/org/apache/druid/tests/indexer/ITHdfsToGcsParallelIndexTest.java +++ b/integration-tests/src/test/java/org/apache/druid/tests/indexer/ITHdfsToGcsParallelIndexTest.java @@ -43,6 +43,6 @@ public class ITHdfsToGcsParallelIndexTest extends AbstractHdfsInputSourceParalle @Test(dataProvider = "resources") public void testHdfsIndexData(Pair hdfsInputSource) throws Exception { - doTest(hdfsInputSource); + doTest(hdfsInputSource, InputFormatDetails.JSON); } } diff --git a/integration-tests/src/test/java/org/apache/druid/tests/indexer/ITHdfsToHdfsParallelIndexTest.java b/integration-tests/src/test/java/org/apache/druid/tests/indexer/ITHdfsToHdfsParallelIndexTest.java index c09121e3f86..71f227c8dbe 100644 --- a/integration-tests/src/test/java/org/apache/druid/tests/indexer/ITHdfsToHdfsParallelIndexTest.java +++ b/integration-tests/src/test/java/org/apache/druid/tests/indexer/ITHdfsToHdfsParallelIndexTest.java @@ -39,8 +39,20 @@ import java.util.List; public class ITHdfsToHdfsParallelIndexTest extends AbstractHdfsInputSourceParallelIndexTest { @Test(dataProvider = "resources") - public void testHdfsIndexData(Pair hdfsInputSource) throws Exception + public void testHdfsIndexJsonData(Pair hdfsInputSource) throws Exception { - doTest(hdfsInputSource); + doTest(hdfsInputSource, InputFormatDetails.JSON); + } + + @Test(dataProvider = "resources") + public void testHdfsIndexOrcData(Pair hdfsInputSource) throws Exception + { + doTest(hdfsInputSource, InputFormatDetails.ORC); + } + + @Test(dataProvider = "resources") + public void testHdfsIndexParquetData(Pair hdfsInputSource) throws Exception + { + doTest(hdfsInputSource, InputFormatDetails.PARQUET); } } diff --git a/integration-tests/src/test/java/org/apache/druid/tests/indexer/ITHdfsToS3ParallelIndexTest.java b/integration-tests/src/test/java/org/apache/druid/tests/indexer/ITHdfsToS3ParallelIndexTest.java index 69825412c1d..564c0c19f44 100644 --- a/integration-tests/src/test/java/org/apache/druid/tests/indexer/ITHdfsToS3ParallelIndexTest.java +++ b/integration-tests/src/test/java/org/apache/druid/tests/indexer/ITHdfsToS3ParallelIndexTest.java @@ -42,6 +42,6 @@ public class ITHdfsToS3ParallelIndexTest extends AbstractHdfsInputSourceParallel @Test(dataProvider = "resources") public void testHdfsIndexData(Pair hdfsInputSource) throws Exception { - doTest(hdfsInputSource); + doTest(hdfsInputSource, InputFormatDetails.JSON); } } diff --git a/integration-tests/src/test/java/org/apache/druid/tests/indexer/ITS3OverrideCredentialsIndexTest.java b/integration-tests/src/test/java/org/apache/druid/tests/indexer/ITS3OverrideCredentialsIndexTest.java index ff18b95f05b..ec949f17fed 100644 --- a/integration-tests/src/test/java/org/apache/druid/tests/indexer/ITS3OverrideCredentialsIndexTest.java +++ b/integration-tests/src/test/java/org/apache/druid/tests/indexer/ITS3OverrideCredentialsIndexTest.java @@ -40,7 +40,7 @@ import java.util.function.Function; * 1) Set the bucket and path for your data. This can be done by setting -Ddruid.test.config.cloudBucket and * -Ddruid.test.config.cloudPath or setting "cloud_bucket" and "cloud_path" in the config file. * 2) Copy wikipedia_index_data1.json, wikipedia_index_data2.json, and wikipedia_index_data3.json - * located in integration-tests/src/test/resources/data/batch_index to your S3 at the location set in step 1. + * located in integration-tests/src/test/resources/data/batch_index/json to your S3 at the location set in step 1. * 3) Provide -Doverride.config.path= with s3 credentials/configs set. See * integration-tests/docker/environment-configs/override-examples/s3 for env vars to provide. * Note that druid_s3_accessKey and druid_s3_secretKey should be unset or set to credentials that does not have @@ -54,7 +54,6 @@ public class ITS3OverrideCredentialsIndexTest extends AbstractITBatchIndexTest private static final String INDEX_TASK_WITH_OVERRIDE = "/indexer/wikipedia_override_credentials_index_task.json"; private static final String INDEX_TASK_WITHOUT_OVERRIDE = "/indexer/wikipedia_cloud_simple_index_task.json"; private static final String INDEX_QUERIES_RESOURCE = "/indexer/wikipedia_index_queries.json"; - private static final String INDEX_DATASOURCE = "wikipedia_index_test_" + UUID.randomUUID(); private static final String INPUT_SOURCE_OBJECTS_KEY = "objects"; private static final String WIKIPEDIA_DATA_1 = "wikipedia_index_data1.json"; private static final String WIKIPEDIA_DATA_2 = "wikipedia_index_data2.json"; @@ -69,8 +68,9 @@ public class ITS3OverrideCredentialsIndexTest extends AbstractITBatchIndexTest @Test public void testS3WithValidOverrideCredentialsIndexDataShouldSucceed() throws Exception { + final String indexDatasource = "wikipedia_index_test_" + UUID.randomUUID(); try ( - final Closeable ignored1 = unloader(INDEX_DATASOURCE + config.getExtraDatasourceNameSuffix()); + final Closeable ignored1 = unloader(indexDatasource + config.getExtraDatasourceNameSuffix()); ) { final Function s3PropsTransform = spec -> { try { @@ -123,7 +123,7 @@ public class ITS3OverrideCredentialsIndexTest extends AbstractITBatchIndexTest }; doIndexTest( - INDEX_DATASOURCE, + indexDatasource, INDEX_TASK_WITH_OVERRIDE, s3PropsTransform, INDEX_QUERIES_RESOURCE, @@ -137,6 +137,7 @@ public class ITS3OverrideCredentialsIndexTest extends AbstractITBatchIndexTest @Test public void testS3WithoutOverrideCredentialsIndexDataShouldFailed() throws Exception { + final String indexDatasource = "wikipedia_index_test_" + UUID.randomUUID(); try { final Function s3PropsTransform = spec -> { try { @@ -172,7 +173,7 @@ public class ITS3OverrideCredentialsIndexTest extends AbstractITBatchIndexTest throw new RuntimeException(e); } }; - final String fullDatasourceName = INDEX_DATASOURCE + config.getExtraDatasourceNameSuffix(); + final String fullDatasourceName = indexDatasource + config.getExtraDatasourceNameSuffix(); final String taskSpec = s3PropsTransform.apply( StringUtils.replace( getResourceAsString(INDEX_TASK_WITHOUT_OVERRIDE), @@ -194,13 +195,14 @@ public class ITS3OverrideCredentialsIndexTest extends AbstractITBatchIndexTest } finally { // If the test pass, then there is no datasource to unload - closeQuietly(unloader(INDEX_DATASOURCE + config.getExtraDatasourceNameSuffix())); + closeQuietly(unloader(indexDatasource + config.getExtraDatasourceNameSuffix())); } } @Test public void testS3WithInvalidOverrideCredentialsIndexDataShouldFailed() throws Exception { + final String indexDatasource = "wikipedia_index_test_" + UUID.randomUUID(); try { final Function s3PropsTransform = spec -> { try { @@ -252,7 +254,7 @@ public class ITS3OverrideCredentialsIndexTest extends AbstractITBatchIndexTest } }; - final String fullDatasourceName = INDEX_DATASOURCE + config.getExtraDatasourceNameSuffix(); + final String fullDatasourceName = indexDatasource + config.getExtraDatasourceNameSuffix(); final String taskSpec = s3PropsTransform.apply( StringUtils.replace( getResourceAsString(INDEX_TASK_WITH_OVERRIDE), @@ -272,7 +274,7 @@ public class ITS3OverrideCredentialsIndexTest extends AbstractITBatchIndexTest } finally { // If the test pass, then there is no datasource to unload - closeQuietly(unloader(INDEX_DATASOURCE + config.getExtraDatasourceNameSuffix())); + closeQuietly(unloader(indexDatasource + config.getExtraDatasourceNameSuffix())); } } diff --git a/integration-tests/src/test/java/org/apache/druid/tests/indexer/ITS3ToHdfsParallelIndexTest.java b/integration-tests/src/test/java/org/apache/druid/tests/indexer/ITS3ToHdfsParallelIndexTest.java index 86782f41b3f..b57f7bc975e 100644 --- a/integration-tests/src/test/java/org/apache/druid/tests/indexer/ITS3ToHdfsParallelIndexTest.java +++ b/integration-tests/src/test/java/org/apache/druid/tests/indexer/ITS3ToHdfsParallelIndexTest.java @@ -33,7 +33,7 @@ import java.util.List; * 1) Set the bucket and path for your data. This can be done by setting -Ddruid.test.config.cloudBucket and * -Ddruid.test.config.cloudPath or setting "cloud_bucket" and "cloud_path" in the config file. * 2) Copy wikipedia_index_data1.json, wikipedia_index_data2.json, and wikipedia_index_data3.json - * located in integration-tests/src/test/resources/data/batch_index to your S3 at the location set in step 1. + * located in integration-tests/src/test/resources/data/batch_index/json to your S3 at the location set in step 1. * 3) Provide -Doverride.config.path= with s3 credentials and hdfs deep storage configs set. See * integration-tests/docker/environment-configs/override-examples/s3 and * integration-tests/docker/environment-configs/override-examples/hdfs for env vars to provide. diff --git a/integration-tests/src/test/java/org/apache/druid/tests/indexer/ITS3ToS3ParallelIndexTest.java b/integration-tests/src/test/java/org/apache/druid/tests/indexer/ITS3ToS3ParallelIndexTest.java index dd52c9abe4b..e8f8b361fd6 100644 --- a/integration-tests/src/test/java/org/apache/druid/tests/indexer/ITS3ToS3ParallelIndexTest.java +++ b/integration-tests/src/test/java/org/apache/druid/tests/indexer/ITS3ToS3ParallelIndexTest.java @@ -33,7 +33,7 @@ import java.util.List; * 1) Set the bucket and path for your data. This can be done by setting -Ddruid.test.config.cloudBucket and * -Ddruid.test.config.cloudPath or setting "cloud_bucket" and "cloud_path" in the config file. * 2) Copy wikipedia_index_data1.json, wikipedia_index_data2.json, and wikipedia_index_data3.json - * located in integration-tests/src/test/resources/data/batch_index to your S3 at the location set in step 1. + * located in integration-tests/src/test/resources/data/batch_index/json to your S3 at the location set in step 1. * 3) Provide -Doverride.config.path= with s3 credentials/configs set. See * integration-tests/docker/environment-configs/override-examples/s3 for env vars to provide. */ diff --git a/integration-tests/src/test/resources/data/batch_index/wikipedia_index_data1.json b/integration-tests/src/test/resources/data/batch_index/json/wikipedia_index_data1.json similarity index 100% rename from integration-tests/src/test/resources/data/batch_index/wikipedia_index_data1.json rename to integration-tests/src/test/resources/data/batch_index/json/wikipedia_index_data1.json diff --git a/integration-tests/src/test/resources/data/batch_index/wikipedia_index_data2.json b/integration-tests/src/test/resources/data/batch_index/json/wikipedia_index_data2.json similarity index 100% rename from integration-tests/src/test/resources/data/batch_index/wikipedia_index_data2.json rename to integration-tests/src/test/resources/data/batch_index/json/wikipedia_index_data2.json diff --git a/integration-tests/src/test/resources/data/batch_index/wikipedia_index_data3.json b/integration-tests/src/test/resources/data/batch_index/json/wikipedia_index_data3.json similarity index 100% rename from integration-tests/src/test/resources/data/batch_index/wikipedia_index_data3.json rename to integration-tests/src/test/resources/data/batch_index/json/wikipedia_index_data3.json diff --git a/integration-tests/src/test/resources/data/batch_index/orc/wikipedia_index_data1.orc b/integration-tests/src/test/resources/data/batch_index/orc/wikipedia_index_data1.orc new file mode 100644 index 00000000000..897b090e3b9 Binary files /dev/null and b/integration-tests/src/test/resources/data/batch_index/orc/wikipedia_index_data1.orc differ diff --git a/integration-tests/src/test/resources/data/batch_index/orc/wikipedia_index_data2.orc b/integration-tests/src/test/resources/data/batch_index/orc/wikipedia_index_data2.orc new file mode 100644 index 00000000000..3f0927403c0 Binary files /dev/null and b/integration-tests/src/test/resources/data/batch_index/orc/wikipedia_index_data2.orc differ diff --git a/integration-tests/src/test/resources/data/batch_index/orc/wikipedia_index_data3.orc b/integration-tests/src/test/resources/data/batch_index/orc/wikipedia_index_data3.orc new file mode 100644 index 00000000000..5c5f54486a0 Binary files /dev/null and b/integration-tests/src/test/resources/data/batch_index/orc/wikipedia_index_data3.orc differ diff --git a/integration-tests/src/test/resources/data/batch_index/parquet/wikipedia_index_data1.parquet b/integration-tests/src/test/resources/data/batch_index/parquet/wikipedia_index_data1.parquet new file mode 100644 index 00000000000..aaed7672c5d Binary files /dev/null and b/integration-tests/src/test/resources/data/batch_index/parquet/wikipedia_index_data1.parquet differ diff --git a/integration-tests/src/test/resources/data/batch_index/parquet/wikipedia_index_data2.parquet b/integration-tests/src/test/resources/data/batch_index/parquet/wikipedia_index_data2.parquet new file mode 100644 index 00000000000..6f26f1f07e3 Binary files /dev/null and b/integration-tests/src/test/resources/data/batch_index/parquet/wikipedia_index_data2.parquet differ diff --git a/integration-tests/src/test/resources/data/batch_index/parquet/wikipedia_index_data3.parquet b/integration-tests/src/test/resources/data/batch_index/parquet/wikipedia_index_data3.parquet new file mode 100644 index 00000000000..fd318da28b8 Binary files /dev/null and b/integration-tests/src/test/resources/data/batch_index/parquet/wikipedia_index_data3.parquet differ diff --git a/integration-tests/src/test/resources/indexer/wikipedia_cloud_index_task.json b/integration-tests/src/test/resources/indexer/wikipedia_cloud_index_task.json index 23f93cd1f9f..d38c09ffd0d 100644 --- a/integration-tests/src/test/resources/indexer/wikipedia_cloud_index_task.json +++ b/integration-tests/src/test/resources/indexer/wikipedia_cloud_index_task.json @@ -71,7 +71,7 @@ "%%INPUT_SOURCE_PROPERTY_KEY%%": %%INPUT_SOURCE_PROPERTY_VALUE%% }, "inputFormat": { - "type": "json" + "type": "%%INPUT_FORMAT_TYPE%%" } }, "tuningConfig": { diff --git a/integration-tests/src/test/resources/indexer/wikipedia_index_task.json b/integration-tests/src/test/resources/indexer/wikipedia_index_task.json index c41bee228be..1364567fbff 100644 --- a/integration-tests/src/test/resources/indexer/wikipedia_index_task.json +++ b/integration-tests/src/test/resources/indexer/wikipedia_index_task.json @@ -73,7 +73,7 @@ "type": "index", "firehose": { "type": "local", - "baseDir": "/resources/data/batch_index", + "baseDir": "/resources/data/batch_index/json", "filter": "wikipedia_index_data*" } }, diff --git a/integration-tests/src/test/resources/indexer/wikipedia_index_task_with_inputsource_transform.json b/integration-tests/src/test/resources/indexer/wikipedia_index_task_with_inputsource_transform.json index bef8f015442..e9bcbfd0aae 100644 --- a/integration-tests/src/test/resources/indexer/wikipedia_index_task_with_inputsource_transform.json +++ b/integration-tests/src/test/resources/indexer/wikipedia_index_task_with_inputsource_transform.json @@ -86,7 +86,7 @@ "type" : "index", "inputSource" : { "type" : "local", - "baseDir" : "/resources/data/batch_index", + "baseDir" : "/resources/data/batch_index/json", "filter" : "wikipedia_index_data*" }, "inputFormat" : { diff --git a/integration-tests/src/test/resources/indexer/wikipedia_index_task_with_transform.json b/integration-tests/src/test/resources/indexer/wikipedia_index_task_with_transform.json index 5f289dc5395..9f3128fb303 100644 --- a/integration-tests/src/test/resources/indexer/wikipedia_index_task_with_transform.json +++ b/integration-tests/src/test/resources/indexer/wikipedia_index_task_with_transform.json @@ -91,7 +91,7 @@ "type": "index", "firehose": { "type": "local", - "baseDir": "/resources/data/batch_index", + "baseDir": "/resources/data/batch_index/json", "filter": "wikipedia_index_data*" } }, diff --git a/integration-tests/src/test/resources/indexer/wikipedia_parallel_index_task.json b/integration-tests/src/test/resources/indexer/wikipedia_parallel_index_task.json index ff347d42219..4781d39b024 100644 --- a/integration-tests/src/test/resources/indexer/wikipedia_parallel_index_task.json +++ b/integration-tests/src/test/resources/indexer/wikipedia_parallel_index_task.json @@ -58,7 +58,7 @@ "type": "index_parallel", "firehose": { "type": "local", - "baseDir": "/resources/data/batch_index", + "baseDir": "/resources/data/batch_index/json", "filter": "wikipedia_index_data*" } }, diff --git a/integration-tests/src/test/resources/indexer/wikipedia_parallel_reindex_task.json b/integration-tests/src/test/resources/indexer/wikipedia_parallel_reindex_task.json index e17913415f6..84842b43a23 100644 --- a/integration-tests/src/test/resources/indexer/wikipedia_parallel_reindex_task.json +++ b/integration-tests/src/test/resources/indexer/wikipedia_parallel_reindex_task.json @@ -57,7 +57,7 @@ "type": "index_parallel", "firehose": { "type": "local", - "baseDir": "/resources/data/batch_index", + "baseDir": "/resources/data/batch_index/json", "filter": "wikipedia_index_data2*" } }, diff --git a/integration-tests/src/test/resources/indexer/wikipedia_with_timestamp_index_task.json b/integration-tests/src/test/resources/indexer/wikipedia_with_timestamp_index_task.json index 0d663832f14..4f13b70cb72 100644 --- a/integration-tests/src/test/resources/indexer/wikipedia_with_timestamp_index_task.json +++ b/integration-tests/src/test/resources/indexer/wikipedia_with_timestamp_index_task.json @@ -74,7 +74,7 @@ "type": "index", "firehose": { "type": "local", - "baseDir": "/resources/data/batch_index", + "baseDir": "/resources/data/batch_index/json", "filter": "wikipedia_index_data*" } },