Test file format extensions for inputSource (orc, parquet) (#9632)

* Test file format extensions for inputSource (orc, parquet) * Test file format extensions for inputSource (orc, parquet) * fix path * resolve merge conflict * fix typo
2020-04-13 10:03:56 -10:00 · 2020-04-13 10:03:56 -10:00 · d930f04e6a
parent 6a52bdc605
commit d930f04e6a
36 changed files with 143 additions and 50 deletions
--- a/integration-tests/README.md
+++ b/integration-tests/README.md
@ -184,7 +184,7 @@ For Google Cloud Storage, Amazon S3, and Microsoft Azure, the following will als
 1) Set the bucket and path for your test data. This can be done by setting -Ddruid.test.config.cloudBucket and 
 -Ddruid.test.config.cloudPath in the mvn command or setting "cloud_bucket" and "cloud_path" in the config file.
 2) Copy wikipedia_index_data1.json, wikipedia_index_data2.json, and wikipedia_index_data3.json 
-located in integration-tests/src/test/resources/data/batch_index to your Cloud storage at the location set in step 1.
+located in integration-tests/src/test/resources/data/batch_index/json to your Cloud storage at the location set in step 1.

 For Google Cloud Storage, in addition to the above, you will also have to:
 1) Provide -Dresource.file.dir.path=<PATH_TO_FOLDER> with folder that contains GOOGLE_APPLICATION_CREDENTIALS file
--- a/integration-tests/docker/environment-configs/override-examples/hdfs
+++ b/integration-tests/docker/environment-configs/override-examples/hdfs
@ -21,4 +21,4 @@ druid_storage_storageDirectory=/druid/segments
 # Depending on the test, additional extension(s) may be required.
 # Please refer to the other integration-tests/docker/environment-configs/override-examples/ files and Druid docs for
 # additional env vars to provide for each extension.
-druid_extensions_loadList=["druid-hdfs-storage"]
+druid_extensions_loadList=["druid-hdfs-storage", "druid-parquet-extensions", "druid-orc-extensions"]
--- a/integration-tests/pom.xml
+++ b/integration-tests/pom.xml
@ -69,6 +69,18 @@
            <version>${aws.sdk.version}</version>
            <scope>runtime</scope>
        </dependency>
+        <dependency>
+            <groupId>org.apache.druid.extensions</groupId>
+            <artifactId>druid-orc-extensions</artifactId>
+            <version>${project.parent.version}</version>
+            <scope>runtime</scope>
+        </dependency>
+        <dependency>
+            <groupId>org.apache.druid.extensions</groupId>
+            <artifactId>druid-parquet-extensions</artifactId>
+            <version>${project.parent.version}</version>
+            <scope>runtime</scope>
+        </dependency>
        <dependency>
            <groupId>org.apache.druid.extensions</groupId>
            <artifactId>druid-s3-extensions</artifactId>
--- a/integration-tests/run_cluster.sh
+++ b/integration-tests/run_cluster.sh
@ -74,6 +74,12 @@
  # For druid-kinesis-indexing-service
  mkdir -p $SHARED_DIR/docker/extensions/druid-kinesis-indexing-service
  mv $SHARED_DIR/docker/lib/druid-kinesis-indexing-service-* $SHARED_DIR/docker/extensions/druid-kinesis-indexing-service
+  $ For druid-parquet-extensions
+  mkdir -p $SHARED_DIR/docker/extensions/druid-parquet-extensions
+  mv $SHARED_DIR/docker/lib/druid-parquet-extensions-* $SHARED_DIR/docker/extensions/druid-parquet-extensions
+  $ For druid-orc-extensions
+  mkdir -p $SHARED_DIR/docker/extensions/druid-orc-extensions
+  mv $SHARED_DIR/docker/lib/druid-orc-extensions-* $SHARED_DIR/docker/extensions/druid-orc-extensions

  # Pull Hadoop dependency if needed
  if [ -n "$DRUID_INTEGRATION_TEST_START_HADOOP_DOCKER" ] && [ "$DRUID_INTEGRATION_TEST_START_HADOOP_DOCKER" == true ]
--- a/integration-tests/src/test/java/org/apache/druid/tests/indexer/AbstractAzureInputSourceParallelIndexTest.java
+++ b/integration-tests/src/test/java/org/apache/druid/tests/indexer/AbstractAzureInputSourceParallelIndexTest.java
@ -35,7 +35,6 @@ public abstract class AbstractAzureInputSourceParallelIndexTest extends Abstract
 {
  private static final String INDEX_TASK = "/indexer/wikipedia_cloud_index_task.json";
  private static final String INDEX_QUERIES_RESOURCE = "/indexer/wikipedia_index_queries.json";
-  private static final String INDEX_DATASOURCE = "wikipedia_index_test_" + UUID.randomUUID();
  private static final String INPUT_SOURCE_URIS_KEY = "uris";
  private static final String INPUT_SOURCE_PREFIXES_KEY = "prefixes";
  private static final String INPUT_SOURCE_OBJECTS_KEY = "objects";
@ -71,8 +70,9 @@ public abstract class AbstractAzureInputSourceParallelIndexTest extends Abstract

  void doTest(Pair<String, List> azureInputSource) throws Exception
  {
+    final String indexDatasource = "wikipedia_index_test_" + UUID.randomUUID();
    try (
-        final Closeable ignored1 = unloader(INDEX_DATASOURCE + config.getExtraDatasourceNameSuffix());
+        final Closeable ignored1 = unloader(indexDatasource + config.getExtraDatasourceNameSuffix());
    ) {
      final Function<String, String> azurePropsTransform = spec -> {
        try {
@ -87,7 +87,11 @@ public abstract class AbstractAzureInputSourceParallelIndexTest extends Abstract
              "%%PATH%%",
              config.getCloudPath()
          );
-
+          spec = StringUtils.replace(
+              spec,
+              "%%INPUT_FORMAT_TYPE%%",
+              InputFormatDetails.JSON.getInputFormatType()
+          );
          spec = StringUtils.replace(
              spec,
              "%%PARTITIONS_SPEC%%",
@ -115,7 +119,7 @@ public abstract class AbstractAzureInputSourceParallelIndexTest extends Abstract
      };

      doIndexTest(
-          INDEX_DATASOURCE,
+          indexDatasource,
          INDEX_TASK,
          azurePropsTransform,
          INDEX_QUERIES_RESOURCE,
--- a/integration-tests/src/test/java/org/apache/druid/tests/indexer/AbstractGcsInputSourceParallelIndexTest.java
+++ b/integration-tests/src/test/java/org/apache/druid/tests/indexer/AbstractGcsInputSourceParallelIndexTest.java
@ -35,7 +35,6 @@ public abstract class AbstractGcsInputSourceParallelIndexTest extends AbstractIT
 {
  private static final String INDEX_TASK = "/indexer/wikipedia_cloud_index_task.json";
  private static final String INDEX_QUERIES_RESOURCE = "/indexer/wikipedia_index_queries.json";
-  private static final String INDEX_DATASOURCE = "wikipedia_index_test_" + UUID.randomUUID();
  private static final String INPUT_SOURCE_URIS_KEY = "uris";
  private static final String INPUT_SOURCE_PREFIXES_KEY = "prefixes";
  private static final String INPUT_SOURCE_OBJECTS_KEY = "objects";
@ -71,8 +70,9 @@ public abstract class AbstractGcsInputSourceParallelIndexTest extends AbstractIT

  void doTest(Pair<String, List> gcsInputSource) throws Exception
  {
+    final String indexDatasource = "wikipedia_index_test_" + UUID.randomUUID();
    try (
-        final Closeable ignored1 = unloader(INDEX_DATASOURCE + config.getExtraDatasourceNameSuffix());
+        final Closeable ignored1 = unloader(indexDatasource + config.getExtraDatasourceNameSuffix());
    ) {
      final Function<String, String> gcsPropsTransform = spec -> {
        try {
@ -87,7 +87,11 @@ public abstract class AbstractGcsInputSourceParallelIndexTest extends AbstractIT
              "%%PATH%%",
              config.getCloudPath()
          );
-
+          spec = StringUtils.replace(
+              spec,
+              "%%INPUT_FORMAT_TYPE%%",
+              InputFormatDetails.JSON.getInputFormatType()
+          );
          spec = StringUtils.replace(
              spec,
              "%%PARTITIONS_SPEC%%",
@ -115,7 +119,7 @@ public abstract class AbstractGcsInputSourceParallelIndexTest extends AbstractIT
      };

      doIndexTest(
-          INDEX_DATASOURCE,
+          indexDatasource,
          INDEX_TASK,
          gcsPropsTransform,
          INDEX_QUERIES_RESOURCE,
--- a/integration-tests/src/test/java/org/apache/druid/tests/indexer/AbstractHdfsInputSourceParallelIndexTest.java
+++ b/integration-tests/src/test/java/org/apache/druid/tests/indexer/AbstractHdfsInputSourceParallelIndexTest.java
@ -34,7 +34,6 @@ public abstract class AbstractHdfsInputSourceParallelIndexTest extends AbstractI
 {
  private static final String INDEX_TASK = "/indexer/wikipedia_cloud_index_task.json";
  private static final String INDEX_QUERIES_RESOURCE = "/indexer/wikipedia_index_queries.json";
-  private static final String INDEX_DATASOURCE = "wikipedia_index_test_" + UUID.randomUUID();
  private static final String INPUT_SOURCE_PATHS_KEY = "paths";

  @DataProvider
@ -42,27 +41,28 @@ public abstract class AbstractHdfsInputSourceParallelIndexTest extends AbstractI
  {
    return new Object[][]{
        {new Pair<>(INPUT_SOURCE_PATHS_KEY,
-                    "hdfs://druid-it-hadoop:9000/batch_index"
+                    "hdfs://druid-it-hadoop:9000/batch_index%%FOLDER_SUFFIX%%"
        )},
        {new Pair<>(INPUT_SOURCE_PATHS_KEY,
                    ImmutableList.of(
-                        "hdfs://druid-it-hadoop:9000/batch_index"
+                        "hdfs://druid-it-hadoop:9000/batch_index%%FOLDER_SUFFIX%%"
                    )
        )},
        {new Pair<>(INPUT_SOURCE_PATHS_KEY,
                    ImmutableList.of(
-                        "hdfs://druid-it-hadoop:9000/batch_index/wikipedia_index_data1.json",
-                        "hdfs://druid-it-hadoop:9000/batch_index/wikipedia_index_data2.json",
-                        "hdfs://druid-it-hadoop:9000/batch_index/wikipedia_index_data3.json"
+                        "hdfs://druid-it-hadoop:9000/batch_index%%FOLDER_SUFFIX%%/wikipedia_index_data1%%FILE_EXTENSION%%",
+                        "hdfs://druid-it-hadoop:9000/batch_index%%FOLDER_SUFFIX%%/wikipedia_index_data2%%FILE_EXTENSION%%",
+                        "hdfs://druid-it-hadoop:9000/batch_index%%FOLDER_SUFFIX%%/wikipedia_index_data3%%FILE_EXTENSION%%"
                    )
        )}
    };
  }

-  void doTest(Pair<String, List> hdfsInputSource) throws Exception
+  void doTest(Pair<String, List> hdfsInputSource, InputFormatDetails inputFormatDetails) throws Exception
  {
+    final String indexDatasource = "wikipedia_index_test_" + UUID.randomUUID();
    try (
-        final Closeable ignored1 = unloader(INDEX_DATASOURCE + config.getExtraDatasourceNameSuffix());
+        final Closeable ignored1 = unloader(indexDatasource + config.getExtraDatasourceNameSuffix());
    ) {
      final Function<String, String> hdfsPropsTransform = spec -> {
        try {
@ -81,11 +81,27 @@ public abstract class AbstractHdfsInputSourceParallelIndexTest extends AbstractI
              "%%INPUT_SOURCE_PROPERTY_KEY%%",
              hdfsInputSource.lhs
          );
-          return StringUtils.replace(
+          spec = StringUtils.replace(
+              spec,
+              "%%INPUT_FORMAT_TYPE%%",
+              inputFormatDetails.getInputFormatType()
+          );
+          spec = StringUtils.replace(
              spec,
              "%%INPUT_SOURCE_PROPERTY_VALUE%%",
              jsonMapper.writeValueAsString(hdfsInputSource.rhs)
          );
+          spec = StringUtils.replace(
+              spec,
+              "%%FOLDER_SUFFIX%%",
+              inputFormatDetails.getFolderSuffix()
+          );
+          spec = StringUtils.replace(
+              spec,
+              "%%FILE_EXTENSION%%",
+              inputFormatDetails.getFileExtension()
+          );
+          return spec;
        }
        catch (Exception e) {
          throw new RuntimeException(e);
@ -93,7 +109,7 @@ public abstract class AbstractHdfsInputSourceParallelIndexTest extends AbstractI
      };

      doIndexTest(
-          INDEX_DATASOURCE,
+          indexDatasource,
          INDEX_TASK,
          hdfsPropsTransform,
          INDEX_QUERIES_RESOURCE,
--- a/integration-tests/src/test/java/org/apache/druid/tests/indexer/AbstractITBatchIndexTest.java
+++ b/integration-tests/src/test/java/org/apache/druid/tests/indexer/AbstractITBatchIndexTest.java
@ -49,6 +49,39 @@ import java.util.function.Function;

 public abstract class AbstractITBatchIndexTest extends AbstractIndexerTest
 {
+  public enum InputFormatDetails
+  {
+    ORC("orc", ".orc", "/orc"),
+    JSON("json", ".json", "/json"),
+    PARQUET("parquet", ".parquet", "/parquet");
+
+    private final String inputFormatType;
+    private final String fileExtension;
+    private final String folderSuffix;
+
+    InputFormatDetails(String inputFormatType, String fileExtension, String folderSuffix)
+    {
+      this.inputFormatType = inputFormatType;
+      this.fileExtension = fileExtension;
+      this.folderSuffix = folderSuffix;
+    }
+
+    public String getInputFormatType()
+    {
+      return inputFormatType;
+    }
+
+    public String getFileExtension()
+    {
+      return fileExtension;
+    }
+
+    public String getFolderSuffix()
+    {
+      return folderSuffix;
+    }
+  }
+
  private static final Logger LOG = new Logger(AbstractITBatchIndexTest.class);

  @Inject
--- a/integration-tests/src/test/java/org/apache/druid/tests/indexer/AbstractS3InputSourceParallelIndexTest.java
+++ b/integration-tests/src/test/java/org/apache/druid/tests/indexer/AbstractS3InputSourceParallelIndexTest.java
@ -35,7 +35,6 @@ public abstract class AbstractS3InputSourceParallelIndexTest extends AbstractITB
 {
  private static final String INDEX_TASK = "/indexer/wikipedia_cloud_index_task.json";
  private static final String INDEX_QUERIES_RESOURCE = "/indexer/wikipedia_index_queries.json";
-  private static final String INDEX_DATASOURCE = "wikipedia_index_test_" + UUID.randomUUID();
  private static final String INPUT_SOURCE_URIS_KEY = "uris";
  private static final String INPUT_SOURCE_PREFIXES_KEY = "prefixes";
  private static final String INPUT_SOURCE_OBJECTS_KEY = "objects";
@ -71,8 +70,9 @@ public abstract class AbstractS3InputSourceParallelIndexTest extends AbstractITB

  void doTest(Pair<String, List> s3InputSource) throws Exception
  {
+    final String indexDatasource = "wikipedia_index_test_" + UUID.randomUUID();
    try (
-        final Closeable ignored1 = unloader(INDEX_DATASOURCE + config.getExtraDatasourceNameSuffix());
+        final Closeable ignored1 = unloader(indexDatasource + config.getExtraDatasourceNameSuffix());
    ) {
      final Function<String, String> s3PropsTransform = spec -> {
        try {
@ -87,7 +87,11 @@ public abstract class AbstractS3InputSourceParallelIndexTest extends AbstractITB
              "%%PATH%%",
              config.getCloudPath()
          );
-
+          spec = StringUtils.replace(
+              spec,
+              "%%INPUT_FORMAT_TYPE%%",
+              InputFormatDetails.JSON.getInputFormatType()
+          );
          spec = StringUtils.replace(
              spec,
              "%%PARTITIONS_SPEC%%",
@ -115,7 +119,7 @@ public abstract class AbstractS3InputSourceParallelIndexTest extends AbstractITB
      };

      doIndexTest(
-          INDEX_DATASOURCE,
+          indexDatasource,
          INDEX_TASK,
          s3PropsTransform,
          INDEX_QUERIES_RESOURCE,
--- a/integration-tests/src/test/java/org/apache/druid/tests/indexer/ITAzureToAzureParallelIndexTest.java
+++ b/integration-tests/src/test/java/org/apache/druid/tests/indexer/ITAzureToAzureParallelIndexTest.java
@ -33,7 +33,7 @@ import java.util.List;
 * 1) Set the bucket and path for your data. This can be done by setting -Ddruid.test.config.cloudBucket and
 *    -Ddruid.test.config.cloudPath or setting "cloud_bucket" and "cloud_path" in the config file.
 * 2) Copy wikipedia_index_data1.json, wikipedia_index_data2.json, and wikipedia_index_data3.json
- *    located in integration-tests/src/test/resources/data/batch_index to your Azure at the location set in step 1.
+ *    located in integration-tests/src/test/resources/data/batch_index/json to your Azure at the location set in step 1.
 * 3) Provide -Doverride.config.path=<PATH_TO_FILE> with Azure credentials/configs set. See
 *    integration-tests/docker/environment-configs/override-examples/azure for env vars to provide.
 */
--- a/integration-tests/src/test/java/org/apache/druid/tests/indexer/ITAzureToHdfsParallelIndexTest.java
+++ b/integration-tests/src/test/java/org/apache/druid/tests/indexer/ITAzureToHdfsParallelIndexTest.java
@ -33,7 +33,7 @@ import java.util.List;
 * 1) Set the bucket and path for your data. This can be done by setting -Ddruid.test.config.cloudBucket and
 *    -Ddruid.test.config.cloudPath or setting "cloud_bucket" and "cloud_path" in the config file.
 * 2) Copy wikipedia_index_data1.json, wikipedia_index_data2.json, and wikipedia_index_data3.json
- *    located in integration-tests/src/test/resources/data/batch_index to your Azure at the location set in step 1.
+ *    located in integration-tests/src/test/resources/data/batch_index/json to your Azure at the location set in step 1.
 * 3) Provide -Doverride.config.path=<PATH_TO_FILE> with Azure credentials and hdfs deep storage configs set. See
 *    integration-tests/docker/environment-configs/override-examples/azure and
 *    integration-tests/docker/environment-configs/override-examples/hdfs for env vars to provide.
--- a/integration-tests/src/test/java/org/apache/druid/tests/indexer/ITGcsToGcsParallelIndexTest.java
+++ b/integration-tests/src/test/java/org/apache/druid/tests/indexer/ITGcsToGcsParallelIndexTest.java
@ -33,7 +33,7 @@ import java.util.List;
 * 1) Set the bucket and path for your data. This can be done by setting -Ddruid.test.config.cloudBucket and
 *    -Ddruid.test.config.cloudPath or setting "cloud_bucket" and "cloud_path" in the config file.
 * 2) Copy wikipedia_index_data1.json, wikipedia_index_data2.json, and wikipedia_index_data3.json
- *    located in integration-tests/src/test/resources/data/batch_index to your GCS at the location set in step 1.
+ *    located in integration-tests/src/test/resources/data/batch_index/json to your GCS at the location set in step 1.
 * 3) Provide -Doverride.config.path=<PATH_TO_FILE> with gcs configs set. See
 *    integration-tests/docker/environment-configs/override-examples/gcs for env vars to provide.
 * 4) Provide -Dresource.file.dir.path=<PATH_TO_FOLDER> with folder that contains GOOGLE_APPLICATION_CREDENTIALS file
--- a/integration-tests/src/test/java/org/apache/druid/tests/indexer/ITGcsToHdfsParallelIndexTest.java
+++ b/integration-tests/src/test/java/org/apache/druid/tests/indexer/ITGcsToHdfsParallelIndexTest.java
@ -33,7 +33,7 @@ import java.util.List;
 * 1) Set the bucket and path for your data. This can be done by setting -Ddruid.test.config.cloudBucket and
 *    -Ddruid.test.config.cloudPath or setting "cloud_bucket" and "cloud_path" in the config file.
 * 2) Copy wikipedia_index_data1.json, wikipedia_index_data2.json, and wikipedia_index_data3.json
- *    located in integration-tests/src/test/resources/data/batch_index to your GCS at the location set in step 1.
+ *    located in integration-tests/src/test/resources/data/batch_index/json to your GCS at the location set in step 1.
 * 3) Provide -Doverride.config.path=<PATH_TO_FILE> with gcs configs and hdfs deep storage configs set. See
 *    integration-tests/docker/environment-configs/override-examples/gcs and
 *    integration-tests/docker/environment-configs/override-examples/hdfs for env vars to provide.
--- a/integration-tests/src/test/java/org/apache/druid/tests/indexer/ITHdfsToAzureParallelIndexTest.java
+++ b/integration-tests/src/test/java/org/apache/druid/tests/indexer/ITHdfsToAzureParallelIndexTest.java
@ -42,6 +42,6 @@ public class ITHdfsToAzureParallelIndexTest extends AbstractHdfsInputSourceParal
  @Test(dataProvider = "resources")
  public void testHdfsIndexData(Pair<String, List> hdfsInputSource) throws Exception
  {
-    doTest(hdfsInputSource);
+    doTest(hdfsInputSource, InputFormatDetails.JSON);
  }
 }
--- a/integration-tests/src/test/java/org/apache/druid/tests/indexer/ITHdfsToGcsParallelIndexTest.java
+++ b/integration-tests/src/test/java/org/apache/druid/tests/indexer/ITHdfsToGcsParallelIndexTest.java
@ -43,6 +43,6 @@ public class ITHdfsToGcsParallelIndexTest extends AbstractHdfsInputSourceParalle
  @Test(dataProvider = "resources")
  public void testHdfsIndexData(Pair<String, List> hdfsInputSource) throws Exception
  {
-    doTest(hdfsInputSource);
+    doTest(hdfsInputSource, InputFormatDetails.JSON);
  }
 }
--- a/integration-tests/src/test/java/org/apache/druid/tests/indexer/ITHdfsToHdfsParallelIndexTest.java
+++ b/integration-tests/src/test/java/org/apache/druid/tests/indexer/ITHdfsToHdfsParallelIndexTest.java
@ -39,8 +39,20 @@ import java.util.List;
 public class ITHdfsToHdfsParallelIndexTest extends AbstractHdfsInputSourceParallelIndexTest
 {
  @Test(dataProvider = "resources")
-  public void testHdfsIndexData(Pair<String, List> hdfsInputSource) throws Exception
+  public void testHdfsIndexJsonData(Pair<String, List> hdfsInputSource) throws Exception
  {
-    doTest(hdfsInputSource);
+    doTest(hdfsInputSource, InputFormatDetails.JSON);
+  }
+
+  @Test(dataProvider = "resources")
+  public void testHdfsIndexOrcData(Pair<String, List> hdfsInputSource) throws Exception
+  {
+    doTest(hdfsInputSource, InputFormatDetails.ORC);
+  }
+
+  @Test(dataProvider = "resources")
+  public void testHdfsIndexParquetData(Pair<String, List> hdfsInputSource) throws Exception
+  {
+    doTest(hdfsInputSource, InputFormatDetails.PARQUET);
  }
 }
--- a/integration-tests/src/test/java/org/apache/druid/tests/indexer/ITHdfsToS3ParallelIndexTest.java
+++ b/integration-tests/src/test/java/org/apache/druid/tests/indexer/ITHdfsToS3ParallelIndexTest.java
@ -42,6 +42,6 @@ public class ITHdfsToS3ParallelIndexTest extends AbstractHdfsInputSourceParallel
  @Test(dataProvider = "resources")
  public void testHdfsIndexData(Pair<String, List> hdfsInputSource) throws Exception
  {
-    doTest(hdfsInputSource);
+    doTest(hdfsInputSource, InputFormatDetails.JSON);
  }
 }
--- a/integration-tests/src/test/java/org/apache/druid/tests/indexer/ITS3OverrideCredentialsIndexTest.java
+++ b/integration-tests/src/test/java/org/apache/druid/tests/indexer/ITS3OverrideCredentialsIndexTest.java
@ -40,7 +40,7 @@ import java.util.function.Function;
 * 1) Set the bucket and path for your data. This can be done by setting -Ddruid.test.config.cloudBucket and
 *    -Ddruid.test.config.cloudPath or setting "cloud_bucket" and "cloud_path" in the config file.
 * 2) Copy wikipedia_index_data1.json, wikipedia_index_data2.json, and wikipedia_index_data3.json
- *    located in integration-tests/src/test/resources/data/batch_index to your S3 at the location set in step 1.
+ *    located in integration-tests/src/test/resources/data/batch_index/json to your S3 at the location set in step 1.
 * 3) Provide -Doverride.config.path=<PATH_TO_FILE> with s3 credentials/configs set. See
 *    integration-tests/docker/environment-configs/override-examples/s3 for env vars to provide.
 *    Note that druid_s3_accessKey and druid_s3_secretKey should be unset or set to credentials that does not have
@ -54,7 +54,6 @@ public class ITS3OverrideCredentialsIndexTest extends AbstractITBatchIndexTest
  private static final String INDEX_TASK_WITH_OVERRIDE = "/indexer/wikipedia_override_credentials_index_task.json";
  private static final String INDEX_TASK_WITHOUT_OVERRIDE = "/indexer/wikipedia_cloud_simple_index_task.json";
  private static final String INDEX_QUERIES_RESOURCE = "/indexer/wikipedia_index_queries.json";
-  private static final String INDEX_DATASOURCE = "wikipedia_index_test_" + UUID.randomUUID();
  private static final String INPUT_SOURCE_OBJECTS_KEY = "objects";
  private static final String WIKIPEDIA_DATA_1 = "wikipedia_index_data1.json";
  private static final String WIKIPEDIA_DATA_2 = "wikipedia_index_data2.json";
@ -69,8 +68,9 @@ public class ITS3OverrideCredentialsIndexTest extends AbstractITBatchIndexTest
  @Test
  public void testS3WithValidOverrideCredentialsIndexDataShouldSucceed() throws Exception
  {
+    final String indexDatasource = "wikipedia_index_test_" + UUID.randomUUID();
    try (
-        final Closeable ignored1 = unloader(INDEX_DATASOURCE + config.getExtraDatasourceNameSuffix());
+        final Closeable ignored1 = unloader(indexDatasource + config.getExtraDatasourceNameSuffix());
    ) {
      final Function<String, String> s3PropsTransform = spec -> {
        try {
@ -123,7 +123,7 @@ public class ITS3OverrideCredentialsIndexTest extends AbstractITBatchIndexTest
      };

      doIndexTest(
-          INDEX_DATASOURCE,
+          indexDatasource,
          INDEX_TASK_WITH_OVERRIDE,
          s3PropsTransform,
          INDEX_QUERIES_RESOURCE,
@ -137,6 +137,7 @@ public class ITS3OverrideCredentialsIndexTest extends AbstractITBatchIndexTest
  @Test
  public void testS3WithoutOverrideCredentialsIndexDataShouldFailed() throws Exception
  {
+    final String indexDatasource = "wikipedia_index_test_" + UUID.randomUUID();
    try {
      final Function<String, String> s3PropsTransform = spec -> {
        try {
@ -172,7 +173,7 @@ public class ITS3OverrideCredentialsIndexTest extends AbstractITBatchIndexTest
          throw new RuntimeException(e);
        }
      };
-      final String fullDatasourceName = INDEX_DATASOURCE + config.getExtraDatasourceNameSuffix();
+      final String fullDatasourceName = indexDatasource + config.getExtraDatasourceNameSuffix();
      final String taskSpec = s3PropsTransform.apply(
          StringUtils.replace(
              getResourceAsString(INDEX_TASK_WITHOUT_OVERRIDE),
@ -194,13 +195,14 @@ public class ITS3OverrideCredentialsIndexTest extends AbstractITBatchIndexTest
    }
    finally {
      // If the test pass, then there is no datasource to unload
-      closeQuietly(unloader(INDEX_DATASOURCE + config.getExtraDatasourceNameSuffix()));
+      closeQuietly(unloader(indexDatasource + config.getExtraDatasourceNameSuffix()));
    }
  }

  @Test
  public void testS3WithInvalidOverrideCredentialsIndexDataShouldFailed() throws Exception
  {
+    final String indexDatasource = "wikipedia_index_test_" + UUID.randomUUID();
    try {
      final Function<String, String> s3PropsTransform = spec -> {
        try {
@ -252,7 +254,7 @@ public class ITS3OverrideCredentialsIndexTest extends AbstractITBatchIndexTest
        }
      };

-      final String fullDatasourceName = INDEX_DATASOURCE + config.getExtraDatasourceNameSuffix();
+      final String fullDatasourceName = indexDatasource + config.getExtraDatasourceNameSuffix();
      final String taskSpec = s3PropsTransform.apply(
          StringUtils.replace(
              getResourceAsString(INDEX_TASK_WITH_OVERRIDE),
@ -272,7 +274,7 @@ public class ITS3OverrideCredentialsIndexTest extends AbstractITBatchIndexTest
    }
    finally {
      // If the test pass, then there is no datasource to unload
-      closeQuietly(unloader(INDEX_DATASOURCE + config.getExtraDatasourceNameSuffix()));
+      closeQuietly(unloader(indexDatasource + config.getExtraDatasourceNameSuffix()));
    }
  }

--- a/integration-tests/src/test/java/org/apache/druid/tests/indexer/ITS3ToHdfsParallelIndexTest.java
+++ b/integration-tests/src/test/java/org/apache/druid/tests/indexer/ITS3ToHdfsParallelIndexTest.java
@ -33,7 +33,7 @@ import java.util.List;
 * 1) Set the bucket and path for your data. This can be done by setting -Ddruid.test.config.cloudBucket and
 *    -Ddruid.test.config.cloudPath or setting "cloud_bucket" and "cloud_path" in the config file.
 * 2) Copy wikipedia_index_data1.json, wikipedia_index_data2.json, and wikipedia_index_data3.json
- *    located in integration-tests/src/test/resources/data/batch_index to your S3 at the location set in step 1.
+ *    located in integration-tests/src/test/resources/data/batch_index/json to your S3 at the location set in step 1.
 * 3) Provide -Doverride.config.path=<PATH_TO_FILE> with s3 credentials and hdfs deep storage configs set. See
 *    integration-tests/docker/environment-configs/override-examples/s3 and
 *    integration-tests/docker/environment-configs/override-examples/hdfs for env vars to provide.
--- a/integration-tests/src/test/java/org/apache/druid/tests/indexer/ITS3ToS3ParallelIndexTest.java
+++ b/integration-tests/src/test/java/org/apache/druid/tests/indexer/ITS3ToS3ParallelIndexTest.java
@ -33,7 +33,7 @@ import java.util.List;
 * 1) Set the bucket and path for your data. This can be done by setting -Ddruid.test.config.cloudBucket and
 *    -Ddruid.test.config.cloudPath or setting "cloud_bucket" and "cloud_path" in the config file.
 * 2) Copy wikipedia_index_data1.json, wikipedia_index_data2.json, and wikipedia_index_data3.json
- *    located in integration-tests/src/test/resources/data/batch_index to your S3 at the location set in step 1.
+ *    located in integration-tests/src/test/resources/data/batch_index/json to your S3 at the location set in step 1.
 * 3) Provide -Doverride.config.path=<PATH_TO_FILE> with s3 credentials/configs set. See
 *    integration-tests/docker/environment-configs/override-examples/s3 for env vars to provide.
 */
--- a/integration-tests/src/test/resources/data/batch_index/json/wikipedia_index_data1.json
+++ b/integration-tests/src/test/resources/data/batch_index/json/wikipedia_index_data1.json
--- a/integration-tests/src/test/resources/data/batch_index/json/wikipedia_index_data2.json
+++ b/integration-tests/src/test/resources/data/batch_index/json/wikipedia_index_data2.json
--- a/integration-tests/src/test/resources/data/batch_index/json/wikipedia_index_data3.json
+++ b/integration-tests/src/test/resources/data/batch_index/json/wikipedia_index_data3.json
--- a/integration-tests/src/test/resources/data/batch_index/orc/wikipedia_index_data1.orc
+++ b/integration-tests/src/test/resources/data/batch_index/orc/wikipedia_index_data1.orc
--- a/integration-tests/src/test/resources/data/batch_index/orc/wikipedia_index_data2.orc
+++ b/integration-tests/src/test/resources/data/batch_index/orc/wikipedia_index_data2.orc
--- a/integration-tests/src/test/resources/data/batch_index/orc/wikipedia_index_data3.orc
+++ b/integration-tests/src/test/resources/data/batch_index/orc/wikipedia_index_data3.orc
--- a/integration-tests/src/test/resources/data/batch_index/parquet/wikipedia_index_data1.parquet
+++ b/integration-tests/src/test/resources/data/batch_index/parquet/wikipedia_index_data1.parquet
--- a/integration-tests/src/test/resources/data/batch_index/parquet/wikipedia_index_data2.parquet
+++ b/integration-tests/src/test/resources/data/batch_index/parquet/wikipedia_index_data2.parquet
--- a/integration-tests/src/test/resources/data/batch_index/parquet/wikipedia_index_data3.parquet
+++ b/integration-tests/src/test/resources/data/batch_index/parquet/wikipedia_index_data3.parquet
--- a/integration-tests/src/test/resources/indexer/wikipedia_cloud_index_task.json
+++ b/integration-tests/src/test/resources/indexer/wikipedia_cloud_index_task.json
@ -71,7 +71,7 @@
        "%%INPUT_SOURCE_PROPERTY_KEY%%": %%INPUT_SOURCE_PROPERTY_VALUE%%
      },
      "inputFormat": {
-        "type": "json"
+        "type": "%%INPUT_FORMAT_TYPE%%"
      }
    },
    "tuningConfig": {
--- a/integration-tests/src/test/resources/indexer/wikipedia_index_task.json
+++ b/integration-tests/src/test/resources/indexer/wikipedia_index_task.json
@ -73,7 +73,7 @@
            "type": "index",
            "firehose": {
                "type": "local",
-                "baseDir": "/resources/data/batch_index",
+                "baseDir": "/resources/data/batch_index/json",
                "filter": "wikipedia_index_data*"
            }
        },
--- a/integration-tests/src/test/resources/indexer/wikipedia_index_task_with_inputsource_transform.json
+++ b/integration-tests/src/test/resources/indexer/wikipedia_index_task_with_inputsource_transform.json
@ -86,7 +86,7 @@
            "type" : "index",
            "inputSource" : {
                "type" : "local",
-                "baseDir" : "/resources/data/batch_index",
+                "baseDir" : "/resources/data/batch_index/json",
                "filter" : "wikipedia_index_data*"
            },
            "inputFormat" : {
--- a/integration-tests/src/test/resources/indexer/wikipedia_index_task_with_transform.json
+++ b/integration-tests/src/test/resources/indexer/wikipedia_index_task_with_transform.json
@ -91,7 +91,7 @@
            "type": "index",
            "firehose": {
                "type": "local",
-                "baseDir": "/resources/data/batch_index",
+                "baseDir": "/resources/data/batch_index/json",
                "filter": "wikipedia_index_data*"
            }
        },
--- a/integration-tests/src/test/resources/indexer/wikipedia_parallel_index_task.json
+++ b/integration-tests/src/test/resources/indexer/wikipedia_parallel_index_task.json
@ -58,7 +58,7 @@
            "type": "index_parallel",
            "firehose": {
                "type": "local",
-                "baseDir": "/resources/data/batch_index",
+                "baseDir": "/resources/data/batch_index/json",
                "filter": "wikipedia_index_data*"
            }
        },
--- a/integration-tests/src/test/resources/indexer/wikipedia_parallel_reindex_task.json
+++ b/integration-tests/src/test/resources/indexer/wikipedia_parallel_reindex_task.json
@ -57,7 +57,7 @@
            "type": "index_parallel",
            "firehose": {
                "type": "local",
-                "baseDir": "/resources/data/batch_index",
+                "baseDir": "/resources/data/batch_index/json",
                "filter": "wikipedia_index_data2*"
            }
        },
--- a/integration-tests/src/test/resources/indexer/wikipedia_with_timestamp_index_task.json
+++ b/integration-tests/src/test/resources/indexer/wikipedia_with_timestamp_index_task.json
@ -74,7 +74,7 @@
            "type": "index",
            "firehose": {
                "type": "local",
-                "baseDir": "/resources/data/batch_index",
+                "baseDir": "/resources/data/batch_index/json",
                "filter": "wikipedia_index_data*"
            }
        },