Test file format extensions for inputSource (orc, parquet) (#9632)

* Test file format extensions for inputSource (orc, parquet)

* Test file format extensions for inputSource (orc, parquet)

* fix path

* resolve merge conflict

* fix typo
This commit is contained in:
Maytas Monsereenusorn 2020-04-13 10:03:56 -10:00 committed by GitHub
parent 6a52bdc605
commit d930f04e6a
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
36 changed files with 143 additions and 50 deletions

View File

@ -184,7 +184,7 @@ For Google Cloud Storage, Amazon S3, and Microsoft Azure, the following will als
1) Set the bucket and path for your test data. This can be done by setting -Ddruid.test.config.cloudBucket and 1) Set the bucket and path for your test data. This can be done by setting -Ddruid.test.config.cloudBucket and
-Ddruid.test.config.cloudPath in the mvn command or setting "cloud_bucket" and "cloud_path" in the config file. -Ddruid.test.config.cloudPath in the mvn command or setting "cloud_bucket" and "cloud_path" in the config file.
2) Copy wikipedia_index_data1.json, wikipedia_index_data2.json, and wikipedia_index_data3.json 2) Copy wikipedia_index_data1.json, wikipedia_index_data2.json, and wikipedia_index_data3.json
located in integration-tests/src/test/resources/data/batch_index to your Cloud storage at the location set in step 1. located in integration-tests/src/test/resources/data/batch_index/json to your Cloud storage at the location set in step 1.
For Google Cloud Storage, in addition to the above, you will also have to: For Google Cloud Storage, in addition to the above, you will also have to:
1) Provide -Dresource.file.dir.path=<PATH_TO_FOLDER> with folder that contains GOOGLE_APPLICATION_CREDENTIALS file 1) Provide -Dresource.file.dir.path=<PATH_TO_FOLDER> with folder that contains GOOGLE_APPLICATION_CREDENTIALS file

View File

@ -21,4 +21,4 @@ druid_storage_storageDirectory=/druid/segments
# Depending on the test, additional extension(s) may be required. # Depending on the test, additional extension(s) may be required.
# Please refer to the other integration-tests/docker/environment-configs/override-examples/ files and Druid docs for # Please refer to the other integration-tests/docker/environment-configs/override-examples/ files and Druid docs for
# additional env vars to provide for each extension. # additional env vars to provide for each extension.
druid_extensions_loadList=["druid-hdfs-storage"] druid_extensions_loadList=["druid-hdfs-storage", "druid-parquet-extensions", "druid-orc-extensions"]

View File

@ -69,6 +69,18 @@
<version>${aws.sdk.version}</version> <version>${aws.sdk.version}</version>
<scope>runtime</scope> <scope>runtime</scope>
</dependency> </dependency>
<dependency>
<groupId>org.apache.druid.extensions</groupId>
<artifactId>druid-orc-extensions</artifactId>
<version>${project.parent.version}</version>
<scope>runtime</scope>
</dependency>
<dependency>
<groupId>org.apache.druid.extensions</groupId>
<artifactId>druid-parquet-extensions</artifactId>
<version>${project.parent.version}</version>
<scope>runtime</scope>
</dependency>
<dependency> <dependency>
<groupId>org.apache.druid.extensions</groupId> <groupId>org.apache.druid.extensions</groupId>
<artifactId>druid-s3-extensions</artifactId> <artifactId>druid-s3-extensions</artifactId>

View File

@ -74,6 +74,12 @@
# For druid-kinesis-indexing-service # For druid-kinesis-indexing-service
mkdir -p $SHARED_DIR/docker/extensions/druid-kinesis-indexing-service mkdir -p $SHARED_DIR/docker/extensions/druid-kinesis-indexing-service
mv $SHARED_DIR/docker/lib/druid-kinesis-indexing-service-* $SHARED_DIR/docker/extensions/druid-kinesis-indexing-service mv $SHARED_DIR/docker/lib/druid-kinesis-indexing-service-* $SHARED_DIR/docker/extensions/druid-kinesis-indexing-service
$ For druid-parquet-extensions
mkdir -p $SHARED_DIR/docker/extensions/druid-parquet-extensions
mv $SHARED_DIR/docker/lib/druid-parquet-extensions-* $SHARED_DIR/docker/extensions/druid-parquet-extensions
$ For druid-orc-extensions
mkdir -p $SHARED_DIR/docker/extensions/druid-orc-extensions
mv $SHARED_DIR/docker/lib/druid-orc-extensions-* $SHARED_DIR/docker/extensions/druid-orc-extensions
# Pull Hadoop dependency if needed # Pull Hadoop dependency if needed
if [ -n "$DRUID_INTEGRATION_TEST_START_HADOOP_DOCKER" ] && [ "$DRUID_INTEGRATION_TEST_START_HADOOP_DOCKER" == true ] if [ -n "$DRUID_INTEGRATION_TEST_START_HADOOP_DOCKER" ] && [ "$DRUID_INTEGRATION_TEST_START_HADOOP_DOCKER" == true ]

View File

@ -35,7 +35,6 @@ public abstract class AbstractAzureInputSourceParallelIndexTest extends Abstract
{ {
private static final String INDEX_TASK = "/indexer/wikipedia_cloud_index_task.json"; private static final String INDEX_TASK = "/indexer/wikipedia_cloud_index_task.json";
private static final String INDEX_QUERIES_RESOURCE = "/indexer/wikipedia_index_queries.json"; private static final String INDEX_QUERIES_RESOURCE = "/indexer/wikipedia_index_queries.json";
private static final String INDEX_DATASOURCE = "wikipedia_index_test_" + UUID.randomUUID();
private static final String INPUT_SOURCE_URIS_KEY = "uris"; private static final String INPUT_SOURCE_URIS_KEY = "uris";
private static final String INPUT_SOURCE_PREFIXES_KEY = "prefixes"; private static final String INPUT_SOURCE_PREFIXES_KEY = "prefixes";
private static final String INPUT_SOURCE_OBJECTS_KEY = "objects"; private static final String INPUT_SOURCE_OBJECTS_KEY = "objects";
@ -71,8 +70,9 @@ public abstract class AbstractAzureInputSourceParallelIndexTest extends Abstract
void doTest(Pair<String, List> azureInputSource) throws Exception void doTest(Pair<String, List> azureInputSource) throws Exception
{ {
final String indexDatasource = "wikipedia_index_test_" + UUID.randomUUID();
try ( try (
final Closeable ignored1 = unloader(INDEX_DATASOURCE + config.getExtraDatasourceNameSuffix()); final Closeable ignored1 = unloader(indexDatasource + config.getExtraDatasourceNameSuffix());
) { ) {
final Function<String, String> azurePropsTransform = spec -> { final Function<String, String> azurePropsTransform = spec -> {
try { try {
@ -87,7 +87,11 @@ public abstract class AbstractAzureInputSourceParallelIndexTest extends Abstract
"%%PATH%%", "%%PATH%%",
config.getCloudPath() config.getCloudPath()
); );
spec = StringUtils.replace(
spec,
"%%INPUT_FORMAT_TYPE%%",
InputFormatDetails.JSON.getInputFormatType()
);
spec = StringUtils.replace( spec = StringUtils.replace(
spec, spec,
"%%PARTITIONS_SPEC%%", "%%PARTITIONS_SPEC%%",
@ -115,7 +119,7 @@ public abstract class AbstractAzureInputSourceParallelIndexTest extends Abstract
}; };
doIndexTest( doIndexTest(
INDEX_DATASOURCE, indexDatasource,
INDEX_TASK, INDEX_TASK,
azurePropsTransform, azurePropsTransform,
INDEX_QUERIES_RESOURCE, INDEX_QUERIES_RESOURCE,

View File

@ -35,7 +35,6 @@ public abstract class AbstractGcsInputSourceParallelIndexTest extends AbstractIT
{ {
private static final String INDEX_TASK = "/indexer/wikipedia_cloud_index_task.json"; private static final String INDEX_TASK = "/indexer/wikipedia_cloud_index_task.json";
private static final String INDEX_QUERIES_RESOURCE = "/indexer/wikipedia_index_queries.json"; private static final String INDEX_QUERIES_RESOURCE = "/indexer/wikipedia_index_queries.json";
private static final String INDEX_DATASOURCE = "wikipedia_index_test_" + UUID.randomUUID();
private static final String INPUT_SOURCE_URIS_KEY = "uris"; private static final String INPUT_SOURCE_URIS_KEY = "uris";
private static final String INPUT_SOURCE_PREFIXES_KEY = "prefixes"; private static final String INPUT_SOURCE_PREFIXES_KEY = "prefixes";
private static final String INPUT_SOURCE_OBJECTS_KEY = "objects"; private static final String INPUT_SOURCE_OBJECTS_KEY = "objects";
@ -71,8 +70,9 @@ public abstract class AbstractGcsInputSourceParallelIndexTest extends AbstractIT
void doTest(Pair<String, List> gcsInputSource) throws Exception void doTest(Pair<String, List> gcsInputSource) throws Exception
{ {
final String indexDatasource = "wikipedia_index_test_" + UUID.randomUUID();
try ( try (
final Closeable ignored1 = unloader(INDEX_DATASOURCE + config.getExtraDatasourceNameSuffix()); final Closeable ignored1 = unloader(indexDatasource + config.getExtraDatasourceNameSuffix());
) { ) {
final Function<String, String> gcsPropsTransform = spec -> { final Function<String, String> gcsPropsTransform = spec -> {
try { try {
@ -87,7 +87,11 @@ public abstract class AbstractGcsInputSourceParallelIndexTest extends AbstractIT
"%%PATH%%", "%%PATH%%",
config.getCloudPath() config.getCloudPath()
); );
spec = StringUtils.replace(
spec,
"%%INPUT_FORMAT_TYPE%%",
InputFormatDetails.JSON.getInputFormatType()
);
spec = StringUtils.replace( spec = StringUtils.replace(
spec, spec,
"%%PARTITIONS_SPEC%%", "%%PARTITIONS_SPEC%%",
@ -115,7 +119,7 @@ public abstract class AbstractGcsInputSourceParallelIndexTest extends AbstractIT
}; };
doIndexTest( doIndexTest(
INDEX_DATASOURCE, indexDatasource,
INDEX_TASK, INDEX_TASK,
gcsPropsTransform, gcsPropsTransform,
INDEX_QUERIES_RESOURCE, INDEX_QUERIES_RESOURCE,

View File

@ -34,7 +34,6 @@ public abstract class AbstractHdfsInputSourceParallelIndexTest extends AbstractI
{ {
private static final String INDEX_TASK = "/indexer/wikipedia_cloud_index_task.json"; private static final String INDEX_TASK = "/indexer/wikipedia_cloud_index_task.json";
private static final String INDEX_QUERIES_RESOURCE = "/indexer/wikipedia_index_queries.json"; private static final String INDEX_QUERIES_RESOURCE = "/indexer/wikipedia_index_queries.json";
private static final String INDEX_DATASOURCE = "wikipedia_index_test_" + UUID.randomUUID();
private static final String INPUT_SOURCE_PATHS_KEY = "paths"; private static final String INPUT_SOURCE_PATHS_KEY = "paths";
@DataProvider @DataProvider
@ -42,27 +41,28 @@ public abstract class AbstractHdfsInputSourceParallelIndexTest extends AbstractI
{ {
return new Object[][]{ return new Object[][]{
{new Pair<>(INPUT_SOURCE_PATHS_KEY, {new Pair<>(INPUT_SOURCE_PATHS_KEY,
"hdfs://druid-it-hadoop:9000/batch_index" "hdfs://druid-it-hadoop:9000/batch_index%%FOLDER_SUFFIX%%"
)}, )},
{new Pair<>(INPUT_SOURCE_PATHS_KEY, {new Pair<>(INPUT_SOURCE_PATHS_KEY,
ImmutableList.of( ImmutableList.of(
"hdfs://druid-it-hadoop:9000/batch_index" "hdfs://druid-it-hadoop:9000/batch_index%%FOLDER_SUFFIX%%"
) )
)}, )},
{new Pair<>(INPUT_SOURCE_PATHS_KEY, {new Pair<>(INPUT_SOURCE_PATHS_KEY,
ImmutableList.of( ImmutableList.of(
"hdfs://druid-it-hadoop:9000/batch_index/wikipedia_index_data1.json", "hdfs://druid-it-hadoop:9000/batch_index%%FOLDER_SUFFIX%%/wikipedia_index_data1%%FILE_EXTENSION%%",
"hdfs://druid-it-hadoop:9000/batch_index/wikipedia_index_data2.json", "hdfs://druid-it-hadoop:9000/batch_index%%FOLDER_SUFFIX%%/wikipedia_index_data2%%FILE_EXTENSION%%",
"hdfs://druid-it-hadoop:9000/batch_index/wikipedia_index_data3.json" "hdfs://druid-it-hadoop:9000/batch_index%%FOLDER_SUFFIX%%/wikipedia_index_data3%%FILE_EXTENSION%%"
) )
)} )}
}; };
} }
void doTest(Pair<String, List> hdfsInputSource) throws Exception void doTest(Pair<String, List> hdfsInputSource, InputFormatDetails inputFormatDetails) throws Exception
{ {
final String indexDatasource = "wikipedia_index_test_" + UUID.randomUUID();
try ( try (
final Closeable ignored1 = unloader(INDEX_DATASOURCE + config.getExtraDatasourceNameSuffix()); final Closeable ignored1 = unloader(indexDatasource + config.getExtraDatasourceNameSuffix());
) { ) {
final Function<String, String> hdfsPropsTransform = spec -> { final Function<String, String> hdfsPropsTransform = spec -> {
try { try {
@ -81,11 +81,27 @@ public abstract class AbstractHdfsInputSourceParallelIndexTest extends AbstractI
"%%INPUT_SOURCE_PROPERTY_KEY%%", "%%INPUT_SOURCE_PROPERTY_KEY%%",
hdfsInputSource.lhs hdfsInputSource.lhs
); );
return StringUtils.replace( spec = StringUtils.replace(
spec,
"%%INPUT_FORMAT_TYPE%%",
inputFormatDetails.getInputFormatType()
);
spec = StringUtils.replace(
spec, spec,
"%%INPUT_SOURCE_PROPERTY_VALUE%%", "%%INPUT_SOURCE_PROPERTY_VALUE%%",
jsonMapper.writeValueAsString(hdfsInputSource.rhs) jsonMapper.writeValueAsString(hdfsInputSource.rhs)
); );
spec = StringUtils.replace(
spec,
"%%FOLDER_SUFFIX%%",
inputFormatDetails.getFolderSuffix()
);
spec = StringUtils.replace(
spec,
"%%FILE_EXTENSION%%",
inputFormatDetails.getFileExtension()
);
return spec;
} }
catch (Exception e) { catch (Exception e) {
throw new RuntimeException(e); throw new RuntimeException(e);
@ -93,7 +109,7 @@ public abstract class AbstractHdfsInputSourceParallelIndexTest extends AbstractI
}; };
doIndexTest( doIndexTest(
INDEX_DATASOURCE, indexDatasource,
INDEX_TASK, INDEX_TASK,
hdfsPropsTransform, hdfsPropsTransform,
INDEX_QUERIES_RESOURCE, INDEX_QUERIES_RESOURCE,

View File

@ -49,6 +49,39 @@ import java.util.function.Function;
public abstract class AbstractITBatchIndexTest extends AbstractIndexerTest public abstract class AbstractITBatchIndexTest extends AbstractIndexerTest
{ {
public enum InputFormatDetails
{
ORC("orc", ".orc", "/orc"),
JSON("json", ".json", "/json"),
PARQUET("parquet", ".parquet", "/parquet");
private final String inputFormatType;
private final String fileExtension;
private final String folderSuffix;
InputFormatDetails(String inputFormatType, String fileExtension, String folderSuffix)
{
this.inputFormatType = inputFormatType;
this.fileExtension = fileExtension;
this.folderSuffix = folderSuffix;
}
public String getInputFormatType()
{
return inputFormatType;
}
public String getFileExtension()
{
return fileExtension;
}
public String getFolderSuffix()
{
return folderSuffix;
}
}
private static final Logger LOG = new Logger(AbstractITBatchIndexTest.class); private static final Logger LOG = new Logger(AbstractITBatchIndexTest.class);
@Inject @Inject

View File

@ -35,7 +35,6 @@ public abstract class AbstractS3InputSourceParallelIndexTest extends AbstractITB
{ {
private static final String INDEX_TASK = "/indexer/wikipedia_cloud_index_task.json"; private static final String INDEX_TASK = "/indexer/wikipedia_cloud_index_task.json";
private static final String INDEX_QUERIES_RESOURCE = "/indexer/wikipedia_index_queries.json"; private static final String INDEX_QUERIES_RESOURCE = "/indexer/wikipedia_index_queries.json";
private static final String INDEX_DATASOURCE = "wikipedia_index_test_" + UUID.randomUUID();
private static final String INPUT_SOURCE_URIS_KEY = "uris"; private static final String INPUT_SOURCE_URIS_KEY = "uris";
private static final String INPUT_SOURCE_PREFIXES_KEY = "prefixes"; private static final String INPUT_SOURCE_PREFIXES_KEY = "prefixes";
private static final String INPUT_SOURCE_OBJECTS_KEY = "objects"; private static final String INPUT_SOURCE_OBJECTS_KEY = "objects";
@ -71,8 +70,9 @@ public abstract class AbstractS3InputSourceParallelIndexTest extends AbstractITB
void doTest(Pair<String, List> s3InputSource) throws Exception void doTest(Pair<String, List> s3InputSource) throws Exception
{ {
final String indexDatasource = "wikipedia_index_test_" + UUID.randomUUID();
try ( try (
final Closeable ignored1 = unloader(INDEX_DATASOURCE + config.getExtraDatasourceNameSuffix()); final Closeable ignored1 = unloader(indexDatasource + config.getExtraDatasourceNameSuffix());
) { ) {
final Function<String, String> s3PropsTransform = spec -> { final Function<String, String> s3PropsTransform = spec -> {
try { try {
@ -87,7 +87,11 @@ public abstract class AbstractS3InputSourceParallelIndexTest extends AbstractITB
"%%PATH%%", "%%PATH%%",
config.getCloudPath() config.getCloudPath()
); );
spec = StringUtils.replace(
spec,
"%%INPUT_FORMAT_TYPE%%",
InputFormatDetails.JSON.getInputFormatType()
);
spec = StringUtils.replace( spec = StringUtils.replace(
spec, spec,
"%%PARTITIONS_SPEC%%", "%%PARTITIONS_SPEC%%",
@ -115,7 +119,7 @@ public abstract class AbstractS3InputSourceParallelIndexTest extends AbstractITB
}; };
doIndexTest( doIndexTest(
INDEX_DATASOURCE, indexDatasource,
INDEX_TASK, INDEX_TASK,
s3PropsTransform, s3PropsTransform,
INDEX_QUERIES_RESOURCE, INDEX_QUERIES_RESOURCE,

View File

@ -33,7 +33,7 @@ import java.util.List;
* 1) Set the bucket and path for your data. This can be done by setting -Ddruid.test.config.cloudBucket and * 1) Set the bucket and path for your data. This can be done by setting -Ddruid.test.config.cloudBucket and
* -Ddruid.test.config.cloudPath or setting "cloud_bucket" and "cloud_path" in the config file. * -Ddruid.test.config.cloudPath or setting "cloud_bucket" and "cloud_path" in the config file.
* 2) Copy wikipedia_index_data1.json, wikipedia_index_data2.json, and wikipedia_index_data3.json * 2) Copy wikipedia_index_data1.json, wikipedia_index_data2.json, and wikipedia_index_data3.json
* located in integration-tests/src/test/resources/data/batch_index to your Azure at the location set in step 1. * located in integration-tests/src/test/resources/data/batch_index/json to your Azure at the location set in step 1.
* 3) Provide -Doverride.config.path=<PATH_TO_FILE> with Azure credentials/configs set. See * 3) Provide -Doverride.config.path=<PATH_TO_FILE> with Azure credentials/configs set. See
* integration-tests/docker/environment-configs/override-examples/azure for env vars to provide. * integration-tests/docker/environment-configs/override-examples/azure for env vars to provide.
*/ */

View File

@ -33,7 +33,7 @@ import java.util.List;
* 1) Set the bucket and path for your data. This can be done by setting -Ddruid.test.config.cloudBucket and * 1) Set the bucket and path for your data. This can be done by setting -Ddruid.test.config.cloudBucket and
* -Ddruid.test.config.cloudPath or setting "cloud_bucket" and "cloud_path" in the config file. * -Ddruid.test.config.cloudPath or setting "cloud_bucket" and "cloud_path" in the config file.
* 2) Copy wikipedia_index_data1.json, wikipedia_index_data2.json, and wikipedia_index_data3.json * 2) Copy wikipedia_index_data1.json, wikipedia_index_data2.json, and wikipedia_index_data3.json
* located in integration-tests/src/test/resources/data/batch_index to your Azure at the location set in step 1. * located in integration-tests/src/test/resources/data/batch_index/json to your Azure at the location set in step 1.
* 3) Provide -Doverride.config.path=<PATH_TO_FILE> with Azure credentials and hdfs deep storage configs set. See * 3) Provide -Doverride.config.path=<PATH_TO_FILE> with Azure credentials and hdfs deep storage configs set. See
* integration-tests/docker/environment-configs/override-examples/azure and * integration-tests/docker/environment-configs/override-examples/azure and
* integration-tests/docker/environment-configs/override-examples/hdfs for env vars to provide. * integration-tests/docker/environment-configs/override-examples/hdfs for env vars to provide.

View File

@ -33,7 +33,7 @@ import java.util.List;
* 1) Set the bucket and path for your data. This can be done by setting -Ddruid.test.config.cloudBucket and * 1) Set the bucket and path for your data. This can be done by setting -Ddruid.test.config.cloudBucket and
* -Ddruid.test.config.cloudPath or setting "cloud_bucket" and "cloud_path" in the config file. * -Ddruid.test.config.cloudPath or setting "cloud_bucket" and "cloud_path" in the config file.
* 2) Copy wikipedia_index_data1.json, wikipedia_index_data2.json, and wikipedia_index_data3.json * 2) Copy wikipedia_index_data1.json, wikipedia_index_data2.json, and wikipedia_index_data3.json
* located in integration-tests/src/test/resources/data/batch_index to your GCS at the location set in step 1. * located in integration-tests/src/test/resources/data/batch_index/json to your GCS at the location set in step 1.
* 3) Provide -Doverride.config.path=<PATH_TO_FILE> with gcs configs set. See * 3) Provide -Doverride.config.path=<PATH_TO_FILE> with gcs configs set. See
* integration-tests/docker/environment-configs/override-examples/gcs for env vars to provide. * integration-tests/docker/environment-configs/override-examples/gcs for env vars to provide.
* 4) Provide -Dresource.file.dir.path=<PATH_TO_FOLDER> with folder that contains GOOGLE_APPLICATION_CREDENTIALS file * 4) Provide -Dresource.file.dir.path=<PATH_TO_FOLDER> with folder that contains GOOGLE_APPLICATION_CREDENTIALS file

View File

@ -33,7 +33,7 @@ import java.util.List;
* 1) Set the bucket and path for your data. This can be done by setting -Ddruid.test.config.cloudBucket and * 1) Set the bucket and path for your data. This can be done by setting -Ddruid.test.config.cloudBucket and
* -Ddruid.test.config.cloudPath or setting "cloud_bucket" and "cloud_path" in the config file. * -Ddruid.test.config.cloudPath or setting "cloud_bucket" and "cloud_path" in the config file.
* 2) Copy wikipedia_index_data1.json, wikipedia_index_data2.json, and wikipedia_index_data3.json * 2) Copy wikipedia_index_data1.json, wikipedia_index_data2.json, and wikipedia_index_data3.json
* located in integration-tests/src/test/resources/data/batch_index to your GCS at the location set in step 1. * located in integration-tests/src/test/resources/data/batch_index/json to your GCS at the location set in step 1.
* 3) Provide -Doverride.config.path=<PATH_TO_FILE> with gcs configs and hdfs deep storage configs set. See * 3) Provide -Doverride.config.path=<PATH_TO_FILE> with gcs configs and hdfs deep storage configs set. See
* integration-tests/docker/environment-configs/override-examples/gcs and * integration-tests/docker/environment-configs/override-examples/gcs and
* integration-tests/docker/environment-configs/override-examples/hdfs for env vars to provide. * integration-tests/docker/environment-configs/override-examples/hdfs for env vars to provide.

View File

@ -42,6 +42,6 @@ public class ITHdfsToAzureParallelIndexTest extends AbstractHdfsInputSourceParal
@Test(dataProvider = "resources") @Test(dataProvider = "resources")
public void testHdfsIndexData(Pair<String, List> hdfsInputSource) throws Exception public void testHdfsIndexData(Pair<String, List> hdfsInputSource) throws Exception
{ {
doTest(hdfsInputSource); doTest(hdfsInputSource, InputFormatDetails.JSON);
} }
} }

View File

@ -43,6 +43,6 @@ public class ITHdfsToGcsParallelIndexTest extends AbstractHdfsInputSourceParalle
@Test(dataProvider = "resources") @Test(dataProvider = "resources")
public void testHdfsIndexData(Pair<String, List> hdfsInputSource) throws Exception public void testHdfsIndexData(Pair<String, List> hdfsInputSource) throws Exception
{ {
doTest(hdfsInputSource); doTest(hdfsInputSource, InputFormatDetails.JSON);
} }
} }

View File

@ -39,8 +39,20 @@ import java.util.List;
public class ITHdfsToHdfsParallelIndexTest extends AbstractHdfsInputSourceParallelIndexTest public class ITHdfsToHdfsParallelIndexTest extends AbstractHdfsInputSourceParallelIndexTest
{ {
@Test(dataProvider = "resources") @Test(dataProvider = "resources")
public void testHdfsIndexData(Pair<String, List> hdfsInputSource) throws Exception public void testHdfsIndexJsonData(Pair<String, List> hdfsInputSource) throws Exception
{ {
doTest(hdfsInputSource); doTest(hdfsInputSource, InputFormatDetails.JSON);
}
@Test(dataProvider = "resources")
public void testHdfsIndexOrcData(Pair<String, List> hdfsInputSource) throws Exception
{
doTest(hdfsInputSource, InputFormatDetails.ORC);
}
@Test(dataProvider = "resources")
public void testHdfsIndexParquetData(Pair<String, List> hdfsInputSource) throws Exception
{
doTest(hdfsInputSource, InputFormatDetails.PARQUET);
} }
} }

View File

@ -42,6 +42,6 @@ public class ITHdfsToS3ParallelIndexTest extends AbstractHdfsInputSourceParallel
@Test(dataProvider = "resources") @Test(dataProvider = "resources")
public void testHdfsIndexData(Pair<String, List> hdfsInputSource) throws Exception public void testHdfsIndexData(Pair<String, List> hdfsInputSource) throws Exception
{ {
doTest(hdfsInputSource); doTest(hdfsInputSource, InputFormatDetails.JSON);
} }
} }

View File

@ -40,7 +40,7 @@ import java.util.function.Function;
* 1) Set the bucket and path for your data. This can be done by setting -Ddruid.test.config.cloudBucket and * 1) Set the bucket and path for your data. This can be done by setting -Ddruid.test.config.cloudBucket and
* -Ddruid.test.config.cloudPath or setting "cloud_bucket" and "cloud_path" in the config file. * -Ddruid.test.config.cloudPath or setting "cloud_bucket" and "cloud_path" in the config file.
* 2) Copy wikipedia_index_data1.json, wikipedia_index_data2.json, and wikipedia_index_data3.json * 2) Copy wikipedia_index_data1.json, wikipedia_index_data2.json, and wikipedia_index_data3.json
* located in integration-tests/src/test/resources/data/batch_index to your S3 at the location set in step 1. * located in integration-tests/src/test/resources/data/batch_index/json to your S3 at the location set in step 1.
* 3) Provide -Doverride.config.path=<PATH_TO_FILE> with s3 credentials/configs set. See * 3) Provide -Doverride.config.path=<PATH_TO_FILE> with s3 credentials/configs set. See
* integration-tests/docker/environment-configs/override-examples/s3 for env vars to provide. * integration-tests/docker/environment-configs/override-examples/s3 for env vars to provide.
* Note that druid_s3_accessKey and druid_s3_secretKey should be unset or set to credentials that does not have * Note that druid_s3_accessKey and druid_s3_secretKey should be unset or set to credentials that does not have
@ -54,7 +54,6 @@ public class ITS3OverrideCredentialsIndexTest extends AbstractITBatchIndexTest
private static final String INDEX_TASK_WITH_OVERRIDE = "/indexer/wikipedia_override_credentials_index_task.json"; private static final String INDEX_TASK_WITH_OVERRIDE = "/indexer/wikipedia_override_credentials_index_task.json";
private static final String INDEX_TASK_WITHOUT_OVERRIDE = "/indexer/wikipedia_cloud_simple_index_task.json"; private static final String INDEX_TASK_WITHOUT_OVERRIDE = "/indexer/wikipedia_cloud_simple_index_task.json";
private static final String INDEX_QUERIES_RESOURCE = "/indexer/wikipedia_index_queries.json"; private static final String INDEX_QUERIES_RESOURCE = "/indexer/wikipedia_index_queries.json";
private static final String INDEX_DATASOURCE = "wikipedia_index_test_" + UUID.randomUUID();
private static final String INPUT_SOURCE_OBJECTS_KEY = "objects"; private static final String INPUT_SOURCE_OBJECTS_KEY = "objects";
private static final String WIKIPEDIA_DATA_1 = "wikipedia_index_data1.json"; private static final String WIKIPEDIA_DATA_1 = "wikipedia_index_data1.json";
private static final String WIKIPEDIA_DATA_2 = "wikipedia_index_data2.json"; private static final String WIKIPEDIA_DATA_2 = "wikipedia_index_data2.json";
@ -69,8 +68,9 @@ public class ITS3OverrideCredentialsIndexTest extends AbstractITBatchIndexTest
@Test @Test
public void testS3WithValidOverrideCredentialsIndexDataShouldSucceed() throws Exception public void testS3WithValidOverrideCredentialsIndexDataShouldSucceed() throws Exception
{ {
final String indexDatasource = "wikipedia_index_test_" + UUID.randomUUID();
try ( try (
final Closeable ignored1 = unloader(INDEX_DATASOURCE + config.getExtraDatasourceNameSuffix()); final Closeable ignored1 = unloader(indexDatasource + config.getExtraDatasourceNameSuffix());
) { ) {
final Function<String, String> s3PropsTransform = spec -> { final Function<String, String> s3PropsTransform = spec -> {
try { try {
@ -123,7 +123,7 @@ public class ITS3OverrideCredentialsIndexTest extends AbstractITBatchIndexTest
}; };
doIndexTest( doIndexTest(
INDEX_DATASOURCE, indexDatasource,
INDEX_TASK_WITH_OVERRIDE, INDEX_TASK_WITH_OVERRIDE,
s3PropsTransform, s3PropsTransform,
INDEX_QUERIES_RESOURCE, INDEX_QUERIES_RESOURCE,
@ -137,6 +137,7 @@ public class ITS3OverrideCredentialsIndexTest extends AbstractITBatchIndexTest
@Test @Test
public void testS3WithoutOverrideCredentialsIndexDataShouldFailed() throws Exception public void testS3WithoutOverrideCredentialsIndexDataShouldFailed() throws Exception
{ {
final String indexDatasource = "wikipedia_index_test_" + UUID.randomUUID();
try { try {
final Function<String, String> s3PropsTransform = spec -> { final Function<String, String> s3PropsTransform = spec -> {
try { try {
@ -172,7 +173,7 @@ public class ITS3OverrideCredentialsIndexTest extends AbstractITBatchIndexTest
throw new RuntimeException(e); throw new RuntimeException(e);
} }
}; };
final String fullDatasourceName = INDEX_DATASOURCE + config.getExtraDatasourceNameSuffix(); final String fullDatasourceName = indexDatasource + config.getExtraDatasourceNameSuffix();
final String taskSpec = s3PropsTransform.apply( final String taskSpec = s3PropsTransform.apply(
StringUtils.replace( StringUtils.replace(
getResourceAsString(INDEX_TASK_WITHOUT_OVERRIDE), getResourceAsString(INDEX_TASK_WITHOUT_OVERRIDE),
@ -194,13 +195,14 @@ public class ITS3OverrideCredentialsIndexTest extends AbstractITBatchIndexTest
} }
finally { finally {
// If the test pass, then there is no datasource to unload // If the test pass, then there is no datasource to unload
closeQuietly(unloader(INDEX_DATASOURCE + config.getExtraDatasourceNameSuffix())); closeQuietly(unloader(indexDatasource + config.getExtraDatasourceNameSuffix()));
} }
} }
@Test @Test
public void testS3WithInvalidOverrideCredentialsIndexDataShouldFailed() throws Exception public void testS3WithInvalidOverrideCredentialsIndexDataShouldFailed() throws Exception
{ {
final String indexDatasource = "wikipedia_index_test_" + UUID.randomUUID();
try { try {
final Function<String, String> s3PropsTransform = spec -> { final Function<String, String> s3PropsTransform = spec -> {
try { try {
@ -252,7 +254,7 @@ public class ITS3OverrideCredentialsIndexTest extends AbstractITBatchIndexTest
} }
}; };
final String fullDatasourceName = INDEX_DATASOURCE + config.getExtraDatasourceNameSuffix(); final String fullDatasourceName = indexDatasource + config.getExtraDatasourceNameSuffix();
final String taskSpec = s3PropsTransform.apply( final String taskSpec = s3PropsTransform.apply(
StringUtils.replace( StringUtils.replace(
getResourceAsString(INDEX_TASK_WITH_OVERRIDE), getResourceAsString(INDEX_TASK_WITH_OVERRIDE),
@ -272,7 +274,7 @@ public class ITS3OverrideCredentialsIndexTest extends AbstractITBatchIndexTest
} }
finally { finally {
// If the test pass, then there is no datasource to unload // If the test pass, then there is no datasource to unload
closeQuietly(unloader(INDEX_DATASOURCE + config.getExtraDatasourceNameSuffix())); closeQuietly(unloader(indexDatasource + config.getExtraDatasourceNameSuffix()));
} }
} }

View File

@ -33,7 +33,7 @@ import java.util.List;
* 1) Set the bucket and path for your data. This can be done by setting -Ddruid.test.config.cloudBucket and * 1) Set the bucket and path for your data. This can be done by setting -Ddruid.test.config.cloudBucket and
* -Ddruid.test.config.cloudPath or setting "cloud_bucket" and "cloud_path" in the config file. * -Ddruid.test.config.cloudPath or setting "cloud_bucket" and "cloud_path" in the config file.
* 2) Copy wikipedia_index_data1.json, wikipedia_index_data2.json, and wikipedia_index_data3.json * 2) Copy wikipedia_index_data1.json, wikipedia_index_data2.json, and wikipedia_index_data3.json
* located in integration-tests/src/test/resources/data/batch_index to your S3 at the location set in step 1. * located in integration-tests/src/test/resources/data/batch_index/json to your S3 at the location set in step 1.
* 3) Provide -Doverride.config.path=<PATH_TO_FILE> with s3 credentials and hdfs deep storage configs set. See * 3) Provide -Doverride.config.path=<PATH_TO_FILE> with s3 credentials and hdfs deep storage configs set. See
* integration-tests/docker/environment-configs/override-examples/s3 and * integration-tests/docker/environment-configs/override-examples/s3 and
* integration-tests/docker/environment-configs/override-examples/hdfs for env vars to provide. * integration-tests/docker/environment-configs/override-examples/hdfs for env vars to provide.

View File

@ -33,7 +33,7 @@ import java.util.List;
* 1) Set the bucket and path for your data. This can be done by setting -Ddruid.test.config.cloudBucket and * 1) Set the bucket and path for your data. This can be done by setting -Ddruid.test.config.cloudBucket and
* -Ddruid.test.config.cloudPath or setting "cloud_bucket" and "cloud_path" in the config file. * -Ddruid.test.config.cloudPath or setting "cloud_bucket" and "cloud_path" in the config file.
* 2) Copy wikipedia_index_data1.json, wikipedia_index_data2.json, and wikipedia_index_data3.json * 2) Copy wikipedia_index_data1.json, wikipedia_index_data2.json, and wikipedia_index_data3.json
* located in integration-tests/src/test/resources/data/batch_index to your S3 at the location set in step 1. * located in integration-tests/src/test/resources/data/batch_index/json to your S3 at the location set in step 1.
* 3) Provide -Doverride.config.path=<PATH_TO_FILE> with s3 credentials/configs set. See * 3) Provide -Doverride.config.path=<PATH_TO_FILE> with s3 credentials/configs set. See
* integration-tests/docker/environment-configs/override-examples/s3 for env vars to provide. * integration-tests/docker/environment-configs/override-examples/s3 for env vars to provide.
*/ */

View File

@ -71,7 +71,7 @@
"%%INPUT_SOURCE_PROPERTY_KEY%%": %%INPUT_SOURCE_PROPERTY_VALUE%% "%%INPUT_SOURCE_PROPERTY_KEY%%": %%INPUT_SOURCE_PROPERTY_VALUE%%
}, },
"inputFormat": { "inputFormat": {
"type": "json" "type": "%%INPUT_FORMAT_TYPE%%"
} }
}, },
"tuningConfig": { "tuningConfig": {

View File

@ -73,7 +73,7 @@
"type": "index", "type": "index",
"firehose": { "firehose": {
"type": "local", "type": "local",
"baseDir": "/resources/data/batch_index", "baseDir": "/resources/data/batch_index/json",
"filter": "wikipedia_index_data*" "filter": "wikipedia_index_data*"
} }
}, },

View File

@ -86,7 +86,7 @@
"type" : "index", "type" : "index",
"inputSource" : { "inputSource" : {
"type" : "local", "type" : "local",
"baseDir" : "/resources/data/batch_index", "baseDir" : "/resources/data/batch_index/json",
"filter" : "wikipedia_index_data*" "filter" : "wikipedia_index_data*"
}, },
"inputFormat" : { "inputFormat" : {

View File

@ -91,7 +91,7 @@
"type": "index", "type": "index",
"firehose": { "firehose": {
"type": "local", "type": "local",
"baseDir": "/resources/data/batch_index", "baseDir": "/resources/data/batch_index/json",
"filter": "wikipedia_index_data*" "filter": "wikipedia_index_data*"
} }
}, },

View File

@ -58,7 +58,7 @@
"type": "index_parallel", "type": "index_parallel",
"firehose": { "firehose": {
"type": "local", "type": "local",
"baseDir": "/resources/data/batch_index", "baseDir": "/resources/data/batch_index/json",
"filter": "wikipedia_index_data*" "filter": "wikipedia_index_data*"
} }
}, },

View File

@ -57,7 +57,7 @@
"type": "index_parallel", "type": "index_parallel",
"firehose": { "firehose": {
"type": "local", "type": "local",
"baseDir": "/resources/data/batch_index", "baseDir": "/resources/data/batch_index/json",
"filter": "wikipedia_index_data2*" "filter": "wikipedia_index_data2*"
} }
}, },

View File

@ -74,7 +74,7 @@
"type": "index", "type": "index",
"firehose": { "firehose": {
"type": "local", "type": "local",
"baseDir": "/resources/data/batch_index", "baseDir": "/resources/data/batch_index/json",
"filter": "wikipedia_index_data*" "filter": "wikipedia_index_data*"
} }
}, },