Test file format extensions for inputSource (orc, parquet) (#9632)

* Test file format extensions for inputSource (orc, parquet)

* Test file format extensions for inputSource (orc, parquet)

* fix path

* resolve merge conflict

* fix typo
This commit is contained in:
Maytas Monsereenusorn 2020-04-13 10:03:56 -10:00 committed by GitHub
parent 6a52bdc605
commit d930f04e6a
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
36 changed files with 143 additions and 50 deletions

View File

@ -184,7 +184,7 @@ For Google Cloud Storage, Amazon S3, and Microsoft Azure, the following will als
1) Set the bucket and path for your test data. This can be done by setting -Ddruid.test.config.cloudBucket and
-Ddruid.test.config.cloudPath in the mvn command or setting "cloud_bucket" and "cloud_path" in the config file.
2) Copy wikipedia_index_data1.json, wikipedia_index_data2.json, and wikipedia_index_data3.json
located in integration-tests/src/test/resources/data/batch_index to your Cloud storage at the location set in step 1.
located in integration-tests/src/test/resources/data/batch_index/json to your Cloud storage at the location set in step 1.
For Google Cloud Storage, in addition to the above, you will also have to:
1) Provide -Dresource.file.dir.path=<PATH_TO_FOLDER> with folder that contains GOOGLE_APPLICATION_CREDENTIALS file

View File

@ -21,4 +21,4 @@ druid_storage_storageDirectory=/druid/segments
# Depending on the test, additional extension(s) may be required.
# Please refer to the other integration-tests/docker/environment-configs/override-examples/ files and Druid docs for
# additional env vars to provide for each extension.
druid_extensions_loadList=["druid-hdfs-storage"]
druid_extensions_loadList=["druid-hdfs-storage", "druid-parquet-extensions", "druid-orc-extensions"]

View File

@ -69,6 +69,18 @@
<version>${aws.sdk.version}</version>
<scope>runtime</scope>
</dependency>
<dependency>
<groupId>org.apache.druid.extensions</groupId>
<artifactId>druid-orc-extensions</artifactId>
<version>${project.parent.version}</version>
<scope>runtime</scope>
</dependency>
<dependency>
<groupId>org.apache.druid.extensions</groupId>
<artifactId>druid-parquet-extensions</artifactId>
<version>${project.parent.version}</version>
<scope>runtime</scope>
</dependency>
<dependency>
<groupId>org.apache.druid.extensions</groupId>
<artifactId>druid-s3-extensions</artifactId>

View File

@ -74,6 +74,12 @@
# For druid-kinesis-indexing-service
mkdir -p $SHARED_DIR/docker/extensions/druid-kinesis-indexing-service
mv $SHARED_DIR/docker/lib/druid-kinesis-indexing-service-* $SHARED_DIR/docker/extensions/druid-kinesis-indexing-service
$ For druid-parquet-extensions
mkdir -p $SHARED_DIR/docker/extensions/druid-parquet-extensions
mv $SHARED_DIR/docker/lib/druid-parquet-extensions-* $SHARED_DIR/docker/extensions/druid-parquet-extensions
$ For druid-orc-extensions
mkdir -p $SHARED_DIR/docker/extensions/druid-orc-extensions
mv $SHARED_DIR/docker/lib/druid-orc-extensions-* $SHARED_DIR/docker/extensions/druid-orc-extensions
# Pull Hadoop dependency if needed
if [ -n "$DRUID_INTEGRATION_TEST_START_HADOOP_DOCKER" ] && [ "$DRUID_INTEGRATION_TEST_START_HADOOP_DOCKER" == true ]

View File

@ -35,7 +35,6 @@ public abstract class AbstractAzureInputSourceParallelIndexTest extends Abstract
{
private static final String INDEX_TASK = "/indexer/wikipedia_cloud_index_task.json";
private static final String INDEX_QUERIES_RESOURCE = "/indexer/wikipedia_index_queries.json";
private static final String INDEX_DATASOURCE = "wikipedia_index_test_" + UUID.randomUUID();
private static final String INPUT_SOURCE_URIS_KEY = "uris";
private static final String INPUT_SOURCE_PREFIXES_KEY = "prefixes";
private static final String INPUT_SOURCE_OBJECTS_KEY = "objects";
@ -71,8 +70,9 @@ public abstract class AbstractAzureInputSourceParallelIndexTest extends Abstract
void doTest(Pair<String, List> azureInputSource) throws Exception
{
final String indexDatasource = "wikipedia_index_test_" + UUID.randomUUID();
try (
final Closeable ignored1 = unloader(INDEX_DATASOURCE + config.getExtraDatasourceNameSuffix());
final Closeable ignored1 = unloader(indexDatasource + config.getExtraDatasourceNameSuffix());
) {
final Function<String, String> azurePropsTransform = spec -> {
try {
@ -87,7 +87,11 @@ public abstract class AbstractAzureInputSourceParallelIndexTest extends Abstract
"%%PATH%%",
config.getCloudPath()
);
spec = StringUtils.replace(
spec,
"%%INPUT_FORMAT_TYPE%%",
InputFormatDetails.JSON.getInputFormatType()
);
spec = StringUtils.replace(
spec,
"%%PARTITIONS_SPEC%%",
@ -115,7 +119,7 @@ public abstract class AbstractAzureInputSourceParallelIndexTest extends Abstract
};
doIndexTest(
INDEX_DATASOURCE,
indexDatasource,
INDEX_TASK,
azurePropsTransform,
INDEX_QUERIES_RESOURCE,

View File

@ -35,7 +35,6 @@ public abstract class AbstractGcsInputSourceParallelIndexTest extends AbstractIT
{
private static final String INDEX_TASK = "/indexer/wikipedia_cloud_index_task.json";
private static final String INDEX_QUERIES_RESOURCE = "/indexer/wikipedia_index_queries.json";
private static final String INDEX_DATASOURCE = "wikipedia_index_test_" + UUID.randomUUID();
private static final String INPUT_SOURCE_URIS_KEY = "uris";
private static final String INPUT_SOURCE_PREFIXES_KEY = "prefixes";
private static final String INPUT_SOURCE_OBJECTS_KEY = "objects";
@ -71,8 +70,9 @@ public abstract class AbstractGcsInputSourceParallelIndexTest extends AbstractIT
void doTest(Pair<String, List> gcsInputSource) throws Exception
{
final String indexDatasource = "wikipedia_index_test_" + UUID.randomUUID();
try (
final Closeable ignored1 = unloader(INDEX_DATASOURCE + config.getExtraDatasourceNameSuffix());
final Closeable ignored1 = unloader(indexDatasource + config.getExtraDatasourceNameSuffix());
) {
final Function<String, String> gcsPropsTransform = spec -> {
try {
@ -87,7 +87,11 @@ public abstract class AbstractGcsInputSourceParallelIndexTest extends AbstractIT
"%%PATH%%",
config.getCloudPath()
);
spec = StringUtils.replace(
spec,
"%%INPUT_FORMAT_TYPE%%",
InputFormatDetails.JSON.getInputFormatType()
);
spec = StringUtils.replace(
spec,
"%%PARTITIONS_SPEC%%",
@ -115,7 +119,7 @@ public abstract class AbstractGcsInputSourceParallelIndexTest extends AbstractIT
};
doIndexTest(
INDEX_DATASOURCE,
indexDatasource,
INDEX_TASK,
gcsPropsTransform,
INDEX_QUERIES_RESOURCE,

View File

@ -34,7 +34,6 @@ public abstract class AbstractHdfsInputSourceParallelIndexTest extends AbstractI
{
private static final String INDEX_TASK = "/indexer/wikipedia_cloud_index_task.json";
private static final String INDEX_QUERIES_RESOURCE = "/indexer/wikipedia_index_queries.json";
private static final String INDEX_DATASOURCE = "wikipedia_index_test_" + UUID.randomUUID();
private static final String INPUT_SOURCE_PATHS_KEY = "paths";
@DataProvider
@ -42,27 +41,28 @@ public abstract class AbstractHdfsInputSourceParallelIndexTest extends AbstractI
{
return new Object[][]{
{new Pair<>(INPUT_SOURCE_PATHS_KEY,
"hdfs://druid-it-hadoop:9000/batch_index"
"hdfs://druid-it-hadoop:9000/batch_index%%FOLDER_SUFFIX%%"
)},
{new Pair<>(INPUT_SOURCE_PATHS_KEY,
ImmutableList.of(
"hdfs://druid-it-hadoop:9000/batch_index"
"hdfs://druid-it-hadoop:9000/batch_index%%FOLDER_SUFFIX%%"
)
)},
{new Pair<>(INPUT_SOURCE_PATHS_KEY,
ImmutableList.of(
"hdfs://druid-it-hadoop:9000/batch_index/wikipedia_index_data1.json",
"hdfs://druid-it-hadoop:9000/batch_index/wikipedia_index_data2.json",
"hdfs://druid-it-hadoop:9000/batch_index/wikipedia_index_data3.json"
"hdfs://druid-it-hadoop:9000/batch_index%%FOLDER_SUFFIX%%/wikipedia_index_data1%%FILE_EXTENSION%%",
"hdfs://druid-it-hadoop:9000/batch_index%%FOLDER_SUFFIX%%/wikipedia_index_data2%%FILE_EXTENSION%%",
"hdfs://druid-it-hadoop:9000/batch_index%%FOLDER_SUFFIX%%/wikipedia_index_data3%%FILE_EXTENSION%%"
)
)}
};
}
void doTest(Pair<String, List> hdfsInputSource) throws Exception
void doTest(Pair<String, List> hdfsInputSource, InputFormatDetails inputFormatDetails) throws Exception
{
final String indexDatasource = "wikipedia_index_test_" + UUID.randomUUID();
try (
final Closeable ignored1 = unloader(INDEX_DATASOURCE + config.getExtraDatasourceNameSuffix());
final Closeable ignored1 = unloader(indexDatasource + config.getExtraDatasourceNameSuffix());
) {
final Function<String, String> hdfsPropsTransform = spec -> {
try {
@ -81,11 +81,27 @@ public abstract class AbstractHdfsInputSourceParallelIndexTest extends AbstractI
"%%INPUT_SOURCE_PROPERTY_KEY%%",
hdfsInputSource.lhs
);
return StringUtils.replace(
spec = StringUtils.replace(
spec,
"%%INPUT_FORMAT_TYPE%%",
inputFormatDetails.getInputFormatType()
);
spec = StringUtils.replace(
spec,
"%%INPUT_SOURCE_PROPERTY_VALUE%%",
jsonMapper.writeValueAsString(hdfsInputSource.rhs)
);
spec = StringUtils.replace(
spec,
"%%FOLDER_SUFFIX%%",
inputFormatDetails.getFolderSuffix()
);
spec = StringUtils.replace(
spec,
"%%FILE_EXTENSION%%",
inputFormatDetails.getFileExtension()
);
return spec;
}
catch (Exception e) {
throw new RuntimeException(e);
@ -93,7 +109,7 @@ public abstract class AbstractHdfsInputSourceParallelIndexTest extends AbstractI
};
doIndexTest(
INDEX_DATASOURCE,
indexDatasource,
INDEX_TASK,
hdfsPropsTransform,
INDEX_QUERIES_RESOURCE,

View File

@ -49,6 +49,39 @@ import java.util.function.Function;
public abstract class AbstractITBatchIndexTest extends AbstractIndexerTest
{
public enum InputFormatDetails
{
ORC("orc", ".orc", "/orc"),
JSON("json", ".json", "/json"),
PARQUET("parquet", ".parquet", "/parquet");
private final String inputFormatType;
private final String fileExtension;
private final String folderSuffix;
InputFormatDetails(String inputFormatType, String fileExtension, String folderSuffix)
{
this.inputFormatType = inputFormatType;
this.fileExtension = fileExtension;
this.folderSuffix = folderSuffix;
}
public String getInputFormatType()
{
return inputFormatType;
}
public String getFileExtension()
{
return fileExtension;
}
public String getFolderSuffix()
{
return folderSuffix;
}
}
private static final Logger LOG = new Logger(AbstractITBatchIndexTest.class);
@Inject

View File

@ -35,7 +35,6 @@ public abstract class AbstractS3InputSourceParallelIndexTest extends AbstractITB
{
private static final String INDEX_TASK = "/indexer/wikipedia_cloud_index_task.json";
private static final String INDEX_QUERIES_RESOURCE = "/indexer/wikipedia_index_queries.json";
private static final String INDEX_DATASOURCE = "wikipedia_index_test_" + UUID.randomUUID();
private static final String INPUT_SOURCE_URIS_KEY = "uris";
private static final String INPUT_SOURCE_PREFIXES_KEY = "prefixes";
private static final String INPUT_SOURCE_OBJECTS_KEY = "objects";
@ -71,8 +70,9 @@ public abstract class AbstractS3InputSourceParallelIndexTest extends AbstractITB
void doTest(Pair<String, List> s3InputSource) throws Exception
{
final String indexDatasource = "wikipedia_index_test_" + UUID.randomUUID();
try (
final Closeable ignored1 = unloader(INDEX_DATASOURCE + config.getExtraDatasourceNameSuffix());
final Closeable ignored1 = unloader(indexDatasource + config.getExtraDatasourceNameSuffix());
) {
final Function<String, String> s3PropsTransform = spec -> {
try {
@ -87,7 +87,11 @@ public abstract class AbstractS3InputSourceParallelIndexTest extends AbstractITB
"%%PATH%%",
config.getCloudPath()
);
spec = StringUtils.replace(
spec,
"%%INPUT_FORMAT_TYPE%%",
InputFormatDetails.JSON.getInputFormatType()
);
spec = StringUtils.replace(
spec,
"%%PARTITIONS_SPEC%%",
@ -115,7 +119,7 @@ public abstract class AbstractS3InputSourceParallelIndexTest extends AbstractITB
};
doIndexTest(
INDEX_DATASOURCE,
indexDatasource,
INDEX_TASK,
s3PropsTransform,
INDEX_QUERIES_RESOURCE,

View File

@ -33,7 +33,7 @@ import java.util.List;
* 1) Set the bucket and path for your data. This can be done by setting -Ddruid.test.config.cloudBucket and
* -Ddruid.test.config.cloudPath or setting "cloud_bucket" and "cloud_path" in the config file.
* 2) Copy wikipedia_index_data1.json, wikipedia_index_data2.json, and wikipedia_index_data3.json
* located in integration-tests/src/test/resources/data/batch_index to your Azure at the location set in step 1.
* located in integration-tests/src/test/resources/data/batch_index/json to your Azure at the location set in step 1.
* 3) Provide -Doverride.config.path=<PATH_TO_FILE> with Azure credentials/configs set. See
* integration-tests/docker/environment-configs/override-examples/azure for env vars to provide.
*/

View File

@ -33,7 +33,7 @@ import java.util.List;
* 1) Set the bucket and path for your data. This can be done by setting -Ddruid.test.config.cloudBucket and
* -Ddruid.test.config.cloudPath or setting "cloud_bucket" and "cloud_path" in the config file.
* 2) Copy wikipedia_index_data1.json, wikipedia_index_data2.json, and wikipedia_index_data3.json
* located in integration-tests/src/test/resources/data/batch_index to your Azure at the location set in step 1.
* located in integration-tests/src/test/resources/data/batch_index/json to your Azure at the location set in step 1.
* 3) Provide -Doverride.config.path=<PATH_TO_FILE> with Azure credentials and hdfs deep storage configs set. See
* integration-tests/docker/environment-configs/override-examples/azure and
* integration-tests/docker/environment-configs/override-examples/hdfs for env vars to provide.

View File

@ -33,7 +33,7 @@ import java.util.List;
* 1) Set the bucket and path for your data. This can be done by setting -Ddruid.test.config.cloudBucket and
* -Ddruid.test.config.cloudPath or setting "cloud_bucket" and "cloud_path" in the config file.
* 2) Copy wikipedia_index_data1.json, wikipedia_index_data2.json, and wikipedia_index_data3.json
* located in integration-tests/src/test/resources/data/batch_index to your GCS at the location set in step 1.
* located in integration-tests/src/test/resources/data/batch_index/json to your GCS at the location set in step 1.
* 3) Provide -Doverride.config.path=<PATH_TO_FILE> with gcs configs set. See
* integration-tests/docker/environment-configs/override-examples/gcs for env vars to provide.
* 4) Provide -Dresource.file.dir.path=<PATH_TO_FOLDER> with folder that contains GOOGLE_APPLICATION_CREDENTIALS file

View File

@ -33,7 +33,7 @@ import java.util.List;
* 1) Set the bucket and path for your data. This can be done by setting -Ddruid.test.config.cloudBucket and
* -Ddruid.test.config.cloudPath or setting "cloud_bucket" and "cloud_path" in the config file.
* 2) Copy wikipedia_index_data1.json, wikipedia_index_data2.json, and wikipedia_index_data3.json
* located in integration-tests/src/test/resources/data/batch_index to your GCS at the location set in step 1.
* located in integration-tests/src/test/resources/data/batch_index/json to your GCS at the location set in step 1.
* 3) Provide -Doverride.config.path=<PATH_TO_FILE> with gcs configs and hdfs deep storage configs set. See
* integration-tests/docker/environment-configs/override-examples/gcs and
* integration-tests/docker/environment-configs/override-examples/hdfs for env vars to provide.

View File

@ -42,6 +42,6 @@ public class ITHdfsToAzureParallelIndexTest extends AbstractHdfsInputSourceParal
@Test(dataProvider = "resources")
public void testHdfsIndexData(Pair<String, List> hdfsInputSource) throws Exception
{
doTest(hdfsInputSource);
doTest(hdfsInputSource, InputFormatDetails.JSON);
}
}

View File

@ -43,6 +43,6 @@ public class ITHdfsToGcsParallelIndexTest extends AbstractHdfsInputSourceParalle
@Test(dataProvider = "resources")
public void testHdfsIndexData(Pair<String, List> hdfsInputSource) throws Exception
{
doTest(hdfsInputSource);
doTest(hdfsInputSource, InputFormatDetails.JSON);
}
}

View File

@ -39,8 +39,20 @@ import java.util.List;
public class ITHdfsToHdfsParallelIndexTest extends AbstractHdfsInputSourceParallelIndexTest
{
@Test(dataProvider = "resources")
public void testHdfsIndexData(Pair<String, List> hdfsInputSource) throws Exception
public void testHdfsIndexJsonData(Pair<String, List> hdfsInputSource) throws Exception
{
doTest(hdfsInputSource);
doTest(hdfsInputSource, InputFormatDetails.JSON);
}
@Test(dataProvider = "resources")
public void testHdfsIndexOrcData(Pair<String, List> hdfsInputSource) throws Exception
{
doTest(hdfsInputSource, InputFormatDetails.ORC);
}
@Test(dataProvider = "resources")
public void testHdfsIndexParquetData(Pair<String, List> hdfsInputSource) throws Exception
{
doTest(hdfsInputSource, InputFormatDetails.PARQUET);
}
}

View File

@ -42,6 +42,6 @@ public class ITHdfsToS3ParallelIndexTest extends AbstractHdfsInputSourceParallel
@Test(dataProvider = "resources")
public void testHdfsIndexData(Pair<String, List> hdfsInputSource) throws Exception
{
doTest(hdfsInputSource);
doTest(hdfsInputSource, InputFormatDetails.JSON);
}
}

View File

@ -40,7 +40,7 @@ import java.util.function.Function;
* 1) Set the bucket and path for your data. This can be done by setting -Ddruid.test.config.cloudBucket and
* -Ddruid.test.config.cloudPath or setting "cloud_bucket" and "cloud_path" in the config file.
* 2) Copy wikipedia_index_data1.json, wikipedia_index_data2.json, and wikipedia_index_data3.json
* located in integration-tests/src/test/resources/data/batch_index to your S3 at the location set in step 1.
* located in integration-tests/src/test/resources/data/batch_index/json to your S3 at the location set in step 1.
* 3) Provide -Doverride.config.path=<PATH_TO_FILE> with s3 credentials/configs set. See
* integration-tests/docker/environment-configs/override-examples/s3 for env vars to provide.
* Note that druid_s3_accessKey and druid_s3_secretKey should be unset or set to credentials that does not have
@ -54,7 +54,6 @@ public class ITS3OverrideCredentialsIndexTest extends AbstractITBatchIndexTest
private static final String INDEX_TASK_WITH_OVERRIDE = "/indexer/wikipedia_override_credentials_index_task.json";
private static final String INDEX_TASK_WITHOUT_OVERRIDE = "/indexer/wikipedia_cloud_simple_index_task.json";
private static final String INDEX_QUERIES_RESOURCE = "/indexer/wikipedia_index_queries.json";
private static final String INDEX_DATASOURCE = "wikipedia_index_test_" + UUID.randomUUID();
private static final String INPUT_SOURCE_OBJECTS_KEY = "objects";
private static final String WIKIPEDIA_DATA_1 = "wikipedia_index_data1.json";
private static final String WIKIPEDIA_DATA_2 = "wikipedia_index_data2.json";
@ -69,8 +68,9 @@ public class ITS3OverrideCredentialsIndexTest extends AbstractITBatchIndexTest
@Test
public void testS3WithValidOverrideCredentialsIndexDataShouldSucceed() throws Exception
{
final String indexDatasource = "wikipedia_index_test_" + UUID.randomUUID();
try (
final Closeable ignored1 = unloader(INDEX_DATASOURCE + config.getExtraDatasourceNameSuffix());
final Closeable ignored1 = unloader(indexDatasource + config.getExtraDatasourceNameSuffix());
) {
final Function<String, String> s3PropsTransform = spec -> {
try {
@ -123,7 +123,7 @@ public class ITS3OverrideCredentialsIndexTest extends AbstractITBatchIndexTest
};
doIndexTest(
INDEX_DATASOURCE,
indexDatasource,
INDEX_TASK_WITH_OVERRIDE,
s3PropsTransform,
INDEX_QUERIES_RESOURCE,
@ -137,6 +137,7 @@ public class ITS3OverrideCredentialsIndexTest extends AbstractITBatchIndexTest
@Test
public void testS3WithoutOverrideCredentialsIndexDataShouldFailed() throws Exception
{
final String indexDatasource = "wikipedia_index_test_" + UUID.randomUUID();
try {
final Function<String, String> s3PropsTransform = spec -> {
try {
@ -172,7 +173,7 @@ public class ITS3OverrideCredentialsIndexTest extends AbstractITBatchIndexTest
throw new RuntimeException(e);
}
};
final String fullDatasourceName = INDEX_DATASOURCE + config.getExtraDatasourceNameSuffix();
final String fullDatasourceName = indexDatasource + config.getExtraDatasourceNameSuffix();
final String taskSpec = s3PropsTransform.apply(
StringUtils.replace(
getResourceAsString(INDEX_TASK_WITHOUT_OVERRIDE),
@ -194,13 +195,14 @@ public class ITS3OverrideCredentialsIndexTest extends AbstractITBatchIndexTest
}
finally {
// If the test pass, then there is no datasource to unload
closeQuietly(unloader(INDEX_DATASOURCE + config.getExtraDatasourceNameSuffix()));
closeQuietly(unloader(indexDatasource + config.getExtraDatasourceNameSuffix()));
}
}
@Test
public void testS3WithInvalidOverrideCredentialsIndexDataShouldFailed() throws Exception
{
final String indexDatasource = "wikipedia_index_test_" + UUID.randomUUID();
try {
final Function<String, String> s3PropsTransform = spec -> {
try {
@ -252,7 +254,7 @@ public class ITS3OverrideCredentialsIndexTest extends AbstractITBatchIndexTest
}
};
final String fullDatasourceName = INDEX_DATASOURCE + config.getExtraDatasourceNameSuffix();
final String fullDatasourceName = indexDatasource + config.getExtraDatasourceNameSuffix();
final String taskSpec = s3PropsTransform.apply(
StringUtils.replace(
getResourceAsString(INDEX_TASK_WITH_OVERRIDE),
@ -272,7 +274,7 @@ public class ITS3OverrideCredentialsIndexTest extends AbstractITBatchIndexTest
}
finally {
// If the test pass, then there is no datasource to unload
closeQuietly(unloader(INDEX_DATASOURCE + config.getExtraDatasourceNameSuffix()));
closeQuietly(unloader(indexDatasource + config.getExtraDatasourceNameSuffix()));
}
}

View File

@ -33,7 +33,7 @@ import java.util.List;
* 1) Set the bucket and path for your data. This can be done by setting -Ddruid.test.config.cloudBucket and
* -Ddruid.test.config.cloudPath or setting "cloud_bucket" and "cloud_path" in the config file.
* 2) Copy wikipedia_index_data1.json, wikipedia_index_data2.json, and wikipedia_index_data3.json
* located in integration-tests/src/test/resources/data/batch_index to your S3 at the location set in step 1.
* located in integration-tests/src/test/resources/data/batch_index/json to your S3 at the location set in step 1.
* 3) Provide -Doverride.config.path=<PATH_TO_FILE> with s3 credentials and hdfs deep storage configs set. See
* integration-tests/docker/environment-configs/override-examples/s3 and
* integration-tests/docker/environment-configs/override-examples/hdfs for env vars to provide.

View File

@ -33,7 +33,7 @@ import java.util.List;
* 1) Set the bucket and path for your data. This can be done by setting -Ddruid.test.config.cloudBucket and
* -Ddruid.test.config.cloudPath or setting "cloud_bucket" and "cloud_path" in the config file.
* 2) Copy wikipedia_index_data1.json, wikipedia_index_data2.json, and wikipedia_index_data3.json
* located in integration-tests/src/test/resources/data/batch_index to your S3 at the location set in step 1.
* located in integration-tests/src/test/resources/data/batch_index/json to your S3 at the location set in step 1.
* 3) Provide -Doverride.config.path=<PATH_TO_FILE> with s3 credentials/configs set. See
* integration-tests/docker/environment-configs/override-examples/s3 for env vars to provide.
*/

View File

@ -71,7 +71,7 @@
"%%INPUT_SOURCE_PROPERTY_KEY%%": %%INPUT_SOURCE_PROPERTY_VALUE%%
},
"inputFormat": {
"type": "json"
"type": "%%INPUT_FORMAT_TYPE%%"
}
},
"tuningConfig": {

View File

@ -73,7 +73,7 @@
"type": "index",
"firehose": {
"type": "local",
"baseDir": "/resources/data/batch_index",
"baseDir": "/resources/data/batch_index/json",
"filter": "wikipedia_index_data*"
}
},

View File

@ -86,7 +86,7 @@
"type" : "index",
"inputSource" : {
"type" : "local",
"baseDir" : "/resources/data/batch_index",
"baseDir" : "/resources/data/batch_index/json",
"filter" : "wikipedia_index_data*"
},
"inputFormat" : {

View File

@ -91,7 +91,7 @@
"type": "index",
"firehose": {
"type": "local",
"baseDir": "/resources/data/batch_index",
"baseDir": "/resources/data/batch_index/json",
"filter": "wikipedia_index_data*"
}
},

View File

@ -58,7 +58,7 @@
"type": "index_parallel",
"firehose": {
"type": "local",
"baseDir": "/resources/data/batch_index",
"baseDir": "/resources/data/batch_index/json",
"filter": "wikipedia_index_data*"
}
},

View File

@ -57,7 +57,7 @@
"type": "index_parallel",
"firehose": {
"type": "local",
"baseDir": "/resources/data/batch_index",
"baseDir": "/resources/data/batch_index/json",
"filter": "wikipedia_index_data2*"
}
},

View File

@ -74,7 +74,7 @@
"type": "index",
"firehose": {
"type": "local",
"baseDir": "/resources/data/batch_index",
"baseDir": "/resources/data/batch_index/json",
"filter": "wikipedia_index_data*"
}
},