mirror of https://github.com/apache/druid.git
Test file format extensions for inputSource (orc, parquet) (#9632)
* Test file format extensions for inputSource (orc, parquet) * Test file format extensions for inputSource (orc, parquet) * fix path * resolve merge conflict * fix typo
This commit is contained in:
parent
6a52bdc605
commit
d930f04e6a
|
@ -184,7 +184,7 @@ For Google Cloud Storage, Amazon S3, and Microsoft Azure, the following will als
|
|||
1) Set the bucket and path for your test data. This can be done by setting -Ddruid.test.config.cloudBucket and
|
||||
-Ddruid.test.config.cloudPath in the mvn command or setting "cloud_bucket" and "cloud_path" in the config file.
|
||||
2) Copy wikipedia_index_data1.json, wikipedia_index_data2.json, and wikipedia_index_data3.json
|
||||
located in integration-tests/src/test/resources/data/batch_index to your Cloud storage at the location set in step 1.
|
||||
located in integration-tests/src/test/resources/data/batch_index/json to your Cloud storage at the location set in step 1.
|
||||
|
||||
For Google Cloud Storage, in addition to the above, you will also have to:
|
||||
1) Provide -Dresource.file.dir.path=<PATH_TO_FOLDER> with folder that contains GOOGLE_APPLICATION_CREDENTIALS file
|
||||
|
|
|
@ -21,4 +21,4 @@ druid_storage_storageDirectory=/druid/segments
|
|||
# Depending on the test, additional extension(s) may be required.
|
||||
# Please refer to the other integration-tests/docker/environment-configs/override-examples/ files and Druid docs for
|
||||
# additional env vars to provide for each extension.
|
||||
druid_extensions_loadList=["druid-hdfs-storage"]
|
||||
druid_extensions_loadList=["druid-hdfs-storage", "druid-parquet-extensions", "druid-orc-extensions"]
|
|
@ -69,6 +69,18 @@
|
|||
<version>${aws.sdk.version}</version>
|
||||
<scope>runtime</scope>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>org.apache.druid.extensions</groupId>
|
||||
<artifactId>druid-orc-extensions</artifactId>
|
||||
<version>${project.parent.version}</version>
|
||||
<scope>runtime</scope>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>org.apache.druid.extensions</groupId>
|
||||
<artifactId>druid-parquet-extensions</artifactId>
|
||||
<version>${project.parent.version}</version>
|
||||
<scope>runtime</scope>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>org.apache.druid.extensions</groupId>
|
||||
<artifactId>druid-s3-extensions</artifactId>
|
||||
|
|
|
@ -74,6 +74,12 @@
|
|||
# For druid-kinesis-indexing-service
|
||||
mkdir -p $SHARED_DIR/docker/extensions/druid-kinesis-indexing-service
|
||||
mv $SHARED_DIR/docker/lib/druid-kinesis-indexing-service-* $SHARED_DIR/docker/extensions/druid-kinesis-indexing-service
|
||||
$ For druid-parquet-extensions
|
||||
mkdir -p $SHARED_DIR/docker/extensions/druid-parquet-extensions
|
||||
mv $SHARED_DIR/docker/lib/druid-parquet-extensions-* $SHARED_DIR/docker/extensions/druid-parquet-extensions
|
||||
$ For druid-orc-extensions
|
||||
mkdir -p $SHARED_DIR/docker/extensions/druid-orc-extensions
|
||||
mv $SHARED_DIR/docker/lib/druid-orc-extensions-* $SHARED_DIR/docker/extensions/druid-orc-extensions
|
||||
|
||||
# Pull Hadoop dependency if needed
|
||||
if [ -n "$DRUID_INTEGRATION_TEST_START_HADOOP_DOCKER" ] && [ "$DRUID_INTEGRATION_TEST_START_HADOOP_DOCKER" == true ]
|
||||
|
|
|
@ -35,7 +35,6 @@ public abstract class AbstractAzureInputSourceParallelIndexTest extends Abstract
|
|||
{
|
||||
private static final String INDEX_TASK = "/indexer/wikipedia_cloud_index_task.json";
|
||||
private static final String INDEX_QUERIES_RESOURCE = "/indexer/wikipedia_index_queries.json";
|
||||
private static final String INDEX_DATASOURCE = "wikipedia_index_test_" + UUID.randomUUID();
|
||||
private static final String INPUT_SOURCE_URIS_KEY = "uris";
|
||||
private static final String INPUT_SOURCE_PREFIXES_KEY = "prefixes";
|
||||
private static final String INPUT_SOURCE_OBJECTS_KEY = "objects";
|
||||
|
@ -71,8 +70,9 @@ public abstract class AbstractAzureInputSourceParallelIndexTest extends Abstract
|
|||
|
||||
void doTest(Pair<String, List> azureInputSource) throws Exception
|
||||
{
|
||||
final String indexDatasource = "wikipedia_index_test_" + UUID.randomUUID();
|
||||
try (
|
||||
final Closeable ignored1 = unloader(INDEX_DATASOURCE + config.getExtraDatasourceNameSuffix());
|
||||
final Closeable ignored1 = unloader(indexDatasource + config.getExtraDatasourceNameSuffix());
|
||||
) {
|
||||
final Function<String, String> azurePropsTransform = spec -> {
|
||||
try {
|
||||
|
@ -87,7 +87,11 @@ public abstract class AbstractAzureInputSourceParallelIndexTest extends Abstract
|
|||
"%%PATH%%",
|
||||
config.getCloudPath()
|
||||
);
|
||||
|
||||
spec = StringUtils.replace(
|
||||
spec,
|
||||
"%%INPUT_FORMAT_TYPE%%",
|
||||
InputFormatDetails.JSON.getInputFormatType()
|
||||
);
|
||||
spec = StringUtils.replace(
|
||||
spec,
|
||||
"%%PARTITIONS_SPEC%%",
|
||||
|
@ -115,7 +119,7 @@ public abstract class AbstractAzureInputSourceParallelIndexTest extends Abstract
|
|||
};
|
||||
|
||||
doIndexTest(
|
||||
INDEX_DATASOURCE,
|
||||
indexDatasource,
|
||||
INDEX_TASK,
|
||||
azurePropsTransform,
|
||||
INDEX_QUERIES_RESOURCE,
|
||||
|
|
|
@ -35,7 +35,6 @@ public abstract class AbstractGcsInputSourceParallelIndexTest extends AbstractIT
|
|||
{
|
||||
private static final String INDEX_TASK = "/indexer/wikipedia_cloud_index_task.json";
|
||||
private static final String INDEX_QUERIES_RESOURCE = "/indexer/wikipedia_index_queries.json";
|
||||
private static final String INDEX_DATASOURCE = "wikipedia_index_test_" + UUID.randomUUID();
|
||||
private static final String INPUT_SOURCE_URIS_KEY = "uris";
|
||||
private static final String INPUT_SOURCE_PREFIXES_KEY = "prefixes";
|
||||
private static final String INPUT_SOURCE_OBJECTS_KEY = "objects";
|
||||
|
@ -71,8 +70,9 @@ public abstract class AbstractGcsInputSourceParallelIndexTest extends AbstractIT
|
|||
|
||||
void doTest(Pair<String, List> gcsInputSource) throws Exception
|
||||
{
|
||||
final String indexDatasource = "wikipedia_index_test_" + UUID.randomUUID();
|
||||
try (
|
||||
final Closeable ignored1 = unloader(INDEX_DATASOURCE + config.getExtraDatasourceNameSuffix());
|
||||
final Closeable ignored1 = unloader(indexDatasource + config.getExtraDatasourceNameSuffix());
|
||||
) {
|
||||
final Function<String, String> gcsPropsTransform = spec -> {
|
||||
try {
|
||||
|
@ -87,7 +87,11 @@ public abstract class AbstractGcsInputSourceParallelIndexTest extends AbstractIT
|
|||
"%%PATH%%",
|
||||
config.getCloudPath()
|
||||
);
|
||||
|
||||
spec = StringUtils.replace(
|
||||
spec,
|
||||
"%%INPUT_FORMAT_TYPE%%",
|
||||
InputFormatDetails.JSON.getInputFormatType()
|
||||
);
|
||||
spec = StringUtils.replace(
|
||||
spec,
|
||||
"%%PARTITIONS_SPEC%%",
|
||||
|
@ -115,7 +119,7 @@ public abstract class AbstractGcsInputSourceParallelIndexTest extends AbstractIT
|
|||
};
|
||||
|
||||
doIndexTest(
|
||||
INDEX_DATASOURCE,
|
||||
indexDatasource,
|
||||
INDEX_TASK,
|
||||
gcsPropsTransform,
|
||||
INDEX_QUERIES_RESOURCE,
|
||||
|
|
|
@ -34,7 +34,6 @@ public abstract class AbstractHdfsInputSourceParallelIndexTest extends AbstractI
|
|||
{
|
||||
private static final String INDEX_TASK = "/indexer/wikipedia_cloud_index_task.json";
|
||||
private static final String INDEX_QUERIES_RESOURCE = "/indexer/wikipedia_index_queries.json";
|
||||
private static final String INDEX_DATASOURCE = "wikipedia_index_test_" + UUID.randomUUID();
|
||||
private static final String INPUT_SOURCE_PATHS_KEY = "paths";
|
||||
|
||||
@DataProvider
|
||||
|
@ -42,27 +41,28 @@ public abstract class AbstractHdfsInputSourceParallelIndexTest extends AbstractI
|
|||
{
|
||||
return new Object[][]{
|
||||
{new Pair<>(INPUT_SOURCE_PATHS_KEY,
|
||||
"hdfs://druid-it-hadoop:9000/batch_index"
|
||||
"hdfs://druid-it-hadoop:9000/batch_index%%FOLDER_SUFFIX%%"
|
||||
)},
|
||||
{new Pair<>(INPUT_SOURCE_PATHS_KEY,
|
||||
ImmutableList.of(
|
||||
"hdfs://druid-it-hadoop:9000/batch_index"
|
||||
"hdfs://druid-it-hadoop:9000/batch_index%%FOLDER_SUFFIX%%"
|
||||
)
|
||||
)},
|
||||
{new Pair<>(INPUT_SOURCE_PATHS_KEY,
|
||||
ImmutableList.of(
|
||||
"hdfs://druid-it-hadoop:9000/batch_index/wikipedia_index_data1.json",
|
||||
"hdfs://druid-it-hadoop:9000/batch_index/wikipedia_index_data2.json",
|
||||
"hdfs://druid-it-hadoop:9000/batch_index/wikipedia_index_data3.json"
|
||||
"hdfs://druid-it-hadoop:9000/batch_index%%FOLDER_SUFFIX%%/wikipedia_index_data1%%FILE_EXTENSION%%",
|
||||
"hdfs://druid-it-hadoop:9000/batch_index%%FOLDER_SUFFIX%%/wikipedia_index_data2%%FILE_EXTENSION%%",
|
||||
"hdfs://druid-it-hadoop:9000/batch_index%%FOLDER_SUFFIX%%/wikipedia_index_data3%%FILE_EXTENSION%%"
|
||||
)
|
||||
)}
|
||||
};
|
||||
}
|
||||
|
||||
void doTest(Pair<String, List> hdfsInputSource) throws Exception
|
||||
void doTest(Pair<String, List> hdfsInputSource, InputFormatDetails inputFormatDetails) throws Exception
|
||||
{
|
||||
final String indexDatasource = "wikipedia_index_test_" + UUID.randomUUID();
|
||||
try (
|
||||
final Closeable ignored1 = unloader(INDEX_DATASOURCE + config.getExtraDatasourceNameSuffix());
|
||||
final Closeable ignored1 = unloader(indexDatasource + config.getExtraDatasourceNameSuffix());
|
||||
) {
|
||||
final Function<String, String> hdfsPropsTransform = spec -> {
|
||||
try {
|
||||
|
@ -81,11 +81,27 @@ public abstract class AbstractHdfsInputSourceParallelIndexTest extends AbstractI
|
|||
"%%INPUT_SOURCE_PROPERTY_KEY%%",
|
||||
hdfsInputSource.lhs
|
||||
);
|
||||
return StringUtils.replace(
|
||||
spec = StringUtils.replace(
|
||||
spec,
|
||||
"%%INPUT_FORMAT_TYPE%%",
|
||||
inputFormatDetails.getInputFormatType()
|
||||
);
|
||||
spec = StringUtils.replace(
|
||||
spec,
|
||||
"%%INPUT_SOURCE_PROPERTY_VALUE%%",
|
||||
jsonMapper.writeValueAsString(hdfsInputSource.rhs)
|
||||
);
|
||||
spec = StringUtils.replace(
|
||||
spec,
|
||||
"%%FOLDER_SUFFIX%%",
|
||||
inputFormatDetails.getFolderSuffix()
|
||||
);
|
||||
spec = StringUtils.replace(
|
||||
spec,
|
||||
"%%FILE_EXTENSION%%",
|
||||
inputFormatDetails.getFileExtension()
|
||||
);
|
||||
return spec;
|
||||
}
|
||||
catch (Exception e) {
|
||||
throw new RuntimeException(e);
|
||||
|
@ -93,7 +109,7 @@ public abstract class AbstractHdfsInputSourceParallelIndexTest extends AbstractI
|
|||
};
|
||||
|
||||
doIndexTest(
|
||||
INDEX_DATASOURCE,
|
||||
indexDatasource,
|
||||
INDEX_TASK,
|
||||
hdfsPropsTransform,
|
||||
INDEX_QUERIES_RESOURCE,
|
||||
|
|
|
@ -49,6 +49,39 @@ import java.util.function.Function;
|
|||
|
||||
public abstract class AbstractITBatchIndexTest extends AbstractIndexerTest
|
||||
{
|
||||
public enum InputFormatDetails
|
||||
{
|
||||
ORC("orc", ".orc", "/orc"),
|
||||
JSON("json", ".json", "/json"),
|
||||
PARQUET("parquet", ".parquet", "/parquet");
|
||||
|
||||
private final String inputFormatType;
|
||||
private final String fileExtension;
|
||||
private final String folderSuffix;
|
||||
|
||||
InputFormatDetails(String inputFormatType, String fileExtension, String folderSuffix)
|
||||
{
|
||||
this.inputFormatType = inputFormatType;
|
||||
this.fileExtension = fileExtension;
|
||||
this.folderSuffix = folderSuffix;
|
||||
}
|
||||
|
||||
public String getInputFormatType()
|
||||
{
|
||||
return inputFormatType;
|
||||
}
|
||||
|
||||
public String getFileExtension()
|
||||
{
|
||||
return fileExtension;
|
||||
}
|
||||
|
||||
public String getFolderSuffix()
|
||||
{
|
||||
return folderSuffix;
|
||||
}
|
||||
}
|
||||
|
||||
private static final Logger LOG = new Logger(AbstractITBatchIndexTest.class);
|
||||
|
||||
@Inject
|
||||
|
|
|
@ -35,7 +35,6 @@ public abstract class AbstractS3InputSourceParallelIndexTest extends AbstractITB
|
|||
{
|
||||
private static final String INDEX_TASK = "/indexer/wikipedia_cloud_index_task.json";
|
||||
private static final String INDEX_QUERIES_RESOURCE = "/indexer/wikipedia_index_queries.json";
|
||||
private static final String INDEX_DATASOURCE = "wikipedia_index_test_" + UUID.randomUUID();
|
||||
private static final String INPUT_SOURCE_URIS_KEY = "uris";
|
||||
private static final String INPUT_SOURCE_PREFIXES_KEY = "prefixes";
|
||||
private static final String INPUT_SOURCE_OBJECTS_KEY = "objects";
|
||||
|
@ -71,8 +70,9 @@ public abstract class AbstractS3InputSourceParallelIndexTest extends AbstractITB
|
|||
|
||||
void doTest(Pair<String, List> s3InputSource) throws Exception
|
||||
{
|
||||
final String indexDatasource = "wikipedia_index_test_" + UUID.randomUUID();
|
||||
try (
|
||||
final Closeable ignored1 = unloader(INDEX_DATASOURCE + config.getExtraDatasourceNameSuffix());
|
||||
final Closeable ignored1 = unloader(indexDatasource + config.getExtraDatasourceNameSuffix());
|
||||
) {
|
||||
final Function<String, String> s3PropsTransform = spec -> {
|
||||
try {
|
||||
|
@ -87,7 +87,11 @@ public abstract class AbstractS3InputSourceParallelIndexTest extends AbstractITB
|
|||
"%%PATH%%",
|
||||
config.getCloudPath()
|
||||
);
|
||||
|
||||
spec = StringUtils.replace(
|
||||
spec,
|
||||
"%%INPUT_FORMAT_TYPE%%",
|
||||
InputFormatDetails.JSON.getInputFormatType()
|
||||
);
|
||||
spec = StringUtils.replace(
|
||||
spec,
|
||||
"%%PARTITIONS_SPEC%%",
|
||||
|
@ -115,7 +119,7 @@ public abstract class AbstractS3InputSourceParallelIndexTest extends AbstractITB
|
|||
};
|
||||
|
||||
doIndexTest(
|
||||
INDEX_DATASOURCE,
|
||||
indexDatasource,
|
||||
INDEX_TASK,
|
||||
s3PropsTransform,
|
||||
INDEX_QUERIES_RESOURCE,
|
||||
|
|
|
@ -33,7 +33,7 @@ import java.util.List;
|
|||
* 1) Set the bucket and path for your data. This can be done by setting -Ddruid.test.config.cloudBucket and
|
||||
* -Ddruid.test.config.cloudPath or setting "cloud_bucket" and "cloud_path" in the config file.
|
||||
* 2) Copy wikipedia_index_data1.json, wikipedia_index_data2.json, and wikipedia_index_data3.json
|
||||
* located in integration-tests/src/test/resources/data/batch_index to your Azure at the location set in step 1.
|
||||
* located in integration-tests/src/test/resources/data/batch_index/json to your Azure at the location set in step 1.
|
||||
* 3) Provide -Doverride.config.path=<PATH_TO_FILE> with Azure credentials/configs set. See
|
||||
* integration-tests/docker/environment-configs/override-examples/azure for env vars to provide.
|
||||
*/
|
||||
|
|
|
@ -33,7 +33,7 @@ import java.util.List;
|
|||
* 1) Set the bucket and path for your data. This can be done by setting -Ddruid.test.config.cloudBucket and
|
||||
* -Ddruid.test.config.cloudPath or setting "cloud_bucket" and "cloud_path" in the config file.
|
||||
* 2) Copy wikipedia_index_data1.json, wikipedia_index_data2.json, and wikipedia_index_data3.json
|
||||
* located in integration-tests/src/test/resources/data/batch_index to your Azure at the location set in step 1.
|
||||
* located in integration-tests/src/test/resources/data/batch_index/json to your Azure at the location set in step 1.
|
||||
* 3) Provide -Doverride.config.path=<PATH_TO_FILE> with Azure credentials and hdfs deep storage configs set. See
|
||||
* integration-tests/docker/environment-configs/override-examples/azure and
|
||||
* integration-tests/docker/environment-configs/override-examples/hdfs for env vars to provide.
|
||||
|
|
|
@ -33,7 +33,7 @@ import java.util.List;
|
|||
* 1) Set the bucket and path for your data. This can be done by setting -Ddruid.test.config.cloudBucket and
|
||||
* -Ddruid.test.config.cloudPath or setting "cloud_bucket" and "cloud_path" in the config file.
|
||||
* 2) Copy wikipedia_index_data1.json, wikipedia_index_data2.json, and wikipedia_index_data3.json
|
||||
* located in integration-tests/src/test/resources/data/batch_index to your GCS at the location set in step 1.
|
||||
* located in integration-tests/src/test/resources/data/batch_index/json to your GCS at the location set in step 1.
|
||||
* 3) Provide -Doverride.config.path=<PATH_TO_FILE> with gcs configs set. See
|
||||
* integration-tests/docker/environment-configs/override-examples/gcs for env vars to provide.
|
||||
* 4) Provide -Dresource.file.dir.path=<PATH_TO_FOLDER> with folder that contains GOOGLE_APPLICATION_CREDENTIALS file
|
||||
|
|
|
@ -33,7 +33,7 @@ import java.util.List;
|
|||
* 1) Set the bucket and path for your data. This can be done by setting -Ddruid.test.config.cloudBucket and
|
||||
* -Ddruid.test.config.cloudPath or setting "cloud_bucket" and "cloud_path" in the config file.
|
||||
* 2) Copy wikipedia_index_data1.json, wikipedia_index_data2.json, and wikipedia_index_data3.json
|
||||
* located in integration-tests/src/test/resources/data/batch_index to your GCS at the location set in step 1.
|
||||
* located in integration-tests/src/test/resources/data/batch_index/json to your GCS at the location set in step 1.
|
||||
* 3) Provide -Doverride.config.path=<PATH_TO_FILE> with gcs configs and hdfs deep storage configs set. See
|
||||
* integration-tests/docker/environment-configs/override-examples/gcs and
|
||||
* integration-tests/docker/environment-configs/override-examples/hdfs for env vars to provide.
|
||||
|
|
|
@ -42,6 +42,6 @@ public class ITHdfsToAzureParallelIndexTest extends AbstractHdfsInputSourceParal
|
|||
@Test(dataProvider = "resources")
|
||||
public void testHdfsIndexData(Pair<String, List> hdfsInputSource) throws Exception
|
||||
{
|
||||
doTest(hdfsInputSource);
|
||||
doTest(hdfsInputSource, InputFormatDetails.JSON);
|
||||
}
|
||||
}
|
||||
|
|
|
@ -43,6 +43,6 @@ public class ITHdfsToGcsParallelIndexTest extends AbstractHdfsInputSourceParalle
|
|||
@Test(dataProvider = "resources")
|
||||
public void testHdfsIndexData(Pair<String, List> hdfsInputSource) throws Exception
|
||||
{
|
||||
doTest(hdfsInputSource);
|
||||
doTest(hdfsInputSource, InputFormatDetails.JSON);
|
||||
}
|
||||
}
|
||||
|
|
|
@ -39,8 +39,20 @@ import java.util.List;
|
|||
public class ITHdfsToHdfsParallelIndexTest extends AbstractHdfsInputSourceParallelIndexTest
|
||||
{
|
||||
@Test(dataProvider = "resources")
|
||||
public void testHdfsIndexData(Pair<String, List> hdfsInputSource) throws Exception
|
||||
public void testHdfsIndexJsonData(Pair<String, List> hdfsInputSource) throws Exception
|
||||
{
|
||||
doTest(hdfsInputSource);
|
||||
doTest(hdfsInputSource, InputFormatDetails.JSON);
|
||||
}
|
||||
|
||||
@Test(dataProvider = "resources")
|
||||
public void testHdfsIndexOrcData(Pair<String, List> hdfsInputSource) throws Exception
|
||||
{
|
||||
doTest(hdfsInputSource, InputFormatDetails.ORC);
|
||||
}
|
||||
|
||||
@Test(dataProvider = "resources")
|
||||
public void testHdfsIndexParquetData(Pair<String, List> hdfsInputSource) throws Exception
|
||||
{
|
||||
doTest(hdfsInputSource, InputFormatDetails.PARQUET);
|
||||
}
|
||||
}
|
||||
|
|
|
@ -42,6 +42,6 @@ public class ITHdfsToS3ParallelIndexTest extends AbstractHdfsInputSourceParallel
|
|||
@Test(dataProvider = "resources")
|
||||
public void testHdfsIndexData(Pair<String, List> hdfsInputSource) throws Exception
|
||||
{
|
||||
doTest(hdfsInputSource);
|
||||
doTest(hdfsInputSource, InputFormatDetails.JSON);
|
||||
}
|
||||
}
|
||||
|
|
|
@ -40,7 +40,7 @@ import java.util.function.Function;
|
|||
* 1) Set the bucket and path for your data. This can be done by setting -Ddruid.test.config.cloudBucket and
|
||||
* -Ddruid.test.config.cloudPath or setting "cloud_bucket" and "cloud_path" in the config file.
|
||||
* 2) Copy wikipedia_index_data1.json, wikipedia_index_data2.json, and wikipedia_index_data3.json
|
||||
* located in integration-tests/src/test/resources/data/batch_index to your S3 at the location set in step 1.
|
||||
* located in integration-tests/src/test/resources/data/batch_index/json to your S3 at the location set in step 1.
|
||||
* 3) Provide -Doverride.config.path=<PATH_TO_FILE> with s3 credentials/configs set. See
|
||||
* integration-tests/docker/environment-configs/override-examples/s3 for env vars to provide.
|
||||
* Note that druid_s3_accessKey and druid_s3_secretKey should be unset or set to credentials that does not have
|
||||
|
@ -54,7 +54,6 @@ public class ITS3OverrideCredentialsIndexTest extends AbstractITBatchIndexTest
|
|||
private static final String INDEX_TASK_WITH_OVERRIDE = "/indexer/wikipedia_override_credentials_index_task.json";
|
||||
private static final String INDEX_TASK_WITHOUT_OVERRIDE = "/indexer/wikipedia_cloud_simple_index_task.json";
|
||||
private static final String INDEX_QUERIES_RESOURCE = "/indexer/wikipedia_index_queries.json";
|
||||
private static final String INDEX_DATASOURCE = "wikipedia_index_test_" + UUID.randomUUID();
|
||||
private static final String INPUT_SOURCE_OBJECTS_KEY = "objects";
|
||||
private static final String WIKIPEDIA_DATA_1 = "wikipedia_index_data1.json";
|
||||
private static final String WIKIPEDIA_DATA_2 = "wikipedia_index_data2.json";
|
||||
|
@ -69,8 +68,9 @@ public class ITS3OverrideCredentialsIndexTest extends AbstractITBatchIndexTest
|
|||
@Test
|
||||
public void testS3WithValidOverrideCredentialsIndexDataShouldSucceed() throws Exception
|
||||
{
|
||||
final String indexDatasource = "wikipedia_index_test_" + UUID.randomUUID();
|
||||
try (
|
||||
final Closeable ignored1 = unloader(INDEX_DATASOURCE + config.getExtraDatasourceNameSuffix());
|
||||
final Closeable ignored1 = unloader(indexDatasource + config.getExtraDatasourceNameSuffix());
|
||||
) {
|
||||
final Function<String, String> s3PropsTransform = spec -> {
|
||||
try {
|
||||
|
@ -123,7 +123,7 @@ public class ITS3OverrideCredentialsIndexTest extends AbstractITBatchIndexTest
|
|||
};
|
||||
|
||||
doIndexTest(
|
||||
INDEX_DATASOURCE,
|
||||
indexDatasource,
|
||||
INDEX_TASK_WITH_OVERRIDE,
|
||||
s3PropsTransform,
|
||||
INDEX_QUERIES_RESOURCE,
|
||||
|
@ -137,6 +137,7 @@ public class ITS3OverrideCredentialsIndexTest extends AbstractITBatchIndexTest
|
|||
@Test
|
||||
public void testS3WithoutOverrideCredentialsIndexDataShouldFailed() throws Exception
|
||||
{
|
||||
final String indexDatasource = "wikipedia_index_test_" + UUID.randomUUID();
|
||||
try {
|
||||
final Function<String, String> s3PropsTransform = spec -> {
|
||||
try {
|
||||
|
@ -172,7 +173,7 @@ public class ITS3OverrideCredentialsIndexTest extends AbstractITBatchIndexTest
|
|||
throw new RuntimeException(e);
|
||||
}
|
||||
};
|
||||
final String fullDatasourceName = INDEX_DATASOURCE + config.getExtraDatasourceNameSuffix();
|
||||
final String fullDatasourceName = indexDatasource + config.getExtraDatasourceNameSuffix();
|
||||
final String taskSpec = s3PropsTransform.apply(
|
||||
StringUtils.replace(
|
||||
getResourceAsString(INDEX_TASK_WITHOUT_OVERRIDE),
|
||||
|
@ -194,13 +195,14 @@ public class ITS3OverrideCredentialsIndexTest extends AbstractITBatchIndexTest
|
|||
}
|
||||
finally {
|
||||
// If the test pass, then there is no datasource to unload
|
||||
closeQuietly(unloader(INDEX_DATASOURCE + config.getExtraDatasourceNameSuffix()));
|
||||
closeQuietly(unloader(indexDatasource + config.getExtraDatasourceNameSuffix()));
|
||||
}
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testS3WithInvalidOverrideCredentialsIndexDataShouldFailed() throws Exception
|
||||
{
|
||||
final String indexDatasource = "wikipedia_index_test_" + UUID.randomUUID();
|
||||
try {
|
||||
final Function<String, String> s3PropsTransform = spec -> {
|
||||
try {
|
||||
|
@ -252,7 +254,7 @@ public class ITS3OverrideCredentialsIndexTest extends AbstractITBatchIndexTest
|
|||
}
|
||||
};
|
||||
|
||||
final String fullDatasourceName = INDEX_DATASOURCE + config.getExtraDatasourceNameSuffix();
|
||||
final String fullDatasourceName = indexDatasource + config.getExtraDatasourceNameSuffix();
|
||||
final String taskSpec = s3PropsTransform.apply(
|
||||
StringUtils.replace(
|
||||
getResourceAsString(INDEX_TASK_WITH_OVERRIDE),
|
||||
|
@ -272,7 +274,7 @@ public class ITS3OverrideCredentialsIndexTest extends AbstractITBatchIndexTest
|
|||
}
|
||||
finally {
|
||||
// If the test pass, then there is no datasource to unload
|
||||
closeQuietly(unloader(INDEX_DATASOURCE + config.getExtraDatasourceNameSuffix()));
|
||||
closeQuietly(unloader(indexDatasource + config.getExtraDatasourceNameSuffix()));
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -33,7 +33,7 @@ import java.util.List;
|
|||
* 1) Set the bucket and path for your data. This can be done by setting -Ddruid.test.config.cloudBucket and
|
||||
* -Ddruid.test.config.cloudPath or setting "cloud_bucket" and "cloud_path" in the config file.
|
||||
* 2) Copy wikipedia_index_data1.json, wikipedia_index_data2.json, and wikipedia_index_data3.json
|
||||
* located in integration-tests/src/test/resources/data/batch_index to your S3 at the location set in step 1.
|
||||
* located in integration-tests/src/test/resources/data/batch_index/json to your S3 at the location set in step 1.
|
||||
* 3) Provide -Doverride.config.path=<PATH_TO_FILE> with s3 credentials and hdfs deep storage configs set. See
|
||||
* integration-tests/docker/environment-configs/override-examples/s3 and
|
||||
* integration-tests/docker/environment-configs/override-examples/hdfs for env vars to provide.
|
||||
|
|
|
@ -33,7 +33,7 @@ import java.util.List;
|
|||
* 1) Set the bucket and path for your data. This can be done by setting -Ddruid.test.config.cloudBucket and
|
||||
* -Ddruid.test.config.cloudPath or setting "cloud_bucket" and "cloud_path" in the config file.
|
||||
* 2) Copy wikipedia_index_data1.json, wikipedia_index_data2.json, and wikipedia_index_data3.json
|
||||
* located in integration-tests/src/test/resources/data/batch_index to your S3 at the location set in step 1.
|
||||
* located in integration-tests/src/test/resources/data/batch_index/json to your S3 at the location set in step 1.
|
||||
* 3) Provide -Doverride.config.path=<PATH_TO_FILE> with s3 credentials/configs set. See
|
||||
* integration-tests/docker/environment-configs/override-examples/s3 for env vars to provide.
|
||||
*/
|
||||
|
|
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
|
@ -71,7 +71,7 @@
|
|||
"%%INPUT_SOURCE_PROPERTY_KEY%%": %%INPUT_SOURCE_PROPERTY_VALUE%%
|
||||
},
|
||||
"inputFormat": {
|
||||
"type": "json"
|
||||
"type": "%%INPUT_FORMAT_TYPE%%"
|
||||
}
|
||||
},
|
||||
"tuningConfig": {
|
||||
|
|
|
@ -73,7 +73,7 @@
|
|||
"type": "index",
|
||||
"firehose": {
|
||||
"type": "local",
|
||||
"baseDir": "/resources/data/batch_index",
|
||||
"baseDir": "/resources/data/batch_index/json",
|
||||
"filter": "wikipedia_index_data*"
|
||||
}
|
||||
},
|
||||
|
|
|
@ -86,7 +86,7 @@
|
|||
"type" : "index",
|
||||
"inputSource" : {
|
||||
"type" : "local",
|
||||
"baseDir" : "/resources/data/batch_index",
|
||||
"baseDir" : "/resources/data/batch_index/json",
|
||||
"filter" : "wikipedia_index_data*"
|
||||
},
|
||||
"inputFormat" : {
|
||||
|
|
|
@ -91,7 +91,7 @@
|
|||
"type": "index",
|
||||
"firehose": {
|
||||
"type": "local",
|
||||
"baseDir": "/resources/data/batch_index",
|
||||
"baseDir": "/resources/data/batch_index/json",
|
||||
"filter": "wikipedia_index_data*"
|
||||
}
|
||||
},
|
||||
|
|
|
@ -58,7 +58,7 @@
|
|||
"type": "index_parallel",
|
||||
"firehose": {
|
||||
"type": "local",
|
||||
"baseDir": "/resources/data/batch_index",
|
||||
"baseDir": "/resources/data/batch_index/json",
|
||||
"filter": "wikipedia_index_data*"
|
||||
}
|
||||
},
|
||||
|
|
|
@ -57,7 +57,7 @@
|
|||
"type": "index_parallel",
|
||||
"firehose": {
|
||||
"type": "local",
|
||||
"baseDir": "/resources/data/batch_index",
|
||||
"baseDir": "/resources/data/batch_index/json",
|
||||
"filter": "wikipedia_index_data2*"
|
||||
}
|
||||
},
|
||||
|
|
|
@ -74,7 +74,7 @@
|
|||
"type": "index",
|
||||
"firehose": {
|
||||
"type": "local",
|
||||
"baseDir": "/resources/data/batch_index",
|
||||
"baseDir": "/resources/data/batch_index/json",
|
||||
"filter": "wikipedia_index_data*"
|
||||
}
|
||||
},
|
||||
|
|
Loading…
Reference in New Issue