mirror of https://github.com/apache/druid.git
More Hadoop integration tests (#9714)
* More Hadoop integration tests * Add missing s3 instructions * Address PR comments * Address PR comments * PR comments * Fix typo
This commit is contained in:
parent
c61365c1e0
commit
61295bd002
|
@ -373,7 +373,7 @@ jobs:
|
||||||
name: "(Compile=openjdk8, Run=openjdk8) other integration test"
|
name: "(Compile=openjdk8, Run=openjdk8) other integration test"
|
||||||
jdk: openjdk8
|
jdk: openjdk8
|
||||||
services: *integration_test_services
|
services: *integration_test_services
|
||||||
env: TESTNG_GROUPS='-DexcludedGroups=batch-index,perfect-rollup-parallel-batch-index,kafka-index,query,realtime-index,security,s3-deep-storage,gcs-deep-storage,azure-deep-storage,hdfs-deep-storage,s3-ingestion,kinesis-index,kinesis-data-format,kafka-transactional-index,kafka-index-slow,kafka-transactional-index-slow,kafka-data-format' JVM_RUNTIME='-Djvm.runtime=8'
|
env: TESTNG_GROUPS='-DexcludedGroups=batch-index,perfect-rollup-parallel-batch-index,kafka-index,query,realtime-index,security,s3-deep-storage,gcs-deep-storage,azure-deep-storage,hdfs-deep-storage,s3-ingestion,kinesis-index,kinesis-data-format,kafka-transactional-index,kafka-index-slow,kafka-transactional-index-slow,kafka-data-format,hadoop-s3-to-s3-deep-storage,hadoop-s3-to-hdfs-deep-storage,hadoop-azure-to-azure-deep-storage,hadoop-azure-to-hdfs-deep-storage,hadoop-gcs-to-gcs-deep-storage,hadoop-gcs-to-hdfs-deep-storage' JVM_RUNTIME='-Djvm.runtime=8'
|
||||||
script: *run_integration_test
|
script: *run_integration_test
|
||||||
after_failure: *integration_test_diags
|
after_failure: *integration_test_diags
|
||||||
# END - Integration tests for Compile with Java 8 and Run with Java 8
|
# END - Integration tests for Compile with Java 8 and Run with Java 8
|
||||||
|
@ -407,7 +407,7 @@ jobs:
|
||||||
- <<: *integration_tests
|
- <<: *integration_tests
|
||||||
name: "(Compile=openjdk8, Run=openjdk11) other integration test"
|
name: "(Compile=openjdk8, Run=openjdk11) other integration test"
|
||||||
jdk: openjdk8
|
jdk: openjdk8
|
||||||
env: TESTNG_GROUPS='-DexcludedGroups=batch-index,perfect-rollup-parallel-batch-index,kafka-index,query,realtime-index,security,s3-deep-storage,gcs-deep-storage,azure-deep-storage,hdfs-deep-storage,s3-ingestion,kinesis-index,kinesis-data-format,kafka-transactional-index,kafka-index-slow,kafka-transactional-index-slow,kafka-data-format' JVM_RUNTIME='-Djvm.runtime=11'
|
env: TESTNG_GROUPS='-DexcludedGroups=batch-index,perfect-rollup-parallel-batch-index,kafka-index,query,realtime-index,security,s3-deep-storage,gcs-deep-storage,azure-deep-storage,hdfs-deep-storage,s3-ingestion,kinesis-index,kinesis-data-format,kafka-transactional-index,kafka-index-slow,kafka-transactional-index-slow,kafka-data-format,hadoop-s3-to-s3-deep-storage,hadoop-s3-to-hdfs-deep-storage,hadoop-azure-to-azure-deep-storage,hadoop-azure-to-hdfs-deep-storage,hadoop-gcs-to-gcs-deep-storage,hadoop-gcs-to-hdfs-deep-storage' JVM_RUNTIME='-Djvm.runtime=11'
|
||||||
# END - Integration tests for Compile with Java 8 and Run with Java 11
|
# END - Integration tests for Compile with Java 8 and Run with Java 11
|
||||||
|
|
||||||
- name: "security vulnerabilities"
|
- name: "security vulnerabilities"
|
||||||
|
|
|
@ -214,8 +214,8 @@ of the integration test run discussed above. This is because druid
|
||||||
test clusters might not, in general, have access to hadoop.
|
test clusters might not, in general, have access to hadoop.
|
||||||
This also applies to integration test that uses Hadoop HDFS as an inputSource or as a deep storage.
|
This also applies to integration test that uses Hadoop HDFS as an inputSource or as a deep storage.
|
||||||
To run integration test that uses Hadoop, you will have to run a Hadoop cluster. This can be done in two ways:
|
To run integration test that uses Hadoop, you will have to run a Hadoop cluster. This can be done in two ways:
|
||||||
1) Run your own Druid + Hadoop cluster and specified Hadoop configs in the configuration file (CONFIG_FILE).
|
1) Run Druid Docker test clusters with Hadoop container by passing -Dstart.hadoop.docker=true to the mvn command.
|
||||||
2) Run Druid Docker test clusters with Hadoop container by passing -Dstart.hadoop.docker=true to the mvn command.
|
2) Run your own Druid + Hadoop cluster and specified Hadoop configs in the configuration file (CONFIG_FILE).
|
||||||
|
|
||||||
Currently, hdfs-deep-storage and other <cloud>-deep-storage integration test groups can only be run with
|
Currently, hdfs-deep-storage and other <cloud>-deep-storage integration test groups can only be run with
|
||||||
Druid Docker test clusters by passing -Dstart.hadoop.docker=true to start Hadoop container.
|
Druid Docker test clusters by passing -Dstart.hadoop.docker=true to start Hadoop container.
|
||||||
|
@ -224,21 +224,23 @@ See integration-tests/docker/environment-configs/override-examples/hdfs director
|
||||||
Note that if the integration test you are running also uses other cloud extension (S3, Azure, GCS), additional
|
Note that if the integration test you are running also uses other cloud extension (S3, Azure, GCS), additional
|
||||||
credentials/configs may need to be set in the same file as your Druid's Hadoop configs set.
|
credentials/configs may need to be set in the same file as your Druid's Hadoop configs set.
|
||||||
|
|
||||||
Currently, ITHadoopIndexTest can only be run with your own Druid + Hadoop cluster by following the below steps:
|
If you are running ITHadoopIndexTest with your own Druid + Hadoop cluster, please follow the below steps:
|
||||||
Create a directory called batchHadoop1 in the hadoop file system
|
- Copy wikipedia_index_data1.json, wikipedia_index_data2.json, and wikipedia_index_data3.json
|
||||||
(anywhere you want) and put batch_hadoop.data (integration-tests/src/test/resources/hadoop/batch_hadoop.data)
|
located in integration-tests/src/test/resources/data/batch_index/json to your HDFS at /batch_index/json/
|
||||||
into that directory (as its only file).
|
- Copy batch_hadoop.data located in integration-tests/src/test/resources/data/batch_index/tsv to your HDFS
|
||||||
|
at /batch_index/tsv/
|
||||||
|
If using the Docker-based Hadoop container, the steps above are automatically done by the integration tests.
|
||||||
|
|
||||||
Add this keyword to the configuration file (see above):
|
When running the Hadoop tests, you must set `-Dextra.datasource.name.suffix=''`, due to https://github.com/apache/druid/issues/9788.
|
||||||
|
|
||||||
|
Run the test using mvn (using the bundled Docker-based Hadoop cluster):
|
||||||
```
|
```
|
||||||
"hadoopTestDir": "<name_of_dir_containing_batchHadoop1>"
|
mvn verify -P integration-tests -Dit.test=ITHadoopIndexTest -Dstart.hadoop.docker=true -Doverride.config.path=docker/environment-configs/override-examples/hdfs -Dextra.datasource.name.suffix=''
|
||||||
```
|
```
|
||||||
|
|
||||||
Run the test using mvn:
|
Run the test using mvn (using config file for existing Hadoop cluster):
|
||||||
|
|
||||||
```
|
```
|
||||||
mvn verify -P int-tests-config-file -Dit.test=ITHadoopIndexTest
|
mvn verify -P int-tests-config-file -Dit.test=ITHadoopIndexTest -Dextra.datasource.name.suffix=''
|
||||||
```
|
```
|
||||||
|
|
||||||
In some test environments, the machine where the tests need to be executed
|
In some test environments, the machine where the tests need to be executed
|
||||||
|
|
|
@ -0,0 +1,31 @@
|
||||||
|
#
|
||||||
|
# Licensed to the Apache Software Foundation (ASF) under one
|
||||||
|
# or more contributor license agreements. See the NOTICE file
|
||||||
|
# distributed with this work for additional information
|
||||||
|
# regarding copyright ownership. The ASF licenses this file
|
||||||
|
# to you under the Apache License, Version 2.0 (the
|
||||||
|
# "License"); you may not use this file except in compliance
|
||||||
|
# with the License. You may obtain a copy of the License at
|
||||||
|
#
|
||||||
|
# http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
#
|
||||||
|
# Unless required by applicable law or agreed to in writing,
|
||||||
|
# software distributed under the License is distributed on an
|
||||||
|
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||||
|
# KIND, either express or implied. See the License for the
|
||||||
|
# specific language governing permissions and limitations
|
||||||
|
# under the License.
|
||||||
|
#
|
||||||
|
|
||||||
|
#
|
||||||
|
# Example of override config file to provide.
|
||||||
|
# Please replace <OVERRIDE_THIS> with your cloud configs/credentials
|
||||||
|
#
|
||||||
|
druid_storage_type=azure
|
||||||
|
druid_azure_account=<OVERRIDE_THIS>
|
||||||
|
druid_azure_key=<OVERRIDE_THIS>
|
||||||
|
druid_azure_container=<OVERRIDE_THIS>
|
||||||
|
|
||||||
|
druid_extensions_loadList=["druid-azure-extensions","druid-hdfs-storage"]
|
||||||
|
|
||||||
|
druid_indexer_task_defaultHadoopCoordinates=["org.apache.hadoop:hadoop-client:2.8.5", "org.apache.hadoop:hadoop-azure:2.8.5"]
|
|
@ -0,0 +1,34 @@
|
||||||
|
#
|
||||||
|
# Licensed to the Apache Software Foundation (ASF) under one
|
||||||
|
# or more contributor license agreements. See the NOTICE file
|
||||||
|
# distributed with this work for additional information
|
||||||
|
# regarding copyright ownership. The ASF licenses this file
|
||||||
|
# to you under the Apache License, Version 2.0 (the
|
||||||
|
# "License"); you may not use this file except in compliance
|
||||||
|
# with the License. You may obtain a copy of the License at
|
||||||
|
#
|
||||||
|
# http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
#
|
||||||
|
# Unless required by applicable law or agreed to in writing,
|
||||||
|
# software distributed under the License is distributed on an
|
||||||
|
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||||
|
# KIND, either express or implied. See the License for the
|
||||||
|
# specific language governing permissions and limitations
|
||||||
|
# under the License.
|
||||||
|
#
|
||||||
|
|
||||||
|
#
|
||||||
|
# Example of override config file to provide.
|
||||||
|
# Please replace <OVERRIDE_THIS> with your cloud configs/credentials
|
||||||
|
#
|
||||||
|
druid_storage_type=hdfs
|
||||||
|
druid_storage_storageDirectory=/druid/segments
|
||||||
|
|
||||||
|
druid_extensions_loadList=["druid-azure-extensions","druid-hdfs-storage"]
|
||||||
|
|
||||||
|
# Not used since we have HDFS deep storage, but the Druid Azure extension requires these to be defined
|
||||||
|
druid_azure_account=<OVERRIDE_THIS>
|
||||||
|
druid_azure_key=<OVERRIDE_THIS>
|
||||||
|
druid_azure_container=<OVERRIDE_THIS>
|
||||||
|
|
||||||
|
druid_indexer_task_defaultHadoopCoordinates=["org.apache.hadoop:hadoop-client:2.8.5", "org.apache.hadoop:hadoop-azure:2.8.5"]
|
|
@ -0,0 +1,30 @@
|
||||||
|
#
|
||||||
|
# Licensed to the Apache Software Foundation (ASF) under one
|
||||||
|
# or more contributor license agreements. See the NOTICE file
|
||||||
|
# distributed with this work for additional information
|
||||||
|
# regarding copyright ownership. The ASF licenses this file
|
||||||
|
# to you under the Apache License, Version 2.0 (the
|
||||||
|
# "License"); you may not use this file except in compliance
|
||||||
|
# with the License. You may obtain a copy of the License at
|
||||||
|
#
|
||||||
|
# http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
#
|
||||||
|
# Unless required by applicable law or agreed to in writing,
|
||||||
|
# software distributed under the License is distributed on an
|
||||||
|
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||||
|
# KIND, either express or implied. See the License for the
|
||||||
|
# specific language governing permissions and limitations
|
||||||
|
# under the License.
|
||||||
|
#
|
||||||
|
|
||||||
|
#
|
||||||
|
# Example of override config file to provide.
|
||||||
|
# Please replace <OVERRIDE_THIS> and <YOUR_GOOGLE_CREDENTIALS_FILE_NAME> with your cloud configs/credentials
|
||||||
|
#
|
||||||
|
druid_storage_type=google
|
||||||
|
druid_google_bucket=<OVERRIDE_THIS>
|
||||||
|
druid_google_prefix=<OVERRIDE_THIS>
|
||||||
|
|
||||||
|
druid_extensions_loadList=["druid-google-extensions","druid-hdfs-storage"]
|
||||||
|
|
||||||
|
GOOGLE_APPLICATION_CREDENTIALS=/shared/docker/credentials/<YOUR_GOOGLE_CREDENTIALS_FILE_NAME>
|
|
@ -0,0 +1,30 @@
|
||||||
|
#
|
||||||
|
# Licensed to the Apache Software Foundation (ASF) under one
|
||||||
|
# or more contributor license agreements. See the NOTICE file
|
||||||
|
# distributed with this work for additional information
|
||||||
|
# regarding copyright ownership. The ASF licenses this file
|
||||||
|
# to you under the Apache License, Version 2.0 (the
|
||||||
|
# "License"); you may not use this file except in compliance
|
||||||
|
# with the License. You may obtain a copy of the License at
|
||||||
|
#
|
||||||
|
# http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
#
|
||||||
|
# Unless required by applicable law or agreed to in writing,
|
||||||
|
# software distributed under the License is distributed on an
|
||||||
|
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||||
|
# KIND, either express or implied. See the License for the
|
||||||
|
# specific language governing permissions and limitations
|
||||||
|
# under the License.
|
||||||
|
#
|
||||||
|
|
||||||
|
#
|
||||||
|
# Example of override config file to provide.
|
||||||
|
# Please replace <OVERRIDE_THIS> and <YOUR_GOOGLE_CREDENTIALS_FILE_NAME> with your cloud configs/credentials
|
||||||
|
#
|
||||||
|
druid_storage_type=hdfs
|
||||||
|
druid_storage_storageDirectory=/druid/segments
|
||||||
|
|
||||||
|
druid_extensions_loadList=["druid-google-extensions","druid-hdfs-storage"]
|
||||||
|
|
||||||
|
GOOGLE_APPLICATION_CREDENTIALS=/shared/docker/credentials/<YOUR_GOOGLE_CREDENTIALS_FILE_NAME>
|
||||||
|
|
|
@ -0,0 +1,34 @@
|
||||||
|
#
|
||||||
|
# Licensed to the Apache Software Foundation (ASF) under one
|
||||||
|
# or more contributor license agreements. See the NOTICE file
|
||||||
|
# distributed with this work for additional information
|
||||||
|
# regarding copyright ownership. The ASF licenses this file
|
||||||
|
# to you under the Apache License, Version 2.0 (the
|
||||||
|
# "License"); you may not use this file except in compliance
|
||||||
|
# with the License. You may obtain a copy of the License at
|
||||||
|
#
|
||||||
|
# http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
#
|
||||||
|
# Unless required by applicable law or agreed to in writing,
|
||||||
|
# software distributed under the License is distributed on an
|
||||||
|
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||||
|
# KIND, either express or implied. See the License for the
|
||||||
|
# specific language governing permissions and limitations
|
||||||
|
# under the License.
|
||||||
|
#
|
||||||
|
|
||||||
|
#
|
||||||
|
# Example of override config file to provide.
|
||||||
|
# Please replace <OVERRIDE_THIS> with your cloud configs/credentials
|
||||||
|
#
|
||||||
|
druid_s3_accessKey=<OVERRIDE_THIS>
|
||||||
|
druid_s3_secretKey=<OVERRIDE_THIS>
|
||||||
|
|
||||||
|
druid_storage_type=hdfs
|
||||||
|
druid_storage_storageDirectory=/druid/segments
|
||||||
|
|
||||||
|
AWS_REGION=<OVERRIDE_THIS>
|
||||||
|
|
||||||
|
druid_extensions_loadList=["druid-s3-extensions","druid-hdfs-storage"]
|
||||||
|
|
||||||
|
druid.indexer.task.defaultHadoopCoordinates=["org.apache.hadoop:hadoop-client:2.8.5", "org.apache.hadoop:hadoop-aws:2.8.5"]
|
|
@ -0,0 +1,35 @@
|
||||||
|
#
|
||||||
|
# Licensed to the Apache Software Foundation (ASF) under one
|
||||||
|
# or more contributor license agreements. See the NOTICE file
|
||||||
|
# distributed with this work for additional information
|
||||||
|
# regarding copyright ownership. The ASF licenses this file
|
||||||
|
# to you under the Apache License, Version 2.0 (the
|
||||||
|
# "License"); you may not use this file except in compliance
|
||||||
|
# with the License. You may obtain a copy of the License at
|
||||||
|
#
|
||||||
|
# http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
#
|
||||||
|
# Unless required by applicable law or agreed to in writing,
|
||||||
|
# software distributed under the License is distributed on an
|
||||||
|
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||||
|
# KIND, either express or implied. See the License for the
|
||||||
|
# specific language governing permissions and limitations
|
||||||
|
# under the License.
|
||||||
|
#
|
||||||
|
|
||||||
|
#
|
||||||
|
# Example of override config file to provide.
|
||||||
|
# Please replace <OVERRIDE_THIS> with your cloud configs/credentials
|
||||||
|
#
|
||||||
|
druid_s3_accessKey=<OVERRIDE_THIS>
|
||||||
|
druid_s3_secretKey=<OVERRIDE_THIS>
|
||||||
|
|
||||||
|
druid_storage_type=s3
|
||||||
|
druid_storage_bucket=<OVERRIDE_THIS>
|
||||||
|
druid_storage_baseKey=<OVERRIDE_THIS>
|
||||||
|
|
||||||
|
AWS_REGION=<OVERRIDE_THIS>
|
||||||
|
|
||||||
|
druid_extensions_loadList=["druid-s3-extensions","druid-hdfs-storage"]
|
||||||
|
|
||||||
|
druid.indexer.task.defaultHadoopCoordinates=["org.apache.hadoop:hadoop-client:2.8.5", "org.apache.hadoop:hadoop-aws:2.8.5"]
|
|
@ -27,3 +27,5 @@ SERVICE_DRUID_JAVA_OPTS=-server -Xmx128m -XX:+UseG1GC -agentlib:jdwp=transport=d
|
||||||
druid_auth_basic_common_cacheDirectory=/tmp/authCache/router
|
druid_auth_basic_common_cacheDirectory=/tmp/authCache/router
|
||||||
druid_sql_avatica_enable=true
|
druid_sql_avatica_enable=true
|
||||||
druid_server_https_crlPath=/tls/revocations.crl
|
druid_server_https_crlPath=/tls/revocations.crl
|
||||||
|
druid_router_managementProxy_enabled=true
|
||||||
|
|
||||||
|
|
|
@ -32,5 +32,3 @@ cd client_tls
|
||||||
../docker/tls/generate-to-be-revoked-client-cert.sh
|
../docker/tls/generate-to-be-revoked-client-cert.sh
|
||||||
../docker/tls/generate-untrusted-root-client-cert.sh
|
../docker/tls/generate-untrusted-root-client-cert.sh
|
||||||
../docker/tls/generate-valid-intermediate-client-cert.sh
|
../docker/tls/generate-valid-intermediate-client-cert.sh
|
||||||
|
|
||||||
|
|
||||||
|
|
|
@ -118,7 +118,7 @@ rm -rf certs.seq
|
||||||
echo 11111115 > certs.seq
|
echo 11111115 > certs.seq
|
||||||
|
|
||||||
# Generate a client certificate for this machine
|
# Generate a client certificate for this machine
|
||||||
openssl genrsa -out expired_client.key 1024 -sha256
|
openssl genrsa -out expired_client.key 1024
|
||||||
openssl req -new -out expired_client.csr -key expired_client.key -reqexts req_ext -config expired_csr.conf
|
openssl req -new -out expired_client.csr -key expired_client.key -reqexts req_ext -config expired_csr.conf
|
||||||
openssl ca -batch -config root_for_expired_client.cnf -policy policy_loose -out expired_client.pem -outdir . -startdate 101010000000Z -enddate 101011000000Z -extensions v3_ca -cert root.pem -keyfile root.key -infiles expired_client.csr
|
openssl ca -batch -config root_for_expired_client.cnf -policy policy_loose -out expired_client.pem -outdir . -startdate 101010000000Z -enddate 101011000000Z -extensions v3_ca -cert root.pem -keyfile root.key -infiles expired_client.csr
|
||||||
|
|
||||||
|
|
|
@ -50,7 +50,7 @@ DNS.2 = localhost
|
||||||
EOT
|
EOT
|
||||||
|
|
||||||
# Generate a client certificate for this machine
|
# Generate a client certificate for this machine
|
||||||
openssl genrsa -out client.key 1024 -sha256
|
openssl genrsa -out client.key 1024
|
||||||
openssl req -new -out client.csr -key client.key -reqexts req_ext -config csr.conf
|
openssl req -new -out client.csr -key client.key -reqexts req_ext -config csr.conf
|
||||||
openssl x509 -req -days 3650 -in client.csr -CA root.pem -CAkey root.key -set_serial 0x11111111 -out client.pem -sha256 -extfile csr.conf -extensions req_ext
|
openssl x509 -req -days 3650 -in client.csr -CA root.pem -CAkey root.key -set_serial 0x11111111 -out client.pem -sha256 -extfile csr.conf -extensions req_ext
|
||||||
|
|
||||||
|
|
|
@ -46,7 +46,7 @@ DNS.1 = thisisprobablywrongtoo
|
||||||
|
|
||||||
EOT
|
EOT
|
||||||
|
|
||||||
openssl genrsa -out invalid_hostname_client.key 1024 -sha256
|
openssl genrsa -out invalid_hostname_client.key 1024
|
||||||
openssl req -new -out invalid_hostname_client.csr -key invalid_hostname_client.key -reqexts req_ext -config invalid_hostname_csr.conf
|
openssl req -new -out invalid_hostname_client.csr -key invalid_hostname_client.key -reqexts req_ext -config invalid_hostname_csr.conf
|
||||||
openssl x509 -req -days 3650 -in invalid_hostname_client.csr -CA root.pem -CAkey root.key -set_serial 0x11111112 -out invalid_hostname_client.pem -sha256 -extfile invalid_hostname_csr.conf -extensions req_ext
|
openssl x509 -req -days 3650 -in invalid_hostname_client.csr -CA root.pem -CAkey root.key -set_serial 0x11111112 -out invalid_hostname_client.pem -sha256 -extfile invalid_hostname_csr.conf -extensions req_ext
|
||||||
|
|
||||||
|
|
|
@ -45,7 +45,7 @@ IP.1 = 9.9.9.9
|
||||||
EOT
|
EOT
|
||||||
|
|
||||||
# Generate a bad intermediate certificate
|
# Generate a bad intermediate certificate
|
||||||
openssl genrsa -out invalid_ca_intermediate.key 1024 -sha256
|
openssl genrsa -out invalid_ca_intermediate.key 1024
|
||||||
openssl req -new -out invalid_ca_intermediate.csr -key invalid_ca_intermediate.key -reqexts req_ext -config invalid_ca_intermediate.conf
|
openssl req -new -out invalid_ca_intermediate.csr -key invalid_ca_intermediate.key -reqexts req_ext -config invalid_ca_intermediate.conf
|
||||||
openssl x509 -req -days 3650 -in invalid_ca_intermediate.csr -CA root.pem -CAkey root.key -set_serial 0x33333331 -out invalid_ca_intermediate.pem -sha256 -extfile invalid_ca_intermediate.conf -extensions req_ext
|
openssl x509 -req -days 3650 -in invalid_ca_intermediate.csr -CA root.pem -CAkey root.key -set_serial 0x33333331 -out invalid_ca_intermediate.pem -sha256 -extfile invalid_ca_intermediate.conf -extensions req_ext
|
||||||
|
|
||||||
|
@ -81,7 +81,7 @@ DNS.2 = localhost
|
||||||
EOT
|
EOT
|
||||||
|
|
||||||
# Generate a client certificate for this machine
|
# Generate a client certificate for this machine
|
||||||
openssl genrsa -out invalid_ca_client.key 1024 -sha256
|
openssl genrsa -out invalid_ca_client.key 1024
|
||||||
openssl req -new -out invalid_ca_client.csr -key invalid_ca_client.key -reqexts req_ext -config invalid_ca_client.conf
|
openssl req -new -out invalid_ca_client.csr -key invalid_ca_client.key -reqexts req_ext -config invalid_ca_client.conf
|
||||||
openssl x509 -req -days 3650 -in invalid_ca_client.csr -CA invalid_ca_intermediate.pem -CAkey invalid_ca_intermediate.key -set_serial 0x33333333 -out invalid_ca_client.pem -sha256 -extfile invalid_ca_client.conf -extensions req_ext
|
openssl x509 -req -days 3650 -in invalid_ca_client.csr -CA invalid_ca_intermediate.pem -CAkey invalid_ca_intermediate.key -set_serial 0x33333333 -out invalid_ca_client.pem -sha256 -extfile invalid_ca_client.conf -extensions req_ext
|
||||||
|
|
||||||
|
|
|
@ -52,9 +52,9 @@ DNS.2 = localhost
|
||||||
EOT
|
EOT
|
||||||
|
|
||||||
# Generate a client certificate for this machine
|
# Generate a client certificate for this machine
|
||||||
openssl genrsa -out revoked_client.key 1024 -sha256
|
openssl genrsa -out revoked_client.key 1024
|
||||||
openssl req -new -out revoked_client.csr -key revoked_client.key -reqexts req_ext -config revoked_csr.conf
|
openssl req -new -out revoked_client.csr -key revoked_client.key -reqexts req_ext -config revoked_csr.conf
|
||||||
openssl x509 -req -days 3650 -in revoked_client.csr -CA root.pem -CAkey root.key -set_serial 0x11111113 -out revoked_client.pem -sha256 -extfile csr.conf -extensions req_ext
|
openssl x509 -req -days 3650 -in revoked_client.csr -CA root.pem -CAkey root.key -set_serial 0x11111113 -out revoked_client.pem -sha256 -extfile revoked_csr.conf -extensions req_ext
|
||||||
|
|
||||||
# Create a Java keystore containing the generated certificate
|
# Create a Java keystore containing the generated certificate
|
||||||
openssl pkcs12 -export -in revoked_client.pem -inkey revoked_client.key -out revoked_client.p12 -name revoked_druid -CAfile root.pem -caname druid-it-root -password pass:druid123
|
openssl pkcs12 -export -in revoked_client.pem -inkey revoked_client.key -out revoked_client.p12 -name revoked_druid -CAfile root.pem -caname druid-it-root -password pass:druid123
|
||||||
|
|
|
@ -50,7 +50,7 @@ DNS.2 = localhost
|
||||||
EOT
|
EOT
|
||||||
|
|
||||||
# Generate a client certificate for this machine
|
# Generate a client certificate for this machine
|
||||||
openssl genrsa -out client_another_root.key 1024 -sha256
|
openssl genrsa -out client_another_root.key 1024
|
||||||
openssl req -new -out client_another_root.csr -key client_another_root.key -reqexts req_ext -config csr_another_root.conf
|
openssl req -new -out client_another_root.csr -key client_another_root.key -reqexts req_ext -config csr_another_root.conf
|
||||||
openssl x509 -req -days 3650 -in client_another_root.csr -CA untrusted_root.pem -CAkey untrusted_root.key -set_serial 0x11111114 -out client_another_root.pem -sha256 -extfile csr_another_root.conf -extensions req_ext
|
openssl x509 -req -days 3650 -in client_another_root.csr -CA untrusted_root.pem -CAkey untrusted_root.key -set_serial 0x11111114 -out client_another_root.pem -sha256 -extfile csr_another_root.conf -extensions req_ext
|
||||||
|
|
||||||
|
|
|
@ -45,7 +45,7 @@ IP.1 = 9.9.9.9
|
||||||
EOT
|
EOT
|
||||||
|
|
||||||
# Generate an intermediate certificate
|
# Generate an intermediate certificate
|
||||||
openssl genrsa -out ca_intermediate.key 1024 -sha256
|
openssl genrsa -out ca_intermediate.key 1024
|
||||||
openssl req -new -out ca_intermediate.csr -key ca_intermediate.key -reqexts req_ext -config ca_intermediate.conf
|
openssl req -new -out ca_intermediate.csr -key ca_intermediate.key -reqexts req_ext -config ca_intermediate.conf
|
||||||
openssl x509 -req -days 3650 -in ca_intermediate.csr -CA root.pem -CAkey root.key -set_serial 0x33333332 -out ca_intermediate.pem -sha256 -extfile ca_intermediate.conf -extensions req_ext
|
openssl x509 -req -days 3650 -in ca_intermediate.csr -CA root.pem -CAkey root.key -set_serial 0x33333332 -out ca_intermediate.pem -sha256 -extfile ca_intermediate.conf -extensions req_ext
|
||||||
|
|
||||||
|
@ -81,7 +81,7 @@ DNS.2 = localhost
|
||||||
EOT
|
EOT
|
||||||
|
|
||||||
# Generate a client certificate for this machine
|
# Generate a client certificate for this machine
|
||||||
openssl genrsa -out intermediate_ca_client.key 1024 -sha256
|
openssl genrsa -out intermediate_ca_client.key 1024
|
||||||
openssl req -new -out intermediate_ca_client.csr -key intermediate_ca_client.key -reqexts req_ext -config intermediate_ca_client.conf
|
openssl req -new -out intermediate_ca_client.csr -key intermediate_ca_client.key -reqexts req_ext -config intermediate_ca_client.conf
|
||||||
openssl x509 -req -days 3650 -in intermediate_ca_client.csr -CA ca_intermediate.pem -CAkey ca_intermediate.key -set_serial 0x33333333 -out intermediate_ca_client.pem -sha256 -extfile intermediate_ca_client.conf -extensions req_ext
|
openssl x509 -req -days 3650 -in intermediate_ca_client.csr -CA ca_intermediate.pem -CAkey ca_intermediate.key -set_serial 0x33333333 -out intermediate_ca_client.pem -sha256 -extfile intermediate_ca_client.conf -extensions req_ext
|
||||||
|
|
||||||
|
|
|
@ -365,6 +365,7 @@
|
||||||
<skip.stop.docker>false</skip.stop.docker>
|
<skip.stop.docker>false</skip.stop.docker>
|
||||||
<override.config.path></override.config.path>
|
<override.config.path></override.config.path>
|
||||||
<resource.file.dir.path></resource.file.dir.path>
|
<resource.file.dir.path></resource.file.dir.path>
|
||||||
|
<extra.datasource.name.suffix>\ Россия\ 한국\ 中国!?</extra.datasource.name.suffix>
|
||||||
</properties>
|
</properties>
|
||||||
<build>
|
<build>
|
||||||
<plugins>
|
<plugins>
|
||||||
|
@ -430,7 +431,7 @@
|
||||||
-Dfile.encoding=UTF-8
|
-Dfile.encoding=UTF-8
|
||||||
-Ddruid.test.config.dockerIp=${env.DOCKER_IP}
|
-Ddruid.test.config.dockerIp=${env.DOCKER_IP}
|
||||||
-Ddruid.test.config.hadoopDir=${env.HADOOP_DIR}
|
-Ddruid.test.config.hadoopDir=${env.HADOOP_DIR}
|
||||||
-Ddruid.test.config.extraDatasourceNameSuffix=\ Россия\ 한국\ 中国!?
|
-Ddruid.test.config.extraDatasourceNameSuffix=${extra.datasource.name.suffix}
|
||||||
-Ddruid.zk.service.host=${env.DOCKER_IP}
|
-Ddruid.zk.service.host=${env.DOCKER_IP}
|
||||||
-Ddruid.client.https.trustStorePath=client_tls/truststore.jks
|
-Ddruid.client.https.trustStorePath=client_tls/truststore.jks
|
||||||
-Ddruid.client.https.trustStorePassword=druid123
|
-Ddruid.client.https.trustStorePassword=druid123
|
||||||
|
|
|
@ -80,17 +80,18 @@ if [ -n "$DRUID_INTEGRATION_TEST_SKIP_START_DOCKER" ] && [ "$DRUID_INTEGRATION_T
|
||||||
# For druid-kinesis-indexing-service
|
# For druid-kinesis-indexing-service
|
||||||
mkdir -p $SHARED_DIR/docker/extensions/druid-kinesis-indexing-service
|
mkdir -p $SHARED_DIR/docker/extensions/druid-kinesis-indexing-service
|
||||||
mv $SHARED_DIR/docker/lib/druid-kinesis-indexing-service-* $SHARED_DIR/docker/extensions/druid-kinesis-indexing-service
|
mv $SHARED_DIR/docker/lib/druid-kinesis-indexing-service-* $SHARED_DIR/docker/extensions/druid-kinesis-indexing-service
|
||||||
$ For druid-parquet-extensions
|
# For druid-parquet-extensions
|
||||||
mkdir -p $SHARED_DIR/docker/extensions/druid-parquet-extensions
|
mkdir -p $SHARED_DIR/docker/extensions/druid-parquet-extensions
|
||||||
mv $SHARED_DIR/docker/lib/druid-parquet-extensions-* $SHARED_DIR/docker/extensions/druid-parquet-extensions
|
mv $SHARED_DIR/docker/lib/druid-parquet-extensions-* $SHARED_DIR/docker/extensions/druid-parquet-extensions
|
||||||
$ For druid-orc-extensions
|
# For druid-orc-extensions
|
||||||
mkdir -p $SHARED_DIR/docker/extensions/druid-orc-extensions
|
mkdir -p $SHARED_DIR/docker/extensions/druid-orc-extensions
|
||||||
mv $SHARED_DIR/docker/lib/druid-orc-extensions-* $SHARED_DIR/docker/extensions/druid-orc-extensions
|
mv $SHARED_DIR/docker/lib/druid-orc-extensions-* $SHARED_DIR/docker/extensions/druid-orc-extensions
|
||||||
|
|
||||||
# Pull Hadoop dependency if needed
|
# Pull Hadoop dependency if needed
|
||||||
if [ -n "$DRUID_INTEGRATION_TEST_START_HADOOP_DOCKER" ] && [ "$DRUID_INTEGRATION_TEST_START_HADOOP_DOCKER" == true ]
|
if [ -n "$DRUID_INTEGRATION_TEST_START_HADOOP_DOCKER" ] && [ "$DRUID_INTEGRATION_TEST_START_HADOOP_DOCKER" == true ]
|
||||||
then
|
then
|
||||||
java -cp "$SHARED_DIR/docker/lib/*" -Ddruid.extensions.hadoopDependenciesDir="$SHARED_DIR/hadoop-dependencies" org.apache.druid.cli.Main tools pull-deps -h org.apache.hadoop:hadoop-client:2.8.5 -h org.apache.hadoop:hadoop-aws:2.8.5
|
java -cp "$SHARED_DIR/docker/lib/*" -Ddruid.extensions.hadoopDependenciesDir="$SHARED_DIR/hadoop-dependencies" org.apache.druid.cli.Main tools pull-deps -h org.apache.hadoop:hadoop-client:2.8.5 -h org.apache.hadoop:hadoop-aws:2.8.5 -h org.apache.hadoop:hadoop-azure:2.8.5
|
||||||
|
curl https://storage.googleapis.com/hadoop-lib/gcs/gcs-connector-hadoop2-latest.jar --output $SHARED_DIR/docker/lib/gcs-connector-hadoop2-latest.jar
|
||||||
fi
|
fi
|
||||||
|
|
||||||
# install logging config
|
# install logging config
|
||||||
|
|
|
@ -57,6 +57,9 @@ public class ConfigFileConfigProvider implements IntegrationTestingConfigProvide
|
||||||
private String password;
|
private String password;
|
||||||
private String cloudBucket;
|
private String cloudBucket;
|
||||||
private String cloudPath;
|
private String cloudPath;
|
||||||
|
private String cloudRegion;
|
||||||
|
private String hadoopGcsCredentialsPath;
|
||||||
|
private String azureKey;
|
||||||
private String streamEndpoint;
|
private String streamEndpoint;
|
||||||
|
|
||||||
@JsonCreator
|
@JsonCreator
|
||||||
|
@ -193,6 +196,9 @@ public class ConfigFileConfigProvider implements IntegrationTestingConfigProvide
|
||||||
|
|
||||||
cloudBucket = props.get("cloud_bucket");
|
cloudBucket = props.get("cloud_bucket");
|
||||||
cloudPath = props.get("cloud_path");
|
cloudPath = props.get("cloud_path");
|
||||||
|
cloudRegion = props.get("cloud_region");
|
||||||
|
hadoopGcsCredentialsPath = props.get("hadoopGcsCredentialsPath");
|
||||||
|
azureKey = props.get("azureKey");
|
||||||
streamEndpoint = props.get("stream_endpoint");
|
streamEndpoint = props.get("stream_endpoint");
|
||||||
|
|
||||||
LOG.info("router: [%s], [%s]", routerUrl, routerTLSUrl);
|
LOG.info("router: [%s], [%s]", routerUrl, routerTLSUrl);
|
||||||
|
@ -356,6 +362,24 @@ public class ConfigFileConfigProvider implements IntegrationTestingConfigProvide
|
||||||
return cloudPath;
|
return cloudPath;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public String getCloudRegion()
|
||||||
|
{
|
||||||
|
return cloudRegion;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public String getAzureKey()
|
||||||
|
{
|
||||||
|
return azureKey;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public String getHadoopGcsCredentialsPath()
|
||||||
|
{
|
||||||
|
return hadoopGcsCredentialsPath;
|
||||||
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public String getStreamEndpoint()
|
public String getStreamEndpoint()
|
||||||
{
|
{
|
||||||
|
|
|
@ -33,10 +33,6 @@ public class DockerConfigProvider implements IntegrationTestingConfigProvider
|
||||||
@NotNull
|
@NotNull
|
||||||
private String dockerIp;
|
private String dockerIp;
|
||||||
|
|
||||||
@JsonProperty
|
|
||||||
@NotNull
|
|
||||||
private String hadoopDir;
|
|
||||||
|
|
||||||
@JsonProperty
|
@JsonProperty
|
||||||
private String extraDatasourceNameSuffix = "";
|
private String extraDatasourceNameSuffix = "";
|
||||||
|
|
||||||
|
@ -46,6 +42,15 @@ public class DockerConfigProvider implements IntegrationTestingConfigProvider
|
||||||
@JsonProperty
|
@JsonProperty
|
||||||
private String cloudBucket;
|
private String cloudBucket;
|
||||||
|
|
||||||
|
@JsonProperty
|
||||||
|
private String cloudRegion;
|
||||||
|
|
||||||
|
@JsonProperty
|
||||||
|
private String hadoopGcsCredentialsPath;
|
||||||
|
|
||||||
|
@JsonProperty
|
||||||
|
private String azureKey;
|
||||||
|
|
||||||
@JsonProperty
|
@JsonProperty
|
||||||
private String streamEndpoint;
|
private String streamEndpoint;
|
||||||
|
|
||||||
|
@ -185,9 +190,6 @@ public class DockerConfigProvider implements IntegrationTestingConfigProvider
|
||||||
@Override
|
@Override
|
||||||
public String getProperty(String prop)
|
public String getProperty(String prop)
|
||||||
{
|
{
|
||||||
if ("hadoopTestDir".equals(prop)) {
|
|
||||||
return hadoopDir;
|
|
||||||
}
|
|
||||||
throw new UnsupportedOperationException("DockerConfigProvider does not support property " + prop);
|
throw new UnsupportedOperationException("DockerConfigProvider does not support property " + prop);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -233,6 +235,24 @@ public class DockerConfigProvider implements IntegrationTestingConfigProvider
|
||||||
return cloudPath;
|
return cloudPath;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public String getCloudRegion()
|
||||||
|
{
|
||||||
|
return cloudRegion;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public String getAzureKey()
|
||||||
|
{
|
||||||
|
return azureKey;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public String getHadoopGcsCredentialsPath()
|
||||||
|
{
|
||||||
|
return hadoopGcsCredentialsPath;
|
||||||
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public String getStreamEndpoint()
|
public String getStreamEndpoint()
|
||||||
{
|
{
|
||||||
|
|
|
@ -89,5 +89,11 @@ public interface IntegrationTestingConfig
|
||||||
|
|
||||||
String getCloudPath();
|
String getCloudPath();
|
||||||
|
|
||||||
|
String getCloudRegion();
|
||||||
|
|
||||||
|
String getAzureKey();
|
||||||
|
|
||||||
|
String getHadoopGcsCredentialsPath();
|
||||||
|
|
||||||
String getStreamEndpoint();
|
String getStreamEndpoint();
|
||||||
}
|
}
|
||||||
|
|
|
@ -27,8 +27,6 @@ public class TestNGGroup
|
||||||
{
|
{
|
||||||
public static final String BATCH_INDEX = "batch-index";
|
public static final String BATCH_INDEX = "batch-index";
|
||||||
|
|
||||||
public static final String HADOOP_INDEX = "hadoop-index";
|
|
||||||
|
|
||||||
public static final String KAFKA_INDEX = "kafka-index";
|
public static final String KAFKA_INDEX = "kafka-index";
|
||||||
|
|
||||||
public static final String KAFKA_INDEX_SLOW = "kafka-index-slow";
|
public static final String KAFKA_INDEX_SLOW = "kafka-index-slow";
|
||||||
|
@ -86,6 +84,15 @@ public class TestNGGroup
|
||||||
*/
|
*/
|
||||||
public static final String HDFS_DEEP_STORAGE = "hdfs-deep-storage";
|
public static final String HDFS_DEEP_STORAGE = "hdfs-deep-storage";
|
||||||
|
|
||||||
|
public static final String HADOOP_S3_TO_S3 = "hadoop-s3-to-s3-deep-storage";
|
||||||
|
public static final String HADOOP_S3_TO_HDFS = "hadoop-s3-to-hdfs-deep-storage";
|
||||||
|
|
||||||
|
public static final String HADOOP_AZURE_TO_AZURE = "hadoop-azure-to-azure-deep-storage";
|
||||||
|
public static final String HADOOP_AZURE_TO_HDFS = "hadoop-azure-to-hdfs-deep-storage";
|
||||||
|
|
||||||
|
public static final String HADOOP_GCS_TO_GCS = "hadoop-gcs-to-gcs-deep-storage";
|
||||||
|
public static final String HADOOP_GCS_TO_HDFS = "hadoop-gcs-to-hdfs-deep-storage";
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* This group is not part of CI. To run this group, s3 configs/credentials for your s3 must be provided in a file.
|
* This group is not part of CI. To run this group, s3 configs/credentials for your s3 must be provided in a file.
|
||||||
* The path of the file must then be pass to mvn with -Doverride.config.path=<PATH_TO_FILE>
|
* The path of the file must then be pass to mvn with -Doverride.config.path=<PATH_TO_FILE>
|
||||||
|
|
|
@ -0,0 +1,83 @@
|
||||||
|
/*
|
||||||
|
* Licensed to the Apache Software Foundation (ASF) under one
|
||||||
|
* or more contributor license agreements. See the NOTICE file
|
||||||
|
* distributed with this work for additional information
|
||||||
|
* regarding copyright ownership. The ASF licenses this file
|
||||||
|
* to you under the Apache License, Version 2.0 (the
|
||||||
|
* "License"); you may not use this file except in compliance
|
||||||
|
* with the License. You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing,
|
||||||
|
* software distributed under the License is distributed on an
|
||||||
|
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||||
|
* KIND, either express or implied. See the License for the
|
||||||
|
* specific language governing permissions and limitations
|
||||||
|
* under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
package org.apache.druid.tests.hadoop;
|
||||||
|
|
||||||
|
import org.apache.druid.java.util.common.StringUtils;
|
||||||
|
import org.apache.druid.tests.indexer.AbstractITBatchIndexTest;
|
||||||
|
|
||||||
|
import java.io.Closeable;
|
||||||
|
import java.util.UUID;
|
||||||
|
import java.util.function.Function;
|
||||||
|
|
||||||
|
public abstract class AbstractAzureInputHadoopIndexTest extends AbstractITBatchIndexTest
|
||||||
|
{
|
||||||
|
private static final String INDEX_TASK = "/hadoop/wikipedia_hadoop_azure_input_index_task.json";
|
||||||
|
private static final String INDEX_QUERIES_RESOURCE = "/indexer/wikipedia_index_queries.json";
|
||||||
|
|
||||||
|
void doTest() throws Exception
|
||||||
|
{
|
||||||
|
final String indexDatasource = "wikipedia_index_test_" + UUID.randomUUID();
|
||||||
|
try (
|
||||||
|
final Closeable ignored1 = unloader(indexDatasource + config.getExtraDatasourceNameSuffix());
|
||||||
|
) {
|
||||||
|
final Function<String, String> azurePropsTransform = spec -> {
|
||||||
|
try {
|
||||||
|
String path = StringUtils.format(
|
||||||
|
"wasbs://%s@%s.blob.core.windows.net/",
|
||||||
|
config.getCloudPath(),
|
||||||
|
config.getCloudBucket()
|
||||||
|
);
|
||||||
|
spec = StringUtils.replace(
|
||||||
|
spec,
|
||||||
|
"%%INPUT_PATHS%%",
|
||||||
|
path
|
||||||
|
);
|
||||||
|
|
||||||
|
spec = StringUtils.replace(
|
||||||
|
spec,
|
||||||
|
"%%AZURE_ACCOUNT%%",
|
||||||
|
config.getCloudBucket()
|
||||||
|
);
|
||||||
|
|
||||||
|
spec = StringUtils.replace(
|
||||||
|
spec,
|
||||||
|
"%%AZURE_KEY%%",
|
||||||
|
config.getAzureKey()
|
||||||
|
);
|
||||||
|
|
||||||
|
return spec;
|
||||||
|
}
|
||||||
|
catch (Exception e) {
|
||||||
|
throw new RuntimeException(e);
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
doIndexTest(
|
||||||
|
indexDatasource,
|
||||||
|
INDEX_TASK,
|
||||||
|
azurePropsTransform,
|
||||||
|
INDEX_QUERIES_RESOURCE,
|
||||||
|
false,
|
||||||
|
true,
|
||||||
|
true
|
||||||
|
);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
|
@ -0,0 +1,79 @@
|
||||||
|
/*
|
||||||
|
* Licensed to the Apache Software Foundation (ASF) under one
|
||||||
|
* or more contributor license agreements. See the NOTICE file
|
||||||
|
* distributed with this work for additional information
|
||||||
|
* regarding copyright ownership. The ASF licenses this file
|
||||||
|
* to you under the Apache License, Version 2.0 (the
|
||||||
|
* "License"); you may not use this file except in compliance
|
||||||
|
* with the License. You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing,
|
||||||
|
* software distributed under the License is distributed on an
|
||||||
|
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||||
|
* KIND, either express or implied. See the License for the
|
||||||
|
* specific language governing permissions and limitations
|
||||||
|
* under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
package org.apache.druid.tests.hadoop;
|
||||||
|
|
||||||
|
import org.apache.druid.java.util.common.StringUtils;
|
||||||
|
import org.apache.druid.tests.indexer.AbstractITBatchIndexTest;
|
||||||
|
|
||||||
|
import java.io.Closeable;
|
||||||
|
import java.util.UUID;
|
||||||
|
import java.util.function.Function;
|
||||||
|
|
||||||
|
public abstract class AbstractGcsInputHadoopIndexTest extends AbstractITBatchIndexTest
|
||||||
|
{
|
||||||
|
private static final String INDEX_TASK = "/hadoop/wikipedia_hadoop_gcs_input_index_task.json";
|
||||||
|
private static final String INDEX_QUERIES_RESOURCE = "/indexer/wikipedia_index_queries.json";
|
||||||
|
|
||||||
|
void doTest() throws Exception
|
||||||
|
{
|
||||||
|
final String indexDatasource = "wikipedia_hadoop_index_test_" + UUID.randomUUID();
|
||||||
|
try (
|
||||||
|
final Closeable ignored1 = unloader(indexDatasource + config.getExtraDatasourceNameSuffix());
|
||||||
|
) {
|
||||||
|
final Function<String, String> gcsPropsTransform = spec -> {
|
||||||
|
try {
|
||||||
|
|
||||||
|
String path = StringUtils.format(
|
||||||
|
"gs://%s/%s",
|
||||||
|
config.getCloudBucket(),
|
||||||
|
config.getCloudPath()
|
||||||
|
);
|
||||||
|
|
||||||
|
spec = StringUtils.replace(
|
||||||
|
spec,
|
||||||
|
"%%INPUT_PATHS%%",
|
||||||
|
path
|
||||||
|
);
|
||||||
|
|
||||||
|
spec = StringUtils.replace(
|
||||||
|
spec,
|
||||||
|
"%%GCS_KEYFILE_PATH%%",
|
||||||
|
config.getHadoopGcsCredentialsPath()
|
||||||
|
);
|
||||||
|
|
||||||
|
return spec;
|
||||||
|
}
|
||||||
|
catch (Exception e) {
|
||||||
|
throw new RuntimeException(e);
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
doIndexTest(
|
||||||
|
indexDatasource,
|
||||||
|
INDEX_TASK,
|
||||||
|
gcsPropsTransform,
|
||||||
|
INDEX_QUERIES_RESOURCE,
|
||||||
|
false,
|
||||||
|
true,
|
||||||
|
true
|
||||||
|
);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
|
@ -0,0 +1,96 @@
|
||||||
|
/*
|
||||||
|
* Licensed to the Apache Software Foundation (ASF) under one
|
||||||
|
* or more contributor license agreements. See the NOTICE file
|
||||||
|
* distributed with this work for additional information
|
||||||
|
* regarding copyright ownership. The ASF licenses this file
|
||||||
|
* to you under the Apache License, Version 2.0 (the
|
||||||
|
* "License"); you may not use this file except in compliance
|
||||||
|
* with the License. You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing,
|
||||||
|
* software distributed under the License is distributed on an
|
||||||
|
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||||
|
* KIND, either express or implied. See the License for the
|
||||||
|
* specific language governing permissions and limitations
|
||||||
|
* under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
package org.apache.druid.tests.hadoop;
|
||||||
|
|
||||||
|
import com.google.inject.Inject;
|
||||||
|
import org.apache.druid.common.aws.AWSCredentialsConfig;
|
||||||
|
import org.apache.druid.java.util.common.StringUtils;
|
||||||
|
import org.apache.druid.tests.indexer.AbstractITBatchIndexTest;
|
||||||
|
|
||||||
|
import java.io.Closeable;
|
||||||
|
import java.util.UUID;
|
||||||
|
import java.util.function.Function;
|
||||||
|
|
||||||
|
public abstract class AbstractS3InputHadoopIndexTest extends AbstractITBatchIndexTest
|
||||||
|
{
|
||||||
|
@Inject
|
||||||
|
protected AWSCredentialsConfig awsCredentialsConfig;
|
||||||
|
|
||||||
|
private static final String INDEX_TASK = "/hadoop/wikipedia_hadoop_s3_input_index_task.json";
|
||||||
|
private static final String INDEX_QUERIES_RESOURCE = "/indexer/wikipedia_index_queries.json";
|
||||||
|
|
||||||
|
void doTest() throws Exception
|
||||||
|
{
|
||||||
|
final String indexDatasource = "wikipedia_hadoop_index_test_" + UUID.randomUUID();
|
||||||
|
try (
|
||||||
|
final Closeable ignored1 = unloader(indexDatasource + config.getExtraDatasourceNameSuffix());
|
||||||
|
) {
|
||||||
|
final Function<String, String> s3PropsTransform = spec -> {
|
||||||
|
try {
|
||||||
|
|
||||||
|
String path = StringUtils.format(
|
||||||
|
"s3a://%s/%s",
|
||||||
|
config.getCloudBucket(),
|
||||||
|
config.getCloudPath()
|
||||||
|
);
|
||||||
|
|
||||||
|
spec = StringUtils.replace(
|
||||||
|
spec,
|
||||||
|
"%%INPUT_PATHS%%",
|
||||||
|
path
|
||||||
|
);
|
||||||
|
|
||||||
|
spec = StringUtils.replace(
|
||||||
|
spec,
|
||||||
|
"%%AWS_ACCESS_KEY%%",
|
||||||
|
awsCredentialsConfig.getAccessKey().getPassword()
|
||||||
|
);
|
||||||
|
|
||||||
|
spec = StringUtils.replace(
|
||||||
|
spec,
|
||||||
|
"%%AWS_SECRET_KEY%%",
|
||||||
|
awsCredentialsConfig.getSecretKey().getPassword()
|
||||||
|
);
|
||||||
|
|
||||||
|
spec = StringUtils.replace(
|
||||||
|
spec,
|
||||||
|
"%%AWS_REGION%%",
|
||||||
|
config.getCloudRegion()
|
||||||
|
);
|
||||||
|
|
||||||
|
return spec;
|
||||||
|
}
|
||||||
|
catch (Exception e) {
|
||||||
|
throw new RuntimeException(e);
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
doIndexTest(
|
||||||
|
indexDatasource,
|
||||||
|
INDEX_TASK,
|
||||||
|
s3PropsTransform,
|
||||||
|
INDEX_QUERIES_RESOURCE,
|
||||||
|
false,
|
||||||
|
true,
|
||||||
|
true
|
||||||
|
);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
|
@ -0,0 +1,50 @@
|
||||||
|
/*
|
||||||
|
* Licensed to the Apache Software Foundation (ASF) under one
|
||||||
|
* or more contributor license agreements. See the NOTICE file
|
||||||
|
* distributed with this work for additional information
|
||||||
|
* regarding copyright ownership. The ASF licenses this file
|
||||||
|
* to you under the Apache License, Version 2.0 (the
|
||||||
|
* "License"); you may not use this file except in compliance
|
||||||
|
* with the License. You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing,
|
||||||
|
* software distributed under the License is distributed on an
|
||||||
|
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||||
|
* KIND, either express or implied. See the License for the
|
||||||
|
* specific language governing permissions and limitations
|
||||||
|
* under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
package org.apache.druid.tests.hadoop;
|
||||||
|
|
||||||
|
import org.apache.druid.testing.guice.DruidTestModuleFactory;
|
||||||
|
import org.apache.druid.tests.TestNGGroup;
|
||||||
|
import org.testng.annotations.Guice;
|
||||||
|
import org.testng.annotations.Test;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* IMPORTANT:
|
||||||
|
* To run this test, you must:
|
||||||
|
* 1) Set the account, container, and key for your data.
|
||||||
|
* This can be done by setting -Ddruid.test.config.cloudBucket, -Ddruid.test.config.cloudPath,
|
||||||
|
* and -Ddruid.test.config.azureKey.
|
||||||
|
* - `cloudBucket` should be set to your Azure account name
|
||||||
|
* - `cloudPath` should be set to the Azure container where the input data resides
|
||||||
|
* - `azureKey` should be set to an Azure access key that can read from the container above
|
||||||
|
* 2) Copy wikipedia_index_data1.json, wikipedia_index_data2.json, and wikipedia_index_data3.json
|
||||||
|
* located in integration-tests/src/test/resources/data/batch_index/json to the Azure container set in step 1.
|
||||||
|
* 3) Provide -Doverride.config.path=<PATH_TO_FILE> with Azure+Hadoop configs set. See
|
||||||
|
* integration-tests/docker/environment-configs/override-examples/hadoop/azure_to_azure for env vars to provide.
|
||||||
|
* 4) Run the test with -Dstart.hadoop.docker=true -Dextra.datasource.name.suffix='' in the mvn command
|
||||||
|
*/
|
||||||
|
@Test(groups = TestNGGroup.HADOOP_AZURE_TO_AZURE)
|
||||||
|
@Guice(moduleFactory = DruidTestModuleFactory.class)
|
||||||
|
public class ITAzureInputToAzureHadoopIndexTest extends AbstractAzureInputHadoopIndexTest
|
||||||
|
{
|
||||||
|
public void testGcsIndexData() throws Exception
|
||||||
|
{
|
||||||
|
doTest();
|
||||||
|
}
|
||||||
|
}
|
|
@ -0,0 +1,50 @@
|
||||||
|
/*
|
||||||
|
* Licensed to the Apache Software Foundation (ASF) under one
|
||||||
|
* or more contributor license agreements. See the NOTICE file
|
||||||
|
* distributed with this work for additional information
|
||||||
|
* regarding copyright ownership. The ASF licenses this file
|
||||||
|
* to you under the Apache License, Version 2.0 (the
|
||||||
|
* "License"); you may not use this file except in compliance
|
||||||
|
* with the License. You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing,
|
||||||
|
* software distributed under the License is distributed on an
|
||||||
|
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||||
|
* KIND, either express or implied. See the License for the
|
||||||
|
* specific language governing permissions and limitations
|
||||||
|
* under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
package org.apache.druid.tests.hadoop;
|
||||||
|
|
||||||
|
import org.apache.druid.testing.guice.DruidTestModuleFactory;
|
||||||
|
import org.apache.druid.tests.TestNGGroup;
|
||||||
|
import org.testng.annotations.Guice;
|
||||||
|
import org.testng.annotations.Test;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* IMPORTANT:
|
||||||
|
* To run this test, you must:
|
||||||
|
* 1) Set the account, container, and key for your data.
|
||||||
|
* This can be done by setting -Ddruid.test.config.cloudBucket, -Ddruid.test.config.cloudPath,
|
||||||
|
* and -Ddruid.test.config.azureKey.
|
||||||
|
* - `cloudBucket` should be set to your Azure account name
|
||||||
|
* - `cloudPath` should be set to the Azure container where the input data resides
|
||||||
|
* - `azureKey` should be set to an Azure access key that can read from the container above
|
||||||
|
* 2) Copy wikipedia_index_data1.json, wikipedia_index_data2.json, and wikipedia_index_data3.json
|
||||||
|
* located in integration-tests/src/test/resources/data/batch_index/json to the Azure container set in step 1.
|
||||||
|
* 3) Provide -Doverride.config.path=<PATH_TO_FILE> with Azure+Hadoop configs set. See
|
||||||
|
* integration-tests/docker/environment-configs/override-examples/hadoop/azure_to_hdfs for env vars to provide.
|
||||||
|
* 4) Run the test with -Dstart.hadoop.docker=true -Dextra.datasource.name.suffix='' in the mvn command
|
||||||
|
*/
|
||||||
|
@Test(groups = TestNGGroup.HADOOP_AZURE_TO_HDFS)
|
||||||
|
@Guice(moduleFactory = DruidTestModuleFactory.class)
|
||||||
|
public class ITAzureInputToHdfsHadoopIndexTest extends AbstractAzureInputHadoopIndexTest
|
||||||
|
{
|
||||||
|
public void testGcsIndexData() throws Exception
|
||||||
|
{
|
||||||
|
doTest();
|
||||||
|
}
|
||||||
|
}
|
|
@ -0,0 +1,51 @@
|
||||||
|
/*
|
||||||
|
* Licensed to the Apache Software Foundation (ASF) under one
|
||||||
|
* or more contributor license agreements. See the NOTICE file
|
||||||
|
* distributed with this work for additional information
|
||||||
|
* regarding copyright ownership. The ASF licenses this file
|
||||||
|
* to you under the Apache License, Version 2.0 (the
|
||||||
|
* "License"); you may not use this file except in compliance
|
||||||
|
* with the License. You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing,
|
||||||
|
* software distributed under the License is distributed on an
|
||||||
|
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||||
|
* KIND, either express or implied. See the License for the
|
||||||
|
* specific language governing permissions and limitations
|
||||||
|
* under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
package org.apache.druid.tests.hadoop;
|
||||||
|
|
||||||
|
import org.apache.druid.testing.guice.DruidTestModuleFactory;
|
||||||
|
import org.apache.druid.tests.TestNGGroup;
|
||||||
|
import org.testng.annotations.Guice;
|
||||||
|
import org.testng.annotations.Test;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* IMPORTANT:
|
||||||
|
* To run this test, you must:
|
||||||
|
* 1) Set the bucket and path for your data. This can be done by setting -Ddruid.test.config.cloudBucket and
|
||||||
|
* -Ddruid.test.config.cloudPath or setting "cloud_bucket" and "cloud_path" in the config file.
|
||||||
|
* 2) Set -Ddruid.test.config.hadoopGcsCredentialsPath to the location of your Google credentials file as it
|
||||||
|
* exists within the Hadoop cluster that will ingest the data. The credentials file can be placed in the
|
||||||
|
* shared folder used by the integration test containers if running the Docker-based Hadoop container,
|
||||||
|
* in which case this property can be set to /shared/<path_of_your_credentials_file>
|
||||||
|
* 3) Provide -Dresource.file.dir.path=<PATH_TO_FOLDER> with folder that contains GOOGLE_APPLICATION_CREDENTIALS file
|
||||||
|
* 4) Copy wikipedia_index_data1.json, wikipedia_index_data2.json, and wikipedia_index_data3.json
|
||||||
|
* located in integration-tests/src/test/resources/data/batch_index/json to your GCS at the location set in step 1.
|
||||||
|
* 5) Provide -Doverride.config.path=<PATH_TO_FILE> with gcs configs set. See
|
||||||
|
* integration-tests/docker/environment-configs/override-examples/hadoop/gcs_to_gcs for env vars to provide.
|
||||||
|
* 6) Run the test with -Dstart.hadoop.docker=true -Dextra.datasource.name.suffix='' in the mvn command
|
||||||
|
*/
|
||||||
|
@Test(groups = TestNGGroup.HADOOP_GCS_TO_GCS)
|
||||||
|
@Guice(moduleFactory = DruidTestModuleFactory.class)
|
||||||
|
public class ITGcsInputToGcsHadoopIndexTest extends AbstractGcsInputHadoopIndexTest
|
||||||
|
{
|
||||||
|
public void testGcsIndexData() throws Exception
|
||||||
|
{
|
||||||
|
doTest();
|
||||||
|
}
|
||||||
|
}
|
|
@ -0,0 +1,51 @@
|
||||||
|
/*
|
||||||
|
* Licensed to the Apache Software Foundation (ASF) under one
|
||||||
|
* or more contributor license agreements. See the NOTICE file
|
||||||
|
* distributed with this work for additional information
|
||||||
|
* regarding copyright ownership. The ASF licenses this file
|
||||||
|
* to you under the Apache License, Version 2.0 (the
|
||||||
|
* "License"); you may not use this file except in compliance
|
||||||
|
* with the License. You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing,
|
||||||
|
* software distributed under the License is distributed on an
|
||||||
|
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||||
|
* KIND, either express or implied. See the License for the
|
||||||
|
* specific language governing permissions and limitations
|
||||||
|
* under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
package org.apache.druid.tests.hadoop;
|
||||||
|
|
||||||
|
import org.apache.druid.testing.guice.DruidTestModuleFactory;
|
||||||
|
import org.apache.druid.tests.TestNGGroup;
|
||||||
|
import org.testng.annotations.Guice;
|
||||||
|
import org.testng.annotations.Test;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* IMPORTANT:
|
||||||
|
* To run this test, you must:
|
||||||
|
* 1) Set the bucket and path for your data. This can be done by setting -Ddruid.test.config.cloudBucket and
|
||||||
|
* -Ddruid.test.config.cloudPath or setting "cloud_bucket" and "cloud_path" in the config file.
|
||||||
|
* 2. Set -Ddruid.test.config.hadoopGcsCredentialsPath to the location of your Google credentials file as it
|
||||||
|
* exists within the Hadoop cluster that will ingest the data. The credentials file can be placed in the
|
||||||
|
* shared folder used by the integration test containers if running the Docker-based Hadoop container,
|
||||||
|
* in which case this property can be set to /shared/<path_of_your_credentials_file>
|
||||||
|
* 3) Provide -Dresource.file.dir.path=<PATH_TO_FOLDER> with folder that contains GOOGLE_APPLICATION_CREDENTIALS file
|
||||||
|
* 4) Copy wikipedia_index_data1.json, wikipedia_index_data2.json, and wikipedia_index_data3.json
|
||||||
|
* located in integration-tests/src/test/resources/data/batch_index/json to your GCS at the location set in step 1.
|
||||||
|
* 5) Provide -Doverride.config.path=<PATH_TO_FILE> with gcs configs set. See
|
||||||
|
* integration-tests/docker/environment-configs/override-examples/hadoop/gcs_to_hdfs for env vars to provide.
|
||||||
|
* 6) Run the test with -Dstart.hadoop.docker=true -Dextra.datasource.name.suffix='' in the mvn command
|
||||||
|
*/
|
||||||
|
@Test(groups = TestNGGroup.HADOOP_GCS_TO_HDFS)
|
||||||
|
@Guice(moduleFactory = DruidTestModuleFactory.class)
|
||||||
|
public class ITGcsInputToHdfsHadoopIndexTest extends AbstractGcsInputHadoopIndexTest
|
||||||
|
{
|
||||||
|
public void testGcsIndexData() throws Exception
|
||||||
|
{
|
||||||
|
doTest();
|
||||||
|
}
|
||||||
|
}
|
|
@ -19,87 +19,150 @@
|
||||||
|
|
||||||
package org.apache.druid.tests.hadoop;
|
package org.apache.druid.tests.hadoop;
|
||||||
|
|
||||||
import com.google.inject.Inject;
|
import com.google.common.collect.ImmutableList;
|
||||||
|
import org.apache.druid.indexer.partitions.DimensionBasedPartitionsSpec;
|
||||||
|
import org.apache.druid.indexer.partitions.HashedPartitionsSpec;
|
||||||
|
import org.apache.druid.indexer.partitions.SingleDimensionPartitionsSpec;
|
||||||
import org.apache.druid.java.util.common.StringUtils;
|
import org.apache.druid.java.util.common.StringUtils;
|
||||||
import org.apache.druid.java.util.common.logger.Logger;
|
import org.apache.druid.java.util.common.logger.Logger;
|
||||||
import org.apache.druid.testing.IntegrationTestingConfig;
|
|
||||||
import org.apache.druid.testing.guice.DruidTestModuleFactory;
|
import org.apache.druid.testing.guice.DruidTestModuleFactory;
|
||||||
import org.apache.druid.testing.utils.ITRetryUtil;
|
|
||||||
import org.apache.druid.tests.TestNGGroup;
|
import org.apache.druid.tests.TestNGGroup;
|
||||||
import org.apache.druid.tests.indexer.AbstractIndexerTest;
|
import org.apache.druid.tests.indexer.AbstractITBatchIndexTest;
|
||||||
import org.testng.annotations.AfterClass;
|
import org.testng.annotations.DataProvider;
|
||||||
import org.testng.annotations.BeforeClass;
|
|
||||||
import org.testng.annotations.Guice;
|
import org.testng.annotations.Guice;
|
||||||
import org.testng.annotations.Test;
|
import org.testng.annotations.Test;
|
||||||
|
|
||||||
@Test(groups = TestNGGroup.HADOOP_INDEX)
|
import java.io.Closeable;
|
||||||
|
import java.util.UUID;
|
||||||
|
import java.util.function.Function;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* IMPORTANT:
|
||||||
|
* To run this test, you must:
|
||||||
|
* 1) Copy wikipedia_index_data1.json, wikipedia_index_data2.json, and wikipedia_index_data3.json
|
||||||
|
* located in integration-tests/src/test/resources/data/batch_index/json to your HDFS at /batch_index/json/
|
||||||
|
* If using the Docker-based Hadoop container, this is automatically done by the integration tests.
|
||||||
|
* 2) Copy batch_hadoop.data located in integration-tests/src/test/resources/data/batch_index/tsv to your HDFS
|
||||||
|
* at /batch_index/tsv/
|
||||||
|
* If using the Docker-based Hadoop container, this is automatically done by the integration tests.
|
||||||
|
* 2) Provide -Doverride.config.path=<PATH_TO_FILE> with HDFS configs set. See
|
||||||
|
* integration-tests/docker/environment-configs/override-examples/hdfs for env vars to provide.
|
||||||
|
* 3) Run the test with -Dstart.hadoop.docker=true -Dextra.datasource.name.suffix='' in the mvn command
|
||||||
|
*/
|
||||||
|
@Test(groups = TestNGGroup.HDFS_DEEP_STORAGE)
|
||||||
@Guice(moduleFactory = DruidTestModuleFactory.class)
|
@Guice(moduleFactory = DruidTestModuleFactory.class)
|
||||||
public class ITHadoopIndexTest extends AbstractIndexerTest
|
public class ITHadoopIndexTest extends AbstractITBatchIndexTest
|
||||||
{
|
{
|
||||||
private static final Logger LOG = new Logger(ITHadoopIndexTest.class);
|
private static final Logger LOG = new Logger(ITHadoopIndexTest.class);
|
||||||
|
|
||||||
private static final String BATCH_TASK = "/hadoop/batch_hadoop_indexer.json";
|
private static final String BATCH_TASK = "/hadoop/batch_hadoop_indexer.json";
|
||||||
private static final String BATCH_QUERIES_RESOURCE = "/hadoop/batch_hadoop_queries.json";
|
private static final String BATCH_QUERIES_RESOURCE = "/hadoop/batch_hadoop_queries.json";
|
||||||
private static final String BATCH_DATASOURCE = "batchHadoop";
|
private static final String BATCH_DATASOURCE = "batchHadoop";
|
||||||
private boolean dataLoaded = false;
|
|
||||||
|
|
||||||
@Inject
|
private static final String INDEX_TASK = "/hadoop/wikipedia_hadoop_index_task.json";
|
||||||
private IntegrationTestingConfig config;
|
private static final String INDEX_QUERIES_RESOURCE = "/indexer/wikipedia_index_queries.json";
|
||||||
|
private static final String INDEX_DATASOURCE = "wikipedia_hadoop_index_test";
|
||||||
|
|
||||||
@BeforeClass
|
private static final String REINDEX_TASK = "/hadoop/wikipedia_hadoop_reindex_task.json";
|
||||||
public void beforeClass()
|
private static final String REINDEX_QUERIES_RESOURCE = "/indexer/wikipedia_reindex_queries.json";
|
||||||
|
private static final String REINDEX_DATASOURCE = "wikipedia_hadoop_reindex_test";
|
||||||
|
|
||||||
|
@DataProvider
|
||||||
|
public static Object[][] resources()
|
||||||
{
|
{
|
||||||
loadData(config.getProperty("hadoopTestDir") + "/batchHadoop1");
|
return new Object[][]{
|
||||||
dataLoaded = true;
|
{new HashedPartitionsSpec(3, null, null)},
|
||||||
|
{new HashedPartitionsSpec(null, 3, ImmutableList.of("page"))},
|
||||||
|
{new HashedPartitionsSpec(null, 3, ImmutableList.of("page", "user"))},
|
||||||
|
{new SingleDimensionPartitionsSpec(1000, null, null, false)},
|
||||||
|
{new SingleDimensionPartitionsSpec(1000, null, "page", false)},
|
||||||
|
{new SingleDimensionPartitionsSpec(1000, null, null, true)},
|
||||||
|
|
||||||
|
//{new HashedPartitionsSpec(null, 3, null)} // this results in a bug where the segments have 0 rows
|
||||||
|
};
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void testHadoopIndex() throws Exception
|
public void testLegacyITHadoopIndexTest() throws Exception
|
||||||
{
|
{
|
||||||
queryHelper.testQueriesFromFile(BATCH_QUERIES_RESOURCE, 2);
|
try (
|
||||||
}
|
final Closeable ignored0 = unloader(BATCH_DATASOURCE + config.getExtraDatasourceNameSuffix());
|
||||||
|
) {
|
||||||
private void loadData(String hadoopDir)
|
final Function<String, String> specPathsTransform = spec -> {
|
||||||
{
|
|
||||||
String indexerSpec;
|
|
||||||
|
|
||||||
try {
|
try {
|
||||||
LOG.info("indexerFile name: [%s]", BATCH_TASK);
|
String path = "/batch_index/tsv";
|
||||||
indexerSpec = getResourceAsString(BATCH_TASK);
|
spec = StringUtils.replace(
|
||||||
indexerSpec = StringUtils.replace(indexerSpec, "%%HADOOP_TEST_PATH%%", hadoopDir);
|
spec,
|
||||||
|
"%%INPUT_PATHS%%",
|
||||||
|
path
|
||||||
|
);
|
||||||
|
|
||||||
|
return spec;
|
||||||
}
|
}
|
||||||
catch (Exception e) {
|
catch (Exception e) {
|
||||||
LOG.error("could not read and modify indexer file: %s", e.getMessage());
|
|
||||||
throw new RuntimeException(e);
|
throw new RuntimeException(e);
|
||||||
}
|
}
|
||||||
|
};
|
||||||
|
|
||||||
try {
|
doIndexTest(
|
||||||
final String taskID = indexer.submitTask(indexerSpec);
|
BATCH_DATASOURCE,
|
||||||
LOG.info("TaskID for loading index task %s", taskID);
|
BATCH_TASK,
|
||||||
indexer.waitUntilTaskCompletes(taskID, 10000, 120);
|
specPathsTransform,
|
||||||
ITRetryUtil.retryUntil(
|
BATCH_QUERIES_RESOURCE,
|
||||||
() -> coordinator.areSegmentsLoaded(BATCH_DATASOURCE),
|
false,
|
||||||
true,
|
true,
|
||||||
20000,
|
true
|
||||||
10,
|
|
||||||
"Segment-Load-Task-" + taskID
|
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
catch (Exception e) {
|
|
||||||
LOG.error("data could not be loaded: %s", e.getMessage());
|
|
||||||
throw new RuntimeException(e);
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
@AfterClass
|
@Test(dataProvider = "resources")
|
||||||
public void afterClass()
|
public void testIndexData(DimensionBasedPartitionsSpec partitionsSpec) throws Exception
|
||||||
{
|
{
|
||||||
if (dataLoaded) {
|
String indexDatasource = INDEX_DATASOURCE + "_" + UUID.randomUUID();
|
||||||
|
String reindexDatasource = REINDEX_DATASOURCE + "_" + UUID.randomUUID();
|
||||||
|
|
||||||
|
try (
|
||||||
|
final Closeable ignored1 = unloader(indexDatasource + config.getExtraDatasourceNameSuffix());
|
||||||
|
final Closeable ignored2 = unloader(reindexDatasource + config.getExtraDatasourceNameSuffix());
|
||||||
|
) {
|
||||||
|
final Function<String, String> specPathsTransform = spec -> {
|
||||||
try {
|
try {
|
||||||
unloadAndKillData(BATCH_DATASOURCE);
|
String path = "/batch_index/json";
|
||||||
|
spec = StringUtils.replace(
|
||||||
|
spec,
|
||||||
|
"%%INPUT_PATHS%%",
|
||||||
|
path
|
||||||
|
);
|
||||||
|
spec = StringUtils.replace(
|
||||||
|
spec,
|
||||||
|
"%%PARTITIONS_SPEC%%",
|
||||||
|
jsonMapper.writeValueAsString(partitionsSpec)
|
||||||
|
);
|
||||||
|
|
||||||
|
return spec;
|
||||||
}
|
}
|
||||||
catch (Exception e) {
|
catch (Exception e) {
|
||||||
LOG.warn(e, "exception while removing segments");
|
throw new RuntimeException(e);
|
||||||
}
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
doIndexTest(
|
||||||
|
indexDatasource,
|
||||||
|
INDEX_TASK,
|
||||||
|
specPathsTransform,
|
||||||
|
INDEX_QUERIES_RESOURCE,
|
||||||
|
false,
|
||||||
|
true,
|
||||||
|
true
|
||||||
|
);
|
||||||
|
|
||||||
|
doReindexTest(
|
||||||
|
indexDatasource,
|
||||||
|
reindexDatasource,
|
||||||
|
REINDEX_TASK,
|
||||||
|
REINDEX_QUERIES_RESOURCE
|
||||||
|
);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -0,0 +1,49 @@
|
||||||
|
/*
|
||||||
|
* Licensed to the Apache Software Foundation (ASF) under one
|
||||||
|
* or more contributor license agreements. See the NOTICE file
|
||||||
|
* distributed with this work for additional information
|
||||||
|
* regarding copyright ownership. The ASF licenses this file
|
||||||
|
* to you under the Apache License, Version 2.0 (the
|
||||||
|
* "License"); you may not use this file except in compliance
|
||||||
|
* with the License. You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing,
|
||||||
|
* software distributed under the License is distributed on an
|
||||||
|
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||||
|
* KIND, either express or implied. See the License for the
|
||||||
|
* specific language governing permissions and limitations
|
||||||
|
* under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
package org.apache.druid.tests.hadoop;
|
||||||
|
|
||||||
|
import org.apache.druid.testing.guice.DruidTestModuleFactory;
|
||||||
|
import org.apache.druid.tests.TestNGGroup;
|
||||||
|
import org.testng.annotations.Guice;
|
||||||
|
import org.testng.annotations.Test;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* IMPORTANT:
|
||||||
|
* To run this test, you must:
|
||||||
|
* 1) Set the bucket, path, and region for your data.
|
||||||
|
* This can be done by setting -Ddruid.test.config.cloudBucket, -Ddruid.test.config.cloudPath
|
||||||
|
* and -Ddruid.test.config.cloudRegion or setting "cloud_bucket","cloud_path", and "cloud_region" in the config file.
|
||||||
|
* 2) Set -Ddruid.s3.accessKey and -Ddruid.s3.secretKey when running the tests to your access/secret keys.
|
||||||
|
* 3) Copy wikipedia_index_data1.json, wikipedia_index_data2.json, and wikipedia_index_data3.json
|
||||||
|
* located in integration-tests/src/test/resources/data/batch_index/json to your S3 at the location set in step 1.
|
||||||
|
* 4) Provide -Doverride.config.path=<PATH_TO_FILE> with s3 credentials and hdfs deep storage configs set. See
|
||||||
|
* integration-tests/docker/environment-configs/override-examples/hadoop/s3_to_hdfs for env vars to provide.
|
||||||
|
* 5) Run the test with -Dstart.hadoop.docker=true -Dextra.datasource.name.suffix='' in the mvn command
|
||||||
|
*/
|
||||||
|
@Test(groups = TestNGGroup.HADOOP_S3_TO_HDFS)
|
||||||
|
@Guice(moduleFactory = DruidTestModuleFactory.class)
|
||||||
|
public class ITS3InputToHdfsHadoopIndexTest extends AbstractS3InputHadoopIndexTest
|
||||||
|
{
|
||||||
|
@Test()
|
||||||
|
public void testS3IndexData() throws Exception
|
||||||
|
{
|
||||||
|
doTest();
|
||||||
|
}
|
||||||
|
}
|
|
@ -0,0 +1,49 @@
|
||||||
|
/*
|
||||||
|
* Licensed to the Apache Software Foundation (ASF) under one
|
||||||
|
* or more contributor license agreements. See the NOTICE file
|
||||||
|
* distributed with this work for additional information
|
||||||
|
* regarding copyright ownership. The ASF licenses this file
|
||||||
|
* to you under the Apache License, Version 2.0 (the
|
||||||
|
* "License"); you may not use this file except in compliance
|
||||||
|
* with the License. You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing,
|
||||||
|
* software distributed under the License is distributed on an
|
||||||
|
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||||
|
* KIND, either express or implied. See the License for the
|
||||||
|
* specific language governing permissions and limitations
|
||||||
|
* under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
package org.apache.druid.tests.hadoop;
|
||||||
|
|
||||||
|
import org.apache.druid.testing.guice.DruidTestModuleFactory;
|
||||||
|
import org.apache.druid.tests.TestNGGroup;
|
||||||
|
import org.testng.annotations.Guice;
|
||||||
|
import org.testng.annotations.Test;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* IMPORTANT:
|
||||||
|
* To run this test, you must:
|
||||||
|
* 1) Set the bucket, path, and region for your data.
|
||||||
|
* This can be done by setting -Ddruid.test.config.cloudBucket, -Ddruid.test.config.cloudPath
|
||||||
|
* and -Ddruid.test.config.cloudRegion or setting "cloud_bucket","cloud_path", and "cloud_region" in the config file.
|
||||||
|
* 2) Set -Ddruid.s3.accessKey and -Ddruid.s3.secretKey when running the tests to your access/secret keys.
|
||||||
|
* 3) Copy wikipedia_index_data1.json, wikipedia_index_data2.json, and wikipedia_index_data3.json
|
||||||
|
* located in integration-tests/src/test/resources/data/batch_index/json to your S3 at the location set in step 1.
|
||||||
|
* 4) Provide -Doverride.config.path=<PATH_TO_FILE> with s3 credentials and hdfs deep storage configs set. See
|
||||||
|
* integration-tests/docker/environment-configs/override-examples/hadoop/s3_to_s3 for env vars to provide.
|
||||||
|
* 5) Run the test with -Dstart.hadoop.docker=true -Dextra.datasource.name.suffix='' in the mvn command
|
||||||
|
*/
|
||||||
|
@Test(groups = TestNGGroup.HADOOP_S3_TO_S3)
|
||||||
|
@Guice(moduleFactory = DruidTestModuleFactory.class)
|
||||||
|
public class ITS3InputToS3HadoopIndexTest extends AbstractS3InputHadoopIndexTest
|
||||||
|
{
|
||||||
|
@Test()
|
||||||
|
public void testS3IndexData() throws Exception
|
||||||
|
{
|
||||||
|
doTest();
|
||||||
|
}
|
||||||
|
}
|
|
@ -85,14 +85,14 @@ public abstract class AbstractITBatchIndexTest extends AbstractIndexerTest
|
||||||
private static final Logger LOG = new Logger(AbstractITBatchIndexTest.class);
|
private static final Logger LOG = new Logger(AbstractITBatchIndexTest.class);
|
||||||
|
|
||||||
@Inject
|
@Inject
|
||||||
IntegrationTestingConfig config;
|
protected IntegrationTestingConfig config;
|
||||||
@Inject
|
@Inject
|
||||||
protected SqlTestQueryHelper sqlQueryHelper;
|
protected SqlTestQueryHelper sqlQueryHelper;
|
||||||
|
|
||||||
@Inject
|
@Inject
|
||||||
ClientInfoResourceTestClient clientInfoResourceTestClient;
|
ClientInfoResourceTestClient clientInfoResourceTestClient;
|
||||||
|
|
||||||
void doIndexTest(
|
protected void doIndexTest(
|
||||||
String dataSource,
|
String dataSource,
|
||||||
String indexTaskFilePath,
|
String indexTaskFilePath,
|
||||||
String queryFilePath,
|
String queryFilePath,
|
||||||
|
@ -104,7 +104,7 @@ public abstract class AbstractITBatchIndexTest extends AbstractIndexerTest
|
||||||
doIndexTest(dataSource, indexTaskFilePath, Function.identity(), queryFilePath, waitForNewVersion, runTestQueries, waitForSegmentsToLoad);
|
doIndexTest(dataSource, indexTaskFilePath, Function.identity(), queryFilePath, waitForNewVersion, runTestQueries, waitForSegmentsToLoad);
|
||||||
}
|
}
|
||||||
|
|
||||||
void doIndexTest(
|
protected void doIndexTest(
|
||||||
String dataSource,
|
String dataSource,
|
||||||
String indexTaskFilePath,
|
String indexTaskFilePath,
|
||||||
Function<String, String> taskSpecTransform,
|
Function<String, String> taskSpecTransform,
|
||||||
|
@ -151,7 +151,7 @@ public abstract class AbstractITBatchIndexTest extends AbstractIndexerTest
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void doReindexTest(
|
protected void doReindexTest(
|
||||||
String baseDataSource,
|
String baseDataSource,
|
||||||
String reindexDataSource,
|
String reindexDataSource,
|
||||||
String reindexTaskFilePath,
|
String reindexTaskFilePath,
|
||||||
|
|
|
@ -2,9 +2,9 @@
|
||||||
"type": "index_hadoop",
|
"type": "index_hadoop",
|
||||||
"spec": {
|
"spec": {
|
||||||
"dataSchema": {
|
"dataSchema": {
|
||||||
"dataSource": "batchHadoop",
|
"dataSource": "%%DATASOURCE%%",
|
||||||
"parser": {
|
"parser": {
|
||||||
"type": "string",
|
"type": "hadoopyString",
|
||||||
"parseSpec": {
|
"parseSpec": {
|
||||||
"format": "tsv",
|
"format": "tsv",
|
||||||
"timestampSpec": {
|
"timestampSpec": {
|
||||||
|
@ -53,7 +53,7 @@
|
||||||
"type": "hadoop",
|
"type": "hadoop",
|
||||||
"inputSpec": {
|
"inputSpec": {
|
||||||
"type": "static",
|
"type": "static",
|
||||||
"paths": "%%HADOOP_TEST_PATH%%"
|
"paths": "/batch_index/tsv/batch_hadoop.data"
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
"tuningConfig": {
|
"tuningConfig": {
|
||||||
|
@ -64,7 +64,19 @@
|
||||||
"type": "hashed"
|
"type": "hashed"
|
||||||
},
|
},
|
||||||
"jobProperties": {
|
"jobProperties": {
|
||||||
"fs.permissions.umask-mode": "022"
|
"fs.permissions.umask-mode": "022",
|
||||||
|
"fs.default.name" : "hdfs://druid-it-hadoop:9000",
|
||||||
|
"fs.defaultFS" : "hdfs://druid-it-hadoop:9000",
|
||||||
|
"dfs.datanode.address" : "druid-it-hadoop",
|
||||||
|
"dfs.client.use.datanode.hostname" : "true",
|
||||||
|
"dfs.datanode.use.datanode.hostname" : "true",
|
||||||
|
"yarn.resourcemanager.hostname" : "druid-it-hadoop",
|
||||||
|
"yarn.nodemanager.vmem-check-enabled" : "false",
|
||||||
|
"mapreduce.map.java.opts" : "-Duser.timezone=UTC -Dfile.encoding=UTF-8",
|
||||||
|
"mapreduce.job.user.classpath.first" : "true",
|
||||||
|
"mapreduce.reduce.java.opts" : "-Duser.timezone=UTC -Dfile.encoding=UTF-8",
|
||||||
|
"mapreduce.map.memory.mb" : 1024,
|
||||||
|
"mapreduce.reduce.memory.mb" : 1024
|
||||||
},
|
},
|
||||||
"rowFlushBoundary": 10000
|
"rowFlushBoundary": 10000
|
||||||
}
|
}
|
||||||
|
|
|
@ -0,0 +1,107 @@
|
||||||
|
{
|
||||||
|
"type": "index_hadoop",
|
||||||
|
"hadoopDependencyCoordinates" : ["org.apache.hadoop:hadoop-client:2.8.5", "org.apache.hadoop:hadoop-azure:2.8.5"],
|
||||||
|
"spec": {
|
||||||
|
"dataSchema": {
|
||||||
|
"dataSource": "%%DATASOURCE%%",
|
||||||
|
"parser": {
|
||||||
|
"type": "hadoopyString",
|
||||||
|
"parseSpec": {
|
||||||
|
"format" : "json",
|
||||||
|
"timestampSpec": {
|
||||||
|
"column": "timestamp"
|
||||||
|
},
|
||||||
|
"dimensionsSpec": {
|
||||||
|
"dimensions": [
|
||||||
|
"page",
|
||||||
|
{"type": "string", "name": "language", "createBitmapIndex": false},
|
||||||
|
"user",
|
||||||
|
"unpatrolled",
|
||||||
|
"newPage",
|
||||||
|
"robot",
|
||||||
|
"anonymous",
|
||||||
|
"namespace",
|
||||||
|
"continent",
|
||||||
|
"country",
|
||||||
|
"region",
|
||||||
|
"city"
|
||||||
|
]
|
||||||
|
}
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"metricsSpec": [
|
||||||
|
{
|
||||||
|
"type": "count",
|
||||||
|
"name": "count"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"type": "doubleSum",
|
||||||
|
"name": "added",
|
||||||
|
"fieldName": "added"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"type": "doubleSum",
|
||||||
|
"name": "deleted",
|
||||||
|
"fieldName": "deleted"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"type": "doubleSum",
|
||||||
|
"name": "delta",
|
||||||
|
"fieldName": "delta"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"name": "thetaSketch",
|
||||||
|
"type": "thetaSketch",
|
||||||
|
"fieldName": "user"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"name": "quantilesDoublesSketch",
|
||||||
|
"type": "quantilesDoublesSketch",
|
||||||
|
"fieldName": "delta"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"name": "HLLSketchBuild",
|
||||||
|
"type": "HLLSketchBuild",
|
||||||
|
"fieldName": "user"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"granularitySpec": {
|
||||||
|
"segmentGranularity": "DAY",
|
||||||
|
"queryGranularity": "second",
|
||||||
|
"intervals" : [ "2013-08-31/2013-09-02" ]
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"ioConfig": {
|
||||||
|
"type": "hadoop",
|
||||||
|
"inputSpec": {
|
||||||
|
"type": "static",
|
||||||
|
"paths": "%%INPUT_PATHS%%"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"tuningConfig": {
|
||||||
|
"type": "hadoop",
|
||||||
|
"partitionsSpec": {
|
||||||
|
"assumeGrouped": true,
|
||||||
|
"targetPartitionSize": 75000,
|
||||||
|
"type": "hashed"
|
||||||
|
},
|
||||||
|
"jobProperties": {
|
||||||
|
"fs.permissions.umask-mode": "022",
|
||||||
|
"fs.default.name" : "hdfs://druid-it-hadoop:9000",
|
||||||
|
"fs.defaultFS" : "hdfs://druid-it-hadoop:9000",
|
||||||
|
"dfs.datanode.address" : "druid-it-hadoop",
|
||||||
|
"dfs.client.use.datanode.hostname" : "true",
|
||||||
|
"dfs.datanode.use.datanode.hostname" : "true",
|
||||||
|
"yarn.resourcemanager.hostname" : "druid-it-hadoop",
|
||||||
|
"yarn.nodemanager.vmem-check-enabled" : "false",
|
||||||
|
"mapreduce.map.java.opts" : "-Duser.timezone=UTC -Dfile.encoding=UTF-8 -D",
|
||||||
|
"mapreduce.job.user.classpath.first" : "true",
|
||||||
|
"mapreduce.reduce.java.opts" : "-Duser.timezone=UTC -Dfile.encoding=UTF-8",
|
||||||
|
"mapreduce.map.memory.mb" : 1024,
|
||||||
|
"mapreduce.reduce.memory.mb" : 1024,
|
||||||
|
"fs.azure.account.key.%%AZURE_ACCOUNT%%.blob.core.windows.net":"%%AZURE_KEY%%"
|
||||||
|
},
|
||||||
|
"rowFlushBoundary": 10000
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
|
@ -0,0 +1,113 @@
|
||||||
|
{
|
||||||
|
"type": "index_hadoop",
|
||||||
|
"spec": {
|
||||||
|
"dataSchema": {
|
||||||
|
"dataSource": "%%DATASOURCE%%",
|
||||||
|
"parser": {
|
||||||
|
"type": "hadoopyString",
|
||||||
|
"parseSpec": {
|
||||||
|
"format" : "json",
|
||||||
|
"timestampSpec": {
|
||||||
|
"column": "timestamp"
|
||||||
|
},
|
||||||
|
"dimensionsSpec": {
|
||||||
|
"dimensions": [
|
||||||
|
"page",
|
||||||
|
{"type": "string", "name": "language", "createBitmapIndex": false},
|
||||||
|
"user",
|
||||||
|
"unpatrolled",
|
||||||
|
"newPage",
|
||||||
|
"robot",
|
||||||
|
"anonymous",
|
||||||
|
"namespace",
|
||||||
|
"continent",
|
||||||
|
"country",
|
||||||
|
"region",
|
||||||
|
"city"
|
||||||
|
]
|
||||||
|
}
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"metricsSpec": [
|
||||||
|
{
|
||||||
|
"type": "count",
|
||||||
|
"name": "count"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"type": "doubleSum",
|
||||||
|
"name": "added",
|
||||||
|
"fieldName": "added"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"type": "doubleSum",
|
||||||
|
"name": "deleted",
|
||||||
|
"fieldName": "deleted"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"type": "doubleSum",
|
||||||
|
"name": "delta",
|
||||||
|
"fieldName": "delta"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"name": "thetaSketch",
|
||||||
|
"type": "thetaSketch",
|
||||||
|
"fieldName": "user"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"name": "quantilesDoublesSketch",
|
||||||
|
"type": "quantilesDoublesSketch",
|
||||||
|
"fieldName": "delta"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"name": "HLLSketchBuild",
|
||||||
|
"type": "HLLSketchBuild",
|
||||||
|
"fieldName": "user"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"granularitySpec": {
|
||||||
|
"segmentGranularity": "DAY",
|
||||||
|
"queryGranularity": "second",
|
||||||
|
"intervals" : [ "2013-08-31/2013-09-02" ]
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"ioConfig": {
|
||||||
|
"type": "hadoop",
|
||||||
|
"inputSpec": {
|
||||||
|
"type": "static",
|
||||||
|
"paths": "%%INPUT_PATHS%%"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"tuningConfig": {
|
||||||
|
"type": "hadoop",
|
||||||
|
"partitionsSpec": {
|
||||||
|
"assumeGrouped": true,
|
||||||
|
"targetPartitionSize": 75000,
|
||||||
|
"type": "hashed"
|
||||||
|
},
|
||||||
|
"jobProperties": {
|
||||||
|
"fs.permissions.umask-mode": "022",
|
||||||
|
"fs.default.name" : "hdfs://druid-it-hadoop:9000",
|
||||||
|
"fs.defaultFS" : "hdfs://druid-it-hadoop:9000",
|
||||||
|
"dfs.datanode.address" : "druid-it-hadoop",
|
||||||
|
"dfs.client.use.datanode.hostname" : "true",
|
||||||
|
"dfs.datanode.use.datanode.hostname" : "true",
|
||||||
|
"yarn.resourcemanager.hostname" : "druid-it-hadoop",
|
||||||
|
"yarn.nodemanager.vmem-check-enabled" : "false",
|
||||||
|
"mapreduce.map.java.opts" : "-Duser.timezone=UTC -Dfile.encoding=UTF-8 -D",
|
||||||
|
"mapreduce.job.user.classpath.first" : "true",
|
||||||
|
"mapreduce.reduce.java.opts" : "-Duser.timezone=UTC -Dfile.encoding=UTF-8",
|
||||||
|
"mapreduce.map.memory.mb" : 1024,
|
||||||
|
"mapreduce.reduce.memory.mb" : 1024,
|
||||||
|
"fs.gs.auth.service.account.json.keyfile":"%%GCS_KEYFILE_PATH%%",
|
||||||
|
"fs.gs.working.dir":"/",
|
||||||
|
"fs.gs.path.encoding":"uri-path",
|
||||||
|
"fs.gs.impl": "com.google.cloud.hadoop.fs.gcs.GoogleHadoopFileSystem",
|
||||||
|
"fs.AbstractFileSystem.gs.impl": "com.google.cloud.hadoop.fs.gcs.GoogleHadoopFS",
|
||||||
|
"fs.gs.auth.service.account.enable": "true",
|
||||||
|
"mapred.child.env":"GOOGLE_APPLICATION_CREDENTIALS=%%GCS_KEYFILE_PATH%%",
|
||||||
|
"fs.gs.reported.permissions":"777"
|
||||||
|
},
|
||||||
|
"rowFlushBoundary": 10000
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
|
@ -0,0 +1,101 @@
|
||||||
|
{
|
||||||
|
"type": "index_hadoop",
|
||||||
|
"spec": {
|
||||||
|
"dataSchema": {
|
||||||
|
"dataSource": "%%DATASOURCE%%",
|
||||||
|
"parser": {
|
||||||
|
"type": "hadoopyString",
|
||||||
|
"parseSpec": {
|
||||||
|
"format" : "json",
|
||||||
|
"timestampSpec": {
|
||||||
|
"column": "timestamp"
|
||||||
|
},
|
||||||
|
"dimensionsSpec": {
|
||||||
|
"dimensions": [
|
||||||
|
"page",
|
||||||
|
{"type": "string", "name": "language", "createBitmapIndex": false},
|
||||||
|
"user",
|
||||||
|
"unpatrolled",
|
||||||
|
"newPage",
|
||||||
|
"robot",
|
||||||
|
"anonymous",
|
||||||
|
"namespace",
|
||||||
|
"continent",
|
||||||
|
"country",
|
||||||
|
"region",
|
||||||
|
"city"
|
||||||
|
]
|
||||||
|
}
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"metricsSpec": [
|
||||||
|
{
|
||||||
|
"type": "count",
|
||||||
|
"name": "count"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"type": "doubleSum",
|
||||||
|
"name": "added",
|
||||||
|
"fieldName": "added"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"type": "doubleSum",
|
||||||
|
"name": "deleted",
|
||||||
|
"fieldName": "deleted"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"type": "doubleSum",
|
||||||
|
"name": "delta",
|
||||||
|
"fieldName": "delta"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"name": "thetaSketch",
|
||||||
|
"type": "thetaSketch",
|
||||||
|
"fieldName": "user"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"name": "quantilesDoublesSketch",
|
||||||
|
"type": "quantilesDoublesSketch",
|
||||||
|
"fieldName": "delta"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"name": "HLLSketchBuild",
|
||||||
|
"type": "HLLSketchBuild",
|
||||||
|
"fieldName": "user"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"granularitySpec": {
|
||||||
|
"segmentGranularity": "DAY",
|
||||||
|
"queryGranularity": "second",
|
||||||
|
"intervals" : [ "2013-08-31/2013-09-02" ]
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"ioConfig": {
|
||||||
|
"type": "hadoop",
|
||||||
|
"inputSpec": {
|
||||||
|
"type": "static",
|
||||||
|
"paths": "%%INPUT_PATHS%%"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"tuningConfig": {
|
||||||
|
"type": "hadoop",
|
||||||
|
"partitionsSpec": %%PARTITIONS_SPEC%%,
|
||||||
|
"jobProperties": {
|
||||||
|
"fs.permissions.umask-mode": "022",
|
||||||
|
"fs.default.name" : "hdfs://druid-it-hadoop:9000",
|
||||||
|
"fs.defaultFS" : "hdfs://druid-it-hadoop:9000",
|
||||||
|
"dfs.datanode.address" : "druid-it-hadoop",
|
||||||
|
"dfs.client.use.datanode.hostname" : "true",
|
||||||
|
"dfs.datanode.use.datanode.hostname" : "true",
|
||||||
|
"yarn.resourcemanager.hostname" : "druid-it-hadoop",
|
||||||
|
"yarn.nodemanager.vmem-check-enabled" : "false",
|
||||||
|
"mapreduce.map.java.opts" : "-Duser.timezone=UTC -Dfile.encoding=UTF-8",
|
||||||
|
"mapreduce.job.user.classpath.first" : "true",
|
||||||
|
"mapreduce.reduce.java.opts" : "-Duser.timezone=UTC -Dfile.encoding=UTF-8",
|
||||||
|
"mapreduce.map.memory.mb" : 1024,
|
||||||
|
"mapreduce.reduce.memory.mb" : 1024
|
||||||
|
},
|
||||||
|
"rowFlushBoundary": 10000
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
|
@ -0,0 +1,77 @@
|
||||||
|
{
|
||||||
|
"type": "index_hadoop",
|
||||||
|
"spec": {
|
||||||
|
"dataSchema": {
|
||||||
|
"dataSource": "%%REINDEX_DATASOURCE%%",
|
||||||
|
"parser": {
|
||||||
|
"type": "hadoopyString",
|
||||||
|
"parseSpec": {
|
||||||
|
"format" : "json",
|
||||||
|
"timestampSpec": {
|
||||||
|
"column": "timestamp",
|
||||||
|
"format": "iso"
|
||||||
|
},
|
||||||
|
"dimensionsSpec": {
|
||||||
|
"dimensionExclusions" : ["robot", "continent"]
|
||||||
|
}
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"metricsSpec": [
|
||||||
|
{
|
||||||
|
"type": "doubleSum",
|
||||||
|
"name": "added",
|
||||||
|
"fieldName": "added"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"type": "doubleSum",
|
||||||
|
"name": "deleted",
|
||||||
|
"fieldName": "deleted"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"type": "doubleSum",
|
||||||
|
"name": "delta",
|
||||||
|
"fieldName": "delta"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"granularitySpec": {
|
||||||
|
"segmentGranularity": "DAY",
|
||||||
|
"queryGranularity": "second",
|
||||||
|
"intervals" : [ "2013-08-31/2013-09-01" ]
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"ioConfig": {
|
||||||
|
"type": "hadoop",
|
||||||
|
"inputSpec": {
|
||||||
|
"type": "dataSource",
|
||||||
|
"ingestionSpec": {
|
||||||
|
"dataSource": "%%DATASOURCE%%",
|
||||||
|
"intervals": ["2013-08-31/2013-09-01"]
|
||||||
|
}
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"tuningConfig": {
|
||||||
|
"type": "hadoop",
|
||||||
|
"partitionsSpec": {
|
||||||
|
"assumeGrouped": true,
|
||||||
|
"targetPartitionSize": 75000,
|
||||||
|
"type": "hashed"
|
||||||
|
},
|
||||||
|
"jobProperties": {
|
||||||
|
"fs.permissions.umask-mode": "022",
|
||||||
|
"fs.default.name" : "hdfs://druid-it-hadoop:9000",
|
||||||
|
"fs.defaultFS" : "hdfs://druid-it-hadoop:9000",
|
||||||
|
"dfs.datanode.address" : "druid-it-hadoop",
|
||||||
|
"dfs.client.use.datanode.hostname" : "true",
|
||||||
|
"dfs.datanode.use.datanode.hostname" : "true",
|
||||||
|
"yarn.resourcemanager.hostname" : "druid-it-hadoop",
|
||||||
|
"yarn.nodemanager.vmem-check-enabled" : "false",
|
||||||
|
"mapreduce.map.java.opts" : "-Duser.timezone=UTC -Dfile.encoding=UTF-8",
|
||||||
|
"mapreduce.job.user.classpath.first" : "true",
|
||||||
|
"mapreduce.reduce.java.opts" : "-Duser.timezone=UTC -Dfile.encoding=UTF-8",
|
||||||
|
"mapreduce.map.memory.mb" : 1024,
|
||||||
|
"mapreduce.reduce.memory.mb" : 1024
|
||||||
|
},
|
||||||
|
"rowFlushBoundary": 10000
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
|
@ -0,0 +1,114 @@
|
||||||
|
{
|
||||||
|
"type": "index_hadoop",
|
||||||
|
"spec": {
|
||||||
|
"dataSchema": {
|
||||||
|
"dataSource": "%%DATASOURCE%%",
|
||||||
|
"parser": {
|
||||||
|
"type": "hadoopyString",
|
||||||
|
"parseSpec": {
|
||||||
|
"format" : "json",
|
||||||
|
"timestampSpec": {
|
||||||
|
"column": "timestamp"
|
||||||
|
},
|
||||||
|
"dimensionsSpec": {
|
||||||
|
"dimensions": [
|
||||||
|
"page",
|
||||||
|
{"type": "string", "name": "language", "createBitmapIndex": false},
|
||||||
|
"user",
|
||||||
|
"unpatrolled",
|
||||||
|
"newPage",
|
||||||
|
"robot",
|
||||||
|
"anonymous",
|
||||||
|
"namespace",
|
||||||
|
"continent",
|
||||||
|
"country",
|
||||||
|
"region",
|
||||||
|
"city"
|
||||||
|
]
|
||||||
|
}
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"metricsSpec": [
|
||||||
|
{
|
||||||
|
"type": "count",
|
||||||
|
"name": "count"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"type": "doubleSum",
|
||||||
|
"name": "added",
|
||||||
|
"fieldName": "added"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"type": "doubleSum",
|
||||||
|
"name": "deleted",
|
||||||
|
"fieldName": "deleted"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"type": "doubleSum",
|
||||||
|
"name": "delta",
|
||||||
|
"fieldName": "delta"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"name": "thetaSketch",
|
||||||
|
"type": "thetaSketch",
|
||||||
|
"fieldName": "user"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"name": "quantilesDoublesSketch",
|
||||||
|
"type": "quantilesDoublesSketch",
|
||||||
|
"fieldName": "delta"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"name": "HLLSketchBuild",
|
||||||
|
"type": "HLLSketchBuild",
|
||||||
|
"fieldName": "user"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"granularitySpec": {
|
||||||
|
"segmentGranularity": "DAY",
|
||||||
|
"queryGranularity": "second",
|
||||||
|
"intervals" : [ "2013-08-31/2013-09-02" ]
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"ioConfig": {
|
||||||
|
"type": "hadoop",
|
||||||
|
"inputSpec": {
|
||||||
|
"type": "static",
|
||||||
|
"paths": "%%INPUT_PATHS%%"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"tuningConfig": {
|
||||||
|
"type": "hadoop",
|
||||||
|
"partitionsSpec": {
|
||||||
|
"assumeGrouped": true,
|
||||||
|
"targetPartitionSize": 75000,
|
||||||
|
"type": "hashed"
|
||||||
|
},
|
||||||
|
"jobProperties": {
|
||||||
|
"fs.permissions.umask-mode": "022",
|
||||||
|
"fs.default.name" : "hdfs://druid-it-hadoop:9000",
|
||||||
|
"fs.defaultFS" : "hdfs://druid-it-hadoop:9000",
|
||||||
|
"dfs.datanode.address" : "druid-it-hadoop",
|
||||||
|
"dfs.client.use.datanode.hostname" : "true",
|
||||||
|
"dfs.datanode.use.datanode.hostname" : "true",
|
||||||
|
"yarn.resourcemanager.hostname" : "druid-it-hadoop",
|
||||||
|
"yarn.nodemanager.vmem-check-enabled" : "false",
|
||||||
|
"mapreduce.map.java.opts" : "-Duser.timezone=UTC -Dfile.encoding=UTF-8 -Daws.region=%%AWS_REGION%%",
|
||||||
|
"mapreduce.job.user.classpath.first" : "true",
|
||||||
|
"mapreduce.reduce.java.opts" : "-Duser.timezone=UTC -Dfile.encoding=UTF-8 -Daws.region=%%AWS_REGION%%",
|
||||||
|
"mapreduce.map.memory.mb" : 1024,
|
||||||
|
"mapreduce.reduce.memory.mb" : 1024,
|
||||||
|
"fs.s3.awsAccessKeyId" : "%%AWS_ACCESS_KEY%%",
|
||||||
|
"fs.s3.awsSecretAccessKey" : "%%AWS_SECRET_KEY%%",
|
||||||
|
"fs.s3.impl" : "org.apache.hadoop.fs.s3native.NativeS3FileSystem",
|
||||||
|
"fs.s3n.awsAccessKeyId" : "%%AWS_ACCESS_KEY%%",
|
||||||
|
"fs.s3n.awsSecretAccessKey" : "%%AWS_SECRET_KEY%%",
|
||||||
|
"fs.s3n.impl" : "org.apache.hadoop.fs.s3native.NativeS3FileSystem",
|
||||||
|
"fs.s3a.access.key" : "%%AWS_ACCESS_KEY%%",
|
||||||
|
"fs.s3a.secret.key" : "%%AWS_SECRET_KEY%%",
|
||||||
|
"fs.s3a.impl" : "org.apache.hadoop.fs.s3a.S3AFileSystem"
|
||||||
|
},
|
||||||
|
"rowFlushBoundary": 10000
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
|
@ -28,7 +28,6 @@
|
||||||
<test name="AllSerializedTests">
|
<test name="AllSerializedTests">
|
||||||
<packages>
|
<packages>
|
||||||
<package name="org.apache.druid.tests.*">
|
<package name="org.apache.druid.tests.*">
|
||||||
<exclude name="org.apache.druid.tests.hadoop"/>
|
|
||||||
<exclude name="org.apache.druid.tests.parallelized"/>
|
<exclude name="org.apache.druid.tests.parallelized"/>
|
||||||
</package>
|
</package>
|
||||||
</packages>
|
</packages>
|
||||||
|
|
Loading…
Reference in New Issue