BAEL-398 introduction to genie (#8217)
This commit is contained in:
parent
9b801298ca
commit
b6826f8c6e
|
@ -0,0 +1,7 @@
|
|||
## Netflix
|
||||
|
||||
This module contains articles about Netflix.
|
||||
|
||||
### Relevant articles
|
||||
|
||||
- [Introduction to Netflix Genie](https://github.com/eugenp/tutorials/tree/master/netflix/genie)
|
|
@ -0,0 +1,18 @@
|
|||
<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
|
||||
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
|
||||
<modelVersion>4.0.0</modelVersion>
|
||||
|
||||
<groupId>com.baeldung</groupId>
|
||||
<artifactId>genie</artifactId>
|
||||
<packaging>jar</packaging>
|
||||
<version>1.0.0-SNAPSHOT</version>
|
||||
|
||||
<name>Genie</name>
|
||||
<description>Sample project for Netflix Genie</description>
|
||||
|
||||
<parent>
|
||||
<groupId>com.baeldung</groupId>
|
||||
<artifactId>netflix</artifactId>
|
||||
<version>1.0.0-SNAPSHOT</version>
|
||||
</parent>
|
||||
</project>
|
|
@ -0,0 +1,42 @@
|
|||
version: "2"
|
||||
services:
|
||||
genie:
|
||||
image: netflixoss/genie-app:3.3.9
|
||||
ports:
|
||||
- "8080:8080"
|
||||
depends_on:
|
||||
- genie-hadoop-prod
|
||||
- genie-hadoop-test
|
||||
- genie-apache
|
||||
tty: true
|
||||
container_name: genie_demo_app_3.3.9
|
||||
genie-apache:
|
||||
image: netflixoss/genie-demo-apache:3.3.9
|
||||
tty: true
|
||||
container_name: genie_demo_apache_3.3.9
|
||||
genie-client:
|
||||
image: netflixoss/genie-demo-client:3.3.9
|
||||
depends_on:
|
||||
- genie
|
||||
tty: true
|
||||
container_name: genie_demo_client_3.3.9
|
||||
genie-hadoop-prod:
|
||||
image: sequenceiq/hadoop-docker:2.7.1
|
||||
command: /bin/bash -c "/usr/local/hadoop/sbin/mr-jobhistory-daemon.sh start historyserver && /etc/bootstrap.sh -bash"
|
||||
ports:
|
||||
- "19888:19888"
|
||||
- "19070:50070"
|
||||
- "19075:50075"
|
||||
- "8088:8088"
|
||||
tty: true
|
||||
container_name: genie_demo_hadoop_prod_3.3.9
|
||||
genie-hadoop-test:
|
||||
image: sequenceiq/hadoop-docker:2.7.1
|
||||
command: /bin/bash -c "/usr/local/hadoop/sbin/mr-jobhistory-daemon.sh start historyserver && /etc/bootstrap.sh -bash"
|
||||
ports:
|
||||
- "19889:19888"
|
||||
- "19071:50070"
|
||||
- "19076:50075"
|
||||
- "8089:8088"
|
||||
tty: true
|
||||
container_name: genie_demo_hadoop_test_3.3.9
|
|
@ -0,0 +1,122 @@
|
|||
#!/usr/bin/python2.7
|
||||
|
||||
# Copyright 2016 Netflix, Inc.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
import logging
|
||||
|
||||
import yaml
|
||||
from pygenie.client import Genie
|
||||
from pygenie.conf import GenieConf
|
||||
|
||||
logging.basicConfig(level=logging.WARNING)
|
||||
|
||||
LOGGER = logging.getLogger(__name__)
|
||||
|
||||
|
||||
def load_yaml(yaml_file):
|
||||
with open(yaml_file) as _file:
|
||||
return yaml.load(_file)
|
||||
|
||||
|
||||
genie_conf = GenieConf()
|
||||
genie_conf.genie.url = "http://genie:8080"
|
||||
|
||||
genie = Genie(genie_conf)
|
||||
|
||||
hadoop_application = load_yaml("applications/hadoop271.yml")
|
||||
hadoop_application_id = genie.create_application(hadoop_application)
|
||||
LOGGER.warn("Created Hadoop 2.7.1 application with id = [%s]" % hadoop_application_id)
|
||||
|
||||
spark_163_application = load_yaml("applications/spark163.yml")
|
||||
spark_163_application_id = genie.create_application(spark_163_application)
|
||||
LOGGER.warn("Created Spark 1.6.3 application with id = [%s]" % spark_163_application_id)
|
||||
|
||||
spark_201_application = load_yaml("applications/spark201.yml")
|
||||
spark_201_application_id = genie.create_application(spark_201_application)
|
||||
LOGGER.warn("Created Spark 2.0.1 application with id = [%s]" % spark_201_application_id)
|
||||
|
||||
hadoop_command = load_yaml("commands/hadoop271.yml")
|
||||
hadoop_command_id = genie.create_command(hadoop_command)
|
||||
LOGGER.warn("Created Hadoop command with id = [%s]" % hadoop_command_id)
|
||||
|
||||
hdfs_command = load_yaml("commands/hdfs271.yml")
|
||||
hdfs_command_id = genie.create_command(hdfs_command)
|
||||
LOGGER.warn("Created HDFS command with id = [%s]" % hdfs_command_id)
|
||||
|
||||
yarn_command = load_yaml("commands/yarn271.yml")
|
||||
yarn_command_id = genie.create_command(yarn_command)
|
||||
LOGGER.warn("Created Yarn command with id = [%s]" % yarn_command_id)
|
||||
|
||||
spark_163_shell_command = load_yaml("commands/sparkShell163.yml")
|
||||
spark_163_shell_command_id = genie.create_command(spark_163_shell_command)
|
||||
LOGGER.warn("Created Spark 1.6.3 Shell command with id = [%s]" % spark_163_shell_command_id)
|
||||
|
||||
spark_163_submit_command = load_yaml("commands/sparkSubmit163.yml")
|
||||
spark_163_submit_command_id = genie.create_command(spark_163_submit_command)
|
||||
LOGGER.warn("Created Spark 1.6.3 Submit command with id = [%s]" % spark_163_submit_command_id)
|
||||
|
||||
spark_201_shell_command = load_yaml("commands/sparkShell201.yml")
|
||||
spark_201_shell_command_id = genie.create_command(spark_201_shell_command)
|
||||
LOGGER.warn("Created Spark 2.0.1 Shell command with id = [%s]" % spark_201_shell_command_id)
|
||||
|
||||
spark_201_submit_command = load_yaml("commands/sparkSubmit201.yml")
|
||||
spark_201_submit_command_id = genie.create_command(spark_201_submit_command)
|
||||
LOGGER.warn("Created Spark 2.0.1 Submit command with id = [%s]" % spark_201_submit_command_id)
|
||||
|
||||
genie.set_application_for_command(hadoop_command_id, [hadoop_application_id])
|
||||
LOGGER.warn("Set applications for Hadoop command to = [%s]" % hadoop_application_id)
|
||||
|
||||
genie.set_application_for_command(hdfs_command_id, [hadoop_application_id])
|
||||
LOGGER.warn("Set applications for HDFS command to = [[%s]]" % hadoop_application_id)
|
||||
|
||||
genie.set_application_for_command(yarn_command_id, [hadoop_application_id])
|
||||
LOGGER.warn("Set applications for Yarn command to = [[%s]]" % hadoop_application_id)
|
||||
|
||||
genie.set_application_for_command(spark_163_shell_command_id, [hadoop_application_id, spark_163_application_id])
|
||||
LOGGER.warn("Set applications for Spark 1.6.3 Shell command to = [%s]" %
|
||||
[hadoop_application_id, spark_163_application_id])
|
||||
|
||||
genie.set_application_for_command(spark_163_submit_command_id, [hadoop_application_id, spark_163_application_id])
|
||||
LOGGER.warn("Set applications for Spark 1.6.3 Submit command to = [%s]" %
|
||||
[hadoop_application_id, spark_163_application_id])
|
||||
|
||||
genie.set_application_for_command(spark_201_shell_command_id, [hadoop_application_id, spark_201_application_id])
|
||||
LOGGER.warn("Set applications for Spark 2.0.1 Shell command to = [%s]" %
|
||||
[hadoop_application_id, spark_201_application_id])
|
||||
|
||||
genie.set_application_for_command(spark_201_submit_command_id, [hadoop_application_id, spark_201_application_id])
|
||||
LOGGER.warn("Set applications for Spark 2.0.1 Submit command to = [%s]" %
|
||||
[hadoop_application_id, spark_201_application_id])
|
||||
|
||||
prod_cluster = load_yaml("clusters/prod.yml")
|
||||
prod_cluster_id = genie.create_cluster(prod_cluster)
|
||||
LOGGER.warn("Created prod cluster with id = [%s]" % prod_cluster_id)
|
||||
|
||||
test_cluster = load_yaml("clusters/test.yml")
|
||||
test_cluster_id = genie.create_cluster(test_cluster)
|
||||
LOGGER.warn("Created test cluster with id = [%s]" % test_cluster_id)
|
||||
|
||||
genie.set_commands_for_cluster(
|
||||
prod_cluster_id,
|
||||
[hadoop_command_id, hdfs_command_id, yarn_command_id, spark_163_shell_command_id, spark_201_shell_command_id,
|
||||
spark_163_submit_command_id, spark_201_submit_command_id]
|
||||
)
|
||||
LOGGER.warn("Added all commands to the prod cluster with id = [%s]" % prod_cluster_id)
|
||||
genie.set_commands_for_cluster(
|
||||
test_cluster_id,
|
||||
[hadoop_command_id, hdfs_command_id, yarn_command_id, spark_163_shell_command_id, spark_201_shell_command_id,
|
||||
spark_163_submit_command_id, spark_201_submit_command_id]
|
||||
)
|
||||
LOGGER.warn("Added all commands to the test cluster with id = [%s]" % test_cluster_id)
|
|
@ -0,0 +1,80 @@
|
|||
# Copyright 2016 Netflix, Inc.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
##################################################################################
|
||||
# This script assumes setup.py has already been run to configure Genie and that
|
||||
# this script is executed on the host where Genie is running. If it's executed on
|
||||
# another host change the localhost line below.
|
||||
##################################################################################
|
||||
|
||||
from __future__ import absolute_import, division, print_function, unicode_literals
|
||||
|
||||
import logging
|
||||
import sys
|
||||
|
||||
import pygenie
|
||||
|
||||
logging.basicConfig(level=logging.ERROR)
|
||||
|
||||
LOGGER = logging.getLogger(__name__)
|
||||
|
||||
pygenie.conf.DEFAULT_GENIE_URL = "http://genie:8080"
|
||||
|
||||
# Create a job instance and fill in the required parameters
|
||||
job = pygenie.jobs.GenieJob() \
|
||||
.genie_username('root') \
|
||||
.job_version('3.0.0')
|
||||
|
||||
# Set cluster criteria which determine the cluster to run the job on
|
||||
job.cluster_tags(['sched:' + str(sys.argv[1]), 'type:yarn'])
|
||||
|
||||
# Set command criteria which will determine what command Genie executes for the job
|
||||
if len(sys.argv) == 2:
|
||||
# Use the default spark
|
||||
job.command_tags(['type:spark-submit'])
|
||||
job.job_name('Genie Demo Spark Submit Job')
|
||||
else:
|
||||
# Use the spark version passed in
|
||||
job.command_tags(['type:spark-submit', 'ver:' + str(sys.argv[2])])
|
||||
job.job_name('Genie Demo Spark ' + str(sys.argv[2]) + ' Submit Job')
|
||||
|
||||
# Any command line arguments to run along with the command. In this case it holds
|
||||
# the actual query but this could also be done via an attachment or file dependency.
|
||||
# This jar location is where it is installed on the Genie node but could also pass
|
||||
# the jar as attachment and use it locally
|
||||
if len(sys.argv) == 2:
|
||||
# Default is spark 1.6.3
|
||||
job.command_arguments(
|
||||
"--class org.apache.spark.examples.SparkPi "
|
||||
"${SPARK_HOME}/lib/spark-examples*.jar "
|
||||
"10"
|
||||
)
|
||||
else:
|
||||
# Override with Spark 2.x location
|
||||
job.command_arguments(
|
||||
"--class org.apache.spark.examples.SparkPi "
|
||||
"${SPARK_HOME}/examples/jars/spark-examples*.jar "
|
||||
"10"
|
||||
)
|
||||
|
||||
# Submit the job to Genie
|
||||
running_job = job.execute()
|
||||
|
||||
print('Job {} is {}'.format(running_job.job_id, running_job.status))
|
||||
print(running_job.job_link)
|
||||
|
||||
# Block and wait until job is done
|
||||
running_job.wait()
|
||||
|
||||
print('Job {} finished with status {}'.format(running_job.job_id, running_job.status))
|
|
@ -0,0 +1,22 @@
|
|||
<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
|
||||
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
|
||||
<modelVersion>4.0.0</modelVersion>
|
||||
|
||||
<groupId>com.baeldung</groupId>
|
||||
<artifactId>netflix</artifactId>
|
||||
<packaging>pom</packaging>
|
||||
<version>1.0.0-SNAPSHOT</version>
|
||||
|
||||
<name>Netflix</name>
|
||||
<description>Module for Netflix projects</description>
|
||||
|
||||
<parent>
|
||||
<groupId>com.baeldung</groupId>
|
||||
<artifactId>parent-modules</artifactId>
|
||||
<version>1.0.0-SNAPSHOT</version>
|
||||
</parent>
|
||||
|
||||
<modules>
|
||||
<module>genie</module>
|
||||
</modules>
|
||||
</project>
|
4
pom.xml
4
pom.xml
|
@ -590,6 +590,7 @@
|
|||
<module>mustache</module>
|
||||
<module>mybatis</module>
|
||||
|
||||
<module>netflix</module>
|
||||
|
||||
<module>optaplanner</module>
|
||||
<module>orika</module>
|
||||
|
@ -669,6 +670,8 @@
|
|||
</build>
|
||||
|
||||
<modules>
|
||||
<module>netflix</module>
|
||||
|
||||
<module>parent-boot-1</module>
|
||||
<module>parent-boot-2</module>
|
||||
<module>parent-spring-4</module>
|
||||
|
@ -1220,6 +1223,7 @@
|
|||
<module>mustache</module>
|
||||
<module>mybatis</module>
|
||||
|
||||
<module>netflix</module>
|
||||
|
||||
<module>optaplanner</module>
|
||||
<module>orika</module>
|
||||
|
|
Loading…
Reference in New Issue