BAEL-398 introduction to genie

This commit is contained in:
binary-joe 2019-11-20 00:17:33 +01:00
parent 576d4d80ab
commit 53ee65d82c
7 changed files with 295 additions and 0 deletions

7
netflix/README.md Normal file
View File

@ -0,0 +1,7 @@
## Netflix
This module contains articles about Netflix.
### Relevant articles
- [Introduction to Netflix Genie](https://github.com/eugenp/tutorials/tree/master/netflix/genie)

18
netflix/genie/pom.xml Normal file
View File

@ -0,0 +1,18 @@
<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
<modelVersion>4.0.0</modelVersion>
<groupId>com.baeldung</groupId>
<artifactId>genie</artifactId>
<packaging>jar</packaging>
<version>1.0.0-SNAPSHOT</version>
<name>Genie</name>
<description>Sample project for Netflix Genie</description>
<parent>
<groupId>com.baeldung</groupId>
<artifactId>netflix</artifactId>
<version>1.0.0-SNAPSHOT</version>
</parent>
</project>

View File

@ -0,0 +1,42 @@
version: "2"
services:
genie:
image: netflixoss/genie-app:3.3.9
ports:
- "8080:8080"
depends_on:
- genie-hadoop-prod
- genie-hadoop-test
- genie-apache
tty: true
container_name: genie_demo_app_3.3.9
genie-apache:
image: netflixoss/genie-demo-apache:3.3.9
tty: true
container_name: genie_demo_apache_3.3.9
genie-client:
image: netflixoss/genie-demo-client:3.3.9
depends_on:
- genie
tty: true
container_name: genie_demo_client_3.3.9
genie-hadoop-prod:
image: sequenceiq/hadoop-docker:2.7.1
command: /bin/bash -c "/usr/local/hadoop/sbin/mr-jobhistory-daemon.sh start historyserver && /etc/bootstrap.sh -bash"
ports:
- "19888:19888"
- "19070:50070"
- "19075:50075"
- "8088:8088"
tty: true
container_name: genie_demo_hadoop_prod_3.3.9
genie-hadoop-test:
image: sequenceiq/hadoop-docker:2.7.1
command: /bin/bash -c "/usr/local/hadoop/sbin/mr-jobhistory-daemon.sh start historyserver && /etc/bootstrap.sh -bash"
ports:
- "19889:19888"
- "19071:50070"
- "19076:50075"
- "8089:8088"
tty: true
container_name: genie_demo_hadoop_test_3.3.9

View File

@ -0,0 +1,122 @@
#!/usr/bin/python2.7
# Copyright 2016 Netflix, Inc.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import logging
import yaml
from pygenie.client import Genie
from pygenie.conf import GenieConf
logging.basicConfig(level=logging.WARNING)
LOGGER = logging.getLogger(__name__)
def load_yaml(yaml_file):
with open(yaml_file) as _file:
return yaml.load(_file)
genie_conf = GenieConf()
genie_conf.genie.url = "http://genie:8080"
genie = Genie(genie_conf)
hadoop_application = load_yaml("applications/hadoop271.yml")
hadoop_application_id = genie.create_application(hadoop_application)
LOGGER.warn("Created Hadoop 2.7.1 application with id = [%s]" % hadoop_application_id)
spark_163_application = load_yaml("applications/spark163.yml")
spark_163_application_id = genie.create_application(spark_163_application)
LOGGER.warn("Created Spark 1.6.3 application with id = [%s]" % spark_163_application_id)
spark_201_application = load_yaml("applications/spark201.yml")
spark_201_application_id = genie.create_application(spark_201_application)
LOGGER.warn("Created Spark 2.0.1 application with id = [%s]" % spark_201_application_id)
hadoop_command = load_yaml("commands/hadoop271.yml")
hadoop_command_id = genie.create_command(hadoop_command)
LOGGER.warn("Created Hadoop command with id = [%s]" % hadoop_command_id)
hdfs_command = load_yaml("commands/hdfs271.yml")
hdfs_command_id = genie.create_command(hdfs_command)
LOGGER.warn("Created HDFS command with id = [%s]" % hdfs_command_id)
yarn_command = load_yaml("commands/yarn271.yml")
yarn_command_id = genie.create_command(yarn_command)
LOGGER.warn("Created Yarn command with id = [%s]" % yarn_command_id)
spark_163_shell_command = load_yaml("commands/sparkShell163.yml")
spark_163_shell_command_id = genie.create_command(spark_163_shell_command)
LOGGER.warn("Created Spark 1.6.3 Shell command with id = [%s]" % spark_163_shell_command_id)
spark_163_submit_command = load_yaml("commands/sparkSubmit163.yml")
spark_163_submit_command_id = genie.create_command(spark_163_submit_command)
LOGGER.warn("Created Spark 1.6.3 Submit command with id = [%s]" % spark_163_submit_command_id)
spark_201_shell_command = load_yaml("commands/sparkShell201.yml")
spark_201_shell_command_id = genie.create_command(spark_201_shell_command)
LOGGER.warn("Created Spark 2.0.1 Shell command with id = [%s]" % spark_201_shell_command_id)
spark_201_submit_command = load_yaml("commands/sparkSubmit201.yml")
spark_201_submit_command_id = genie.create_command(spark_201_submit_command)
LOGGER.warn("Created Spark 2.0.1 Submit command with id = [%s]" % spark_201_submit_command_id)
genie.set_application_for_command(hadoop_command_id, [hadoop_application_id])
LOGGER.warn("Set applications for Hadoop command to = [%s]" % hadoop_application_id)
genie.set_application_for_command(hdfs_command_id, [hadoop_application_id])
LOGGER.warn("Set applications for HDFS command to = [[%s]]" % hadoop_application_id)
genie.set_application_for_command(yarn_command_id, [hadoop_application_id])
LOGGER.warn("Set applications for Yarn command to = [[%s]]" % hadoop_application_id)
genie.set_application_for_command(spark_163_shell_command_id, [hadoop_application_id, spark_163_application_id])
LOGGER.warn("Set applications for Spark 1.6.3 Shell command to = [%s]" %
[hadoop_application_id, spark_163_application_id])
genie.set_application_for_command(spark_163_submit_command_id, [hadoop_application_id, spark_163_application_id])
LOGGER.warn("Set applications for Spark 1.6.3 Submit command to = [%s]" %
[hadoop_application_id, spark_163_application_id])
genie.set_application_for_command(spark_201_shell_command_id, [hadoop_application_id, spark_201_application_id])
LOGGER.warn("Set applications for Spark 2.0.1 Shell command to = [%s]" %
[hadoop_application_id, spark_201_application_id])
genie.set_application_for_command(spark_201_submit_command_id, [hadoop_application_id, spark_201_application_id])
LOGGER.warn("Set applications for Spark 2.0.1 Submit command to = [%s]" %
[hadoop_application_id, spark_201_application_id])
prod_cluster = load_yaml("clusters/prod.yml")
prod_cluster_id = genie.create_cluster(prod_cluster)
LOGGER.warn("Created prod cluster with id = [%s]" % prod_cluster_id)
test_cluster = load_yaml("clusters/test.yml")
test_cluster_id = genie.create_cluster(test_cluster)
LOGGER.warn("Created test cluster with id = [%s]" % test_cluster_id)
genie.set_commands_for_cluster(
prod_cluster_id,
[hadoop_command_id, hdfs_command_id, yarn_command_id, spark_163_shell_command_id, spark_201_shell_command_id,
spark_163_submit_command_id, spark_201_submit_command_id]
)
LOGGER.warn("Added all commands to the prod cluster with id = [%s]" % prod_cluster_id)
genie.set_commands_for_cluster(
test_cluster_id,
[hadoop_command_id, hdfs_command_id, yarn_command_id, spark_163_shell_command_id, spark_201_shell_command_id,
spark_163_submit_command_id, spark_201_submit_command_id]
)
LOGGER.warn("Added all commands to the test cluster with id = [%s]" % test_cluster_id)

View File

@ -0,0 +1,80 @@
# Copyright 2016 Netflix, Inc.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
##################################################################################
# This script assumes setup.py has already been run to configure Genie and that
# this script is executed on the host where Genie is running. If it's executed on
# another host change the localhost line below.
##################################################################################
from __future__ import absolute_import, division, print_function, unicode_literals
import logging
import sys
import pygenie
logging.basicConfig(level=logging.ERROR)
LOGGER = logging.getLogger(__name__)
pygenie.conf.DEFAULT_GENIE_URL = "http://genie:8080"
# Create a job instance and fill in the required parameters
job = pygenie.jobs.GenieJob() \
.genie_username('root') \
.job_version('3.0.0')
# Set cluster criteria which determine the cluster to run the job on
job.cluster_tags(['sched:' + str(sys.argv[1]), 'type:yarn'])
# Set command criteria which will determine what command Genie executes for the job
if len(sys.argv) == 2:
# Use the default spark
job.command_tags(['type:spark-submit'])
job.job_name('Genie Demo Spark Submit Job')
else:
# Use the spark version passed in
job.command_tags(['type:spark-submit', 'ver:' + str(sys.argv[2])])
job.job_name('Genie Demo Spark ' + str(sys.argv[2]) + ' Submit Job')
# Any command line arguments to run along with the command. In this case it holds
# the actual query but this could also be done via an attachment or file dependency.
# This jar location is where it is installed on the Genie node but could also pass
# the jar as attachment and use it locally
if len(sys.argv) == 2:
# Default is spark 1.6.3
job.command_arguments(
"--class org.apache.spark.examples.SparkPi "
"${SPARK_HOME}/lib/spark-examples*.jar "
"10"
)
else:
# Override with Spark 2.x location
job.command_arguments(
"--class org.apache.spark.examples.SparkPi "
"${SPARK_HOME}/examples/jars/spark-examples*.jar "
"10"
)
# Submit the job to Genie
running_job = job.execute()
print('Job {} is {}'.format(running_job.job_id, running_job.status))
print(running_job.job_link)
# Block and wait until job is done
running_job.wait()
print('Job {} finished with status {}'.format(running_job.job_id, running_job.status))

22
netflix/pom.xml Normal file
View File

@ -0,0 +1,22 @@
<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
<modelVersion>4.0.0</modelVersion>
<groupId>com.baeldung</groupId>
<artifactId>netflix</artifactId>
<packaging>pom</packaging>
<version>1.0.0-SNAPSHOT</version>
<name>Netflix</name>
<description>Module for Netflix projects</description>
<parent>
<groupId>com.baeldung</groupId>
<artifactId>parent-modules</artifactId>
<version>1.0.0-SNAPSHOT</version>
</parent>
<modules>
<module>genie</module>
</modules>
</project>

View File

@ -583,6 +583,7 @@
<module>mustache</module>
<module>mybatis</module>
<module>netflix</module>
<module>optaplanner</module>
<module>orika</module>
@ -662,6 +663,8 @@
</build>
<modules>
<module>netflix</module>
<module>parent-boot-1</module>
<module>parent-boot-2</module>
<module>parent-spring-4</module>
@ -1348,6 +1351,7 @@
<module>mustache</module>
<module>mybatis</module>
<module>netflix</module>
<module>optaplanner</module>
<module>orika</module>