From b6826f8c6e77fca2e3c739a9bf4f55de8d42f160 Mon Sep 17 00:00:00 2001 From: binary-joe <48189951+binary-joe@users.noreply.github.com> Date: Fri, 29 Nov 2019 20:26:30 +0100 Subject: [PATCH] BAEL-398 introduction to genie (#8217) --- netflix/README.md | 7 + netflix/genie/pom.xml | 18 +++ .../netflix/genie/docker/docker-compose.yml | 42 ++++++ .../netflix/genie/docker/init_demo.py | 122 ++++++++++++++++++ .../genie/docker/run_spark_submit_job.py | 80 ++++++++++++ netflix/pom.xml | 22 ++++ pom.xml | 4 + 7 files changed, 295 insertions(+) create mode 100644 netflix/README.md create mode 100644 netflix/genie/pom.xml create mode 100644 netflix/genie/src/main/resources/com/baeldung/netflix/genie/docker/docker-compose.yml create mode 100644 netflix/genie/src/main/resources/com/baeldung/netflix/genie/docker/init_demo.py create mode 100644 netflix/genie/src/main/resources/com/baeldung/netflix/genie/docker/run_spark_submit_job.py create mode 100644 netflix/pom.xml diff --git a/netflix/README.md b/netflix/README.md new file mode 100644 index 0000000000..0af07d37a8 --- /dev/null +++ b/netflix/README.md @@ -0,0 +1,7 @@ +## Netflix + +This module contains articles about Netflix. + +### Relevant articles + +- [Introduction to Netflix Genie](https://github.com/eugenp/tutorials/tree/master/netflix/genie) diff --git a/netflix/genie/pom.xml b/netflix/genie/pom.xml new file mode 100644 index 0000000000..9c78a11464 --- /dev/null +++ b/netflix/genie/pom.xml @@ -0,0 +1,18 @@ + + 4.0.0 + + com.baeldung + genie + jar + 1.0.0-SNAPSHOT + + Genie + Sample project for Netflix Genie + + + com.baeldung + netflix + 1.0.0-SNAPSHOT + + \ No newline at end of file diff --git a/netflix/genie/src/main/resources/com/baeldung/netflix/genie/docker/docker-compose.yml b/netflix/genie/src/main/resources/com/baeldung/netflix/genie/docker/docker-compose.yml new file mode 100644 index 0000000000..db7a76a014 --- /dev/null +++ b/netflix/genie/src/main/resources/com/baeldung/netflix/genie/docker/docker-compose.yml @@ -0,0 +1,42 @@ +version: "2" +services: + genie: + image: netflixoss/genie-app:3.3.9 + ports: + - "8080:8080" + depends_on: + - genie-hadoop-prod + - genie-hadoop-test + - genie-apache + tty: true + container_name: genie_demo_app_3.3.9 + genie-apache: + image: netflixoss/genie-demo-apache:3.3.9 + tty: true + container_name: genie_demo_apache_3.3.9 + genie-client: + image: netflixoss/genie-demo-client:3.3.9 + depends_on: + - genie + tty: true + container_name: genie_demo_client_3.3.9 + genie-hadoop-prod: + image: sequenceiq/hadoop-docker:2.7.1 + command: /bin/bash -c "/usr/local/hadoop/sbin/mr-jobhistory-daemon.sh start historyserver && /etc/bootstrap.sh -bash" + ports: + - "19888:19888" + - "19070:50070" + - "19075:50075" + - "8088:8088" + tty: true + container_name: genie_demo_hadoop_prod_3.3.9 + genie-hadoop-test: + image: sequenceiq/hadoop-docker:2.7.1 + command: /bin/bash -c "/usr/local/hadoop/sbin/mr-jobhistory-daemon.sh start historyserver && /etc/bootstrap.sh -bash" + ports: + - "19889:19888" + - "19071:50070" + - "19076:50075" + - "8089:8088" + tty: true + container_name: genie_demo_hadoop_test_3.3.9 diff --git a/netflix/genie/src/main/resources/com/baeldung/netflix/genie/docker/init_demo.py b/netflix/genie/src/main/resources/com/baeldung/netflix/genie/docker/init_demo.py new file mode 100644 index 0000000000..548b056fd8 --- /dev/null +++ b/netflix/genie/src/main/resources/com/baeldung/netflix/genie/docker/init_demo.py @@ -0,0 +1,122 @@ +#!/usr/bin/python2.7 + +# Copyright 2016 Netflix, Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import logging + +import yaml +from pygenie.client import Genie +from pygenie.conf import GenieConf + +logging.basicConfig(level=logging.WARNING) + +LOGGER = logging.getLogger(__name__) + + +def load_yaml(yaml_file): + with open(yaml_file) as _file: + return yaml.load(_file) + + +genie_conf = GenieConf() +genie_conf.genie.url = "http://genie:8080" + +genie = Genie(genie_conf) + +hadoop_application = load_yaml("applications/hadoop271.yml") +hadoop_application_id = genie.create_application(hadoop_application) +LOGGER.warn("Created Hadoop 2.7.1 application with id = [%s]" % hadoop_application_id) + +spark_163_application = load_yaml("applications/spark163.yml") +spark_163_application_id = genie.create_application(spark_163_application) +LOGGER.warn("Created Spark 1.6.3 application with id = [%s]" % spark_163_application_id) + +spark_201_application = load_yaml("applications/spark201.yml") +spark_201_application_id = genie.create_application(spark_201_application) +LOGGER.warn("Created Spark 2.0.1 application with id = [%s]" % spark_201_application_id) + +hadoop_command = load_yaml("commands/hadoop271.yml") +hadoop_command_id = genie.create_command(hadoop_command) +LOGGER.warn("Created Hadoop command with id = [%s]" % hadoop_command_id) + +hdfs_command = load_yaml("commands/hdfs271.yml") +hdfs_command_id = genie.create_command(hdfs_command) +LOGGER.warn("Created HDFS command with id = [%s]" % hdfs_command_id) + +yarn_command = load_yaml("commands/yarn271.yml") +yarn_command_id = genie.create_command(yarn_command) +LOGGER.warn("Created Yarn command with id = [%s]" % yarn_command_id) + +spark_163_shell_command = load_yaml("commands/sparkShell163.yml") +spark_163_shell_command_id = genie.create_command(spark_163_shell_command) +LOGGER.warn("Created Spark 1.6.3 Shell command with id = [%s]" % spark_163_shell_command_id) + +spark_163_submit_command = load_yaml("commands/sparkSubmit163.yml") +spark_163_submit_command_id = genie.create_command(spark_163_submit_command) +LOGGER.warn("Created Spark 1.6.3 Submit command with id = [%s]" % spark_163_submit_command_id) + +spark_201_shell_command = load_yaml("commands/sparkShell201.yml") +spark_201_shell_command_id = genie.create_command(spark_201_shell_command) +LOGGER.warn("Created Spark 2.0.1 Shell command with id = [%s]" % spark_201_shell_command_id) + +spark_201_submit_command = load_yaml("commands/sparkSubmit201.yml") +spark_201_submit_command_id = genie.create_command(spark_201_submit_command) +LOGGER.warn("Created Spark 2.0.1 Submit command with id = [%s]" % spark_201_submit_command_id) + +genie.set_application_for_command(hadoop_command_id, [hadoop_application_id]) +LOGGER.warn("Set applications for Hadoop command to = [%s]" % hadoop_application_id) + +genie.set_application_for_command(hdfs_command_id, [hadoop_application_id]) +LOGGER.warn("Set applications for HDFS command to = [[%s]]" % hadoop_application_id) + +genie.set_application_for_command(yarn_command_id, [hadoop_application_id]) +LOGGER.warn("Set applications for Yarn command to = [[%s]]" % hadoop_application_id) + +genie.set_application_for_command(spark_163_shell_command_id, [hadoop_application_id, spark_163_application_id]) +LOGGER.warn("Set applications for Spark 1.6.3 Shell command to = [%s]" % + [hadoop_application_id, spark_163_application_id]) + +genie.set_application_for_command(spark_163_submit_command_id, [hadoop_application_id, spark_163_application_id]) +LOGGER.warn("Set applications for Spark 1.6.3 Submit command to = [%s]" % + [hadoop_application_id, spark_163_application_id]) + +genie.set_application_for_command(spark_201_shell_command_id, [hadoop_application_id, spark_201_application_id]) +LOGGER.warn("Set applications for Spark 2.0.1 Shell command to = [%s]" % + [hadoop_application_id, spark_201_application_id]) + +genie.set_application_for_command(spark_201_submit_command_id, [hadoop_application_id, spark_201_application_id]) +LOGGER.warn("Set applications for Spark 2.0.1 Submit command to = [%s]" % + [hadoop_application_id, spark_201_application_id]) + +prod_cluster = load_yaml("clusters/prod.yml") +prod_cluster_id = genie.create_cluster(prod_cluster) +LOGGER.warn("Created prod cluster with id = [%s]" % prod_cluster_id) + +test_cluster = load_yaml("clusters/test.yml") +test_cluster_id = genie.create_cluster(test_cluster) +LOGGER.warn("Created test cluster with id = [%s]" % test_cluster_id) + +genie.set_commands_for_cluster( + prod_cluster_id, + [hadoop_command_id, hdfs_command_id, yarn_command_id, spark_163_shell_command_id, spark_201_shell_command_id, + spark_163_submit_command_id, spark_201_submit_command_id] +) +LOGGER.warn("Added all commands to the prod cluster with id = [%s]" % prod_cluster_id) +genie.set_commands_for_cluster( + test_cluster_id, + [hadoop_command_id, hdfs_command_id, yarn_command_id, spark_163_shell_command_id, spark_201_shell_command_id, + spark_163_submit_command_id, spark_201_submit_command_id] +) +LOGGER.warn("Added all commands to the test cluster with id = [%s]" % test_cluster_id) \ No newline at end of file diff --git a/netflix/genie/src/main/resources/com/baeldung/netflix/genie/docker/run_spark_submit_job.py b/netflix/genie/src/main/resources/com/baeldung/netflix/genie/docker/run_spark_submit_job.py new file mode 100644 index 0000000000..67539a0908 --- /dev/null +++ b/netflix/genie/src/main/resources/com/baeldung/netflix/genie/docker/run_spark_submit_job.py @@ -0,0 +1,80 @@ +# Copyright 2016 Netflix, Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +################################################################################## +# This script assumes setup.py has already been run to configure Genie and that +# this script is executed on the host where Genie is running. If it's executed on +# another host change the localhost line below. +################################################################################## + +from __future__ import absolute_import, division, print_function, unicode_literals + +import logging +import sys + +import pygenie + +logging.basicConfig(level=logging.ERROR) + +LOGGER = logging.getLogger(__name__) + +pygenie.conf.DEFAULT_GENIE_URL = "http://genie:8080" + +# Create a job instance and fill in the required parameters +job = pygenie.jobs.GenieJob() \ + .genie_username('root') \ + .job_version('3.0.0') + +# Set cluster criteria which determine the cluster to run the job on +job.cluster_tags(['sched:' + str(sys.argv[1]), 'type:yarn']) + +# Set command criteria which will determine what command Genie executes for the job +if len(sys.argv) == 2: + # Use the default spark + job.command_tags(['type:spark-submit']) + job.job_name('Genie Demo Spark Submit Job') +else: + # Use the spark version passed in + job.command_tags(['type:spark-submit', 'ver:' + str(sys.argv[2])]) + job.job_name('Genie Demo Spark ' + str(sys.argv[2]) + ' Submit Job') + +# Any command line arguments to run along with the command. In this case it holds +# the actual query but this could also be done via an attachment or file dependency. +# This jar location is where it is installed on the Genie node but could also pass +# the jar as attachment and use it locally +if len(sys.argv) == 2: + # Default is spark 1.6.3 + job.command_arguments( + "--class org.apache.spark.examples.SparkPi " + "${SPARK_HOME}/lib/spark-examples*.jar " + "10" + ) +else: + # Override with Spark 2.x location + job.command_arguments( + "--class org.apache.spark.examples.SparkPi " + "${SPARK_HOME}/examples/jars/spark-examples*.jar " + "10" + ) + +# Submit the job to Genie +running_job = job.execute() + +print('Job {} is {}'.format(running_job.job_id, running_job.status)) +print(running_job.job_link) + +# Block and wait until job is done +running_job.wait() + +print('Job {} finished with status {}'.format(running_job.job_id, running_job.status)) \ No newline at end of file diff --git a/netflix/pom.xml b/netflix/pom.xml new file mode 100644 index 0000000000..4b0954d777 --- /dev/null +++ b/netflix/pom.xml @@ -0,0 +1,22 @@ + + 4.0.0 + + com.baeldung + netflix + pom + 1.0.0-SNAPSHOT + + Netflix + Module for Netflix projects + + + com.baeldung + parent-modules + 1.0.0-SNAPSHOT + + + + genie + + \ No newline at end of file diff --git a/pom.xml b/pom.xml index 05999f0926..25848f4e83 100644 --- a/pom.xml +++ b/pom.xml @@ -590,6 +590,7 @@ mustache mybatis + netflix optaplanner orika @@ -669,6 +670,8 @@ + netflix + parent-boot-1 parent-boot-2 parent-spring-4 @@ -1220,6 +1223,7 @@ mustache mybatis + netflix optaplanner orika