From b6826f8c6e77fca2e3c739a9bf4f55de8d42f160 Mon Sep 17 00:00:00 2001
From: binary-joe <48189951+binary-joe@users.noreply.github.com>
Date: Fri, 29 Nov 2019 20:26:30 +0100
Subject: [PATCH] BAEL-398 introduction to genie (#8217)
---
netflix/README.md | 7 +
netflix/genie/pom.xml | 18 +++
.../netflix/genie/docker/docker-compose.yml | 42 ++++++
.../netflix/genie/docker/init_demo.py | 122 ++++++++++++++++++
.../genie/docker/run_spark_submit_job.py | 80 ++++++++++++
netflix/pom.xml | 22 ++++
pom.xml | 4 +
7 files changed, 295 insertions(+)
create mode 100644 netflix/README.md
create mode 100644 netflix/genie/pom.xml
create mode 100644 netflix/genie/src/main/resources/com/baeldung/netflix/genie/docker/docker-compose.yml
create mode 100644 netflix/genie/src/main/resources/com/baeldung/netflix/genie/docker/init_demo.py
create mode 100644 netflix/genie/src/main/resources/com/baeldung/netflix/genie/docker/run_spark_submit_job.py
create mode 100644 netflix/pom.xml
diff --git a/netflix/README.md b/netflix/README.md
new file mode 100644
index 0000000000..0af07d37a8
--- /dev/null
+++ b/netflix/README.md
@@ -0,0 +1,7 @@
+## Netflix
+
+This module contains articles about Netflix.
+
+### Relevant articles
+
+- [Introduction to Netflix Genie](https://github.com/eugenp/tutorials/tree/master/netflix/genie)
diff --git a/netflix/genie/pom.xml b/netflix/genie/pom.xml
new file mode 100644
index 0000000000..9c78a11464
--- /dev/null
+++ b/netflix/genie/pom.xml
@@ -0,0 +1,18 @@
+
+ 4.0.0
+
+ com.baeldung
+ genie
+ jar
+ 1.0.0-SNAPSHOT
+
+ Genie
+ Sample project for Netflix Genie
+
+
+ com.baeldung
+ netflix
+ 1.0.0-SNAPSHOT
+
+
\ No newline at end of file
diff --git a/netflix/genie/src/main/resources/com/baeldung/netflix/genie/docker/docker-compose.yml b/netflix/genie/src/main/resources/com/baeldung/netflix/genie/docker/docker-compose.yml
new file mode 100644
index 0000000000..db7a76a014
--- /dev/null
+++ b/netflix/genie/src/main/resources/com/baeldung/netflix/genie/docker/docker-compose.yml
@@ -0,0 +1,42 @@
+version: "2"
+services:
+ genie:
+ image: netflixoss/genie-app:3.3.9
+ ports:
+ - "8080:8080"
+ depends_on:
+ - genie-hadoop-prod
+ - genie-hadoop-test
+ - genie-apache
+ tty: true
+ container_name: genie_demo_app_3.3.9
+ genie-apache:
+ image: netflixoss/genie-demo-apache:3.3.9
+ tty: true
+ container_name: genie_demo_apache_3.3.9
+ genie-client:
+ image: netflixoss/genie-demo-client:3.3.9
+ depends_on:
+ - genie
+ tty: true
+ container_name: genie_demo_client_3.3.9
+ genie-hadoop-prod:
+ image: sequenceiq/hadoop-docker:2.7.1
+ command: /bin/bash -c "/usr/local/hadoop/sbin/mr-jobhistory-daemon.sh start historyserver && /etc/bootstrap.sh -bash"
+ ports:
+ - "19888:19888"
+ - "19070:50070"
+ - "19075:50075"
+ - "8088:8088"
+ tty: true
+ container_name: genie_demo_hadoop_prod_3.3.9
+ genie-hadoop-test:
+ image: sequenceiq/hadoop-docker:2.7.1
+ command: /bin/bash -c "/usr/local/hadoop/sbin/mr-jobhistory-daemon.sh start historyserver && /etc/bootstrap.sh -bash"
+ ports:
+ - "19889:19888"
+ - "19071:50070"
+ - "19076:50075"
+ - "8089:8088"
+ tty: true
+ container_name: genie_demo_hadoop_test_3.3.9
diff --git a/netflix/genie/src/main/resources/com/baeldung/netflix/genie/docker/init_demo.py b/netflix/genie/src/main/resources/com/baeldung/netflix/genie/docker/init_demo.py
new file mode 100644
index 0000000000..548b056fd8
--- /dev/null
+++ b/netflix/genie/src/main/resources/com/baeldung/netflix/genie/docker/init_demo.py
@@ -0,0 +1,122 @@
+#!/usr/bin/python2.7
+
+# Copyright 2016 Netflix, Inc.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import logging
+
+import yaml
+from pygenie.client import Genie
+from pygenie.conf import GenieConf
+
+logging.basicConfig(level=logging.WARNING)
+
+LOGGER = logging.getLogger(__name__)
+
+
+def load_yaml(yaml_file):
+ with open(yaml_file) as _file:
+ return yaml.load(_file)
+
+
+genie_conf = GenieConf()
+genie_conf.genie.url = "http://genie:8080"
+
+genie = Genie(genie_conf)
+
+hadoop_application = load_yaml("applications/hadoop271.yml")
+hadoop_application_id = genie.create_application(hadoop_application)
+LOGGER.warn("Created Hadoop 2.7.1 application with id = [%s]" % hadoop_application_id)
+
+spark_163_application = load_yaml("applications/spark163.yml")
+spark_163_application_id = genie.create_application(spark_163_application)
+LOGGER.warn("Created Spark 1.6.3 application with id = [%s]" % spark_163_application_id)
+
+spark_201_application = load_yaml("applications/spark201.yml")
+spark_201_application_id = genie.create_application(spark_201_application)
+LOGGER.warn("Created Spark 2.0.1 application with id = [%s]" % spark_201_application_id)
+
+hadoop_command = load_yaml("commands/hadoop271.yml")
+hadoop_command_id = genie.create_command(hadoop_command)
+LOGGER.warn("Created Hadoop command with id = [%s]" % hadoop_command_id)
+
+hdfs_command = load_yaml("commands/hdfs271.yml")
+hdfs_command_id = genie.create_command(hdfs_command)
+LOGGER.warn("Created HDFS command with id = [%s]" % hdfs_command_id)
+
+yarn_command = load_yaml("commands/yarn271.yml")
+yarn_command_id = genie.create_command(yarn_command)
+LOGGER.warn("Created Yarn command with id = [%s]" % yarn_command_id)
+
+spark_163_shell_command = load_yaml("commands/sparkShell163.yml")
+spark_163_shell_command_id = genie.create_command(spark_163_shell_command)
+LOGGER.warn("Created Spark 1.6.3 Shell command with id = [%s]" % spark_163_shell_command_id)
+
+spark_163_submit_command = load_yaml("commands/sparkSubmit163.yml")
+spark_163_submit_command_id = genie.create_command(spark_163_submit_command)
+LOGGER.warn("Created Spark 1.6.3 Submit command with id = [%s]" % spark_163_submit_command_id)
+
+spark_201_shell_command = load_yaml("commands/sparkShell201.yml")
+spark_201_shell_command_id = genie.create_command(spark_201_shell_command)
+LOGGER.warn("Created Spark 2.0.1 Shell command with id = [%s]" % spark_201_shell_command_id)
+
+spark_201_submit_command = load_yaml("commands/sparkSubmit201.yml")
+spark_201_submit_command_id = genie.create_command(spark_201_submit_command)
+LOGGER.warn("Created Spark 2.0.1 Submit command with id = [%s]" % spark_201_submit_command_id)
+
+genie.set_application_for_command(hadoop_command_id, [hadoop_application_id])
+LOGGER.warn("Set applications for Hadoop command to = [%s]" % hadoop_application_id)
+
+genie.set_application_for_command(hdfs_command_id, [hadoop_application_id])
+LOGGER.warn("Set applications for HDFS command to = [[%s]]" % hadoop_application_id)
+
+genie.set_application_for_command(yarn_command_id, [hadoop_application_id])
+LOGGER.warn("Set applications for Yarn command to = [[%s]]" % hadoop_application_id)
+
+genie.set_application_for_command(spark_163_shell_command_id, [hadoop_application_id, spark_163_application_id])
+LOGGER.warn("Set applications for Spark 1.6.3 Shell command to = [%s]" %
+ [hadoop_application_id, spark_163_application_id])
+
+genie.set_application_for_command(spark_163_submit_command_id, [hadoop_application_id, spark_163_application_id])
+LOGGER.warn("Set applications for Spark 1.6.3 Submit command to = [%s]" %
+ [hadoop_application_id, spark_163_application_id])
+
+genie.set_application_for_command(spark_201_shell_command_id, [hadoop_application_id, spark_201_application_id])
+LOGGER.warn("Set applications for Spark 2.0.1 Shell command to = [%s]" %
+ [hadoop_application_id, spark_201_application_id])
+
+genie.set_application_for_command(spark_201_submit_command_id, [hadoop_application_id, spark_201_application_id])
+LOGGER.warn("Set applications for Spark 2.0.1 Submit command to = [%s]" %
+ [hadoop_application_id, spark_201_application_id])
+
+prod_cluster = load_yaml("clusters/prod.yml")
+prod_cluster_id = genie.create_cluster(prod_cluster)
+LOGGER.warn("Created prod cluster with id = [%s]" % prod_cluster_id)
+
+test_cluster = load_yaml("clusters/test.yml")
+test_cluster_id = genie.create_cluster(test_cluster)
+LOGGER.warn("Created test cluster with id = [%s]" % test_cluster_id)
+
+genie.set_commands_for_cluster(
+ prod_cluster_id,
+ [hadoop_command_id, hdfs_command_id, yarn_command_id, spark_163_shell_command_id, spark_201_shell_command_id,
+ spark_163_submit_command_id, spark_201_submit_command_id]
+)
+LOGGER.warn("Added all commands to the prod cluster with id = [%s]" % prod_cluster_id)
+genie.set_commands_for_cluster(
+ test_cluster_id,
+ [hadoop_command_id, hdfs_command_id, yarn_command_id, spark_163_shell_command_id, spark_201_shell_command_id,
+ spark_163_submit_command_id, spark_201_submit_command_id]
+)
+LOGGER.warn("Added all commands to the test cluster with id = [%s]" % test_cluster_id)
\ No newline at end of file
diff --git a/netflix/genie/src/main/resources/com/baeldung/netflix/genie/docker/run_spark_submit_job.py b/netflix/genie/src/main/resources/com/baeldung/netflix/genie/docker/run_spark_submit_job.py
new file mode 100644
index 0000000000..67539a0908
--- /dev/null
+++ b/netflix/genie/src/main/resources/com/baeldung/netflix/genie/docker/run_spark_submit_job.py
@@ -0,0 +1,80 @@
+# Copyright 2016 Netflix, Inc.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+##################################################################################
+# This script assumes setup.py has already been run to configure Genie and that
+# this script is executed on the host where Genie is running. If it's executed on
+# another host change the localhost line below.
+##################################################################################
+
+from __future__ import absolute_import, division, print_function, unicode_literals
+
+import logging
+import sys
+
+import pygenie
+
+logging.basicConfig(level=logging.ERROR)
+
+LOGGER = logging.getLogger(__name__)
+
+pygenie.conf.DEFAULT_GENIE_URL = "http://genie:8080"
+
+# Create a job instance and fill in the required parameters
+job = pygenie.jobs.GenieJob() \
+ .genie_username('root') \
+ .job_version('3.0.0')
+
+# Set cluster criteria which determine the cluster to run the job on
+job.cluster_tags(['sched:' + str(sys.argv[1]), 'type:yarn'])
+
+# Set command criteria which will determine what command Genie executes for the job
+if len(sys.argv) == 2:
+ # Use the default spark
+ job.command_tags(['type:spark-submit'])
+ job.job_name('Genie Demo Spark Submit Job')
+else:
+ # Use the spark version passed in
+ job.command_tags(['type:spark-submit', 'ver:' + str(sys.argv[2])])
+ job.job_name('Genie Demo Spark ' + str(sys.argv[2]) + ' Submit Job')
+
+# Any command line arguments to run along with the command. In this case it holds
+# the actual query but this could also be done via an attachment or file dependency.
+# This jar location is where it is installed on the Genie node but could also pass
+# the jar as attachment and use it locally
+if len(sys.argv) == 2:
+ # Default is spark 1.6.3
+ job.command_arguments(
+ "--class org.apache.spark.examples.SparkPi "
+ "${SPARK_HOME}/lib/spark-examples*.jar "
+ "10"
+ )
+else:
+ # Override with Spark 2.x location
+ job.command_arguments(
+ "--class org.apache.spark.examples.SparkPi "
+ "${SPARK_HOME}/examples/jars/spark-examples*.jar "
+ "10"
+ )
+
+# Submit the job to Genie
+running_job = job.execute()
+
+print('Job {} is {}'.format(running_job.job_id, running_job.status))
+print(running_job.job_link)
+
+# Block and wait until job is done
+running_job.wait()
+
+print('Job {} finished with status {}'.format(running_job.job_id, running_job.status))
\ No newline at end of file
diff --git a/netflix/pom.xml b/netflix/pom.xml
new file mode 100644
index 0000000000..4b0954d777
--- /dev/null
+++ b/netflix/pom.xml
@@ -0,0 +1,22 @@
+
+ 4.0.0
+
+ com.baeldung
+ netflix
+ pom
+ 1.0.0-SNAPSHOT
+
+ Netflix
+ Module for Netflix projects
+
+
+ com.baeldung
+ parent-modules
+ 1.0.0-SNAPSHOT
+
+
+
+ genie
+
+
\ No newline at end of file
diff --git a/pom.xml b/pom.xml
index 05999f0926..25848f4e83 100644
--- a/pom.xml
+++ b/pom.xml
@@ -590,6 +590,7 @@
mustache
mybatis
+ netflix
optaplanner
orika
@@ -669,6 +670,8 @@
+ netflix
+
parent-boot-1
parent-boot-2
parent-spring-4
@@ -1220,6 +1223,7 @@
mustache
mybatis
+ netflix
optaplanner
orika