Merge pull request #1130 from housejester/jde-building

Various improvements to the tutorial and building experience.
2015-03-02 14:48:32 -08:00 · 2015-03-02 14:48:32 -08:00 · 9578304d6f
parent d685e2ab04 9cb5d9bd00
commit 9578304d6f
6 changed files with 121 additions and 93 deletions
--- a/docs/content/Tutorial:-A-First-Look-at-Druid.md
+++ b/docs/content/Tutorial:-A-First-Look-at-Druid.md
@ -43,12 +43,18 @@ Metrics (things to aggregate over):
 Setting Up
 ----------

-There are two ways to setup Druid: download a tarball, or [Build From Source](Build-from-source.html). You only need to do one of these.
+To start, we need to get our hands on a Druid build. There are two ways to get Druid: download a tarball, or [Build From Source](Build-from-source.html). You only need to do one of these.

 ### Download a Tarball

 We've built a tarball that contains everything you'll need. You'll find it [here](http://static.druid.io/artifacts/releases/druid-0.7.0-bin.tar.gz). Download this file to a directory of your choosing.

+### Build From Source
+
+Follow the [Build From Source](Build-from-source.html) guide to build from source. Then grab the tarball from services/target/druid-0.7.0-bin.tar.gz.
+
+### Unpack the Tarball
+
 You can extract the content within by issuing:

 ```
@ -70,13 +76,13 @@ You should see a bunch of files:
 Running Example Scripts
 -----------------------

-Let's start doing stuff. You can start a Druid [Realtime](Realtime.html) node by issuing:
+Let's start doing stuff. You can start an example Druid [Realtime](Realtime.html) node by issuing:

 ```
 ./run_example_server.sh
 ```

-Select "wikipedia".
+Select "2" for the "wikipedia" example.

 Note that the first time you start the example, it may take some extra time due to its fetching various dependencies. Once the node starts up you will see a bunch of logs about setting up properties and connecting to the data source. If everything was successful, you should see messages of the form shown below.

@ -168,7 +174,7 @@ If you issue the query again, you should notice your results updating.

 Right now all the results you are getting back are being aggregated into a single timestamp bucket. What if we wanted to see our aggregations on a per minute basis?

-We can change granularity our the results to minute. To specify different granularities to bucket our results, we change our query like so:
+We can change granularity for the results to "minute". To specify different granularities to bucket our results, we change our query like so:

 ```json
 {
@ -256,7 +262,7 @@ You should see an answer to our question. As an example, some results are shown
 ]
 ```

-Feel free to tweak other query parameters to answer other questions you may have about the data. Druid also includes more complex query types such as [groupBy queries](GroupByQuery.html).
+Feel free to tweak other query parameters to answer other questions you may have about the data. Druid also includes more complex query types such as [groupBy queries](GroupByQuery.html). For more information on querying, see this [link](Querying.html).

 Next Steps
 ----------
--- a/docs/content/Tutorial:-The-Druid-Cluster.md
+++ b/docs/content/Tutorial:-The-Druid-Cluster.md
@ -13,17 +13,15 @@ In this tutorial, we will set up other types of Druid nodes and external depende

 If you followed the first tutorial, you should already have Druid downloaded. If not, let's go back and do that first.

-You can download the latest version of druid [here](http://static.druid.io/artifacts/releases/druid-0.7.0-bin.tar.gz)
+You can download the latest version of druid [here](http://static.druid.io/artifacts/releases/druid-0.7.0-bin.tar.gz). You can also [Build From Source](Build-from-source.html) and grab the tarball from services/target/druid-0.7.0-bin.tar.gz.

-and untar the contents within by issuing:
+Either way, once you have the tarball, untar the contents within by issuing:

 ```bash
 tar -zxvf druid-0.7.0-bin.tar.gz
 cd druid-0.7.0
 ```

-You can also [Build From Source](Build-from-source.html).
-
 ## External Dependencies

 Druid requires 3 external dependencies. A "deep storage" that acts as a backup data repository, a "metadata storage" such as MySQL to hold configuration and metadata information, and [Apache Zookeeper](http://zookeeper.apache.org/) for coordination among different pieces of the cluster.
--- a/examples/bin/run_example_client.sh
+++ b/examples/bin/run_example_client.sh
@ -12,33 +12,10 @@ cd ${SCRIPT_DIR}
 SCRIPT_DIR=`pwd`
 cd ${CURR_DIR}

-EXAMPLES_DIR=${SCRIPT_DIR}/examples
+source $SCRIPT_DIR/select_example.sh

-EXAMPLE=$1
-if [ -z ${EXAMPLE} ] ; then
-    echo "Please specify an example type."
-    echo "Examples availables:"
-    echo `ls ${EXAMPLES_DIR} | grep -v indexing`
-    read -p "> " EXAMPLE
-    echo " "
-fi
+select_example QUERY_FILE "${SCRIPT_DIR}/examples" "*query.body" "${1}" "query.body"

-EXAMPLE_LOC=${EXAMPLES_DIR}/${EXAMPLE}
-
-while [[ ! -e ${EXAMPLE_LOC} ]] ; do
-    echo "Unknown example ${EXAMPLE}, please specify a known example."
-    echo "Known examples:"
-    echo `ls ${EXAMPLES_DIR}`
-    read -p "> " EXAMPLE
-    EXAMPLE_LOC=${EXAMPLES_DIR}/${EXAMPLE}
-    echo " "
-done
-
-QUERY_FILE=${EXAMPLE_LOC}/query.body
-
-[ ! -e ${QUERY_FILE} ]  &&  echo "expecting file ${QUERY_FILE} to be in current directory"  &&  exit 2
-
-echo "Running ${EXAMPLE} query:"
 cat ${QUERY_FILE}
 for delay in 5 30 30 30 30 30 30 30 30 30 30
 do
--- a/examples/bin/run_example_server.sh
+++ b/examples/bin/run_example_server.sh
@ -6,42 +6,27 @@ shopt -s expand_aliases
 trap "exit 1" 1 2 3 15

 SCRIPT_DIR=`dirname $0`
+
+if [[ ! -d "${SCRIPT_DIR}/lib" || ! -d "${SCRIPT_DIR}/config" ]]; then
+  echo "This script appears to be running from the source location. It must be run from its deployed location."
+  echo "After building, unpack services/target/druid-services-*-SNAPSHOT-bin.tar.gz, and run the script unpacked there."
+  exit 2
+fi
+
 CURR_DIR=`pwd`
 cd ${SCRIPT_DIR}
 SCRIPT_DIR=`pwd`
 cd ${CURR_DIR}

-EXAMPLES_DIR=${SCRIPT_DIR}/examples
-
 [ -d /tmp/example ]  &&  echo "Cleaning up from previous run.."  &&  /bin/rm -fr /tmp/example

-EXAMPLE=$1
-if [ -z ${EXAMPLE} ] ; then
-    echo "Please specify an example type."
-    echo "Examples availables:"
-    echo `ls ${EXAMPLES_DIR} | grep -v indexing`
-    read -p "> " EXAMPLE
-    echo " "
-fi
+source $SCRIPT_DIR/select_example.sh

-EXAMPLE_LOC=${EXAMPLES_DIR}/${EXAMPLE}
-
-while [[ ! -e ${EXAMPLE_LOC} ]] ; do
-    echo "Unknown example ${EXAMPLE}, please specify a known example."
-    echo "Known examples:"
-    echo `ls ${EXAMPLES_DIR}`
-    read -p "> " EXAMPLE
-    EXAMPLE_LOC=${EXAMPLES_DIR}/${EXAMPLE}
-    echo " "
-done
-
-SPEC_FILE=${EXAMPLE_LOC}/${EXAMPLE}_realtime.spec
-
-# check spec file exists
-[ ! -e ${SPEC_FILE} ]  &&  echo "Expecting file ${SPEC_FILE} to exist, it didn't"  &&  exit 3
+select_example SPEC_FILE "${SCRIPT_DIR}/examples" "*_realtime.spec" "${1}" "${1}_realtime.spec"

+EXAMPLE_LOC=$(dirname $SPEC_FILE)
 # run before script if it exists
-if [ -e ${EXAMPLE_LOC}/before.sh ]; then
+if [ -x ${EXAMPLE_LOC}/before.sh ]; then
    trap "set +x; cd ${EXAMPLE_LOC} && ./after.sh && cd ${CURR_DIR}; exit 1" EXIT
    cd ${EXAMPLE_LOC}
    ./before.sh
@ -52,7 +37,6 @@ fi
 JAVA_ARGS="-Xmx512m -Duser.timezone=UTC -Dfile.encoding=UTF-8"
 JAVA_ARGS="${JAVA_ARGS} -Ddruid.realtime.specFile=${SPEC_FILE}"

-
 DRUID_CP=${EXAMPLE_LOC}
 #For a pull
 DRUID_CP=${SCRIPT_DIR}/../config/realtime:${DRUID_CP}
--- a/examples/bin/select_example.sh
+++ b/examples/bin/select_example.sh
@ -0,0 +1,38 @@
+#!/usr/bin/env bash
+
+function select_example() {
+  example_outvar=$1
+  examples_dir=$2
+  find_pattern=$3
+  example_arg=$4
+  default_file=$5
+
+  if [[ -n ${example_arg} ]]; then
+    if [[ -f ${example_arg} ]]; then
+      example_file=${example_arg}
+    else
+      example_file="${examples_dir}/${example_arg}/${default_file}"
+    fi
+  fi
+
+  all_examples=($(find ${examples_dir} -name "${find_pattern}"))
+  while [[ -z ${example_file} || ! -f ${example_file} ]] ; do
+    if [[ -n ${example_file} ]]; then
+      echo "No example found at ${example_file}."
+    fi
+    echo "Please specify an example by its number."
+    echo "Examples available:"
+    LINE=0
+    for e in ${all_examples[@]}; do
+      LINE=$((LINE+1))
+      REL_FILE=${e#${examples_dir}/}
+      DESC=`grep 'description' $e | tail -1 | sed 's/"description"[^"]*"\([^"]*\)".*/\1/' `
+      echo "${LINE} - ${REL_FILE} - ${DESC:-No Description}"
+    done
+    read -p "[1] > " NUM_SELECTED
+    echo " "
+    NUM_SELECTED=${NUM_SELECTED:-1}
+    example_file=${all_examples[$((NUM_SELECTED-1))]}
+  done
+  eval $example_outvar="'$example_file'"
+}
--- a/server/src/main/java/io/druid/segment/realtime/firehose/WikipediaIrcDecoder.java
+++ b/server/src/main/java/io/druid/segment/realtime/firehose/WikipediaIrcDecoder.java
@ -91,17 +91,51 @@ class WikipediaIrcDecoder implements IrcDecoder
    this.namespaces = namespaces;
    this.geoIpDatabase = geoIpDatabase;

-    File geoDb;
    if (geoIpDatabase != null) {
-      geoDb = new File(geoIpDatabase);
+      this.geoLookup = openGeoIpDb(new File(geoIpDatabase));
    } else {
+      this.geoLookup = openDefaultGeoIpDb();
+    }
+  }
+
+  private DatabaseReader openDefaultGeoIpDb() {
+    File geoDb = new File(System.getProperty("java.io.tmpdir"),
+                          this.getClass().getCanonicalName() + ".GeoLite2-City.mmdb");
    try {
-        String tmpDir = System.getProperty("java.io.tmpdir");
+      return openDefaultGeoIpDb(geoDb);
+    }
+    catch (RuntimeException e) {
+      log.warn(e.getMessage()+" Attempting to re-download.", e);
+      if (geoDb.exists() && !geoDb.delete()) {
+        throw new RuntimeException("Could not delete geo db file ["+ geoDb.getAbsolutePath() +"].");
+      }
+      // local download may be corrupt, will retry once.
+      return openDefaultGeoIpDb(geoDb);
+    }
+  }

-        geoDb = new File(tmpDir, this.getClass().getCanonicalName() + ".GeoLite2-City.mmdb");
+  private DatabaseReader openDefaultGeoIpDb(File geoDb) {
+    downloadGeoLiteDbToFile(geoDb);
+    return openGeoIpDb(geoDb);
+  }

-        if (!geoDb.exists()) {
-          log.info("Downloading geo ip database to [%s]. This may take a few minutes.", geoDb);
+  private DatabaseReader openGeoIpDb(File geoDb) {
+    try {
+      DatabaseReader reader = new DatabaseReader(geoDb);
+      log.info("Using geo ip database at [%s].", geoDb);
+      return reader;
+    } catch (IOException e) {
+      throw new RuntimeException("Could not open geo db at ["+ geoDb.getAbsolutePath() +"].", e);
+    }
+  }
+
+  private void downloadGeoLiteDbToFile(File geoDb) {
+    if (geoDb.exists()) {
+      return;
+    }
+
+    try {
+      log.info("Downloading geo ip database to [%s]. This may take a few minutes.", geoDb.getAbsolutePath());

      File tmpFile = File.createTempFile("druid", "geo");

@ -111,22 +145,13 @@ class WikipediaIrcDecoder implements IrcDecoder
        ),
        tmpFile
      );
+
      if (!tmpFile.renameTo(geoDb)) {
-            throw new RuntimeException("Unable to move geo file!");
-          }
-        } else {
-          log.info("Using geo ip database at [%s].", geoDb);
+        throw new RuntimeException("Unable to move geo file to ["+geoDb.getAbsolutePath()+"]!");
      }
    }
    catch (IOException e) {
-        throw new RuntimeException("Unable to download geo ip database [%s]", e);
-      }
-    }
-    try {
-      geoLookup = new DatabaseReader(geoDb);
-    }
-    catch (IOException e) {
-      throw new RuntimeException("Unable to open geo ip lookup database", e);
+      throw new RuntimeException("Unable to download geo ip database.", e);
    }
  }