diff --git a/docs/content/Tutorial:-A-First-Look-at-Druid.md b/docs/content/Tutorial:-A-First-Look-at-Druid.md index 1ed6ff85293..0928b23d1bd 100644 --- a/docs/content/Tutorial:-A-First-Look-at-Druid.md +++ b/docs/content/Tutorial:-A-First-Look-at-Druid.md @@ -43,12 +43,18 @@ Metrics (things to aggregate over): Setting Up ---------- -There are two ways to setup Druid: download a tarball, or [Build From Source](Build-from-source.html). You only need to do one of these. +To start, we need to get our hands on a Druid build. There are two ways to get Druid: download a tarball, or [Build From Source](Build-from-source.html). You only need to do one of these. ### Download a Tarball We've built a tarball that contains everything you'll need. You'll find it [here](http://static.druid.io/artifacts/releases/druid-0.7.0-bin.tar.gz). Download this file to a directory of your choosing. +### Build From Source + +Follow the [Build From Source](Build-from-source.html) guide to build from source. Then grab the tarball from services/target/druid-0.7.0-bin.tar.gz. + +### Unpack the Tarball + You can extract the content within by issuing: ``` @@ -70,13 +76,13 @@ You should see a bunch of files: Running Example Scripts ----------------------- -Let's start doing stuff. You can start a Druid [Realtime](Realtime.html) node by issuing: +Let's start doing stuff. You can start an example Druid [Realtime](Realtime.html) node by issuing: ``` ./run_example_server.sh ``` -Select "wikipedia". +Select "2" for the "wikipedia" example. Note that the first time you start the example, it may take some extra time due to its fetching various dependencies. Once the node starts up you will see a bunch of logs about setting up properties and connecting to the data source. If everything was successful, you should see messages of the form shown below. @@ -168,7 +174,7 @@ If you issue the query again, you should notice your results updating. Right now all the results you are getting back are being aggregated into a single timestamp bucket. What if we wanted to see our aggregations on a per minute basis? -We can change granularity our the results to minute. To specify different granularities to bucket our results, we change our query like so: +We can change granularity for the results to "minute". To specify different granularities to bucket our results, we change our query like so: ```json { @@ -256,7 +262,7 @@ You should see an answer to our question. As an example, some results are shown ] ``` -Feel free to tweak other query parameters to answer other questions you may have about the data. Druid also includes more complex query types such as [groupBy queries](GroupByQuery.html). +Feel free to tweak other query parameters to answer other questions you may have about the data. Druid also includes more complex query types such as [groupBy queries](GroupByQuery.html). For more information on querying, see this [link](Querying.html). Next Steps ---------- diff --git a/docs/content/Tutorial:-The-Druid-Cluster.md b/docs/content/Tutorial:-The-Druid-Cluster.md index 72d5413cae1..04490786e85 100644 --- a/docs/content/Tutorial:-The-Druid-Cluster.md +++ b/docs/content/Tutorial:-The-Druid-Cluster.md @@ -13,17 +13,15 @@ In this tutorial, we will set up other types of Druid nodes and external depende If you followed the first tutorial, you should already have Druid downloaded. If not, let's go back and do that first. -You can download the latest version of druid [here](http://static.druid.io/artifacts/releases/druid-0.7.0-bin.tar.gz) +You can download the latest version of druid [here](http://static.druid.io/artifacts/releases/druid-0.7.0-bin.tar.gz). You can also [Build From Source](Build-from-source.html) and grab the tarball from services/target/druid-0.7.0-bin.tar.gz. -and untar the contents within by issuing: +Either way, once you have the tarball, untar the contents within by issuing: ```bash tar -zxvf druid-0.7.0-bin.tar.gz cd druid-0.7.0 ``` -You can also [Build From Source](Build-from-source.html). - ## External Dependencies Druid requires 3 external dependencies. A "deep storage" that acts as a backup data repository, a "metadata storage" such as MySQL to hold configuration and metadata information, and [Apache Zookeeper](http://zookeeper.apache.org/) for coordination among different pieces of the cluster. diff --git a/examples/bin/run_example_client.sh b/examples/bin/run_example_client.sh index 2747b3a0395..8e64a6d0de8 100755 --- a/examples/bin/run_example_client.sh +++ b/examples/bin/run_example_client.sh @@ -12,38 +12,15 @@ cd ${SCRIPT_DIR} SCRIPT_DIR=`pwd` cd ${CURR_DIR} -EXAMPLES_DIR=${SCRIPT_DIR}/examples +source $SCRIPT_DIR/select_example.sh -EXAMPLE=$1 -if [ -z ${EXAMPLE} ] ; then - echo "Please specify an example type." - echo "Examples availables:" - echo `ls ${EXAMPLES_DIR} | grep -v indexing` - read -p "> " EXAMPLE - echo " " -fi +select_example QUERY_FILE "${SCRIPT_DIR}/examples" "*query.body" "${1}" "query.body" -EXAMPLE_LOC=${EXAMPLES_DIR}/${EXAMPLE} - -while [[ ! -e ${EXAMPLE_LOC} ]] ; do - echo "Unknown example ${EXAMPLE}, please specify a known example." - echo "Known examples:" - echo `ls ${EXAMPLES_DIR}` - read -p "> " EXAMPLE - EXAMPLE_LOC=${EXAMPLES_DIR}/${EXAMPLE} - echo " " -done - -QUERY_FILE=${EXAMPLE_LOC}/query.body - -[ ! -e ${QUERY_FILE} ] && echo "expecting file ${QUERY_FILE} to be in current directory" && exit 2 - -echo "Running ${EXAMPLE} query:" cat ${QUERY_FILE} -for delay in 5 30 30 30 30 30 30 30 30 30 30 +for delay in 5 30 30 30 30 30 30 30 30 30 30 do echo "sleep for $delay seconds..." - echo " " + echo " " sleep $delay curl -X POST 'http://localhost:8084/druid/v2/?pretty' -H 'content-type: application/json' -d "`cat ${QUERY_FILE}`" echo " " diff --git a/examples/bin/run_example_server.sh b/examples/bin/run_example_server.sh index bbbe26566b7..51dccf1d78e 100755 --- a/examples/bin/run_example_server.sh +++ b/examples/bin/run_example_server.sh @@ -6,42 +6,27 @@ shopt -s expand_aliases trap "exit 1" 1 2 3 15 SCRIPT_DIR=`dirname $0` + +if [[ ! -d "${SCRIPT_DIR}/lib" || ! -d "${SCRIPT_DIR}/config" ]]; then + echo "This script appears to be running from the source location. It must be run from its deployed location." + echo "After building, unpack services/target/druid-services-*-SNAPSHOT-bin.tar.gz, and run the script unpacked there." + exit 2 +fi + CURR_DIR=`pwd` cd ${SCRIPT_DIR} SCRIPT_DIR=`pwd` cd ${CURR_DIR} -EXAMPLES_DIR=${SCRIPT_DIR}/examples - [ -d /tmp/example ] && echo "Cleaning up from previous run.." && /bin/rm -fr /tmp/example -EXAMPLE=$1 -if [ -z ${EXAMPLE} ] ; then - echo "Please specify an example type." - echo "Examples availables:" - echo `ls ${EXAMPLES_DIR} | grep -v indexing` - read -p "> " EXAMPLE - echo " " -fi +source $SCRIPT_DIR/select_example.sh -EXAMPLE_LOC=${EXAMPLES_DIR}/${EXAMPLE} - -while [[ ! -e ${EXAMPLE_LOC} ]] ; do - echo "Unknown example ${EXAMPLE}, please specify a known example." - echo "Known examples:" - echo `ls ${EXAMPLES_DIR}` - read -p "> " EXAMPLE - EXAMPLE_LOC=${EXAMPLES_DIR}/${EXAMPLE} - echo " " -done - -SPEC_FILE=${EXAMPLE_LOC}/${EXAMPLE}_realtime.spec - -# check spec file exists -[ ! -e ${SPEC_FILE} ] && echo "Expecting file ${SPEC_FILE} to exist, it didn't" && exit 3 +select_example SPEC_FILE "${SCRIPT_DIR}/examples" "*_realtime.spec" "${1}" "${1}_realtime.spec" +EXAMPLE_LOC=$(dirname $SPEC_FILE) # run before script if it exists -if [ -e ${EXAMPLE_LOC}/before.sh ]; then +if [ -x ${EXAMPLE_LOC}/before.sh ]; then trap "set +x; cd ${EXAMPLE_LOC} && ./after.sh && cd ${CURR_DIR}; exit 1" EXIT cd ${EXAMPLE_LOC} ./before.sh @@ -52,7 +37,6 @@ fi JAVA_ARGS="-Xmx512m -Duser.timezone=UTC -Dfile.encoding=UTF-8" JAVA_ARGS="${JAVA_ARGS} -Ddruid.realtime.specFile=${SPEC_FILE}" - DRUID_CP=${EXAMPLE_LOC} #For a pull DRUID_CP=${SCRIPT_DIR}/../config/realtime:${DRUID_CP} diff --git a/examples/bin/select_example.sh b/examples/bin/select_example.sh new file mode 100755 index 00000000000..f90111e99f5 --- /dev/null +++ b/examples/bin/select_example.sh @@ -0,0 +1,38 @@ +#!/usr/bin/env bash + +function select_example() { + example_outvar=$1 + examples_dir=$2 + find_pattern=$3 + example_arg=$4 + default_file=$5 + + if [[ -n ${example_arg} ]]; then + if [[ -f ${example_arg} ]]; then + example_file=${example_arg} + else + example_file="${examples_dir}/${example_arg}/${default_file}" + fi + fi + + all_examples=($(find ${examples_dir} -name "${find_pattern}")) + while [[ -z ${example_file} || ! -f ${example_file} ]] ; do + if [[ -n ${example_file} ]]; then + echo "No example found at ${example_file}." + fi + echo "Please specify an example by its number." + echo "Examples available:" + LINE=0 + for e in ${all_examples[@]}; do + LINE=$((LINE+1)) + REL_FILE=${e#${examples_dir}/} + DESC=`grep 'description' $e | tail -1 | sed 's/"description"[^"]*"\([^"]*\)".*/\1/' ` + echo "${LINE} - ${REL_FILE} - ${DESC:-No Description}" + done + read -p "[1] > " NUM_SELECTED + echo " " + NUM_SELECTED=${NUM_SELECTED:-1} + example_file=${all_examples[$((NUM_SELECTED-1))]} + done + eval $example_outvar="'$example_file'" +} diff --git a/server/src/main/java/io/druid/segment/realtime/firehose/WikipediaIrcDecoder.java b/server/src/main/java/io/druid/segment/realtime/firehose/WikipediaIrcDecoder.java index 35cf8f14af8..829902e6805 100644 --- a/server/src/main/java/io/druid/segment/realtime/firehose/WikipediaIrcDecoder.java +++ b/server/src/main/java/io/druid/segment/realtime/firehose/WikipediaIrcDecoder.java @@ -91,42 +91,67 @@ class WikipediaIrcDecoder implements IrcDecoder this.namespaces = namespaces; this.geoIpDatabase = geoIpDatabase; - File geoDb; if (geoIpDatabase != null) { - geoDb = new File(geoIpDatabase); + this.geoLookup = openGeoIpDb(new File(geoIpDatabase)); } else { - try { - String tmpDir = System.getProperty("java.io.tmpdir"); - - geoDb = new File(tmpDir, this.getClass().getCanonicalName() + ".GeoLite2-City.mmdb"); - - if (!geoDb.exists()) { - log.info("Downloading geo ip database to [%s]. This may take a few minutes.", geoDb); - - File tmpFile = File.createTempFile("druid", "geo"); - - FileUtils.copyInputStreamToFile( - new GZIPInputStream( - new URL("http://geolite.maxmind.com/download/geoip/database/GeoLite2-City.mmdb.gz").openStream() - ), - tmpFile - ); - if (!tmpFile.renameTo(geoDb)) { - throw new RuntimeException("Unable to move geo file!"); - } - } else { - log.info("Using geo ip database at [%s].", geoDb); - } - } - catch (IOException e) { - throw new RuntimeException("Unable to download geo ip database [%s]", e); - } + this.geoLookup = openDefaultGeoIpDb(); } + } + + private DatabaseReader openDefaultGeoIpDb() { + File geoDb = new File(System.getProperty("java.io.tmpdir"), + this.getClass().getCanonicalName() + ".GeoLite2-City.mmdb"); try { - geoLookup = new DatabaseReader(geoDb); + return openDefaultGeoIpDb(geoDb); + } + catch (RuntimeException e) { + log.warn(e.getMessage()+" Attempting to re-download.", e); + if (geoDb.exists() && !geoDb.delete()) { + throw new RuntimeException("Could not delete geo db file ["+ geoDb.getAbsolutePath() +"]."); + } + // local download may be corrupt, will retry once. + return openDefaultGeoIpDb(geoDb); + } + } + + private DatabaseReader openDefaultGeoIpDb(File geoDb) { + downloadGeoLiteDbToFile(geoDb); + return openGeoIpDb(geoDb); + } + + private DatabaseReader openGeoIpDb(File geoDb) { + try { + DatabaseReader reader = new DatabaseReader(geoDb); + log.info("Using geo ip database at [%s].", geoDb); + return reader; + } catch (IOException e) { + throw new RuntimeException("Could not open geo db at ["+ geoDb.getAbsolutePath() +"].", e); + } + } + + private void downloadGeoLiteDbToFile(File geoDb) { + if (geoDb.exists()) { + return; + } + + try { + log.info("Downloading geo ip database to [%s]. This may take a few minutes.", geoDb.getAbsolutePath()); + + File tmpFile = File.createTempFile("druid", "geo"); + + FileUtils.copyInputStreamToFile( + new GZIPInputStream( + new URL("http://geolite.maxmind.com/download/geoip/database/GeoLite2-City.mmdb.gz").openStream() + ), + tmpFile + ); + + if (!tmpFile.renameTo(geoDb)) { + throw new RuntimeException("Unable to move geo file to ["+geoDb.getAbsolutePath()+"]!"); + } } catch (IOException e) { - throw new RuntimeException("Unable to open geo ip lookup database", e); + throw new RuntimeException("Unable to download geo ip database.", e); } }