mirror of https://github.com/apache/druid.git
Merge pull request #1130 from housejester/jde-building
Various improvements to the tutorial and building experience.
This commit is contained in:
commit
9578304d6f
|
@ -43,12 +43,18 @@ Metrics (things to aggregate over):
|
|||
Setting Up
|
||||
----------
|
||||
|
||||
There are two ways to setup Druid: download a tarball, or [Build From Source](Build-from-source.html). You only need to do one of these.
|
||||
To start, we need to get our hands on a Druid build. There are two ways to get Druid: download a tarball, or [Build From Source](Build-from-source.html). You only need to do one of these.
|
||||
|
||||
### Download a Tarball
|
||||
|
||||
We've built a tarball that contains everything you'll need. You'll find it [here](http://static.druid.io/artifacts/releases/druid-0.7.0-bin.tar.gz). Download this file to a directory of your choosing.
|
||||
|
||||
### Build From Source
|
||||
|
||||
Follow the [Build From Source](Build-from-source.html) guide to build from source. Then grab the tarball from services/target/druid-0.7.0-bin.tar.gz.
|
||||
|
||||
### Unpack the Tarball
|
||||
|
||||
You can extract the content within by issuing:
|
||||
|
||||
```
|
||||
|
@ -70,13 +76,13 @@ You should see a bunch of files:
|
|||
Running Example Scripts
|
||||
-----------------------
|
||||
|
||||
Let's start doing stuff. You can start a Druid [Realtime](Realtime.html) node by issuing:
|
||||
Let's start doing stuff. You can start an example Druid [Realtime](Realtime.html) node by issuing:
|
||||
|
||||
```
|
||||
./run_example_server.sh
|
||||
```
|
||||
|
||||
Select "wikipedia".
|
||||
Select "2" for the "wikipedia" example.
|
||||
|
||||
Note that the first time you start the example, it may take some extra time due to its fetching various dependencies. Once the node starts up you will see a bunch of logs about setting up properties and connecting to the data source. If everything was successful, you should see messages of the form shown below.
|
||||
|
||||
|
@ -168,7 +174,7 @@ If you issue the query again, you should notice your results updating.
|
|||
|
||||
Right now all the results you are getting back are being aggregated into a single timestamp bucket. What if we wanted to see our aggregations on a per minute basis?
|
||||
|
||||
We can change granularity our the results to minute. To specify different granularities to bucket our results, we change our query like so:
|
||||
We can change granularity for the results to "minute". To specify different granularities to bucket our results, we change our query like so:
|
||||
|
||||
```json
|
||||
{
|
||||
|
@ -256,7 +262,7 @@ You should see an answer to our question. As an example, some results are shown
|
|||
]
|
||||
```
|
||||
|
||||
Feel free to tweak other query parameters to answer other questions you may have about the data. Druid also includes more complex query types such as [groupBy queries](GroupByQuery.html).
|
||||
Feel free to tweak other query parameters to answer other questions you may have about the data. Druid also includes more complex query types such as [groupBy queries](GroupByQuery.html). For more information on querying, see this [link](Querying.html).
|
||||
|
||||
Next Steps
|
||||
----------
|
||||
|
|
|
@ -13,17 +13,15 @@ In this tutorial, we will set up other types of Druid nodes and external depende
|
|||
|
||||
If you followed the first tutorial, you should already have Druid downloaded. If not, let's go back and do that first.
|
||||
|
||||
You can download the latest version of druid [here](http://static.druid.io/artifacts/releases/druid-0.7.0-bin.tar.gz)
|
||||
You can download the latest version of druid [here](http://static.druid.io/artifacts/releases/druid-0.7.0-bin.tar.gz). You can also [Build From Source](Build-from-source.html) and grab the tarball from services/target/druid-0.7.0-bin.tar.gz.
|
||||
|
||||
and untar the contents within by issuing:
|
||||
Either way, once you have the tarball, untar the contents within by issuing:
|
||||
|
||||
```bash
|
||||
tar -zxvf druid-0.7.0-bin.tar.gz
|
||||
cd druid-0.7.0
|
||||
```
|
||||
|
||||
You can also [Build From Source](Build-from-source.html).
|
||||
|
||||
## External Dependencies
|
||||
|
||||
Druid requires 3 external dependencies. A "deep storage" that acts as a backup data repository, a "metadata storage" such as MySQL to hold configuration and metadata information, and [Apache Zookeeper](http://zookeeper.apache.org/) for coordination among different pieces of the cluster.
|
||||
|
|
|
@ -12,33 +12,10 @@ cd ${SCRIPT_DIR}
|
|||
SCRIPT_DIR=`pwd`
|
||||
cd ${CURR_DIR}
|
||||
|
||||
EXAMPLES_DIR=${SCRIPT_DIR}/examples
|
||||
source $SCRIPT_DIR/select_example.sh
|
||||
|
||||
EXAMPLE=$1
|
||||
if [ -z ${EXAMPLE} ] ; then
|
||||
echo "Please specify an example type."
|
||||
echo "Examples availables:"
|
||||
echo `ls ${EXAMPLES_DIR} | grep -v indexing`
|
||||
read -p "> " EXAMPLE
|
||||
echo " "
|
||||
fi
|
||||
select_example QUERY_FILE "${SCRIPT_DIR}/examples" "*query.body" "${1}" "query.body"
|
||||
|
||||
EXAMPLE_LOC=${EXAMPLES_DIR}/${EXAMPLE}
|
||||
|
||||
while [[ ! -e ${EXAMPLE_LOC} ]] ; do
|
||||
echo "Unknown example ${EXAMPLE}, please specify a known example."
|
||||
echo "Known examples:"
|
||||
echo `ls ${EXAMPLES_DIR}`
|
||||
read -p "> " EXAMPLE
|
||||
EXAMPLE_LOC=${EXAMPLES_DIR}/${EXAMPLE}
|
||||
echo " "
|
||||
done
|
||||
|
||||
QUERY_FILE=${EXAMPLE_LOC}/query.body
|
||||
|
||||
[ ! -e ${QUERY_FILE} ] && echo "expecting file ${QUERY_FILE} to be in current directory" && exit 2
|
||||
|
||||
echo "Running ${EXAMPLE} query:"
|
||||
cat ${QUERY_FILE}
|
||||
for delay in 5 30 30 30 30 30 30 30 30 30 30
|
||||
do
|
||||
|
|
|
@ -6,42 +6,27 @@ shopt -s expand_aliases
|
|||
trap "exit 1" 1 2 3 15
|
||||
|
||||
SCRIPT_DIR=`dirname $0`
|
||||
|
||||
if [[ ! -d "${SCRIPT_DIR}/lib" || ! -d "${SCRIPT_DIR}/config" ]]; then
|
||||
echo "This script appears to be running from the source location. It must be run from its deployed location."
|
||||
echo "After building, unpack services/target/druid-services-*-SNAPSHOT-bin.tar.gz, and run the script unpacked there."
|
||||
exit 2
|
||||
fi
|
||||
|
||||
CURR_DIR=`pwd`
|
||||
cd ${SCRIPT_DIR}
|
||||
SCRIPT_DIR=`pwd`
|
||||
cd ${CURR_DIR}
|
||||
|
||||
EXAMPLES_DIR=${SCRIPT_DIR}/examples
|
||||
|
||||
[ -d /tmp/example ] && echo "Cleaning up from previous run.." && /bin/rm -fr /tmp/example
|
||||
|
||||
EXAMPLE=$1
|
||||
if [ -z ${EXAMPLE} ] ; then
|
||||
echo "Please specify an example type."
|
||||
echo "Examples availables:"
|
||||
echo `ls ${EXAMPLES_DIR} | grep -v indexing`
|
||||
read -p "> " EXAMPLE
|
||||
echo " "
|
||||
fi
|
||||
source $SCRIPT_DIR/select_example.sh
|
||||
|
||||
EXAMPLE_LOC=${EXAMPLES_DIR}/${EXAMPLE}
|
||||
|
||||
while [[ ! -e ${EXAMPLE_LOC} ]] ; do
|
||||
echo "Unknown example ${EXAMPLE}, please specify a known example."
|
||||
echo "Known examples:"
|
||||
echo `ls ${EXAMPLES_DIR}`
|
||||
read -p "> " EXAMPLE
|
||||
EXAMPLE_LOC=${EXAMPLES_DIR}/${EXAMPLE}
|
||||
echo " "
|
||||
done
|
||||
|
||||
SPEC_FILE=${EXAMPLE_LOC}/${EXAMPLE}_realtime.spec
|
||||
|
||||
# check spec file exists
|
||||
[ ! -e ${SPEC_FILE} ] && echo "Expecting file ${SPEC_FILE} to exist, it didn't" && exit 3
|
||||
select_example SPEC_FILE "${SCRIPT_DIR}/examples" "*_realtime.spec" "${1}" "${1}_realtime.spec"
|
||||
|
||||
EXAMPLE_LOC=$(dirname $SPEC_FILE)
|
||||
# run before script if it exists
|
||||
if [ -e ${EXAMPLE_LOC}/before.sh ]; then
|
||||
if [ -x ${EXAMPLE_LOC}/before.sh ]; then
|
||||
trap "set +x; cd ${EXAMPLE_LOC} && ./after.sh && cd ${CURR_DIR}; exit 1" EXIT
|
||||
cd ${EXAMPLE_LOC}
|
||||
./before.sh
|
||||
|
@ -52,7 +37,6 @@ fi
|
|||
JAVA_ARGS="-Xmx512m -Duser.timezone=UTC -Dfile.encoding=UTF-8"
|
||||
JAVA_ARGS="${JAVA_ARGS} -Ddruid.realtime.specFile=${SPEC_FILE}"
|
||||
|
||||
|
||||
DRUID_CP=${EXAMPLE_LOC}
|
||||
#For a pull
|
||||
DRUID_CP=${SCRIPT_DIR}/../config/realtime:${DRUID_CP}
|
||||
|
|
|
@ -0,0 +1,38 @@
|
|||
#!/usr/bin/env bash
|
||||
|
||||
function select_example() {
|
||||
example_outvar=$1
|
||||
examples_dir=$2
|
||||
find_pattern=$3
|
||||
example_arg=$4
|
||||
default_file=$5
|
||||
|
||||
if [[ -n ${example_arg} ]]; then
|
||||
if [[ -f ${example_arg} ]]; then
|
||||
example_file=${example_arg}
|
||||
else
|
||||
example_file="${examples_dir}/${example_arg}/${default_file}"
|
||||
fi
|
||||
fi
|
||||
|
||||
all_examples=($(find ${examples_dir} -name "${find_pattern}"))
|
||||
while [[ -z ${example_file} || ! -f ${example_file} ]] ; do
|
||||
if [[ -n ${example_file} ]]; then
|
||||
echo "No example found at ${example_file}."
|
||||
fi
|
||||
echo "Please specify an example by its number."
|
||||
echo "Examples available:"
|
||||
LINE=0
|
||||
for e in ${all_examples[@]}; do
|
||||
LINE=$((LINE+1))
|
||||
REL_FILE=${e#${examples_dir}/}
|
||||
DESC=`grep 'description' $e | tail -1 | sed 's/"description"[^"]*"\([^"]*\)".*/\1/' `
|
||||
echo "${LINE} - ${REL_FILE} - ${DESC:-No Description}"
|
||||
done
|
||||
read -p "[1] > " NUM_SELECTED
|
||||
echo " "
|
||||
NUM_SELECTED=${NUM_SELECTED:-1}
|
||||
example_file=${all_examples[$((NUM_SELECTED-1))]}
|
||||
done
|
||||
eval $example_outvar="'$example_file'"
|
||||
}
|
|
@ -91,17 +91,51 @@ class WikipediaIrcDecoder implements IrcDecoder
|
|||
this.namespaces = namespaces;
|
||||
this.geoIpDatabase = geoIpDatabase;
|
||||
|
||||
File geoDb;
|
||||
if (geoIpDatabase != null) {
|
||||
geoDb = new File(geoIpDatabase);
|
||||
this.geoLookup = openGeoIpDb(new File(geoIpDatabase));
|
||||
} else {
|
||||
this.geoLookup = openDefaultGeoIpDb();
|
||||
}
|
||||
}
|
||||
|
||||
private DatabaseReader openDefaultGeoIpDb() {
|
||||
File geoDb = new File(System.getProperty("java.io.tmpdir"),
|
||||
this.getClass().getCanonicalName() + ".GeoLite2-City.mmdb");
|
||||
try {
|
||||
String tmpDir = System.getProperty("java.io.tmpdir");
|
||||
return openDefaultGeoIpDb(geoDb);
|
||||
}
|
||||
catch (RuntimeException e) {
|
||||
log.warn(e.getMessage()+" Attempting to re-download.", e);
|
||||
if (geoDb.exists() && !geoDb.delete()) {
|
||||
throw new RuntimeException("Could not delete geo db file ["+ geoDb.getAbsolutePath() +"].");
|
||||
}
|
||||
// local download may be corrupt, will retry once.
|
||||
return openDefaultGeoIpDb(geoDb);
|
||||
}
|
||||
}
|
||||
|
||||
geoDb = new File(tmpDir, this.getClass().getCanonicalName() + ".GeoLite2-City.mmdb");
|
||||
private DatabaseReader openDefaultGeoIpDb(File geoDb) {
|
||||
downloadGeoLiteDbToFile(geoDb);
|
||||
return openGeoIpDb(geoDb);
|
||||
}
|
||||
|
||||
if (!geoDb.exists()) {
|
||||
log.info("Downloading geo ip database to [%s]. This may take a few minutes.", geoDb);
|
||||
private DatabaseReader openGeoIpDb(File geoDb) {
|
||||
try {
|
||||
DatabaseReader reader = new DatabaseReader(geoDb);
|
||||
log.info("Using geo ip database at [%s].", geoDb);
|
||||
return reader;
|
||||
} catch (IOException e) {
|
||||
throw new RuntimeException("Could not open geo db at ["+ geoDb.getAbsolutePath() +"].", e);
|
||||
}
|
||||
}
|
||||
|
||||
private void downloadGeoLiteDbToFile(File geoDb) {
|
||||
if (geoDb.exists()) {
|
||||
return;
|
||||
}
|
||||
|
||||
try {
|
||||
log.info("Downloading geo ip database to [%s]. This may take a few minutes.", geoDb.getAbsolutePath());
|
||||
|
||||
File tmpFile = File.createTempFile("druid", "geo");
|
||||
|
||||
|
@ -111,22 +145,13 @@ class WikipediaIrcDecoder implements IrcDecoder
|
|||
),
|
||||
tmpFile
|
||||
);
|
||||
|
||||
if (!tmpFile.renameTo(geoDb)) {
|
||||
throw new RuntimeException("Unable to move geo file!");
|
||||
}
|
||||
} else {
|
||||
log.info("Using geo ip database at [%s].", geoDb);
|
||||
throw new RuntimeException("Unable to move geo file to ["+geoDb.getAbsolutePath()+"]!");
|
||||
}
|
||||
}
|
||||
catch (IOException e) {
|
||||
throw new RuntimeException("Unable to download geo ip database [%s]", e);
|
||||
}
|
||||
}
|
||||
try {
|
||||
geoLookup = new DatabaseReader(geoDb);
|
||||
}
|
||||
catch (IOException e) {
|
||||
throw new RuntimeException("Unable to open geo ip lookup database", e);
|
||||
throw new RuntimeException("Unable to download geo ip database.", e);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
Loading…
Reference in New Issue