2014-12-25 21:31:22 -05:00
#!/usr/bin/env bash
# Licensed to the Apache Software Foundation (ASF) under one or more
# contributor license agreements. See the NOTICE file distributed with
# this work for additional information regarding copyright ownership.
# The ASF licenses this file to You under the Apache License, Version 2.0
# (the "License"); you may not use this file except in compliance with
# the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
2015-01-18 05:15:46 -05:00
# ====== Common code copied/adapted from bin/solr (TODO: centralize/share this kind of thing across bin/solr, etc)
2015-01-14 18:47:55 -05:00
THIS_SCRIPT="$0"
# Resolve symlinks to this script
while [ -h "$THIS_SCRIPT" ] ; do
ls=`ls -ld "$THIS_SCRIPT"`
# Drop everything prior to ->
link=`expr "$ls" : '.*-> \(.*\)$'`
if expr "$link" : '/.*' > /dev/null; then
THIS_SCRIPT="$link"
else
THIS_SCRIPT=`dirname "$THIS_SCRIPT"`/"$link"
fi
done
2014-12-25 21:31:22 -05:00
2015-01-14 18:47:55 -05:00
SOLR_TIP=`dirname "$THIS_SCRIPT"`/..
SOLR_TIP=`cd "$SOLR_TIP"; pwd`
2015-01-06 15:28:53 -05:00
2014-12-29 22:34:19 -05:00
if [ -n "$SOLR_JAVA_HOME" ]; then
2015-03-27 13:25:05 -04:00
JAVA="$SOLR_JAVA_HOME/bin/java"
2014-12-29 22:34:19 -05:00
elif [ -n "$JAVA_HOME" ]; then
for java in "$JAVA_HOME"/bin/amd64/java "$JAVA_HOME"/bin/java; do
if [ -x "$java" ]; then
JAVA="$java"
break
fi
done
else
JAVA=java
fi
# test that Java exists and is executable on this server
2015-03-27 13:25:05 -04:00
"$JAVA" -version >/dev/null 2>&1 || { echo >&2 "Java is required to run this tool! Please install Java 8 or greater before running this script."; exit 1; }
2014-12-29 22:34:19 -05:00
# ===== post specific code
2015-01-06 15:28:53 -05:00
2015-03-27 13:25:05 -04:00
TOOL_JAR=("$SOLR_TIP/dist"/solr-core-*.jar)
2015-01-14 18:47:55 -05:00
2015-01-06 15:28:53 -05:00
function print_usage() {
echo ""
2015-01-18 05:15:46 -05:00
echo 'Usage: post -c <collection> [OPTIONS] <files|directories|urls|-d ["...",...]>'
2015-01-06 15:28:53 -05:00
echo " or post -help"
echo ""
2015-01-14 18:47:55 -05:00
echo " collection name defaults to DEFAULT_SOLR_COLLECTION if not specified"
echo ""
echo "OPTIONS"
echo "======="
echo " Solr options:"
2015-01-14 19:53:17 -05:00
echo " -url <base Solr update URL> (overrides collection, host, and port)"
echo " -host <host> (default: localhost)"
2015-08-03 14:39:29 -04:00
echo " -p or -port <port> (default: 8983)"
2015-01-14 19:53:17 -05:00
echo " -commit yes|no (default: yes)"
2015-01-18 05:15:46 -05:00
# optimize intentionally omitted, but can be used as '-optimize yes' (default: no)
2015-01-14 18:47:55 -05:00
echo ""
echo " Web crawl options:"
2015-01-14 19:53:17 -05:00
echo " -recursive <depth> (default: 1)"
2015-01-18 05:15:46 -05:00
echo " -delay <seconds> (default: 10)"
2015-01-14 18:47:55 -05:00
echo ""
echo " Directory crawl options:"
2015-01-18 05:15:46 -05:00
echo " -delay <seconds> (default: 0)"
echo ""
echo " stdin/args options:"
echo " -type <content/type> (default: application/xml)"
2015-01-14 18:47:55 -05:00
echo ""
echo " Other options:"
2016-01-09 14:30:21 -05:00
echo " -filetypes <type>[,<type>,...] (default: xml,json,jsonl,csv,pdf,doc,docx,ppt,pptx,xls,xlsx,odt,odp,ods,ott,otp,ots,rtf,htm,html,txt,log)"
2015-01-14 19:53:17 -05:00
echo " -params \"<key>=<value>[&<key>=<value>...]\" (values must be URL-encoded; these pass through to Solr update request)"
2015-01-18 05:15:46 -05:00
echo " -out yes|no (default: no; yes outputs Solr response to console)"
2016-01-09 14:30:21 -05:00
echo " -format solr (sends application/json content as Solr commands to /update instead of /update/json/docs)"
2015-01-14 18:47:55 -05:00
echo ""
echo ""
echo "Examples:"
echo ""
2015-01-18 05:15:46 -05:00
echo "* JSON file: $THIS_SCRIPT -c wizbang events.json"
echo "* XML files: $THIS_SCRIPT -c records article*.xml"
echo "* CSV file: $THIS_SCRIPT -c signals LATEST-signals.csv"
echo "* Directory of files: $THIS_SCRIPT -c myfiles ~/Documents"
2015-02-13 18:20:48 -05:00
echo "* Web crawl: $THIS_SCRIPT -c gettingstarted http://lucene.apache.org/solr -recursive 1 -delay 1"
2015-01-18 05:15:46 -05:00
echo "* Standard input (stdin): echo '{"commit": {}}' | $THIS_SCRIPT -c my_collection -type application/json -out yes -d"
echo "* Data as string: $THIS_SCRIPT -c signals -type text/csv -out yes -d $'id,value\n1,0.47'"
2015-01-14 18:47:55 -05:00
echo ""
2015-01-06 15:28:53 -05:00
} # end print_usage
if [[ $# -eq 1 && ("$1" == "-help" || "$1" == "-h" || "$1" == "-usage") ]]; then
print_usage
exit
fi
2015-03-27 13:25:05 -04:00
COLLECTION="$DEFAULT_SOLR_COLLECTION"
PROPS=('-Dauto=yes')
2015-01-14 18:47:55 -05:00
RECURSIVE=""
FILES=()
URLS=()
2015-01-18 05:15:46 -05:00
ARGS=()
2014-12-25 21:31:22 -05:00
2015-01-14 18:47:55 -05:00
while [ $# -gt 0 ]; do
# TODO: natively handle the optional parameters to SPT
# but for now they can be specified as bin/post -c collection-name delay=5 http://lucidworks.com
2015-01-14 19:53:17 -05:00
if [[ -d "$1" ]]; then
# Directory
# echo "$1: DIRECTORY"
2015-03-27 13:25:05 -04:00
RECURSIVE=yes
2015-01-14 19:53:17 -05:00
FILES+=("$1")
elif [[ -f "$1" ]]; then
# File
# echo "$1: FILE"
FILES+=("$1")
elif [[ "$1" == http* ]]; then
# URL
# echo "$1: URL"
URLS+=("$1")
2015-01-14 18:47:55 -05:00
else
2015-03-27 13:25:05 -04:00
if [[ "$1" == -* ]]; then
if [[ "$1" == "-c" ]]; then
2015-01-14 19:53:17 -05:00
# Special case, pull out collection name
shift
2015-03-27 13:25:05 -04:00
COLLECTION="$1"
2015-08-03 14:39:29 -04:00
elif [[ "$1" == "-p" ]]; then
2015-08-03 15:34:21 -04:00
# -p alias for -port for convenience and compatibility with `bin/solr start`
2015-08-03 14:39:29 -04:00
shift
PROPS+=("-Dport=$1")
2015-03-27 13:25:05 -04:00
elif [[ ("$1" == "-d" || "$1" == "--data" || "$1" == "-") ]]; then
2015-01-18 05:15:46 -05:00
if [[ -s /dev/stdin ]]; then
MODE="stdin"
else
# when no stdin exists and -d specified, the rest of the arguments
# are assumed to be strings to post as-is
MODE="args"
shift
if [[ $# -gt 0 ]]; then
ARGS=("$@")
shift $#
fi
fi
2015-01-14 19:53:17 -05:00
else
2015-03-27 13:25:05 -04:00
key="${1:1}"
2015-01-14 19:53:17 -05:00
shift
# echo "$1: PROP"
2015-03-27 13:25:05 -04:00
PROPS+=("-D$key=$1")
2015-08-03 15:34:21 -04:00
if [[ "$key" == "url" ]]; then
SOLR_URL=$1
fi
2015-01-14 19:53:17 -05:00
fi
2015-01-14 18:47:55 -05:00
else
2015-01-14 19:53:17 -05:00
echo -e "\nUnrecognized argument: $1\n"
2015-01-21 21:12:49 -05:00
echo -e "If this was intended to be a data file, it does not exist relative to $PWD\n"
2015-01-14 19:53:17 -05:00
exit 1
2015-01-14 18:47:55 -05:00
fi
fi
shift
done
2014-12-25 21:31:22 -05:00
2015-01-14 18:47:55 -05:00
# Check for errors
2015-08-03 15:34:21 -04:00
if [[ $COLLECTION == "" && $SOLR_URL == "" ]]; then
echo -e "\nCollection or URL must be specified. Use -c <collection name> or set DEFAULT_SOLR_COLLECTION in your environment, or use -url instead.\n"
2015-01-21 21:12:49 -05:00
echo -e "See '$THIS_SCRIPT -h' for usage instructions.\n"
2015-01-14 18:47:55 -05:00
exit 1
fi
2015-01-06 15:28:53 -05:00
2015-01-18 05:15:46 -05:00
# Unsupported: bin/post -c foo
if [[ ${#FILES[@]} == 0 && ${#URLS[@]} == 0 && $MODE != "stdin" && $MODE != "args" ]]; then
2015-01-21 21:12:49 -05:00
echo -e "\nNo files, directories, URLs, -d strings, or stdin were specified.\n"
echo -e "See '$THIS_SCRIPT -h' for usage instructions.\n"
2015-01-14 18:47:55 -05:00
exit 1
fi
2015-01-06 15:28:53 -05:00
2015-01-18 05:15:46 -05:00
# SPT does not support mixing different data mode types, just files, just URLs, just stdin, or just argument strings.
# The following are unsupported constructs:
# bin/post -c foo existing_file.csv http://example.com
# echo '<xml.../>' | bin/post -c foo existing_file.csv
# bin/post -c foo existing_file.csv -d 'anything'
if [[ (${#FILES[@]} != 0 && ${#URLS[@]} != 0 && $MODE != "stdin" && $MODE != "args")
|| ((${#FILES[@]} != 0 || ${#URLS[@]} != 0) && ($MODE == "stdin" || $MODE == "args")) ]]; then
echo -e "\nCombining files/directories, URLs, stdin, or args is not supported. Post them separately.\n"
2015-01-14 18:47:55 -05:00
exit 1
fi
2014-12-25 21:31:22 -05:00
PARAMS=""
2015-01-18 05:15:46 -05:00
# TODO: let's simplify this
if [[ $MODE != "stdin" && $MODE != "args" ]]; then
if [[ $FILES != "" ]]; then
MODE="files"
PARAMS=("${FILES[@]}")
fi
if [[ $URLS != "" ]]; then
MODE="web"
PARAMS=("${URLS[@]}")
fi
else
if [[ ${#ARGS[@]} == 0 ]]; then
# SPT needs a valid (to post to Solr) args string, useful for 'bin/post -c foo -d' to force a commit
ARGS+=("<add/>")
fi
PARAMS=("${ARGS[@]}")
2015-01-14 18:47:55 -05:00
fi
2014-12-25 21:31:22 -05:00
2015-03-27 13:25:05 -04:00
PROPS+=("-Dc=$COLLECTION" "-Ddata=$MODE")
if [[ -n "$RECURSIVE" ]]; then
PROPS+=('-Drecursive=yes')
fi
2015-01-06 15:28:53 -05:00
2015-08-10 17:53:42 -04:00
echo "$JAVA" -classpath "${TOOL_JAR[0]}" "${PROPS[@]}" org.apache.solr.util.SimplePostTool "${PARAMS[@]}"
"$JAVA" -classpath "${TOOL_JAR[0]}" "${PROPS[@]}" org.apache.solr.util.SimplePostTool "${PARAMS[@]}"
2015-01-18 05:15:46 -05:00
# post smoker:
# bin/post -c signals -out yes -type application/json -d '[{"id": 2, "val": 0.47}]'
# bin/post -c signals -out yes -params "wt=json" -d '<add><doc><field name="id">1</field></doc></add>'