mirror of https://github.com/apache/lucene.git
230 lines
7.7 KiB
Bash
Executable File
230 lines
7.7 KiB
Bash
Executable File
#!/usr/bin/env bash
|
|
# Licensed to the Apache Software Foundation (ASF) under one or more
|
|
# contributor license agreements. See the NOTICE file distributed with
|
|
# this work for additional information regarding copyright ownership.
|
|
# The ASF licenses this file to You under the Apache License, Version 2.0
|
|
# (the "License"); you may not use this file except in compliance with
|
|
# the License. You may obtain a copy of the License at
|
|
#
|
|
# http://www.apache.org/licenses/LICENSE-2.0
|
|
#
|
|
# Unless required by applicable law or agreed to in writing, software
|
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
# See the License for the specific language governing permissions and
|
|
# limitations under the License.
|
|
|
|
# ====== Common code copied/adapted from bin/solr (TODO: centralize/share this kind of thing across bin/solr, etc)
|
|
|
|
THIS_SCRIPT="$0"
|
|
|
|
# Resolve symlinks to this script
|
|
while [ -h "$THIS_SCRIPT" ] ; do
|
|
ls=`ls -ld "$THIS_SCRIPT"`
|
|
# Drop everything prior to ->
|
|
link=`expr "$ls" : '.*-> \(.*\)$'`
|
|
if expr "$link" : '/.*' > /dev/null; then
|
|
THIS_SCRIPT="$link"
|
|
else
|
|
THIS_SCRIPT=`dirname "$THIS_SCRIPT"`/"$link"
|
|
fi
|
|
done
|
|
|
|
SOLR_TIP=`dirname "$THIS_SCRIPT"`/..
|
|
SOLR_TIP=`cd "$SOLR_TIP"; pwd`
|
|
|
|
if [ -n "$SOLR_JAVA_HOME" ]; then
|
|
JAVA="$SOLR_JAVA_HOME/bin/java"
|
|
elif [ -n "$JAVA_HOME" ]; then
|
|
for java in "$JAVA_HOME"/bin/amd64/java "$JAVA_HOME"/bin/java; do
|
|
if [ -x "$java" ]; then
|
|
JAVA="$java"
|
|
break
|
|
fi
|
|
done
|
|
else
|
|
JAVA=java
|
|
fi
|
|
|
|
# test that Java exists and is executable on this server
|
|
"$JAVA" -version >/dev/null 2>&1 || { echo >&2 "Java is required to run this tool! Please install Java 8 or greater before running this script."; exit 1; }
|
|
|
|
|
|
# ===== post specific code
|
|
|
|
TOOL_JAR=("$SOLR_TIP/dist"/solr-core-*.jar)
|
|
|
|
function print_usage() {
|
|
echo ""
|
|
echo 'Usage: post -c <collection> [OPTIONS] <files|directories|urls|-d ["...",...]>'
|
|
echo " or post -help"
|
|
echo ""
|
|
echo " collection name defaults to DEFAULT_SOLR_COLLECTION if not specified"
|
|
echo ""
|
|
echo "OPTIONS"
|
|
echo "======="
|
|
echo " Solr options:"
|
|
echo " -url <base Solr update URL> (overrides collection, host, and port)"
|
|
echo " -host <host> (default: localhost)"
|
|
echo " -p or -port <port> (default: 8983)"
|
|
echo " -commit yes|no (default: yes)"
|
|
# optimize intentionally omitted, but can be used as '-optimize yes' (default: no)
|
|
echo ""
|
|
echo " Web crawl options:"
|
|
echo " -recursive <depth> (default: 1)"
|
|
echo " -delay <seconds> (default: 10)"
|
|
echo ""
|
|
echo " Directory crawl options:"
|
|
echo " -delay <seconds> (default: 0)"
|
|
echo ""
|
|
echo " stdin/args options:"
|
|
echo " -type <content/type> (default: application/xml)"
|
|
echo ""
|
|
echo " Other options:"
|
|
echo " -filetypes <type>[,<type>,...] (default: xml,json,jsonl,csv,pdf,doc,docx,ppt,pptx,xls,xlsx,odt,odp,ods,ott,otp,ots,rtf,htm,html,txt,log)"
|
|
echo " -params \"<key>=<value>[&<key>=<value>...]\" (values must be URL-encoded; these pass through to Solr update request)"
|
|
echo " -out yes|no (default: no; yes outputs Solr response to console)"
|
|
echo " -format solr (sends application/json content as Solr commands to /update instead of /update/json/docs)"
|
|
echo ""
|
|
echo ""
|
|
echo "Examples:"
|
|
echo ""
|
|
echo "* JSON file: $THIS_SCRIPT -c wizbang events.json"
|
|
echo "* XML files: $THIS_SCRIPT -c records article*.xml"
|
|
echo "* CSV file: $THIS_SCRIPT -c signals LATEST-signals.csv"
|
|
echo "* Directory of files: $THIS_SCRIPT -c myfiles ~/Documents"
|
|
echo "* Web crawl: $THIS_SCRIPT -c gettingstarted http://lucene.apache.org/solr -recursive 1 -delay 1"
|
|
echo "* Standard input (stdin): echo '{"commit": {}}' | $THIS_SCRIPT -c my_collection -type application/json -out yes -d"
|
|
echo "* Data as string: $THIS_SCRIPT -c signals -type text/csv -out yes -d $'id,value\n1,0.47'"
|
|
echo ""
|
|
} # end print_usage
|
|
|
|
if [[ $# -eq 1 && ("$1" == "-help" || "$1" == "-h" || "$1" == "-usage") ]]; then
|
|
print_usage
|
|
exit
|
|
fi
|
|
|
|
|
|
COLLECTION="$DEFAULT_SOLR_COLLECTION"
|
|
PROPS=('-Dauto=yes')
|
|
RECURSIVE=""
|
|
FILES=()
|
|
URLS=()
|
|
ARGS=()
|
|
|
|
while [ $# -gt 0 ]; do
|
|
# TODO: natively handle the optional parameters to SPT
|
|
# but for now they can be specified as bin/post -c collection-name delay=5 http://lucidworks.com
|
|
|
|
if [[ -d "$1" ]]; then
|
|
# Directory
|
|
# echo "$1: DIRECTORY"
|
|
RECURSIVE=yes
|
|
FILES+=("$1")
|
|
elif [[ -f "$1" ]]; then
|
|
# File
|
|
# echo "$1: FILE"
|
|
FILES+=("$1")
|
|
elif [[ "$1" == http* ]]; then
|
|
# URL
|
|
# echo "$1: URL"
|
|
URLS+=("$1")
|
|
else
|
|
if [[ "$1" == -* ]]; then
|
|
if [[ "$1" == "-c" ]]; then
|
|
# Special case, pull out collection name
|
|
shift
|
|
COLLECTION="$1"
|
|
elif [[ "$1" == "-p" ]]; then
|
|
# -p alias for -port for convenience and compatibility with `bin/solr start`
|
|
shift
|
|
PROPS+=("-Dport=$1")
|
|
elif [[ ("$1" == "-d" || "$1" == "--data" || "$1" == "-") ]]; then
|
|
if [[ -s /dev/stdin ]]; then
|
|
MODE="stdin"
|
|
else
|
|
# when no stdin exists and -d specified, the rest of the arguments
|
|
# are assumed to be strings to post as-is
|
|
MODE="args"
|
|
shift
|
|
if [[ $# -gt 0 ]]; then
|
|
ARGS=("$@")
|
|
shift $#
|
|
fi
|
|
fi
|
|
else
|
|
key="${1:1}"
|
|
shift
|
|
# echo "$1: PROP"
|
|
PROPS+=("-D$key=$1")
|
|
if [[ "$key" == "url" ]]; then
|
|
SOLR_URL=$1
|
|
fi
|
|
fi
|
|
else
|
|
echo -e "\nUnrecognized argument: $1\n"
|
|
echo -e "If this was intended to be a data file, it does not exist relative to $PWD\n"
|
|
exit 1
|
|
fi
|
|
fi
|
|
shift
|
|
done
|
|
|
|
# Check for errors
|
|
if [[ $COLLECTION == "" && $SOLR_URL == "" ]]; then
|
|
echo -e "\nCollection or URL must be specified. Use -c <collection name> or set DEFAULT_SOLR_COLLECTION in your environment, or use -url instead.\n"
|
|
echo -e "See '$THIS_SCRIPT -h' for usage instructions.\n"
|
|
exit 1
|
|
fi
|
|
|
|
# Unsupported: bin/post -c foo
|
|
if [[ ${#FILES[@]} == 0 && ${#URLS[@]} == 0 && $MODE != "stdin" && $MODE != "args" ]]; then
|
|
echo -e "\nNo files, directories, URLs, -d strings, or stdin were specified.\n"
|
|
echo -e "See '$THIS_SCRIPT -h' for usage instructions.\n"
|
|
exit 1
|
|
fi
|
|
|
|
# SPT does not support mixing different data mode types, just files, just URLs, just stdin, or just argument strings.
|
|
# The following are unsupported constructs:
|
|
# bin/post -c foo existing_file.csv http://example.com
|
|
# echo '<xml.../>' | bin/post -c foo existing_file.csv
|
|
# bin/post -c foo existing_file.csv -d 'anything'
|
|
if [[ (${#FILES[@]} != 0 && ${#URLS[@]} != 0 && $MODE != "stdin" && $MODE != "args")
|
|
|| ((${#FILES[@]} != 0 || ${#URLS[@]} != 0) && ($MODE == "stdin" || $MODE == "args")) ]]; then
|
|
echo -e "\nCombining files/directories, URLs, stdin, or args is not supported. Post them separately.\n"
|
|
exit 1
|
|
fi
|
|
|
|
PARAMS=""
|
|
|
|
# TODO: let's simplify this
|
|
if [[ $MODE != "stdin" && $MODE != "args" ]]; then
|
|
if [[ $FILES != "" ]]; then
|
|
MODE="files"
|
|
PARAMS=("${FILES[@]}")
|
|
fi
|
|
|
|
if [[ $URLS != "" ]]; then
|
|
MODE="web"
|
|
PARAMS=("${URLS[@]}")
|
|
fi
|
|
else
|
|
if [[ ${#ARGS[@]} == 0 ]]; then
|
|
# SPT needs a valid (to post to Solr) args string, useful for 'bin/post -c foo -d' to force a commit
|
|
ARGS+=("<add/>")
|
|
fi
|
|
PARAMS=("${ARGS[@]}")
|
|
fi
|
|
|
|
PROPS+=("-Dc=$COLLECTION" "-Ddata=$MODE")
|
|
if [[ -n "$RECURSIVE" ]]; then
|
|
PROPS+=('-Drecursive=yes')
|
|
fi
|
|
|
|
echo "$JAVA" -classpath "${TOOL_JAR[0]}" "${PROPS[@]}" org.apache.solr.util.SimplePostTool "${PARAMS[@]}"
|
|
"$JAVA" -classpath "${TOOL_JAR[0]}" "${PROPS[@]}" org.apache.solr.util.SimplePostTool "${PARAMS[@]}"
|
|
|
|
# post smoker:
|
|
# bin/post -c signals -out yes -type application/json -d '[{"id": 2, "val": 0.47}]'
|
|
# bin/post -c signals -out yes -params "wt=json" -d '<add><doc><field name="id">1</field></doc></add>'
|