lucene/solr/bin/post

177 lines
5.5 KiB
Plaintext
Raw Normal View History

#!/usr/bin/env bash
# Licensed to the Apache Software Foundation (ASF) under one or more
# contributor license agreements. See the NOTICE file distributed with
# this work for additional information regarding copyright ownership.
# The ASF licenses this file to You under the Apache License, Version 2.0
# (the "License"); you may not use this file except in compliance with
# the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# TODO wishlist:
# - handle stdin as well, such that `cat foo.csv | bin/post my_collection` works
# - bin/post collection "file with spaces.csv" does not work, breaks arguments at whitespace apparently.
# - support arbitrary posting like - java -Ddata=args org.apache.solr.util.SimplePostTool "<delete><id>SP2514N</id></delete>"
# - convert OPTIONS (key=val pass-through to SPT) to standard 'nix switches
# ====== Common code copied/adapted from bin/solr (TODO: centralize/share this kind of thing)
THIS_SCRIPT="$0"
# Resolve symlinks to this script
while [ -h "$THIS_SCRIPT" ] ; do
ls=`ls -ld "$THIS_SCRIPT"`
# Drop everything prior to ->
link=`expr "$ls" : '.*-> \(.*\)$'`
if expr "$link" : '/.*' > /dev/null; then
THIS_SCRIPT="$link"
else
THIS_SCRIPT=`dirname "$THIS_SCRIPT"`/"$link"
fi
done
SOLR_TIP=`dirname "$THIS_SCRIPT"`/..
SOLR_TIP=`cd "$SOLR_TIP"; pwd`
if [ -n "$SOLR_JAVA_HOME" ]; then
JAVA=$SOLR_JAVA_HOME/bin/java
elif [ -n "$JAVA_HOME" ]; then
for java in "$JAVA_HOME"/bin/amd64/java "$JAVA_HOME"/bin/java; do
if [ -x "$java" ]; then
JAVA="$java"
break
fi
done
else
JAVA=java
fi
# test that Java exists and is executable on this server
$JAVA -version >/dev/null 2>&1 || { echo >&2 "Java is required to run this tool! Please install Java 8 or greater before running this script."; exit 1; }
# ===== post specific code
TOOL_JAR=$SOLR_TIP/dist/solr-core-*.jar
function print_usage() {
echo ""
echo "Usage: post -c <collection/core> <files|directories|urls> [OPTIONS]"
echo " or post -help"
echo ""
echo " collection name defaults to DEFAULT_SOLR_COLLECTION if not specified"
echo ""
echo "OPTIONS"
echo "======="
echo " Solr options:"
echo " url=<base Solr update URL> (overrides collection, host, and port)"
echo " host=<host> (default: localhost)"
echo " port=<port> (default: 8983)"
echo " commit=yes|no (default: yes)"
echo ""
echo " Web crawl options:"
echo " recursive=<depth> (default: 1)"
echo " delay=<seconds> (default=10)"
echo ""
echo " Directory crawl options:"
echo " delay=<seconds> (default=0)"
echo ""
echo " Other options:"
echo " filetypes=<type>[,<type>,...] (default: xml,json,csv,pdf,doc,docx,ppt,pptx,xls,xlsx,odt,odp,ods,ott,otp,ots,rtf,htm,html,txt,log)"
echo " params=\"<key>=<value>[&<key>=<value>...]\" (values must be URL-encoded)"
echo " out=yes|no (default=no; yes outputs Solr response to console)"
echo ""
echo ""
echo "Examples:"
echo ""
echo "JSON file: $THIS_SCRIPT -c wizbang events.json"
echo "XML files: $THIS_SCRIPT -c records article*.xml"
echo "CSV file: $THIS_SCRIPT -c signals LATEST-signals.csv"
echo "Directory of files: $THIS_SCRIPT -c myfiles ~/Documents"
echo "Web crawl: $THIS_SCRIPT -c gettingstarted http://lucidworks.com recursive=2 delay=1"
echo ""
} # end print_usage
if [[ $# -eq 1 && ("$1" == "-help" || "$1" == "-h" || "$1" == "-usage") ]]; then
print_usage
exit
fi
COLLECTION=$DEFAULT_SOLR_COLLECTION
PROPS="-Dauto=yes"
RECURSIVE=""
FILES=()
URLS=()
while [ $# -gt 0 ]; do
# TODO: natively handle the optional parameters to SPT
# but for now they can be specified as bin/post -c collection-name delay=5 http://lucidworks.com
if [[ "$1" == "-c" ]]; then
# Pull out collection name
shift
COLLECTION=$1
else
# General argument, either a file, directory, URL, or param[=val]
if [[ -d "$1" ]]; then
# Directory
# echo "$1: DIRECTORY"
MODE="files"
RECURSIVE="-Drecursive=yes"
FILES+=("$1")
elif [[ -f "$1" ]]; then
# File
# echo "$1: FILE"
MODE="files"
FILES+=("$1")
elif [[ "$1" == http* ]]; then
# URL
# echo "$1: URL"
MODE="web"
URLS+=("$1")
else
# Not a file, directory or URL. Consider it a property to SPT
# echo "$1: PROP"
PROPS="$PROPS -D$1"
fi
fi
shift
done
# Check for errors
if [[ ${#FILES[@]} != 0 && ${#URLS[@]} != 0 ]]; then
echo -e "\nCombining files (or directories) and URLs is not supported. Post them separately.\n"
exit 1
fi
if [[ ${#FILES[@]} == 0 && ${#URLS[@]} == 0 ]]; then
echo -e "\nNo files, directories, or URLs were specified. See '$THIS_SCRIPT -h' for usage instructions.\n"
exit 1
fi
if [[ $COLLECTION == "" ]]; then
echo -e "\nCollection must be specified. Use -c <collection name> or set DEFAULT_SOLR_COLLECTION in your environment.\n"
exit 1
fi
PARAMS=""
if [[ $FILES != "" ]]; then
MODE="files"
PARAMS=("${FILES[@]}")
fi
if [[ $URLS != "" ]]; then
MODE="web"
PARAMS=("${URLS[@]}")
fi
PROPS="$PROPS -Dc=$COLLECTION -Ddata=$MODE $RECURSIVE"
#echo "$JAVA" -classpath $TOOL_JAR $PROPS org.apache.solr.util.SimplePostTool "${PARAMS[@]}"
"$JAVA" -classpath $TOOL_JAR $PROPS org.apache.solr.util.SimplePostTool "${PARAMS[@]}"