mirror of https://github.com/apache/lucene.git
177 lines
5.5 KiB
Bash
Executable File
177 lines
5.5 KiB
Bash
Executable File
#!/usr/bin/env bash
|
|
# Licensed to the Apache Software Foundation (ASF) under one or more
|
|
# contributor license agreements. See the NOTICE file distributed with
|
|
# this work for additional information regarding copyright ownership.
|
|
# The ASF licenses this file to You under the Apache License, Version 2.0
|
|
# (the "License"); you may not use this file except in compliance with
|
|
# the License. You may obtain a copy of the License at
|
|
#
|
|
# http://www.apache.org/licenses/LICENSE-2.0
|
|
#
|
|
# Unless required by applicable law or agreed to in writing, software
|
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
# See the License for the specific language governing permissions and
|
|
# limitations under the License.
|
|
|
|
# TODO wishlist:
|
|
# - handle stdin as well, such that `cat foo.csv | bin/post my_collection` works
|
|
# - bin/post collection "file with spaces.csv" does not work, breaks arguments at whitespace apparently.
|
|
# - support arbitrary posting like - java -Ddata=args org.apache.solr.util.SimplePostTool "<delete><id>SP2514N</id></delete>"
|
|
# - convert OPTIONS (key=val pass-through to SPT) to standard 'nix switches
|
|
|
|
# ====== Common code copied/adapted from bin/solr (TODO: centralize/share this kind of thing)
|
|
|
|
THIS_SCRIPT="$0"
|
|
|
|
# Resolve symlinks to this script
|
|
while [ -h "$THIS_SCRIPT" ] ; do
|
|
ls=`ls -ld "$THIS_SCRIPT"`
|
|
# Drop everything prior to ->
|
|
link=`expr "$ls" : '.*-> \(.*\)$'`
|
|
if expr "$link" : '/.*' > /dev/null; then
|
|
THIS_SCRIPT="$link"
|
|
else
|
|
THIS_SCRIPT=`dirname "$THIS_SCRIPT"`/"$link"
|
|
fi
|
|
done
|
|
|
|
SOLR_TIP=`dirname "$THIS_SCRIPT"`/..
|
|
SOLR_TIP=`cd "$SOLR_TIP"; pwd`
|
|
|
|
if [ -n "$SOLR_JAVA_HOME" ]; then
|
|
JAVA=$SOLR_JAVA_HOME/bin/java
|
|
elif [ -n "$JAVA_HOME" ]; then
|
|
for java in "$JAVA_HOME"/bin/amd64/java "$JAVA_HOME"/bin/java; do
|
|
if [ -x "$java" ]; then
|
|
JAVA="$java"
|
|
break
|
|
fi
|
|
done
|
|
else
|
|
JAVA=java
|
|
fi
|
|
|
|
# test that Java exists and is executable on this server
|
|
$JAVA -version >/dev/null 2>&1 || { echo >&2 "Java is required to run this tool! Please install Java 8 or greater before running this script."; exit 1; }
|
|
|
|
|
|
# ===== post specific code
|
|
|
|
TOOL_JAR=$SOLR_TIP/dist/solr-core-*.jar
|
|
|
|
function print_usage() {
|
|
echo ""
|
|
echo "Usage: post -c <collection/core> <files|directories|urls> [OPTIONS]"
|
|
echo " or post -help"
|
|
echo ""
|
|
echo " collection name defaults to DEFAULT_SOLR_COLLECTION if not specified"
|
|
echo ""
|
|
echo "OPTIONS"
|
|
echo "======="
|
|
echo " Solr options:"
|
|
echo " url=<base Solr update URL> (overrides collection, host, and port)"
|
|
echo " host=<host> (default: localhost)"
|
|
echo " port=<port> (default: 8983)"
|
|
echo " commit=yes|no (default: yes)"
|
|
echo ""
|
|
echo " Web crawl options:"
|
|
echo " recursive=<depth> (default: 1)"
|
|
echo " delay=<seconds> (default=10)"
|
|
echo ""
|
|
echo " Directory crawl options:"
|
|
echo " delay=<seconds> (default=0)"
|
|
echo ""
|
|
echo " Other options:"
|
|
echo " filetypes=<type>[,<type>,...] (default: xml,json,csv,pdf,doc,docx,ppt,pptx,xls,xlsx,odt,odp,ods,ott,otp,ots,rtf,htm,html,txt,log)"
|
|
echo " params=\"<key>=<value>[&<key>=<value>...]\" (values must be URL-encoded)"
|
|
echo " out=yes|no (default=no; yes outputs Solr response to console)"
|
|
echo ""
|
|
echo ""
|
|
echo "Examples:"
|
|
echo ""
|
|
echo "JSON file: $THIS_SCRIPT -c wizbang events.json"
|
|
echo "XML files: $THIS_SCRIPT -c records article*.xml"
|
|
echo "CSV file: $THIS_SCRIPT -c signals LATEST-signals.csv"
|
|
echo "Directory of files: $THIS_SCRIPT -c myfiles ~/Documents"
|
|
echo "Web crawl: $THIS_SCRIPT -c gettingstarted http://lucidworks.com recursive=2 delay=1"
|
|
echo ""
|
|
} # end print_usage
|
|
|
|
if [[ $# -eq 1 && ("$1" == "-help" || "$1" == "-h" || "$1" == "-usage") ]]; then
|
|
print_usage
|
|
exit
|
|
fi
|
|
|
|
|
|
COLLECTION=$DEFAULT_SOLR_COLLECTION
|
|
PROPS="-Dauto=yes"
|
|
RECURSIVE=""
|
|
FILES=()
|
|
URLS=()
|
|
|
|
while [ $# -gt 0 ]; do
|
|
# TODO: natively handle the optional parameters to SPT
|
|
# but for now they can be specified as bin/post -c collection-name delay=5 http://lucidworks.com
|
|
if [[ "$1" == "-c" ]]; then
|
|
# Pull out collection name
|
|
shift
|
|
COLLECTION=$1
|
|
else
|
|
# General argument, either a file, directory, URL, or param[=val]
|
|
if [[ -d "$1" ]]; then
|
|
# Directory
|
|
# echo "$1: DIRECTORY"
|
|
MODE="files"
|
|
RECURSIVE="-Drecursive=yes"
|
|
FILES+=("$1")
|
|
elif [[ -f "$1" ]]; then
|
|
# File
|
|
# echo "$1: FILE"
|
|
MODE="files"
|
|
FILES+=("$1")
|
|
elif [[ "$1" == http* ]]; then
|
|
# URL
|
|
# echo "$1: URL"
|
|
MODE="web"
|
|
URLS+=("$1")
|
|
else
|
|
# Not a file, directory or URL. Consider it a property to SPT
|
|
# echo "$1: PROP"
|
|
PROPS="$PROPS -D$1"
|
|
fi
|
|
fi
|
|
shift
|
|
done
|
|
|
|
# Check for errors
|
|
if [[ ${#FILES[@]} != 0 && ${#URLS[@]} != 0 ]]; then
|
|
echo -e "\nCombining files (or directories) and URLs is not supported. Post them separately.\n"
|
|
exit 1
|
|
fi
|
|
|
|
if [[ ${#FILES[@]} == 0 && ${#URLS[@]} == 0 ]]; then
|
|
echo -e "\nNo files, directories, or URLs were specified. See '$THIS_SCRIPT -h' for usage instructions.\n"
|
|
exit 1
|
|
fi
|
|
|
|
if [[ $COLLECTION == "" ]]; then
|
|
echo -e "\nCollection must be specified. Use -c <collection name> or set DEFAULT_SOLR_COLLECTION in your environment.\n"
|
|
exit 1
|
|
fi
|
|
|
|
PARAMS=""
|
|
if [[ $FILES != "" ]]; then
|
|
MODE="files"
|
|
PARAMS=("${FILES[@]}")
|
|
fi
|
|
|
|
if [[ $URLS != "" ]]; then
|
|
MODE="web"
|
|
PARAMS=("${URLS[@]}")
|
|
fi
|
|
|
|
PROPS="$PROPS -Dc=$COLLECTION -Ddata=$MODE $RECURSIVE"
|
|
|
|
#echo "$JAVA" -classpath $TOOL_JAR $PROPS org.apache.solr.util.SimplePostTool "${PARAMS[@]}"
|
|
"$JAVA" -classpath $TOOL_JAR $PROPS org.apache.solr.util.SimplePostTool "${PARAMS[@]}" |