mirror of https://github.com/apache/lucene.git
SOLR-6900: bin/post improvements including glob handling, spaces in file names, and improved help output
git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1651895 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
a6b2647c92
commit
d23496f4da
|
@ -282,7 +282,7 @@ New Features
|
||||||
* SOLR-6879: Have an option to disable autoAddReplicas temporarily for all collections.
|
* SOLR-6879: Have an option to disable autoAddReplicas temporarily for all collections.
|
||||||
(Varun Thacker via Steve Rowe)
|
(Varun Thacker via Steve Rowe)
|
||||||
|
|
||||||
* SOLR-6435: Add bin/post script to simplify posting content to Solr (ehatcher)
|
* SOLR-6435: Add bin/post script to simplify posting content to Solr (Erik Hatcher)
|
||||||
|
|
||||||
* SOLR-6761: Ability to ignore commit and/or optimize requests from clients when running in
|
* SOLR-6761: Ability to ignore commit and/or optimize requests from clients when running in
|
||||||
SolrCloud mode using the IgnoreCommitOptimizeUpdateProcessorFactory. (Timothy Potter)
|
SolrCloud mode using the IgnoreCommitOptimizeUpdateProcessorFactory. (Timothy Potter)
|
||||||
|
|
183
solr/bin/post
183
solr/bin/post
|
@ -14,17 +14,31 @@
|
||||||
# See the License for the specific language governing permissions and
|
# See the License for the specific language governing permissions and
|
||||||
# limitations under the License.
|
# limitations under the License.
|
||||||
|
|
||||||
# Usage:
|
# TODO wishlist:
|
||||||
# bin/post <collection> <file(s)|url> [<params to SimplePostTool>]
|
# - handle stdin as well, such that `cat foo.csv | bin/post my_collection` works
|
||||||
# bin/post gettingstarted http://lucidworks.com [recursive=1] [delay=1]
|
# - bin/post collection "file with spaces.csv" does not work, breaks arguments at whitespace apparently.
|
||||||
# bin/post tehfiles ~/Documents
|
# - support arbitrary posting like - java -Ddata=args org.apache.solr.util.SimplePostTool "<delete><id>SP2514N</id></delete>"
|
||||||
# bin/post signals LATEST-signals.csv
|
# - convert OPTIONS (key=val pass-through to SPT) to standard 'nix switches
|
||||||
# bin/post records article*.xml
|
|
||||||
# bin/post wizbang events.json
|
|
||||||
|
|
||||||
# TODO: handle stdin as well, such that `cat foo.csv | bin/post my_collection` works
|
# ====== Common code copied/adapted from bin/solr (TODO: centralize/share this kind of thing)
|
||||||
|
|
||||||
|
THIS_SCRIPT="$0"
|
||||||
|
|
||||||
|
# Resolve symlinks to this script
|
||||||
|
while [ -h "$THIS_SCRIPT" ] ; do
|
||||||
|
ls=`ls -ld "$THIS_SCRIPT"`
|
||||||
|
# Drop everything prior to ->
|
||||||
|
link=`expr "$ls" : '.*-> \(.*\)$'`
|
||||||
|
if expr "$link" : '/.*' > /dev/null; then
|
||||||
|
THIS_SCRIPT="$link"
|
||||||
|
else
|
||||||
|
THIS_SCRIPT=`dirname "$THIS_SCRIPT"`/"$link"
|
||||||
|
fi
|
||||||
|
done
|
||||||
|
|
||||||
|
SOLR_TIP=`dirname "$THIS_SCRIPT"`/..
|
||||||
|
SOLR_TIP=`cd "$SOLR_TIP"; pwd`
|
||||||
|
|
||||||
# ====== Common code copied from bin/solr (TODO: centralize/share this kind of thing)
|
|
||||||
if [ -n "$SOLR_JAVA_HOME" ]; then
|
if [ -n "$SOLR_JAVA_HOME" ]; then
|
||||||
JAVA=$SOLR_JAVA_HOME/bin/java
|
JAVA=$SOLR_JAVA_HOME/bin/java
|
||||||
elif [ -n "$JAVA_HOME" ]; then
|
elif [ -n "$JAVA_HOME" ]; then
|
||||||
|
@ -44,11 +58,44 @@ $JAVA -version >/dev/null 2>&1 || { echo >&2 "Java is required to run this tool!
|
||||||
|
|
||||||
# ===== post specific code
|
# ===== post specific code
|
||||||
|
|
||||||
|
TOOL_JAR=$SOLR_TIP/dist/solr-core-*.jar
|
||||||
|
|
||||||
function print_usage() {
|
function print_usage() {
|
||||||
echo ""
|
echo ""
|
||||||
echo "Usage: post <collection/core> <file|directory|url> [OPTIONS]"
|
echo "Usage: post -c <collection/core> <files|directories|urls> [OPTIONS]"
|
||||||
echo " or post -help"
|
echo " or post -help"
|
||||||
echo ""
|
echo ""
|
||||||
|
echo " collection name defaults to DEFAULT_SOLR_COLLECTION if not specified"
|
||||||
|
echo ""
|
||||||
|
echo "OPTIONS"
|
||||||
|
echo "======="
|
||||||
|
echo " Solr options:"
|
||||||
|
echo " url=<base Solr update URL> (overrides collection, host, and port)"
|
||||||
|
echo " host=<host> (default: localhost)"
|
||||||
|
echo " port=<port> (default: 8983)"
|
||||||
|
echo " commit=yes|no (default: yes)"
|
||||||
|
echo ""
|
||||||
|
echo " Web crawl options:"
|
||||||
|
echo " recursive=<depth> (default: 1)"
|
||||||
|
echo " delay=<seconds> (default=10)"
|
||||||
|
echo ""
|
||||||
|
echo " Directory crawl options:"
|
||||||
|
echo " delay=<seconds> (default=0)"
|
||||||
|
echo ""
|
||||||
|
echo " Other options:"
|
||||||
|
echo " filetypes=<type>[,<type>,...] (default: xml,json,csv,pdf,doc,docx,ppt,pptx,xls,xlsx,odt,odp,ods,ott,otp,ots,rtf,htm,html,txt,log)"
|
||||||
|
echo " params=\"<key>=<value>[&<key>=<value>...]\" (values must be URL-encoded)"
|
||||||
|
echo " out=yes|no (default=no; yes outputs Solr response to console)"
|
||||||
|
echo ""
|
||||||
|
echo ""
|
||||||
|
echo "Examples:"
|
||||||
|
echo ""
|
||||||
|
echo "JSON file: $THIS_SCRIPT -c wizbang events.json"
|
||||||
|
echo "XML files: $THIS_SCRIPT -c records article*.xml"
|
||||||
|
echo "CSV file: $THIS_SCRIPT -c signals LATEST-signals.csv"
|
||||||
|
echo "Directory of files: $THIS_SCRIPT -c myfiles ~/Documents"
|
||||||
|
echo "Web crawl: $THIS_SCRIPT -c gettingstarted http://lucidworks.com recursive=2 delay=1"
|
||||||
|
echo ""
|
||||||
} # end print_usage
|
} # end print_usage
|
||||||
|
|
||||||
if [[ $# -eq 1 && ("$1" == "-help" || "$1" == "-h" || "$1" == "-usage") ]]; then
|
if [[ $# -eq 1 && ("$1" == "-help" || "$1" == "-h" || "$1" == "-usage") ]]; then
|
||||||
|
@ -57,62 +104,74 @@ if [[ $# -eq 1 && ("$1" == "-help" || "$1" == "-h" || "$1" == "-usage") ]]; then
|
||||||
fi
|
fi
|
||||||
|
|
||||||
|
|
||||||
TOOL_JAR=dist/solr-core-*.jar
|
COLLECTION=$DEFAULT_SOLR_COLLECTION
|
||||||
|
PROPS="-Dauto=yes"
|
||||||
|
RECURSIVE=""
|
||||||
|
FILES=()
|
||||||
|
URLS=()
|
||||||
|
|
||||||
COLLECTION=$1; shift
|
|
||||||
|
|
||||||
# TODO: fix globbing issues... bin/post collection *.xml doens't work as expected (only first file indexed?)
|
|
||||||
# TODO: fix bin/post *.xml issues, where collection isn't specified, so it assumes first passed file name is collection name
|
|
||||||
|
|
||||||
# TODO: Check that $COLLECTION actually exists? How to determine if user omitted collection name as first param?
|
|
||||||
# "$JAVA" -classpath "$TOOL_JAR" org.apache.solr.util.SolrCLI $* # except can't easily check for core existence with SolrCLI?
|
|
||||||
# TODO: also need a more general way to set the URL (or just server or port) rather than passing url=... at the end.
|
|
||||||
|
|
||||||
echo "Collection:" $COLLECTION
|
|
||||||
|
|
||||||
PROPS="-Dc=$COLLECTION"
|
|
||||||
PARAMS=""
|
|
||||||
|
|
||||||
echo -n "Data mode: "
|
|
||||||
if [[ $1 == http* ]]; then
|
|
||||||
echo "WEB"
|
|
||||||
PROPS="$PROPS -Ddata=web"
|
|
||||||
PARAMS=$1; shift
|
|
||||||
else
|
|
||||||
if [[ -d $1 ]]; then
|
|
||||||
# Directory
|
|
||||||
echo "DIRECTORY"
|
|
||||||
PROPS="$PROPS -Ddata=files -Dauto -Drecursive"
|
|
||||||
PARAMS=$1; shift
|
|
||||||
else
|
|
||||||
# Not a URL or existing directory, assume file(s)
|
|
||||||
echo "FILE"
|
|
||||||
FILE=$1; shift
|
|
||||||
EXTENSION="${FILE##*.}"
|
|
||||||
|
|
||||||
PARAMS=$FILE
|
|
||||||
|
|
||||||
if [[ $EXTENSION == xml || $EXTENSION == csv || $EXTENSION == json ]]; then
|
|
||||||
# Solr /update supported type (default being application/xml).
|
|
||||||
if [[ $EXTENSION == csv ]]; then
|
|
||||||
PROPS="$PROPS -Dtype=text/csv"
|
|
||||||
fi
|
|
||||||
if [[ $EXTENSION == json ]]; then
|
|
||||||
PROPS="$PROPS -Dtype=application/json"
|
|
||||||
fi
|
|
||||||
else
|
|
||||||
PROPS="$PROPS -Dauto=yes"
|
|
||||||
fi
|
|
||||||
|
|
||||||
fi
|
|
||||||
fi
|
|
||||||
|
|
||||||
# Add all additonal trailing script parameters as system properties to SPT (eg. bin/post core_name ~/Documents depth=1)
|
|
||||||
while [ $# -gt 0 ]; do
|
while [ $# -gt 0 ]; do
|
||||||
|
# TODO: natively handle the optional parameters to SPT
|
||||||
|
# but for now they can be specified as bin/post -c collection-name delay=5 http://lucidworks.com
|
||||||
|
if [[ "$1" == "-c" ]]; then
|
||||||
|
# Pull out collection name
|
||||||
|
shift
|
||||||
|
COLLECTION=$1
|
||||||
|
else
|
||||||
|
# General argument, either a file, directory, URL, or param[=val]
|
||||||
|
if [[ -d "$1" ]]; then
|
||||||
|
# Directory
|
||||||
|
# echo "$1: DIRECTORY"
|
||||||
|
MODE="files"
|
||||||
|
RECURSIVE="-Drecursive=yes"
|
||||||
|
FILES+=("$1")
|
||||||
|
elif [[ -f "$1" ]]; then
|
||||||
|
# File
|
||||||
|
# echo "$1: FILE"
|
||||||
|
MODE="files"
|
||||||
|
FILES+=("$1")
|
||||||
|
elif [[ "$1" == http* ]]; then
|
||||||
|
# URL
|
||||||
|
# echo "$1: URL"
|
||||||
|
MODE="web"
|
||||||
|
URLS+=("$1")
|
||||||
|
else
|
||||||
|
# Not a file, directory or URL. Consider it a property to SPT
|
||||||
|
# echo "$1: PROP"
|
||||||
PROPS="$PROPS -D$1"
|
PROPS="$PROPS -D$1"
|
||||||
|
fi
|
||||||
|
fi
|
||||||
shift
|
shift
|
||||||
done
|
done
|
||||||
|
|
||||||
echo "$JAVA" -classpath "$TOOL_JAR" $PROPS org.apache.solr.util.SimplePostTool $PARAMS
|
# Check for errors
|
||||||
$JAVA -classpath $TOOL_JAR $PROPS org.apache.solr.util.SimplePostTool $PARAMS
|
if [[ ${#FILES[@]} != 0 && ${#URLS[@]} != 0 ]]; then
|
||||||
|
echo -e "\nCombining files (or directories) and URLs is not supported. Post them separately.\n"
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
|
||||||
|
if [[ ${#FILES[@]} == 0 && ${#URLS[@]} == 0 ]]; then
|
||||||
|
echo -e "\nNo files, directories, or URLs were specified. See '$THIS_SCRIPT -h' for usage instructions.\n"
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
|
||||||
|
if [[ $COLLECTION == "" ]]; then
|
||||||
|
echo -e "\nCollection must be specified. Use -c <collection name> or set DEFAULT_SOLR_COLLECTION in your environment.\n"
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
|
||||||
|
PARAMS=""
|
||||||
|
if [[ $FILES != "" ]]; then
|
||||||
|
MODE="files"
|
||||||
|
PARAMS=("${FILES[@]}")
|
||||||
|
fi
|
||||||
|
|
||||||
|
if [[ $URLS != "" ]]; then
|
||||||
|
MODE="web"
|
||||||
|
PARAMS=("${URLS[@]}")
|
||||||
|
fi
|
||||||
|
|
||||||
|
PROPS="$PROPS -Dc=$COLLECTION -Ddata=$MODE $RECURSIVE"
|
||||||
|
|
||||||
|
#echo "$JAVA" -classpath $TOOL_JAR $PROPS org.apache.solr.util.SimplePostTool "${PARAMS[@]}"
|
||||||
|
"$JAVA" -classpath $TOOL_JAR $PROPS org.apache.solr.util.SimplePostTool "${PARAMS[@]}"
|
Loading…
Reference in New Issue