SOLR-6900: add support for stdin and string args (merged from r1652722)

git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1652724 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Erik Hatcher 2015-01-18 10:15:46 +00:00
parent 22d0422663
commit 7f38f31d2d
1 changed files with 70 additions and 36 deletions

View File

@ -14,11 +14,7 @@
# See the License for the specific language governing permissions and # See the License for the specific language governing permissions and
# limitations under the License. # limitations under the License.
# TODO wishlist: # ====== Common code copied/adapted from bin/solr (TODO: centralize/share this kind of thing across bin/solr, etc)
# - handle stdin as well, such that `cat foo.csv | bin/post my_collection` works
# - support arbitrary posting like - java -Ddata=args org.apache.solr.util.SimplePostTool "<delete><id>SP2514N</id></delete>"
# ====== Common code copied/adapted from bin/solr (TODO: centralize/share this kind of thing)
THIS_SCRIPT="$0" THIS_SCRIPT="$0"
@ -60,7 +56,7 @@ TOOL_JAR=$SOLR_TIP/dist/solr-core-*.jar
function print_usage() { function print_usage() {
echo "" echo ""
echo "Usage: post -c <collection/core> <files|directories|urls> [OPTIONS]" echo 'Usage: post -c <collection> [OPTIONS] <files|directories|urls|-d ["...",...]>'
echo " or post -help" echo " or post -help"
echo "" echo ""
echo " collection name defaults to DEFAULT_SOLR_COLLECTION if not specified" echo " collection name defaults to DEFAULT_SOLR_COLLECTION if not specified"
@ -72,27 +68,33 @@ function print_usage() {
echo " -host <host> (default: localhost)" echo " -host <host> (default: localhost)"
echo " -port <port> (default: 8983)" echo " -port <port> (default: 8983)"
echo " -commit yes|no (default: yes)" echo " -commit yes|no (default: yes)"
# optimize intentionally omitted, but can be used as '-optimize yes' (default: no)
echo "" echo ""
echo " Web crawl options:" echo " Web crawl options:"
echo " -recursive <depth> (default: 1)" echo " -recursive <depth> (default: 1)"
echo " -delay <seconds> (default=10)" echo " -delay <seconds> (default: 10)"
echo "" echo ""
echo " Directory crawl options:" echo " Directory crawl options:"
echo " -delay <seconds> (default=0)" echo " -delay <seconds> (default: 0)"
echo ""
echo " stdin/args options:"
echo " -type <content/type> (default: application/xml)"
echo "" echo ""
echo " Other options:" echo " Other options:"
echo " -filetypes <type>[,<type>,...] (default: xml,json,csv,pdf,doc,docx,ppt,pptx,xls,xlsx,odt,odp,ods,ott,otp,ots,rtf,htm,html,txt,log)" echo " -filetypes <type>[,<type>,...] (default: xml,json,csv,pdf,doc,docx,ppt,pptx,xls,xlsx,odt,odp,ods,ott,otp,ots,rtf,htm,html,txt,log)"
echo " -params \"<key>=<value>[&<key>=<value>...]\" (values must be URL-encoded; these pass through to Solr update request)" echo " -params \"<key>=<value>[&<key>=<value>...]\" (values must be URL-encoded; these pass through to Solr update request)"
echo " -out yes|no (default=no; yes outputs Solr response to console)" echo " -out yes|no (default: no; yes outputs Solr response to console)"
echo "" echo ""
echo "" echo ""
echo "Examples:" echo "Examples:"
echo "" echo ""
echo "JSON file: $THIS_SCRIPT -c wizbang events.json" echo "* JSON file: $THIS_SCRIPT -c wizbang events.json"
echo "XML files: $THIS_SCRIPT -c records article*.xml" echo "* XML files: $THIS_SCRIPT -c records article*.xml"
echo "CSV file: $THIS_SCRIPT -c signals LATEST-signals.csv" echo "* CSV file: $THIS_SCRIPT -c signals LATEST-signals.csv"
echo "Directory of files: $THIS_SCRIPT -c myfiles ~/Documents" echo "* Directory of files: $THIS_SCRIPT -c myfiles ~/Documents"
echo "Web crawl: $THIS_SCRIPT -c gettingstarted http://lucidworks.com -recursive 2 -delay 1" echo "* Web crawl: $THIS_SCRIPT -c gettingstarted http://lucidworks.com -recursive 2 -delay 1"
echo "* Standard input (stdin): echo '{"commit": {}}' | $THIS_SCRIPT -c my_collection -type application/json -out yes -d"
echo "* Data as string: $THIS_SCRIPT -c signals -type text/csv -out yes -d $'id,value\n1,0.47'"
echo "" echo ""
} # end print_usage } # end print_usage
@ -107,6 +109,7 @@ PROPS="-Dauto=yes"
RECURSIVE="" RECURSIVE=""
FILES=() FILES=()
URLS=() URLS=()
ARGS=()
while [ $# -gt 0 ]; do while [ $# -gt 0 ]; do
# TODO: natively handle the optional parameters to SPT # TODO: natively handle the optional parameters to SPT
@ -115,18 +118,15 @@ while [ $# -gt 0 ]; do
if [[ -d "$1" ]]; then if [[ -d "$1" ]]; then
# Directory # Directory
# echo "$1: DIRECTORY" # echo "$1: DIRECTORY"
MODE="files"
RECURSIVE="-Drecursive=yes" RECURSIVE="-Drecursive=yes"
FILES+=("$1") FILES+=("$1")
elif [[ -f "$1" ]]; then elif [[ -f "$1" ]]; then
# File # File
# echo "$1: FILE" # echo "$1: FILE"
MODE="files"
FILES+=("$1") FILES+=("$1")
elif [[ "$1" == http* ]]; then elif [[ "$1" == http* ]]; then
# URL # URL
# echo "$1: URL" # echo "$1: URL"
MODE="web"
URLS+=("$1") URLS+=("$1")
else else
if [[ $1 == -* ]]; then if [[ $1 == -* ]]; then
@ -134,6 +134,19 @@ while [ $# -gt 0 ]; do
# Special case, pull out collection name # Special case, pull out collection name
shift shift
COLLECTION=$1 COLLECTION=$1
elif [[ ($1 == "-d" || $1 == "--data" || $1 == "-") ]]; then
if [[ -s /dev/stdin ]]; then
MODE="stdin"
else
# when no stdin exists and -d specified, the rest of the arguments
# are assumed to be strings to post as-is
MODE="args"
shift
if [[ $# -gt 0 ]]; then
ARGS=("$@")
shift $#
fi
fi
else else
key=${1:1} key=${1:1}
shift shift
@ -149,33 +162,54 @@ while [ $# -gt 0 ]; do
done done
# Check for errors # Check for errors
if [[ ${#FILES[@]} != 0 && ${#URLS[@]} != 0 ]]; then
echo -e "\nCombining files (or directories) and URLs is not supported. Post them separately.\n"
exit 1
fi
if [[ ${#FILES[@]} == 0 && ${#URLS[@]} == 0 ]]; then
echo -e "\nNo files, directories, or URLs were specified. See '$THIS_SCRIPT -h' for usage instructions.\n"
exit 1
fi
if [[ $COLLECTION == "" ]]; then if [[ $COLLECTION == "" ]]; then
echo -e "\nCollection must be specified. Use -c <collection name> or set DEFAULT_SOLR_COLLECTION in your environment.\n" echo -e "\nCollection must be specified. Use -c <collection name> or set DEFAULT_SOLR_COLLECTION in your environment.\n"
exit 1 exit 1
fi fi
PARAMS="" # Unsupported: bin/post -c foo
if [[ $FILES != "" ]]; then if [[ ${#FILES[@]} == 0 && ${#URLS[@]} == 0 && $MODE != "stdin" && $MODE != "args" ]]; then
MODE="files" echo -e "\nNo files, directories, URLs, -d strings, or stdin were specified. See '$THIS_SCRIPT -h' for usage instructions.\n"
PARAMS=("${FILES[@]}") exit 1
fi fi
if [[ $URLS != "" ]]; then # SPT does not support mixing different data mode types, just files, just URLs, just stdin, or just argument strings.
MODE="web" # The following are unsupported constructs:
PARAMS=("${URLS[@]}") # bin/post -c foo existing_file.csv http://example.com
# echo '<xml.../>' | bin/post -c foo existing_file.csv
# bin/post -c foo existing_file.csv -d 'anything'
if [[ (${#FILES[@]} != 0 && ${#URLS[@]} != 0 && $MODE != "stdin" && $MODE != "args")
|| ((${#FILES[@]} != 0 || ${#URLS[@]} != 0) && ($MODE == "stdin" || $MODE == "args")) ]]; then
echo -e "\nCombining files/directories, URLs, stdin, or args is not supported. Post them separately.\n"
exit 1
fi
PARAMS=""
# TODO: let's simplify this
if [[ $MODE != "stdin" && $MODE != "args" ]]; then
if [[ $FILES != "" ]]; then
MODE="files"
PARAMS=("${FILES[@]}")
fi
if [[ $URLS != "" ]]; then
MODE="web"
PARAMS=("${URLS[@]}")
fi
else
if [[ ${#ARGS[@]} == 0 ]]; then
# SPT needs a valid (to post to Solr) args string, useful for 'bin/post -c foo -d' to force a commit
ARGS+=("<add/>")
fi
PARAMS=("${ARGS[@]}")
fi fi
PROPS="$PROPS -Dc=$COLLECTION -Ddata=$MODE $RECURSIVE" PROPS="$PROPS -Dc=$COLLECTION -Ddata=$MODE $RECURSIVE"
#echo "$JAVA" -classpath $TOOL_JAR $PROPS org.apache.solr.util.SimplePostTool "${PARAMS[@]}" echo "$JAVA" -classpath $TOOL_JAR $PROPS org.apache.solr.util.SimplePostTool "${PARAMS[@]}"
"$JAVA" -classpath $TOOL_JAR $PROPS org.apache.solr.util.SimplePostTool "${PARAMS[@]}" "$JAVA" -classpath $TOOL_JAR $PROPS org.apache.solr.util.SimplePostTool "${PARAMS[@]}"
# post smoker:
# bin/post -c signals -out yes -type application/json -d '[{"id": 2, "val": 0.47}]'
# bin/post -c signals -out yes -params "wt=json" -d '<add><doc><field name="id">1</field></doc></add>'