mirror of https://github.com/apache/lucene.git
SOLR-6900: add support for stdin and string args (merged from r1652722)
git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1652724 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
22d0422663
commit
7f38f31d2d
106
solr/bin/post
106
solr/bin/post
|
@ -14,11 +14,7 @@
|
||||||
# See the License for the specific language governing permissions and
|
# See the License for the specific language governing permissions and
|
||||||
# limitations under the License.
|
# limitations under the License.
|
||||||
|
|
||||||
# TODO wishlist:
|
# ====== Common code copied/adapted from bin/solr (TODO: centralize/share this kind of thing across bin/solr, etc)
|
||||||
# - handle stdin as well, such that `cat foo.csv | bin/post my_collection` works
|
|
||||||
# - support arbitrary posting like - java -Ddata=args org.apache.solr.util.SimplePostTool "<delete><id>SP2514N</id></delete>"
|
|
||||||
|
|
||||||
# ====== Common code copied/adapted from bin/solr (TODO: centralize/share this kind of thing)
|
|
||||||
|
|
||||||
THIS_SCRIPT="$0"
|
THIS_SCRIPT="$0"
|
||||||
|
|
||||||
|
@ -60,7 +56,7 @@ TOOL_JAR=$SOLR_TIP/dist/solr-core-*.jar
|
||||||
|
|
||||||
function print_usage() {
|
function print_usage() {
|
||||||
echo ""
|
echo ""
|
||||||
echo "Usage: post -c <collection/core> <files|directories|urls> [OPTIONS]"
|
echo 'Usage: post -c <collection> [OPTIONS] <files|directories|urls|-d ["...",...]>'
|
||||||
echo " or post -help"
|
echo " or post -help"
|
||||||
echo ""
|
echo ""
|
||||||
echo " collection name defaults to DEFAULT_SOLR_COLLECTION if not specified"
|
echo " collection name defaults to DEFAULT_SOLR_COLLECTION if not specified"
|
||||||
|
@ -72,27 +68,33 @@ function print_usage() {
|
||||||
echo " -host <host> (default: localhost)"
|
echo " -host <host> (default: localhost)"
|
||||||
echo " -port <port> (default: 8983)"
|
echo " -port <port> (default: 8983)"
|
||||||
echo " -commit yes|no (default: yes)"
|
echo " -commit yes|no (default: yes)"
|
||||||
|
# optimize intentionally omitted, but can be used as '-optimize yes' (default: no)
|
||||||
echo ""
|
echo ""
|
||||||
echo " Web crawl options:"
|
echo " Web crawl options:"
|
||||||
echo " -recursive <depth> (default: 1)"
|
echo " -recursive <depth> (default: 1)"
|
||||||
echo " -delay <seconds> (default=10)"
|
echo " -delay <seconds> (default: 10)"
|
||||||
echo ""
|
echo ""
|
||||||
echo " Directory crawl options:"
|
echo " Directory crawl options:"
|
||||||
echo " -delay <seconds> (default=0)"
|
echo " -delay <seconds> (default: 0)"
|
||||||
|
echo ""
|
||||||
|
echo " stdin/args options:"
|
||||||
|
echo " -type <content/type> (default: application/xml)"
|
||||||
echo ""
|
echo ""
|
||||||
echo " Other options:"
|
echo " Other options:"
|
||||||
echo " -filetypes <type>[,<type>,...] (default: xml,json,csv,pdf,doc,docx,ppt,pptx,xls,xlsx,odt,odp,ods,ott,otp,ots,rtf,htm,html,txt,log)"
|
echo " -filetypes <type>[,<type>,...] (default: xml,json,csv,pdf,doc,docx,ppt,pptx,xls,xlsx,odt,odp,ods,ott,otp,ots,rtf,htm,html,txt,log)"
|
||||||
echo " -params \"<key>=<value>[&<key>=<value>...]\" (values must be URL-encoded; these pass through to Solr update request)"
|
echo " -params \"<key>=<value>[&<key>=<value>...]\" (values must be URL-encoded; these pass through to Solr update request)"
|
||||||
echo " -out yes|no (default=no; yes outputs Solr response to console)"
|
echo " -out yes|no (default: no; yes outputs Solr response to console)"
|
||||||
echo ""
|
echo ""
|
||||||
echo ""
|
echo ""
|
||||||
echo "Examples:"
|
echo "Examples:"
|
||||||
echo ""
|
echo ""
|
||||||
echo "JSON file: $THIS_SCRIPT -c wizbang events.json"
|
echo "* JSON file: $THIS_SCRIPT -c wizbang events.json"
|
||||||
echo "XML files: $THIS_SCRIPT -c records article*.xml"
|
echo "* XML files: $THIS_SCRIPT -c records article*.xml"
|
||||||
echo "CSV file: $THIS_SCRIPT -c signals LATEST-signals.csv"
|
echo "* CSV file: $THIS_SCRIPT -c signals LATEST-signals.csv"
|
||||||
echo "Directory of files: $THIS_SCRIPT -c myfiles ~/Documents"
|
echo "* Directory of files: $THIS_SCRIPT -c myfiles ~/Documents"
|
||||||
echo "Web crawl: $THIS_SCRIPT -c gettingstarted http://lucidworks.com -recursive 2 -delay 1"
|
echo "* Web crawl: $THIS_SCRIPT -c gettingstarted http://lucidworks.com -recursive 2 -delay 1"
|
||||||
|
echo "* Standard input (stdin): echo '{"commit": {}}' | $THIS_SCRIPT -c my_collection -type application/json -out yes -d"
|
||||||
|
echo "* Data as string: $THIS_SCRIPT -c signals -type text/csv -out yes -d $'id,value\n1,0.47'"
|
||||||
echo ""
|
echo ""
|
||||||
} # end print_usage
|
} # end print_usage
|
||||||
|
|
||||||
|
@ -107,6 +109,7 @@ PROPS="-Dauto=yes"
|
||||||
RECURSIVE=""
|
RECURSIVE=""
|
||||||
FILES=()
|
FILES=()
|
||||||
URLS=()
|
URLS=()
|
||||||
|
ARGS=()
|
||||||
|
|
||||||
while [ $# -gt 0 ]; do
|
while [ $# -gt 0 ]; do
|
||||||
# TODO: natively handle the optional parameters to SPT
|
# TODO: natively handle the optional parameters to SPT
|
||||||
|
@ -115,18 +118,15 @@ while [ $# -gt 0 ]; do
|
||||||
if [[ -d "$1" ]]; then
|
if [[ -d "$1" ]]; then
|
||||||
# Directory
|
# Directory
|
||||||
# echo "$1: DIRECTORY"
|
# echo "$1: DIRECTORY"
|
||||||
MODE="files"
|
|
||||||
RECURSIVE="-Drecursive=yes"
|
RECURSIVE="-Drecursive=yes"
|
||||||
FILES+=("$1")
|
FILES+=("$1")
|
||||||
elif [[ -f "$1" ]]; then
|
elif [[ -f "$1" ]]; then
|
||||||
# File
|
# File
|
||||||
# echo "$1: FILE"
|
# echo "$1: FILE"
|
||||||
MODE="files"
|
|
||||||
FILES+=("$1")
|
FILES+=("$1")
|
||||||
elif [[ "$1" == http* ]]; then
|
elif [[ "$1" == http* ]]; then
|
||||||
# URL
|
# URL
|
||||||
# echo "$1: URL"
|
# echo "$1: URL"
|
||||||
MODE="web"
|
|
||||||
URLS+=("$1")
|
URLS+=("$1")
|
||||||
else
|
else
|
||||||
if [[ $1 == -* ]]; then
|
if [[ $1 == -* ]]; then
|
||||||
|
@ -134,6 +134,19 @@ while [ $# -gt 0 ]; do
|
||||||
# Special case, pull out collection name
|
# Special case, pull out collection name
|
||||||
shift
|
shift
|
||||||
COLLECTION=$1
|
COLLECTION=$1
|
||||||
|
elif [[ ($1 == "-d" || $1 == "--data" || $1 == "-") ]]; then
|
||||||
|
if [[ -s /dev/stdin ]]; then
|
||||||
|
MODE="stdin"
|
||||||
|
else
|
||||||
|
# when no stdin exists and -d specified, the rest of the arguments
|
||||||
|
# are assumed to be strings to post as-is
|
||||||
|
MODE="args"
|
||||||
|
shift
|
||||||
|
if [[ $# -gt 0 ]]; then
|
||||||
|
ARGS=("$@")
|
||||||
|
shift $#
|
||||||
|
fi
|
||||||
|
fi
|
||||||
else
|
else
|
||||||
key=${1:1}
|
key=${1:1}
|
||||||
shift
|
shift
|
||||||
|
@ -149,33 +162,54 @@ while [ $# -gt 0 ]; do
|
||||||
done
|
done
|
||||||
|
|
||||||
# Check for errors
|
# Check for errors
|
||||||
if [[ ${#FILES[@]} != 0 && ${#URLS[@]} != 0 ]]; then
|
|
||||||
echo -e "\nCombining files (or directories) and URLs is not supported. Post them separately.\n"
|
|
||||||
exit 1
|
|
||||||
fi
|
|
||||||
|
|
||||||
if [[ ${#FILES[@]} == 0 && ${#URLS[@]} == 0 ]]; then
|
|
||||||
echo -e "\nNo files, directories, or URLs were specified. See '$THIS_SCRIPT -h' for usage instructions.\n"
|
|
||||||
exit 1
|
|
||||||
fi
|
|
||||||
|
|
||||||
if [[ $COLLECTION == "" ]]; then
|
if [[ $COLLECTION == "" ]]; then
|
||||||
echo -e "\nCollection must be specified. Use -c <collection name> or set DEFAULT_SOLR_COLLECTION in your environment.\n"
|
echo -e "\nCollection must be specified. Use -c <collection name> or set DEFAULT_SOLR_COLLECTION in your environment.\n"
|
||||||
exit 1
|
exit 1
|
||||||
fi
|
fi
|
||||||
|
|
||||||
PARAMS=""
|
# Unsupported: bin/post -c foo
|
||||||
if [[ $FILES != "" ]]; then
|
if [[ ${#FILES[@]} == 0 && ${#URLS[@]} == 0 && $MODE != "stdin" && $MODE != "args" ]]; then
|
||||||
MODE="files"
|
echo -e "\nNo files, directories, URLs, -d strings, or stdin were specified. See '$THIS_SCRIPT -h' for usage instructions.\n"
|
||||||
PARAMS=("${FILES[@]}")
|
exit 1
|
||||||
fi
|
fi
|
||||||
|
|
||||||
if [[ $URLS != "" ]]; then
|
# SPT does not support mixing different data mode types, just files, just URLs, just stdin, or just argument strings.
|
||||||
MODE="web"
|
# The following are unsupported constructs:
|
||||||
PARAMS=("${URLS[@]}")
|
# bin/post -c foo existing_file.csv http://example.com
|
||||||
|
# echo '<xml.../>' | bin/post -c foo existing_file.csv
|
||||||
|
# bin/post -c foo existing_file.csv -d 'anything'
|
||||||
|
if [[ (${#FILES[@]} != 0 && ${#URLS[@]} != 0 && $MODE != "stdin" && $MODE != "args")
|
||||||
|
|| ((${#FILES[@]} != 0 || ${#URLS[@]} != 0) && ($MODE == "stdin" || $MODE == "args")) ]]; then
|
||||||
|
echo -e "\nCombining files/directories, URLs, stdin, or args is not supported. Post them separately.\n"
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
|
||||||
|
PARAMS=""
|
||||||
|
|
||||||
|
# TODO: let's simplify this
|
||||||
|
if [[ $MODE != "stdin" && $MODE != "args" ]]; then
|
||||||
|
if [[ $FILES != "" ]]; then
|
||||||
|
MODE="files"
|
||||||
|
PARAMS=("${FILES[@]}")
|
||||||
|
fi
|
||||||
|
|
||||||
|
if [[ $URLS != "" ]]; then
|
||||||
|
MODE="web"
|
||||||
|
PARAMS=("${URLS[@]}")
|
||||||
|
fi
|
||||||
|
else
|
||||||
|
if [[ ${#ARGS[@]} == 0 ]]; then
|
||||||
|
# SPT needs a valid (to post to Solr) args string, useful for 'bin/post -c foo -d' to force a commit
|
||||||
|
ARGS+=("<add/>")
|
||||||
|
fi
|
||||||
|
PARAMS=("${ARGS[@]}")
|
||||||
fi
|
fi
|
||||||
|
|
||||||
PROPS="$PROPS -Dc=$COLLECTION -Ddata=$MODE $RECURSIVE"
|
PROPS="$PROPS -Dc=$COLLECTION -Ddata=$MODE $RECURSIVE"
|
||||||
|
|
||||||
#echo "$JAVA" -classpath $TOOL_JAR $PROPS org.apache.solr.util.SimplePostTool "${PARAMS[@]}"
|
echo "$JAVA" -classpath $TOOL_JAR $PROPS org.apache.solr.util.SimplePostTool "${PARAMS[@]}"
|
||||||
"$JAVA" -classpath $TOOL_JAR $PROPS org.apache.solr.util.SimplePostTool "${PARAMS[@]}"
|
"$JAVA" -classpath $TOOL_JAR $PROPS org.apache.solr.util.SimplePostTool "${PARAMS[@]}"
|
||||||
|
|
||||||
|
# post smoker:
|
||||||
|
# bin/post -c signals -out yes -type application/json -d '[{"id": 2, "val": 0.47}]'
|
||||||
|
# bin/post -c signals -out yes -params "wt=json" -d '<add><doc><field name="id">1</field></doc></add>'
|
||||||
|
|
Loading…
Reference in New Issue