#!/usr/bin/env bash # Licensed to the Apache Software Foundation (ASF) under one or more # contributor license agreements. See the NOTICE file distributed with # this work for additional information regarding copyright ownership. # The ASF licenses this file to You under the Apache License, Version 2.0 # (the "License"); you may not use this file except in compliance with # the License. You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # ====== Common code copied/adapted from bin/solr (TODO: centralize/share this kind of thing across bin/solr, etc) THIS_SCRIPT="$0" # Resolve symlinks to this script while [ -h "$THIS_SCRIPT" ] ; do ls=`ls -ld "$THIS_SCRIPT"` # Drop everything prior to -> link=`expr "$ls" : '.*-> \(.*\)$'` if expr "$link" : '/.*' > /dev/null; then THIS_SCRIPT="$link" else THIS_SCRIPT=`dirname "$THIS_SCRIPT"`/"$link" fi done SOLR_TIP=`dirname "$THIS_SCRIPT"`/.. SOLR_TIP=`cd "$SOLR_TIP"; pwd` if [ -n "$SOLR_JAVA_HOME" ]; then JAVA="$SOLR_JAVA_HOME/bin/java" elif [ -n "$JAVA_HOME" ]; then for java in "$JAVA_HOME"/bin/amd64/java "$JAVA_HOME"/bin/java; do if [ -x "$java" ]; then JAVA="$java" break fi done else JAVA=java fi # test that Java exists and is executable on this server "$JAVA" -version >/dev/null 2>&1 || { echo >&2 "Java is required to run this tool! Please install Java 8 or greater before running this script."; exit 1; } # ===== post specific code TOOL_JAR=("$SOLR_TIP/dist"/solr-core-*.jar) function print_usage() { echo "" echo 'Usage: post -c [OPTIONS] ' echo " or post -help" echo "" echo " collection name defaults to DEFAULT_SOLR_COLLECTION if not specified" echo "" echo "OPTIONS" echo "=======" echo " Solr options:" echo " -url (overrides collection, host, and port)" echo " -host (default: localhost)" echo " -p or -port (default: 8983)" echo " -commit yes|no (default: yes)" # optimize intentionally omitted, but can be used as '-optimize yes' (default: no) echo "" echo " Web crawl options:" echo " -recursive (default: 1)" echo " -delay (default: 10)" echo "" echo " Directory crawl options:" echo " -delay (default: 0)" echo "" echo " stdin/args options:" echo " -type (default: application/xml)" echo "" echo " Other options:" echo " -filetypes [,,...] (default: xml,json,jsonl,csv,pdf,doc,docx,ppt,pptx,xls,xlsx,odt,odp,ods,ott,otp,ots,rtf,htm,html,txt,log)" echo " -params \"=[&=...]\" (values must be URL-encoded; these pass through to Solr update request)" echo " -out yes|no (default: no; yes outputs Solr response to console)" echo " -format solr (sends application/json content as Solr commands to /update instead of /update/json/docs)" echo "" echo "" echo "Examples:" echo "" echo "* JSON file: $THIS_SCRIPT -c wizbang events.json" echo "* XML files: $THIS_SCRIPT -c records article*.xml" echo "* CSV file: $THIS_SCRIPT -c signals LATEST-signals.csv" echo "* Directory of files: $THIS_SCRIPT -c myfiles ~/Documents" echo "* Web crawl: $THIS_SCRIPT -c gettingstarted http://lucene.apache.org/solr -recursive 1 -delay 1" echo "* Standard input (stdin): echo '{"commit": {}}' | $THIS_SCRIPT -c my_collection -type application/json -out yes -d" echo "* Data as string: $THIS_SCRIPT -c signals -type text/csv -out yes -d $'id,value\n1,0.47'" echo "" } # end print_usage if [[ $# -eq 1 && ("$1" == "-help" || "$1" == "-h" || "$1" == "-usage") ]]; then print_usage exit fi COLLECTION="$DEFAULT_SOLR_COLLECTION" PROPS=('-Dauto=yes') RECURSIVE="" FILES=() URLS=() ARGS=() while [ $# -gt 0 ]; do # TODO: natively handle the optional parameters to SPT # but for now they can be specified as bin/post -c collection-name delay=5 http://lucidworks.com if [[ -d "$1" ]]; then # Directory # echo "$1: DIRECTORY" RECURSIVE=yes FILES+=("$1") elif [[ -f "$1" ]]; then # File # echo "$1: FILE" FILES+=("$1") elif [[ "$1" == http* ]]; then # URL # echo "$1: URL" URLS+=("$1") else if [[ "$1" == -* ]]; then if [[ "$1" == "-c" ]]; then # Special case, pull out collection name shift COLLECTION="$1" elif [[ "$1" == "-p" ]]; then # -p alias for -port for convenience and compatibility with `bin/solr start` shift PROPS+=("-Dport=$1") elif [[ ("$1" == "-d" || "$1" == "--data" || "$1" == "-") ]]; then if [[ ! -t 0 ]]; then MODE="stdin" else # when no stdin exists and -d specified, the rest of the arguments # are assumed to be strings to post as-is MODE="args" shift if [[ $# -gt 0 ]]; then ARGS=("$@") shift $# else # SPT needs a valid args string, useful for 'bin/post -c foo -d' to force a commit ARGS+=("") fi fi else key="${1:1}" shift # echo "$1: PROP" PROPS+=("-D$key=$1") if [[ "$key" == "url" ]]; then SOLR_URL=$1 fi fi else echo -e "\nUnrecognized argument: $1\n" echo -e "If this was intended to be a data file, it does not exist relative to $PWD\n" exit 1 fi fi shift done # Check for errors if [[ $COLLECTION == "" && $SOLR_URL == "" ]]; then echo -e "\nCollection or URL must be specified. Use -c or set DEFAULT_SOLR_COLLECTION in your environment, or use -url instead.\n" echo -e "See '$THIS_SCRIPT -h' for usage instructions.\n" exit 1 fi # Unsupported: bin/post -c foo if [[ ${#FILES[@]} == 0 && ${#URLS[@]} == 0 && $MODE != "stdin" && $MODE != "args" ]]; then echo -e "\nNo files, directories, URLs, -d strings, or stdin were specified.\n" echo -e "See '$THIS_SCRIPT -h' for usage instructions.\n" exit 1 fi # SPT does not support mixing different data mode types, just files, just URLs, just stdin, or just argument strings. # The following are unsupported constructs: # bin/post -c foo existing_file.csv http://example.com # echo '' | bin/post -c foo existing_file.csv # bin/post -c foo existing_file.csv -d 'anything' if [[ (${#FILES[@]} != 0 && ${#URLS[@]} != 0 && $MODE != "stdin" && $MODE != "args") || ((${#FILES[@]} != 0 || ${#URLS[@]} != 0) && ($MODE == "stdin" || $MODE == "args")) ]]; then echo -e "\nCombining files/directories, URLs, stdin, or args is not supported. Post them separately.\n" exit 1 fi PARAMS="" # TODO: let's simplify this if [[ $MODE != "stdin" && $MODE != "args" ]]; then if [[ $FILES != "" ]]; then MODE="files" PARAMS=("${FILES[@]}") fi if [[ $URLS != "" ]]; then MODE="web" PARAMS=("${URLS[@]}") fi else PARAMS=("${ARGS[@]}") fi PROPS+=("-Dc=$COLLECTION" "-Ddata=$MODE") if [[ -n "$RECURSIVE" ]]; then PROPS+=('-Drecursive=yes') fi echo "$JAVA" -classpath "${TOOL_JAR[0]}" "${PROPS[@]}" org.apache.solr.util.SimplePostTool "${PARAMS[@]}" "$JAVA" -classpath "${TOOL_JAR[0]}" "${PROPS[@]}" org.apache.solr.util.SimplePostTool "${PARAMS[@]}" # post smoker: # bin/post -c signals -out yes -type application/json -d '[{"id": 2, "val": 0.47}]' # bin/post -c signals -out yes -params "wt=json" -d '1'