#!/usr/bin/env bash # Licensed to the Apache Software Foundation (ASF) under one or more # contributor license agreements. See the NOTICE file distributed with # this work for additional information regarding copyright ownership. # The ASF licenses this file to You under the Apache License, Version 2.0 # (the "License"); you may not use this file except in compliance with # the License. You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # TODO wishlist: # - handle stdin as well, such that `cat foo.csv | bin/post my_collection` works # - bin/post collection "file with spaces.csv" does not work, breaks arguments at whitespace apparently. # - support arbitrary posting like - java -Ddata=args org.apache.solr.util.SimplePostTool "SP2514N" # - convert OPTIONS (key=val pass-through to SPT) to standard 'nix switches # ====== Common code copied/adapted from bin/solr (TODO: centralize/share this kind of thing) THIS_SCRIPT="$0" # Resolve symlinks to this script while [ -h "$THIS_SCRIPT" ] ; do ls=`ls -ld "$THIS_SCRIPT"` # Drop everything prior to -> link=`expr "$ls" : '.*-> \(.*\)$'` if expr "$link" : '/.*' > /dev/null; then THIS_SCRIPT="$link" else THIS_SCRIPT=`dirname "$THIS_SCRIPT"`/"$link" fi done SOLR_TIP=`dirname "$THIS_SCRIPT"`/.. SOLR_TIP=`cd "$SOLR_TIP"; pwd` if [ -n "$SOLR_JAVA_HOME" ]; then JAVA=$SOLR_JAVA_HOME/bin/java elif [ -n "$JAVA_HOME" ]; then for java in "$JAVA_HOME"/bin/amd64/java "$JAVA_HOME"/bin/java; do if [ -x "$java" ]; then JAVA="$java" break fi done else JAVA=java fi # test that Java exists and is executable on this server $JAVA -version >/dev/null 2>&1 || { echo >&2 "Java is required to run this tool! Please install Java 8 or greater before running this script."; exit 1; } # ===== post specific code TOOL_JAR=$SOLR_TIP/dist/solr-core-*.jar function print_usage() { echo "" echo "Usage: post -c [OPTIONS]" echo " or post -help" echo "" echo " collection name defaults to DEFAULT_SOLR_COLLECTION if not specified" echo "" echo "OPTIONS" echo "=======" echo " Solr options:" echo " url= (overrides collection, host, and port)" echo " host= (default: localhost)" echo " port= (default: 8983)" echo " commit=yes|no (default: yes)" echo "" echo " Web crawl options:" echo " recursive= (default: 1)" echo " delay= (default=10)" echo "" echo " Directory crawl options:" echo " delay= (default=0)" echo "" echo " Other options:" echo " filetypes=[,,...] (default: xml,json,csv,pdf,doc,docx,ppt,pptx,xls,xlsx,odt,odp,ods,ott,otp,ots,rtf,htm,html,txt,log)" echo " params=\"=[&=...]\" (values must be URL-encoded)" echo " out=yes|no (default=no; yes outputs Solr response to console)" echo "" echo "" echo "Examples:" echo "" echo "JSON file: $THIS_SCRIPT -c wizbang events.json" echo "XML files: $THIS_SCRIPT -c records article*.xml" echo "CSV file: $THIS_SCRIPT -c signals LATEST-signals.csv" echo "Directory of files: $THIS_SCRIPT -c myfiles ~/Documents" echo "Web crawl: $THIS_SCRIPT -c gettingstarted http://lucidworks.com recursive=2 delay=1" echo "" } # end print_usage if [[ $# -eq 1 && ("$1" == "-help" || "$1" == "-h" || "$1" == "-usage") ]]; then print_usage exit fi COLLECTION=$DEFAULT_SOLR_COLLECTION PROPS="-Dauto=yes" RECURSIVE="" FILES=() URLS=() while [ $# -gt 0 ]; do # TODO: natively handle the optional parameters to SPT # but for now they can be specified as bin/post -c collection-name delay=5 http://lucidworks.com if [[ "$1" == "-c" ]]; then # Pull out collection name shift COLLECTION=$1 else # General argument, either a file, directory, URL, or param[=val] if [[ -d "$1" ]]; then # Directory # echo "$1: DIRECTORY" MODE="files" RECURSIVE="-Drecursive=yes" FILES+=("$1") elif [[ -f "$1" ]]; then # File # echo "$1: FILE" MODE="files" FILES+=("$1") elif [[ "$1" == http* ]]; then # URL # echo "$1: URL" MODE="web" URLS+=("$1") else # Not a file, directory or URL. Consider it a property to SPT # echo "$1: PROP" PROPS="$PROPS -D$1" fi fi shift done # Check for errors if [[ ${#FILES[@]} != 0 && ${#URLS[@]} != 0 ]]; then echo -e "\nCombining files (or directories) and URLs is not supported. Post them separately.\n" exit 1 fi if [[ ${#FILES[@]} == 0 && ${#URLS[@]} == 0 ]]; then echo -e "\nNo files, directories, or URLs were specified. See '$THIS_SCRIPT -h' for usage instructions.\n" exit 1 fi if [[ $COLLECTION == "" ]]; then echo -e "\nCollection must be specified. Use -c or set DEFAULT_SOLR_COLLECTION in your environment.\n" exit 1 fi PARAMS="" if [[ $FILES != "" ]]; then MODE="files" PARAMS=("${FILES[@]}") fi if [[ $URLS != "" ]]; then MODE="web" PARAMS=("${URLS[@]}") fi PROPS="$PROPS -Dc=$COLLECTION -Ddata=$MODE $RECURSIVE" #echo "$JAVA" -classpath $TOOL_JAR $PROPS org.apache.solr.util.SimplePostTool "${PARAMS[@]}" "$JAVA" -classpath $TOOL_JAR $PROPS org.apache.solr.util.SimplePostTool "${PARAMS[@]}"