mirror of https://github.com/apache/lucene.git
contribution from Dror Matalon
git-svn-id: https://svn.apache.org/repos/asf/lucene/java/trunk@150895 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
485c79366d
commit
ff6a1404c4
|
@ -0,0 +1 @@
|
|||
Main-Class: LuceneLine
|
|
@ -0,0 +1,29 @@
|
|||
lucli (pronounced Luckily) is the Lucene Command Line Interface.
|
||||
|
||||
INSTALLATION
|
||||
|
||||
Edit the run.sh shell script
|
||||
Edit JAVA_HOME to point to your java directory.
|
||||
Edit LUCLI to point to where you installed lucli.
|
||||
Edit LUCLI_MEMORY and set it to the maximum amount of memory you want to allocate to lucli
|
||||
You can also replace the Lucene jar file that came with lucli with your own.
|
||||
|
||||
|
||||
ENABLING READLINE
|
||||
|
||||
If you add the -r command line parameter lucli will try to use a shared library
|
||||
to enable things like tab completion and history. Unfortunately Java doesn't support
|
||||
this capability natively so you'll need a shared library for this. I'm including one
|
||||
that worked for me with Debian Linux.
|
||||
For more details about GNU readline and java see http://java-readline.sourceforge.net/
|
||||
which is the library that lucli uses.
|
||||
|
||||
|
||||
Documentation
|
||||
|
||||
There is none :-). Type help at the command line or read the code.
|
||||
|
||||
Enjoy
|
||||
|
||||
Dror Matalon
|
||||
dror@zapatec.com.
|
|
@ -0,0 +1,36 @@
|
|||
<project name="lucli" default="build" basedir=".">
|
||||
|
||||
<!-- set global properties for this build -->
|
||||
<property name="src" value="./src"/>
|
||||
<property name="build" value="./classes"/>
|
||||
<property name="lucliLib" value="${build}/lucli.jar"/>
|
||||
|
||||
<!-- Include all elements that Tomcat exposes to applications -->
|
||||
<path id="compile.classpath">
|
||||
<fileset dir="lib">
|
||||
<include name="*.jar"/>
|
||||
</fileset>
|
||||
</path>
|
||||
|
||||
<target name="init">
|
||||
<!-- Create the time stamp -->
|
||||
<tstamp/>
|
||||
<!-- Create the dist directory structure used by compile -->
|
||||
<mkdir dir="${build}"/>
|
||||
<mkdir dir="${build}/docs"/>
|
||||
<mkdir dir="${build}/docs/javadocs"/>
|
||||
</target>
|
||||
|
||||
<target name="compile" depends="init">
|
||||
<!-- Compile the java code from ${src} into ${build} -->
|
||||
<javac debug="on" deprecation="on" srcdir="${src}" destdir="${build}">
|
||||
<classpath refid="compile.classpath"/>
|
||||
</javac>
|
||||
</target>
|
||||
|
||||
<target name="build" depends="compile">
|
||||
<jar basedir="${build}" includes="**/*.class" jarfile="${lucliLib}"/>
|
||||
</target>
|
||||
|
||||
|
||||
</project>
|
|
@ -0,0 +1 @@
|
|||
Place libreadline-java.jar and lucene-1.3-rc3-dev.jar here.
|
|
@ -0,0 +1,11 @@
|
|||
LUCLI=.
|
||||
LUCLI_MEMORY=128M
|
||||
#JAVA_HOME=/home/dror/j2sdk1.4.1_03/
|
||||
CLASSPATH=${CLASSPATH}:$LUCLI/lib/libreadline-java.jar:$LUCLI/lib/lucene-1.3-rc3-dev.jar:$LUCLI/classes/lucli.jar
|
||||
PATH=${PATH}:$JAVA_HOME/bin
|
||||
LD_LIBRARY_PATH=${LD_LIBRARY_PATH}:$LUCLI
|
||||
export LD_LIBRARY_PATH
|
||||
$JAVA_HOME/bin/java -Xmx${LUCLI_MEMORY} lucli.Lucli
|
||||
#Use this line to enable tab completion. Depends on the Readline shares library
|
||||
#$JAVA_HOME/bin/java lucli.Lucli -r
|
||||
|
|
@ -0,0 +1,118 @@
|
|||
package lucli;
|
||||
|
||||
/* ====================================================================
|
||||
* The Apache Software License, Version 1.1
|
||||
*
|
||||
* Copyright (c) 2001 The Apache Software Foundation. All rights
|
||||
* reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
*
|
||||
* 1. Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
*
|
||||
* 2. Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in
|
||||
* the documentation and/or other materials provided with the
|
||||
* distribution.
|
||||
*
|
||||
* 3. The end-user documentation included with the redistribution,
|
||||
* if any, must include the following acknowledgment:
|
||||
* "This product includes software developed by the
|
||||
* Apache Software Foundation (http://www.apache.org/)."
|
||||
* Alternately, this acknowledgment may appear in the software itself,
|
||||
* if and wherever such third-party acknowledgments normally appear.
|
||||
*
|
||||
* 4. The names "Apache" and "Apache Software Foundation" and
|
||||
* "Apache Lucene" must not be used to endorse or promote products
|
||||
* derived from this software without prior written permission. For
|
||||
* written permission, please contact apache@apache.org.
|
||||
*
|
||||
* 5. Products derived from this software may not be called "Apache",
|
||||
* "Apache Lucene", nor may "Apache" appear in their name, without
|
||||
* prior written permission of the Apache Software Foundation.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
|
||||
* WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
|
||||
* OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
||||
* DISCLAIMED. IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR
|
||||
* ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
|
||||
* USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
|
||||
* ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
||||
* OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
|
||||
* OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
|
||||
* SUCH DAMAGE.
|
||||
* ====================================================================
|
||||
*
|
||||
* This software consists of voluntary contributions made by many
|
||||
* individuals on behalf of the Apache Software Foundation. For more
|
||||
* information on the Apache Software Foundation, please see
|
||||
* <http://www.apache.org/>.
|
||||
*/
|
||||
|
||||
import org.gnu.readline.*;
|
||||
import java.util.Iterator;
|
||||
import java.util.TreeMap;
|
||||
|
||||
/**
|
||||
* Provide for tab completion
|
||||
* When the user types a tab do the standard thing: complete the command
|
||||
* Two tabs show all possible completions.
|
||||
*/
|
||||
|
||||
|
||||
public class Completer implements ReadlineCompleter {
|
||||
|
||||
String[] words; //list of words
|
||||
int currentPosition = 0; //current position in the array
|
||||
|
||||
/**
|
||||
Default constructor.
|
||||
*/
|
||||
|
||||
public Completer (TreeMap wordMap) {
|
||||
int size = wordMap.size();
|
||||
words = new String[size];
|
||||
Iterator wordIterator = wordMap.keySet().iterator();
|
||||
for (int ii=0; wordIterator.hasNext(); ii++) {
|
||||
words[ii] = (String) wordIterator.next();
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
Return possible completion. Implements org.gnu.readline.ReadlineCompleter.
|
||||
*/
|
||||
|
||||
public String completer (String text, int state) {
|
||||
|
||||
String ret = null; //what we're returning
|
||||
for (int ii = currentPosition; ii < words.length; ii++) {
|
||||
if (words[ii].startsWith(text)) {
|
||||
int next = ii + 1;
|
||||
if ((next < words.length) && words[next].startsWith(text)) {
|
||||
//more than one word match
|
||||
currentPosition = ii + 1; //next time start with next one
|
||||
ret = words[ii];
|
||||
break;
|
||||
} else { //found the last one
|
||||
if (state == 0) { //if it's the only one
|
||||
ret = words[ii];
|
||||
break;
|
||||
} else {
|
||||
ret = null; //there were previous ones
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
if (ret == null)
|
||||
currentPosition = 0; //for next search
|
||||
//System.out.println("returned:" + ret);
|
||||
return (ret); //no more matches
|
||||
}
|
||||
}
|
|
@ -0,0 +1,372 @@
|
|||
package lucli;
|
||||
|
||||
/* ====================================================================
|
||||
* The Apache Software License, Version 1.1
|
||||
*
|
||||
* Copyright (c) 2001 The Apache Software Foundation. All rights
|
||||
* reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
*
|
||||
* 1. Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
*
|
||||
* 2. Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in
|
||||
* the documentation and/or other materials provided with the
|
||||
* distribution.
|
||||
*
|
||||
* 3. The end-user documentation included with the redistribution,
|
||||
* if any, must include the following acknowledgment:
|
||||
* "This product includes software developed by the
|
||||
* Apache Software Foundation (http://www.apache.org/)."
|
||||
* Alternately, this acknowledgment may appear in the software itself,
|
||||
* if and wherever such third-party acknowledgments normally appear.
|
||||
*
|
||||
* 4. The names "Apache" and "Apache Software Foundation" and
|
||||
* "Apache Lucene" must not be used to endorse or promote products
|
||||
* derived from this software without prior written permission. For
|
||||
* written permission, please contact apache@apache.org.
|
||||
*
|
||||
* 5. Products derived from this software may not be called "Apache",
|
||||
* "Apache Lucene", nor may "Apache" appear in their name, without
|
||||
* prior written permission of the Apache Software Foundation.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
|
||||
* WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
|
||||
* OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
||||
* DISCLAIMED. IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR
|
||||
* ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
|
||||
* USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
|
||||
* ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
||||
* OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
|
||||
* OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
|
||||
* SUCH DAMAGE.
|
||||
* ====================================================================
|
||||
*
|
||||
* This software consists of voluntary contributions made by many
|
||||
* individuals on behalf of the Apache Software Foundation. For more
|
||||
* information on the Apache Software Foundation, please see
|
||||
* <http://www.apache.org/>.
|
||||
*/
|
||||
|
||||
import java.io.IOException;
|
||||
import java.io.BufferedReader;
|
||||
import java.io.InputStreamReader;
|
||||
import java.io.Reader;
|
||||
import java.io.StringReader;
|
||||
|
||||
import java.util.Hashtable;
|
||||
import java.util.Vector;
|
||||
import java.util.TreeMap;
|
||||
import java.util.Map.Entry;
|
||||
import java.util.Set;
|
||||
import java.util.Arrays;
|
||||
import java.util.Comparator;
|
||||
import java.util.Iterator;
|
||||
import java.util.Enumeration;
|
||||
|
||||
import org.apache.lucene.analysis.Analyzer;
|
||||
import org.apache.lucene.analysis.Token;
|
||||
import org.apache.lucene.analysis.TokenStream;
|
||||
import org.apache.lucene.analysis.standard.StandardAnalyzer;
|
||||
import org.apache.lucene.document.Document;
|
||||
import org.apache.lucene.document.Field;
|
||||
import org.apache.lucene.index.IndexReader;
|
||||
import org.apache.lucene.index.IndexWriter;
|
||||
import org.apache.lucene.index.Term;
|
||||
import org.apache.lucene.index.TermEnum;
|
||||
import org.apache.lucene.queryParser.MultiFieldQueryParser;
|
||||
import org.apache.lucene.queryParser.ParseException;
|
||||
import org.apache.lucene.search.Explanation;
|
||||
import org.apache.lucene.search.Hits;
|
||||
import org.apache.lucene.search.IndexSearcher;
|
||||
import org.apache.lucene.search.Query;
|
||||
import org.apache.lucene.search.Searcher;
|
||||
|
||||
/*
|
||||
* Parts addapted from Lucene demo. Various methods that interact with
|
||||
* Lucene and provide info about the index, search, etc.
|
||||
*/
|
||||
class LuceneMethods {
|
||||
|
||||
private int numDocs;
|
||||
private String indexName; //directory of this index
|
||||
private long version; //version number of this index
|
||||
java.util.Iterator fieldIterator;
|
||||
Vector fields; //Fields as a vector
|
||||
Vector indexedFields; //Fields as a vector
|
||||
String fieldsArray[]; //Fields as an array
|
||||
Searcher searcher;
|
||||
Query query; //current query string
|
||||
|
||||
public LuceneMethods(String index) {
|
||||
indexName = index;
|
||||
message("Lucene CLI. Using directory:" + indexName);
|
||||
}
|
||||
|
||||
|
||||
public void info() throws java.io.IOException {
|
||||
IndexReader indexReader = IndexReader.open(indexName);
|
||||
|
||||
|
||||
getFieldInfo();
|
||||
numDocs= indexReader.numDocs();
|
||||
message("Index has " + numDocs + " documents ");
|
||||
message ("All Fields:" + fields.toString());
|
||||
message ("Indexed Fields:" + indexedFields.toString());
|
||||
|
||||
if (IndexReader.isLocked(indexName)) {
|
||||
message("Index is locked");
|
||||
}
|
||||
//IndexReader.getCurrentVersion(indexName);
|
||||
//System.out.println("Version:" + version);
|
||||
|
||||
indexReader.close();
|
||||
}
|
||||
|
||||
|
||||
public void search(String queryString, boolean explain, boolean showTokens) throws java.io.IOException, org.apache.lucene.queryParser.ParseException {
|
||||
BufferedReader in = new BufferedReader(new InputStreamReader(System.in));
|
||||
Hits hits = initSearch(queryString);
|
||||
System.out.println(hits.length() + " total matching documents");
|
||||
Query explainQuery;
|
||||
if (explain) {
|
||||
query = explainQuery(queryString);
|
||||
}
|
||||
|
||||
|
||||
final int HITS_PER_PAGE = 10;
|
||||
message ("--------------------------------------");
|
||||
for (int start = 0; start < hits.length(); start += HITS_PER_PAGE) {
|
||||
int end = Math.min(hits.length(), start + HITS_PER_PAGE);
|
||||
for (int ii = start; ii < end; ii++) {
|
||||
Document doc = hits.doc(ii);
|
||||
message ("---------------- " + ii + " score:" + hits.score(ii) + "---------------------");
|
||||
printHit(doc);
|
||||
if (showTokens) {
|
||||
invertDocument(doc);
|
||||
}
|
||||
if (explain) {
|
||||
Explanation exp = searcher.explain(query, hits.id(ii));
|
||||
message("Explanation:" + exp.toString());
|
||||
}
|
||||
}
|
||||
message ("#################################################");
|
||||
|
||||
if (hits.length() > end) {
|
||||
System.out.print("more (y/n) ? ");
|
||||
queryString = in.readLine();
|
||||
if (queryString.length() == 0 || queryString.charAt(0) == 'n')
|
||||
break;
|
||||
}
|
||||
}
|
||||
searcher.close();
|
||||
}
|
||||
|
||||
private void printHit(Document doc) {
|
||||
for (int ii= 0; ii < fieldsArray.length; ii++) {
|
||||
String currField = fieldsArray[ii];
|
||||
String result = doc.get(currField);
|
||||
message(currField + ":" + result);
|
||||
}
|
||||
//another option is to just do message(doc);
|
||||
}
|
||||
|
||||
public void optimize () throws IOException{
|
||||
//open the index writer. False: don't create a new one
|
||||
IndexWriter indexWriter = new IndexWriter(indexName, new StandardAnalyzer(), false);
|
||||
message("Starting to optimize index.");
|
||||
long start = System.currentTimeMillis();
|
||||
indexWriter.optimize();
|
||||
message("Done optimizing index. Took " + (System.currentTimeMillis() - start) + " msecs");
|
||||
indexWriter.close();
|
||||
}
|
||||
|
||||
|
||||
private Query explainQuery(String queryString) throws IOException, ParseException {
|
||||
|
||||
searcher = new IndexSearcher(indexName);
|
||||
Analyzer analyzer = new StandardAnalyzer();
|
||||
getFieldInfo();
|
||||
|
||||
BufferedReader in = new BufferedReader(new InputStreamReader(System.in));
|
||||
|
||||
MultiFieldQueryParser parser = new MultiFieldQueryParser(queryString, analyzer);
|
||||
|
||||
int arraySize = indexedFields.size();
|
||||
String indexedArray[] = new String[arraySize];
|
||||
for (int ii = 0; ii < arraySize; ii++) {
|
||||
indexedArray[ii] = (String) indexedFields.get(ii);
|
||||
}
|
||||
query = parser.parse(queryString, indexedArray, analyzer);
|
||||
System.out.println("Searching for: " + query.toString());
|
||||
return (query);
|
||||
|
||||
}
|
||||
private Hits initSearch(String queryString) throws IOException, ParseException {
|
||||
|
||||
searcher = new IndexSearcher(indexName);
|
||||
Analyzer analyzer = new StandardAnalyzer();
|
||||
getFieldInfo();
|
||||
|
||||
BufferedReader in = new BufferedReader(new InputStreamReader(System.in));
|
||||
|
||||
MultiFieldQueryParser parser = new MultiFieldQueryParser(queryString, analyzer);
|
||||
|
||||
int arraySize = fields.size();
|
||||
fieldsArray = new String[arraySize];
|
||||
for (int ii = 0; ii < arraySize; ii++) {
|
||||
fieldsArray[ii] = (String) fields.get(ii);
|
||||
}
|
||||
query = parser.parse(queryString, fieldsArray, analyzer);
|
||||
System.out.println("Searching for: " + query.toString());
|
||||
Hits hits = searcher.search(query);
|
||||
return (hits);
|
||||
|
||||
}
|
||||
|
||||
public void count(String queryString) throws java.io.IOException, ParseException {
|
||||
Hits hits = initSearch(queryString);
|
||||
System.out.println(hits.length() + " total documents");
|
||||
searcher.close();
|
||||
}
|
||||
|
||||
static public void message (String s) {
|
||||
System.out.println(s);
|
||||
}
|
||||
|
||||
private void getFieldInfo() throws IOException {
|
||||
IndexReader indexReader = IndexReader.open(indexName);
|
||||
fields = new Vector();
|
||||
indexedFields = new Vector();
|
||||
|
||||
//get the list of all field names
|
||||
fieldIterator = indexReader.getFieldNames().iterator();
|
||||
while (fieldIterator.hasNext()) {
|
||||
Object field = fieldIterator.next();
|
||||
if (field != null && !field.equals(""))
|
||||
fields.add(field.toString());
|
||||
}
|
||||
//
|
||||
//get the list of indexed field names
|
||||
fieldIterator = indexReader.getFieldNames(true).iterator();
|
||||
while (fieldIterator.hasNext()) {
|
||||
Object field = fieldIterator.next();
|
||||
if (field != null && !field.equals(""))
|
||||
indexedFields.add(field.toString());
|
||||
}
|
||||
indexReader.close();
|
||||
}
|
||||
|
||||
|
||||
// Copied from DocumentWriter
|
||||
// Tokenizes the fields of a document into Postings.
|
||||
private void invertDocument(Document doc)
|
||||
throws IOException {
|
||||
|
||||
Hashtable tokenHash = new Hashtable();
|
||||
final int maxFieldLength = 10000;
|
||||
|
||||
Analyzer analyzer = new StandardAnalyzer();
|
||||
Enumeration fields = doc.fields();
|
||||
while (fields.hasMoreElements()) {
|
||||
Field field = (Field) fields.nextElement();
|
||||
String fieldName = field.name();
|
||||
|
||||
|
||||
if (field.isIndexed()) {
|
||||
if (field.isTokenized()) { // un-tokenized field
|
||||
Reader reader; // find or make Reader
|
||||
if (field.readerValue() != null)
|
||||
reader = field.readerValue();
|
||||
else if (field.stringValue() != null)
|
||||
reader = new StringReader(field.stringValue());
|
||||
else
|
||||
throw new IllegalArgumentException
|
||||
("field must have either String or Reader value");
|
||||
|
||||
int position = 0;
|
||||
// Tokenize field and add to postingTable
|
||||
TokenStream stream = analyzer.tokenStream(fieldName, reader);
|
||||
try {
|
||||
for (Token t = stream.next(); t != null; t = stream.next()) {
|
||||
position += (t.getPositionIncrement() - 1);
|
||||
position++;
|
||||
String name = t.termText();
|
||||
Integer Count = (Integer)tokenHash.get(name);
|
||||
if (Count == null) { // not in there yet
|
||||
tokenHash.put(name, new Integer(1)); //first one
|
||||
} else {
|
||||
int count = Count.intValue();
|
||||
tokenHash.put(name, new Integer (count+1));
|
||||
}
|
||||
if (position > maxFieldLength) break;
|
||||
}
|
||||
} finally {
|
||||
stream.close();
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
}
|
||||
Entry[] sortedHash = getSortedHashtableEntries(tokenHash);
|
||||
for (int ii = 0; ii < sortedHash.length && ii < 10; ii ++) {
|
||||
Entry currentEntry = sortedHash[ii];
|
||||
message((ii + 1) + ":" + currentEntry.getKey() + " " + currentEntry.getValue());
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/** Provides a list of the top terms of the index.
|
||||
*
|
||||
* @param field - the name of the command or null for all of them.
|
||||
*/
|
||||
public void terms(String field) throws IOException {
|
||||
TreeMap termMap = new TreeMap();
|
||||
IndexReader indexReader = IndexReader.open(indexName);
|
||||
TermEnum terms = indexReader.terms();
|
||||
while (terms.next()) {
|
||||
Term term = terms.term();
|
||||
//message(term.field() + ":" + term.text() + " freq:" + terms.docFreq());
|
||||
//if we're either not looking by field or we're matching the specific field
|
||||
if ((field == null) || field.equals(term.field()))
|
||||
termMap.put(new Integer((0 - terms.docFreq())), term.field() + ":" + term.text());
|
||||
}
|
||||
|
||||
Iterator termIterator = termMap.keySet().iterator();
|
||||
for (int ii=0; termIterator.hasNext() && ii < 100; ii++) {
|
||||
Integer termFreq = (Integer) termIterator.next();
|
||||
String termDetails = (String) termMap.get(termFreq);
|
||||
message(termDetails + ": " + termFreq);
|
||||
}
|
||||
indexReader.close();
|
||||
}
|
||||
|
||||
/** Sort Hashtable values
|
||||
* @param h the hashtable we're sorting
|
||||
* from http://developer.java.sun.com/developer/qow/archive/170/index.jsp
|
||||
*/
|
||||
|
||||
public static Entry[]
|
||||
getSortedHashtableEntries(Hashtable h) {
|
||||
Set set = h.entrySet();
|
||||
Entry [] entries =
|
||||
(Entry[])set.toArray(
|
||||
new Entry[set.size()]);
|
||||
Arrays.sort(entries, new Comparator() {
|
||||
public int compare(Object o1, Object o2) {
|
||||
Object v1 = ((Entry)o1).getValue();
|
||||
Object v2 = ((Entry)o2).getValue();
|
||||
return ((Comparable)v2).compareTo(v1); //descending order
|
||||
}
|
||||
});
|
||||
return entries;
|
||||
}
|
||||
|
||||
}
|
||||
|
|
@ -0,0 +1,382 @@
|
|||
package lucli;
|
||||
|
||||
/* ====================================================================
|
||||
* The Apache Software License, Version 1.1
|
||||
*
|
||||
* Copyright (c) 2001 The Apache Software Foundation. All rights
|
||||
* reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
*
|
||||
* 1. Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
*
|
||||
* 2. Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in
|
||||
* the documentation and/or other materials provided with the
|
||||
* distribution.
|
||||
*
|
||||
* 3. The end-user documentation included with the redistribution,
|
||||
* if any, must include the following acknowledgment:
|
||||
* "This product includes software developed by the
|
||||
* Apache Software Foundation (http://www.apache.org/)."
|
||||
* Alternately, this acknowledgment may appear in the software itself,
|
||||
* if and wherever such third-party acknowledgments normally appear.
|
||||
*
|
||||
* 4. The names "Apache" and "Apache Software Foundation" and
|
||||
* "Apache Lucene" must not be used to endorse or promote products
|
||||
* derived from this software without prior written permission. For
|
||||
* written permission, please contact apache@apache.org.
|
||||
*
|
||||
* 5. Products derived from this software may not be called "Apache",
|
||||
* "Apache Lucene", nor may "Apache" appear in their name, without
|
||||
* prior written permission of the Apache Software Foundation.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
|
||||
* WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
|
||||
* OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
||||
* DISCLAIMED. IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR
|
||||
* ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
|
||||
* USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
|
||||
* ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
||||
* OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
|
||||
* OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
|
||||
* SUCH DAMAGE.
|
||||
* ====================================================================
|
||||
*
|
||||
* This software consists of voluntary contributions made by many
|
||||
* individuals on behalf of the Apache Software Foundation. For more
|
||||
* information on the Apache Software Foundation, please see
|
||||
* <http://www.apache.org/>.
|
||||
*/
|
||||
|
||||
import java.io.*;
|
||||
import org.gnu.readline.*;
|
||||
import org.apache.lucene.queryParser.ParseException;
|
||||
import java.util.StringTokenizer;
|
||||
import java.util.TreeMap;
|
||||
import java.util.Iterator;
|
||||
|
||||
/**
|
||||
* lucli Main class for lucli: the Lucene Command Line Interface
|
||||
* This class handles mostly the actual CLI part, command names, help, etc.
|
||||
*/
|
||||
|
||||
public class Lucli {
|
||||
|
||||
final static String DEFAULT_INDEX = "index"; //directory "index" under the current directory
|
||||
final static String HISTORYFILE = ".lucli"; //directory "index" under the current directory
|
||||
public final static int MAX_TERMS = 100; //Maximum number of terms we're going to show
|
||||
|
||||
// List of commands
|
||||
// To add another command, add it in here, in the list of addcomand(), and in the switch statement
|
||||
final static int NOCOMMAND = -2;
|
||||
final static int UNKOWN = -1;
|
||||
final static int INFO = 0;
|
||||
final static int SEARCH = 1;
|
||||
final static int OPTIMIZE = 2;
|
||||
final static int QUIT = 3;
|
||||
final static int HELP = 4;
|
||||
final static int COUNT = 5;
|
||||
final static int TERMS = 6;
|
||||
final static int INDEX = 7;
|
||||
final static int TOKENS = 8;
|
||||
final static int EXPLAIN = 9;
|
||||
|
||||
String fullPath;
|
||||
TreeMap commandMap = new TreeMap();
|
||||
LuceneMethods luceneMethods; //current cli class we're using
|
||||
boolean enableReadline; //false: use plain java. True: shared library readline
|
||||
|
||||
/**
|
||||
Main entry point. The first argument can be a filename with an
|
||||
application initialization file.
|
||||
*/
|
||||
|
||||
public Lucli(String[] args) throws ParseException, IOException {
|
||||
String line;
|
||||
|
||||
fullPath = System.getProperty("user.home") + System.getProperty("file.separator")
|
||||
+ HISTORYFILE;
|
||||
|
||||
/*
|
||||
* Initialize the list of commands
|
||||
*/
|
||||
|
||||
addCommand("info", INFO, "Display info about the current Lucene Index. Example:info");
|
||||
addCommand("search", SEARCH, "Search the current index. Example: search foo", 1);
|
||||
addCommand("count", COUNT, "Return the number of hits for a search. Example: count foo", 1);
|
||||
addCommand("optimize", OPTIMIZE, "Optimize the current index");
|
||||
addCommand("quit", QUIT, "Quit/exit the program");
|
||||
addCommand("help", HELP, "Display help about commands.");
|
||||
addCommand("terms", TERMS, "Show the first " + MAX_TERMS + " terms in this index. Supply a field name to only show terms in a specific field. Example: terms");
|
||||
addCommand("index", INDEX, "Choose a different lucene index. Example index my_index", 1);
|
||||
addCommand("tokens", TOKENS, "Does a search and shows the top 10 tokens for each document. Verbose! Example: tokens foo", 1);
|
||||
addCommand("explain", EXPLAIN, "Explanation that describes how the document scored against query. Example: explain foo", 1);
|
||||
|
||||
|
||||
|
||||
//parse command line arguments
|
||||
parseArgs(args);
|
||||
|
||||
if (enableReadline)
|
||||
org.gnu.readline.Readline.load(ReadlineLibrary.GnuReadline );
|
||||
else
|
||||
org.gnu.readline.Readline.load(ReadlineLibrary.PureJava );
|
||||
|
||||
Readline.initReadline("lucli"); // init, set app name, read inputrc
|
||||
|
||||
|
||||
|
||||
Readline.readHistoryFile(fullPath);
|
||||
|
||||
// read history file, if available
|
||||
|
||||
File history = new File(".rltest_history");
|
||||
try {
|
||||
if (history.exists())
|
||||
Readline.readHistoryFile(history.getName());
|
||||
} catch (Exception e) {
|
||||
System.err.println("Error reading history file!");
|
||||
}
|
||||
|
||||
// Set word break characters
|
||||
try {
|
||||
Readline.setWordBreakCharacters(" \t;");
|
||||
}
|
||||
catch (UnsupportedEncodingException enc) {
|
||||
System.err.println("Could not set word break characters");
|
||||
System.exit(0);
|
||||
}
|
||||
|
||||
// set completer with list of words
|
||||
|
||||
Readline.setCompleter(new Completer(commandMap));
|
||||
|
||||
// main input loop
|
||||
|
||||
luceneMethods = new LuceneMethods(DEFAULT_INDEX);
|
||||
|
||||
while (true) {
|
||||
try {
|
||||
line = Readline.readline("lucli> ");
|
||||
if (line != null) {
|
||||
handleCommand(line);
|
||||
}
|
||||
} catch (UnsupportedEncodingException enc) {
|
||||
System.err.println("caught UnsupportedEncodingException");
|
||||
break;
|
||||
} catch (java.io.EOFException eof) {
|
||||
System.out.println("");//new line
|
||||
exit();
|
||||
} catch (IOException ioe) {
|
||||
throw (ioe);
|
||||
}
|
||||
}
|
||||
|
||||
exit();
|
||||
}
|
||||
|
||||
public static void main(String[] args) throws ParseException, IOException {
|
||||
new Lucli(args);
|
||||
}
|
||||
|
||||
|
||||
private void handleCommand(String line) throws IOException, ParseException {
|
||||
String [] words = tokenizeCommand(line);
|
||||
if (words.length == 0)
|
||||
return; //white space
|
||||
String query = "";
|
||||
//Command name and number of arguments
|
||||
switch (getCommandId(words[0], words.length - 1)) {
|
||||
case INFO:
|
||||
luceneMethods.info();
|
||||
break;
|
||||
case SEARCH:
|
||||
for (int ii = 1; ii < words.length; ii++) {
|
||||
query += words[ii] + " ";
|
||||
}
|
||||
luceneMethods.search(query, false, false);
|
||||
break;
|
||||
case COUNT:
|
||||
for (int ii = 1; ii < words.length; ii++) {
|
||||
query += words[ii] + " ";
|
||||
}
|
||||
luceneMethods.count(query);
|
||||
break;
|
||||
case QUIT:
|
||||
exit();
|
||||
break;
|
||||
case TERMS:
|
||||
if(words.length > 1)
|
||||
luceneMethods.terms(words[1]);
|
||||
else
|
||||
luceneMethods.terms(null);
|
||||
break;
|
||||
case INDEX:
|
||||
LuceneMethods newLm = new LuceneMethods(words[1]);
|
||||
try {
|
||||
newLm.info(); //will fail if can't open the index
|
||||
luceneMethods = newLm; //OK, so we'll use the new one
|
||||
} catch (IOException ioe) {
|
||||
//problem we'll keep using the old one
|
||||
error(ioe.toString());
|
||||
}
|
||||
break;
|
||||
case OPTIMIZE:
|
||||
luceneMethods.optimize();
|
||||
break;
|
||||
case TOKENS:
|
||||
for (int ii = 1; ii < words.length; ii++) {
|
||||
query += words[ii] + " ";
|
||||
}
|
||||
luceneMethods.search(query, false, true);
|
||||
break;
|
||||
case EXPLAIN:
|
||||
for (int ii = 1; ii < words.length; ii++) {
|
||||
query += words[ii] + " ";
|
||||
}
|
||||
luceneMethods.search(query, true, false);
|
||||
break;
|
||||
case HELP:
|
||||
help();
|
||||
break;
|
||||
case NOCOMMAND: //do nothing
|
||||
break;
|
||||
case UNKOWN:
|
||||
System.out.println("Unknown command:" + words[0] + ". Type help to get a list of commands.");
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
private String [] tokenizeCommand(String line) {
|
||||
StringTokenizer tokenizer = new StringTokenizer(line, " \t");
|
||||
int size = tokenizer.countTokens();
|
||||
String [] tokens = new String[size];
|
||||
for (int ii = 0; tokenizer.hasMoreTokens(); ii++) {
|
||||
tokens[ii] = tokenizer.nextToken();
|
||||
}
|
||||
return tokens;
|
||||
}
|
||||
|
||||
private void exit() {
|
||||
|
||||
try {
|
||||
Readline.writeHistoryFile(fullPath);
|
||||
} catch (IOException ioe) {
|
||||
error("while saving history:" + ioe);
|
||||
}
|
||||
Readline.cleanup();
|
||||
System.exit(0);
|
||||
}
|
||||
|
||||
/**
|
||||
* Add a command to the list of commands for the interpreter for a
|
||||
* command that doesn't take any parameters.
|
||||
* @param name - the name of the command
|
||||
* @param id - the unique id of the command
|
||||
* @param help - the help message for this command
|
||||
*/
|
||||
private void addCommand(String name, int id, String help) {
|
||||
addCommand(name, id, help, 0);
|
||||
}
|
||||
|
||||
/**
|
||||
* Add a command to the list of commands for the interpreter.
|
||||
* @param name - the name of the command
|
||||
* @param id - the unique id of the command
|
||||
* @param help - the help message for this command
|
||||
* @param params - the minimum number of required params if any
|
||||
*/
|
||||
private void addCommand(String name, int id, String help, int params) {
|
||||
Command command = new Command(name, id, help, params);
|
||||
commandMap.put(name, command);
|
||||
}
|
||||
|
||||
private int getCommandId(String name, int params) {
|
||||
name.toLowerCase(); //treat uppercase and lower case commands the same
|
||||
Command command = (Command) commandMap.get(name);
|
||||
if (command == null) {
|
||||
return(UNKOWN);
|
||||
}
|
||||
else {
|
||||
if(command.params > params) {
|
||||
error(command.name + " needs at least " + command.params + " arguments.");
|
||||
return (NOCOMMAND);
|
||||
}
|
||||
return (command.id);
|
||||
}
|
||||
}
|
||||
|
||||
private void help() {
|
||||
Iterator commands = commandMap.keySet().iterator();
|
||||
while (commands.hasNext()) {
|
||||
Command command = (Command) commandMap.get(commands.next());
|
||||
System.out.println("\t" + command.name + ": " + command.help);
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
private void error(String message) {
|
||||
System.err.println("Error:" + message);
|
||||
}
|
||||
|
||||
private void message(String text) {
|
||||
System.out.println(text);
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* Parse command line arguments
|
||||
* Code inspired by http://www.ecs.umass.edu/ece/wireless/people/emmanuel/java/java/cmdLineArgs/parsing.html
|
||||
*/
|
||||
private void parseArgs(String[] args) {
|
||||
for (int ii = 0; ii < args.length; ii++) {
|
||||
// a little overkill for now, but foundation
|
||||
// for other args
|
||||
if (args[ii].startsWith("-")) {
|
||||
String arg = args[ii];
|
||||
if (arg.equals("-r")) {
|
||||
enableReadline = true;
|
||||
}
|
||||
else {
|
||||
usage();
|
||||
System.exit(1);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private void usage() {
|
||||
message("Usage: lucli [-j]");
|
||||
message("Arguments:");
|
||||
message("\t-r: Provide tab completion and history using the GNU readline shared library ");
|
||||
}
|
||||
|
||||
private class Command {
|
||||
String name;
|
||||
int id;
|
||||
int numberArgs;
|
||||
String help;
|
||||
int params;
|
||||
|
||||
Command(String name, int id, String help, int params) {
|
||||
this.name = name;
|
||||
this.id = id;
|
||||
this.help = help;
|
||||
this.params = params;
|
||||
}
|
||||
|
||||
/**
|
||||
* Prints out a usage message for this command.
|
||||
*/
|
||||
public String commandUsage() {
|
||||
return (name + ":" + help + ". Command takes " + params + " params");
|
||||
}
|
||||
|
||||
}
|
||||
}
|
Loading…
Reference in New Issue