javadoc additions and cleanup

git-svn-id: https://svn.apache.org/repos/asf/incubator/solr/trunk@448815 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Chris M. Hostetter 2006-09-22 05:04:06 +00:00
parent 8c79297a75
commit 98f8010f65
11 changed files with 313 additions and 50 deletions

View File

@ -20,10 +20,13 @@ package org.apache.solr.analysis;
import java.util.Map;
/**
* Simple abstract implementation that handles init arg processing.
*
* @author yonik
* @version $Id$
*/
public abstract class BaseTokenFilterFactory implements TokenFilterFactory {
/** The init args */
protected Map<String,String> args;
public void init(Map<String,String> args) {
this.args=args;

View File

@ -19,15 +19,18 @@ package org.apache.solr.analysis;
import java.util.Map;
/**
* Simple abstract implementation that handles init arg processing.
*
* @author yonik
* @version $Id$
*/
public abstract class BaseTokenizerFactory implements TokenizerFactory {
/** The init args */
protected Map<String,String> args;
public void init(Map<String,String> args) {
this.args=args;
}
public Map<String,String> getArgs() {
return args;
}

View File

@ -21,14 +21,48 @@ import org.apache.lucene.analysis.TokenStream;
import java.util.Map;
/**
* Factory to create a token filter that transforms one TokenStream to another.
*
* A <code>TokenFilterFactory</code> creates a
* <code>TokenFilter</code> to transform one <code>TokenStream</code>
* into another.
*
* <p>
* TokenFilterFactories are registered for <code>FieldType</code>s with the
* <code>IndexSchema</code> through the <code>schema.xml</code> file.
* </p>
* <p>
* Example <code>schema.xml</code> entry to register a TokenFilterFactory
* implementation to transform tokens in a field of type "cool"
* </p>
* <pre>
* &lt;fieldtype name="cool" class="solr.TextField"&gt;
* &lt;analyzer&gt;
* ...
* &lt;filter class="foo.MyTokenFilterFactory"/&gt;
* ...
* </pre>
* <p>
* A single instance of any registered TokenizerFactory is created
* via the default constructor and is reused for each FieldType.
* </p>
* @author yonik
* @version $Id$
*/
public interface TokenFilterFactory {
/** <code>init</code> will be called just once, immediately after creation.
* <p>The args are user-level initialization parameters that
* may be specified when declaring a the factory in the
* schema.xml
*/
public void init(Map<String,String> args);
/**
* Accessor method for reporting the args used to initialize this factory.
* <p>
* Implementations are <strong>strongly</strong> encouraged to return
* the contents of the Map passed to to the init method
* </p>
*/
public Map<String,String> getArgs();
/** Transform the specified input TokenStream */
public TokenStream create(TokenStream input);
}

View File

@ -23,15 +23,46 @@ import org.apache.lucene.analysis.*;
/**
* A <code>TokenizerFactory</code> creates a <code>Tokenizer</code> on demand
* that breaks up a stream of characters into tokens.
* A <code>TokenizerFactory</code> breaks up a stream of characters
* into tokens.
*
* <p>
* TokenizerFactories are registered for <code>FieldType</code>s with the
* <code>IndexSchema</code> through the <code>schema.xml</code> file.
* </p>
* <p>
* Example <code>schema.xml</code> entry to register a TokenizerFactory
* implementation to tokenize fields of type "cool"
* </p>
* <pre>
* &lt;fieldtype name="cool" class="solr.TextField"&gt;
* &lt;analyzer&gt;
* &lt;tokenizer class="solr.StandardTokenizerFactory"/&gt;
* ...
* </pre>
* <p>
* A single instance of any registered TokenizerFactory is created
* via the default constructor and is reused for each FieldType.
* </p>
* @author yonik
* @version $Id$
*/
public interface TokenizerFactory {
/** <code>init</code> will be called just once, immediately after creation.
* <p>The args are user-level initialization parameters that
* may be specified when declaring a the factory in the
* schema.xml
*/
public void init(Map<String,String> args);
/**
* Accessor method for reporting the args used to initialize this factory.
* <p>
* Implementations are <strong>strongly</strong> encouraged to return
* the contents of the Map passed to to the init method
* </p>
*/
public Map<String,String> getArgs();
/** Creates a TokenStream of the specified input */
public TokenStream create(Reader input);
}

View File

@ -20,6 +20,21 @@ import java.io.Writer;
import java.io.IOException;
/**
* Implementations of <code>QueryResponseWriter</code> are used to format responses to query requests.
*
* Different <code>QueryResponseWriter</code>s are registered with the <code>SolrCore</code>.
* One way to register a QueryResponseWriter with the core is thorugh the <code>solrconfig.xml</code> file.
* <p>
* Example <code>solrconfig.xml</code> entry to register a <code>QueryResponseWRiter</code> implementation to
* handle all queries with a writer type of "simple":
* <p>
* <code>
* &lt;queryResponseWriter name="simple" class="foo.SimpleResponseWriter" /&gt;
* </code>
* <p>
* A single instance of any registered QueryResponseWriter is created
* via the default constructor and is reused for all relevant queries.
*
* @author yonik
* @version $Id$
*/
@ -28,7 +43,32 @@ public interface QueryResponseWriter {
public static String CONTENT_TYPE_TEXT_UTF8="text/plain; charset=UTF-8";
public static String CONTENT_TYPE_TEXT_ASCII="text/plain; charset=US-ASCII";
/**
* Write a SolrQueryResponse, this method must be thread save.
*
* <p>
* Information about the request (in particular: formating options) may be
* obtained from <code>req</code> but the dominant source of information
* should be <code>rsp</code>.
* <p>
* There are no mandatory actions that write must perform.
* An empty write implementation would fulfill
* all interface obligations.
* </p>
*/
public void write(Writer writer, SolrQueryRequest request, SolrQueryResponse response) throws IOException;
/**
* Return the applicable Content Type for a request, this method
* must be thread safe.
*
* <p>
* QueryResponseWriter's must implement this method to return a valid
* HTTP Content-Type header for the request, that will logically
* corrispond with the output produced by the write method.
* </p>
* @return a Content-Type string, which may not be null.
*/
public String getContentType(SolrQueryRequest request, SolrQueryResponse response);
}

View File

@ -49,7 +49,7 @@ public interface SolrRequestHandler extends SolrInfoMBean {
/**
* Handles a query request. This method must be thread safe.
* Handles a query request, this method must be thread safe.
* <p>
* Information about the request may be obtained from <code>req</code> and
* response information may be set using <code>rsp</code>.

View File

@ -39,12 +39,24 @@ import java.io.IOException;
* @version $Id$
*/
public class QueryParsing {
/** the SolrParam used to override the QueryParser "default operator" */
public static final String OP = "q.op";
/**
* Helper utility for parsing a query using the Lucene QueryParser syntax.
* @param qs query expression in standard Lucene syntax
* @param schema used for default operator (overridden by params) and passed to the query parser for field format analysis information
*/
public static Query parseQuery(String qs, IndexSchema schema) {
return parseQuery(qs, null, schema);
}
/**
* Helper utility for parsing a query using the Lucene QueryParser syntax.
* @param qs query expression in standard Lucene syntax
* @param defaultField default field used for unqualified search terms in the query expression
* @param schema used for default operator (overridden by params) and passed to the query parser for field format analysis information
*/
public static Query parseQuery(String qs, String defaultField, IndexSchema schema) {
try {
Query query = new SolrQueryParser(schema, defaultField).parse(qs);
@ -62,11 +74,10 @@ public class QueryParsing {
}
/**
* @param qs query expression
* @param qs query expression in standard Lucene syntax
* @param defaultField default field used for unqualified search terms in the query expression
* @param params used to determine the default operator, overriding the schema specified operator
* @param schema used for default operator (overridden by params) and passed to the query parser for field format analysis information
* @return
*/
public static Query parseQuery(String qs, String defaultField, SolrParams params, IndexSchema schema) {
try {
@ -123,12 +134,16 @@ public class QueryParsing {
* or if the sort specification couldn't be converted into a Lucene Sort
* (because of a field not being indexed or undefined, etc).
*
* <p>
* The form of the sort specification string currently parsed is:
* </p>
* <pre>>
* SortSpec ::= SingleSort [, SingleSort]* <number>?
* SingleSort ::= <fieldname> SortDirection
* SortDirection ::= top | desc | bottom | asc
*
* </pre>
* Examples:
* <pre>
* top 10 #take the top 10 by score
* desc 10 #take the top 10 by score
* score desc 10 #take the top 10 by score
@ -136,6 +151,7 @@ public class QueryParsing {
* weight desc #sort by weight descending
* height desc,weight desc #sort by height descending, and use weight descending to break any ties
* height desc,weight asc top 20 #sort by height descending, using weight ascending as a tiebreaker
* </pre>
*
*/
public static SortSpec parseSort(String sortSpec, IndexSchema schema) {
@ -240,7 +256,7 @@ public class QueryParsing {
out.append(val);
}
}
/** @see #toString(Query,IndexSchema) */
public static void toString(Query query, IndexSchema schema, Appendable out, int flags) throws IOException {
boolean writeBoost=true;
@ -379,7 +395,20 @@ public class QueryParsing {
}
}
/**
* Formats a Query for debugging, using the IndexSchema to make
* complex field types readable.
*
* <p>
* The benefit of using this method instead of calling
* <code>Query.toString</code> directly is that it knows about the data
* types of each field, so any field which is encoded in a particularly
* complex way is still readable. The downside is thta it only knows
* about built in Query types, and will not be able to format custom
* Query classes.
* </p>
*/
public static String toString(Query query, IndexSchema schema) {
try {
StringBuilder sb = new StringBuilder();
@ -507,7 +536,10 @@ public class QueryParsing {
return f.getType().getValueSource(f);
}
/** Parse a function, returning a FunctionQuery
/**
* Parse a function, returning a FunctionQuery
*
* :TODO: need examples
*/
public static FunctionQuery parseFunction(String func, IndexSchema schema) throws ParseException {
return new FunctionQuery(parseValSource(new StrParser(func), schema));

View File

@ -24,7 +24,8 @@ import java.io.IOException;
/**
*
* Primary API for dealing with Solr's internal caches.
*
* @author yonik
* @version $Id$
*/
@ -46,7 +47,7 @@ public interface SolrCache extends SolrInfoMBean {
* not reference any particular cache instance and prevent it from being
* garbage collected (no using inner classes unless they are static).
* <p>
* Since the persistence object is designed to be used as a way for statistics
* The persistence object is designed to be used as a way for statistics
* to accumulate across all instances of the same type of cache, however the
* object may be of any type desired by the cache implementation.
* <p>
@ -59,21 +60,49 @@ public interface SolrCache extends SolrInfoMBean {
// will be associated with slow-to-create SolrIndexSearchers.
// change to NamedList when other plugins do?
// symbolic name for this cache
/**
* Name the Cache can be refrenced with by SolrRequestHandlers.
*
* This method must return the identifier that the Cache instance
* expects SolrRequestHandlers to use when requesting access to it
* from the SolrIndexSearcher. It is <strong>strongly</strong>
* recommended thta this method return the value of the "name"
* paramater from the init args.
*
* :TODO: verify this.
*/
public String name();
// Should SolrCache just extend the java.util.Map interface?
// Following the conventions of the java.util.Map interface in any case.
/** :TODO: copy from Map */
public int size();
/** :TODO: copy from Map */
public Object put(Object key, Object value);
/** :TODO: copy from Map */
public Object get(Object key);
/** :TODO: copy from Map */
public void clear();
/**
* Enumeration of possible States for cache instances.
* :TODO: only state that seems to ever be set is LIVE ?
*/
public enum State {
/** :TODO */
CREATED,
/** :TODO */
STATICWARMING,
/** :TODO */
AUTOWARMING,
/** :TODO */
LIVE
}
/**
* Set different cache states.
@ -81,8 +110,13 @@ public interface SolrCache extends SolrInfoMBean {
* The cache user (SolrIndexSearcher) will take care of switching
* cache states.
*/
public enum State { CREATED, STATICWARMING, AUTOWARMING, LIVE }
public void setState(State state);
/**
* Returns the last State set on this instance
*
* @see #setState
*/
public State getState();

View File

@ -41,7 +41,7 @@ import java.util.logging.Logger;
/**
* SolrIndexSearcher adds schema awareness and caching functionality
* over the lucene IndexSearcher.
*
* @author yonik
* @version $Id$
* @since solr 0.9
@ -78,8 +78,7 @@ public class SolrIndexSearcher extends Searcher implements SolrInfoMBean {
private final SolrCache[] cacheList;
private static final SolrCache[] noCaches = new SolrCache[0];
/** Creates a searcher searching the index in the named directory. */
/** Creates a searcher searching the index in the named directory. */
/** Creates a searcher searching the index in the named directory. */
public SolrIndexSearcher(IndexSchema schema, String name, String path, boolean enableCache) throws IOException {
this(schema,name,IndexReader.open(path), true, enableCache);
}
@ -148,7 +147,7 @@ public class SolrIndexSearcher extends Searcher implements SolrInfoMBean {
}
/*** Register sub-objects such as caches
/** Register sub-objects such as caches
*/
public void register() {
for (SolrCache cache : cacheList) {
@ -158,7 +157,11 @@ public class SolrIndexSearcher extends Searcher implements SolrInfoMBean {
registerTime=System.currentTimeMillis();
}
/**
* Free's resources associated with this searcher.
*
* In particular, the underlying reader and any cache's in use are closed.
*/
public void close() throws IOException {
// unregister first, so no management actions are tried on a closing searcher.
SolrInfoRegistry.getRegistry().remove(name);
@ -185,7 +188,9 @@ public class SolrIndexSearcher extends Searcher implements SolrInfoMBean {
}
}
/** Direct access to the IndexReader used by this searcher */
public IndexReader getReader() { return reader; }
/** Direct access to the IndexSchema for use with this searcher */
public IndexSchema getSchema() { return schema; }
@ -265,30 +270,10 @@ public class SolrIndexSearcher extends Searcher implements SolrInfoMBean {
Filter[] newFilter = new Filter[1];
optimizer.optimize((BooleanQuery)query, searcher, 0, newQuery, newFilter);
// TODO REMOVE
if (newFilter[0]!=null) {
// System.out.println("OPTIMIZED QUERY: FILTER=" + newFilter[0]);
}
return searcher.search(newQuery[0], newFilter[0], sort);
}
}
/******
* Shouldn't be needed since IndexReader has it's own finalize method
* and there is nothing else to clean up here (for now at least)
*
protected void finalize() {
try {
close();
super.finalize();
} catch (Throwable e) {
SolrException.log(log,e);
}
}
******/
public Hits search(Query query, Filter filter) throws IOException {
return searcher.search(query, filter);
}
@ -297,12 +282,6 @@ public class SolrIndexSearcher extends Searcher implements SolrInfoMBean {
return searcher.search(query, sort);
}
/*** Replaced this one with one that does filter optimization
public Hits search(Query query, Filter filter, Sort sort) throws IOException {
return searcher.search(query, filter, sort);
}
***/
public void search(Query query, HitCollector results) throws IOException {
searcher.search(query, results);
}
@ -581,6 +560,23 @@ public class SolrIndexSearcher extends Searcher implements SolrInfoMBean {
}
/**
* Returns documents matching both <code>query</code> and the
* intersection of the <code>filterList</code>, sorted by <code>sort</code>.
* <p>
* This method is cache aware and may retrieve <code>filter</code> from
* the cache or make an insertion into the cache as a result of this call.
* <p>
* FUTURE: The returned DocList may be retrieved from a cache.
*
* @param query
* @param filterList may be null
* @param lsort criteria by which to sort (if null, query relevance is used)
* @param offset offset into the list of documents to return
* @param len maximum number of documents to return
* @return DocList meeting the specified criteria, should <b>not</b> be modified by the caller.
* @throws IOException
*/
public DocList getDocList(Query query, List<Query> filterList, Sort lsort, int offset, int len, int flags) throws IOException {
DocListAndSet answer = new DocListAndSet();
getDocListC(answer,query,filterList,null,lsort,offset,len,flags);
@ -1023,6 +1019,7 @@ public class SolrIndexSearcher extends Searcher implements SolrInfoMBean {
return getDocListAndSet(query, filterList, lsort, offset, len);
}
/**
* Returns documents matching both <code>query</code> and <code>filter</code>
* and sorted by <code>sort</code>. Also returns the compete set of documents
@ -1062,12 +1059,57 @@ public class SolrIndexSearcher extends Searcher implements SolrInfoMBean {
return filterList;
}
/**
* Returns documents matching both <code>query</code> and the intersection
* of <code>filterList</code>, sorted by <code>sort</code>.
* Also returns the compete set of documents
* matching <code>query</code> and <code>filter</code>
* (regardless of <code>offset</code> and <code>len</code>).
* <p>
* This method is cache aware and may retrieve <code>filter</code> from
* the cache or make an insertion into the cache as a result of this call.
* <p>
* FUTURE: The returned DocList may be retrieved from a cache.
* <p>
* The DocList and DocSet returned should <b>not</b> be modified.
*
* @param query
* @param filterList may be null
* @param lsort criteria by which to sort (if null, query relevance is used)
* @param offset offset into the list of documents to return
* @param len maximum number of documents to return
* @return DocListAndSet meeting the specified criteria, should <b>not</b> be modified by the caller.
* @throws IOException
*/
public DocListAndSet getDocListAndSet(Query query, List<Query> filterList, Sort lsort, int offset, int len) throws IOException {
DocListAndSet ret = new DocListAndSet();
getDocListC(ret,query,filterList,null,lsort,offset,len,GET_DOCSET);
return ret;
}
/**
* Returns documents matching both <code>query</code> and the intersection
* of <code>filterList</code>, sorted by <code>sort</code>.
* Also returns the compete set of documents
* matching <code>query</code> and <code>filter</code>
* (regardless of <code>offset</code> and <code>len</code>).
* <p>
* This method is cache aware and may retrieve <code>filter</code> from
* the cache or make an insertion into the cache as a result of this call.
* <p>
* FUTURE: The returned DocList may be retrieved from a cache.
* <p>
* The DocList and DocSet returned should <b>not</b> be modified.
*
* @param query
* @param filterList may be null
* @param lsort criteria by which to sort (if null, query relevance is used)
* @param offset offset into the list of documents to return
* @param len maximum number of documents to return
* @param flags user supplied flags for the result set
* @return DocListAndSet meeting the specified criteria, should <b>not</b> be modified by the caller.
* @throws IOException
*/
public DocListAndSet getDocListAndSet(Query query, List<Query> filterList, Sort lsort, int offset, int len, int flags) throws IOException {
DocListAndSet ret = new DocListAndSet();
getDocListC(ret,query,filterList,null,lsort,offset,len, flags |= GET_DOCSET);
@ -1095,6 +1137,27 @@ public class SolrIndexSearcher extends Searcher implements SolrInfoMBean {
return ret;
}
/**
* Returns documents matching both <code>query</code> and <code>filter</code>
* and sorted by <code>sort</code>. Also returns the compete set of documents
* matching <code>query</code> and <code>filter</code> (regardless of <code>offset</code> and <code>len</code>).
* <p>
* This method is cache aware and may make an insertion into the cache
* as a result of this call.
* <p>
* FUTURE: The returned DocList may be retrieved from a cache.
* <p>
* The DocList and DocSet returned should <b>not</b> be modified.
*
* @param query
* @param filter may be null
* @param lsort criteria by which to sort (if null, query relevance is used)
* @param offset offset into the list of documents to return
* @param len maximum number of documents to return
* @param flags user supplied flags for the result set
* @return DocListAndSet meeting the specified criteria, should <b>not</b> be modified by the caller.
* @throws IOException
*/
public DocListAndSet getDocListAndSet(Query query, DocSet filter, Sort lsort, int offset, int len, int flags) throws IOException {
DocListAndSet ret = new DocListAndSet();
getDocListC(ret,query,null,filter,lsort,offset,len, flags |= GET_DOCSET);
@ -1160,14 +1223,20 @@ public class SolrIndexSearcher extends Searcher implements SolrInfoMBean {
}
// Takes a list of docs (the doc ids actually), and returns all of
// the stored fields.
/**
* Takes a list of docs (the doc ids actually), and returns an array
* of Documents containing all of the stored fields.
*/
public Document[] readDocs(DocList ids) throws IOException {
Document[] docs = new Document[ids.size()];
readDocs(docs,ids);
return docs;
}
/**
* Takes a list of docs (the doc ids actually), and reads them into an array
* of Documents.
*/
public void readDocs(Document[] docs, DocList ids) throws IOException {
DocIterator iter = ids.iterator();
for (int i=0; i<docs.length; i++) {

View File

@ -28,6 +28,23 @@ import org.apache.solr.schema.FieldType;
// analyzer. Should lead to faster query parsing.
/**
* A variation on the Lucene QueryParser which knows about the field
* types and query time analyzers configured in Solr's schema.xml.
*
* <p>
* This class also deviates from the Lucene QueryParser by using
* ConstantScore versions of RangeQuery and PrefixQuery to prevent
* TooManyClauses exceptions.
* </p>
*
* <p>
* If the magic field name "<code>_val_</code>" is used in a term or
* phrase query, the value is parsed as a function.
* </p>
*
* @see QueryParsing#parseFunction
* @see ConstantScoreRangeQuery
* @see ConstantScorePrefixQuery
* @author yonik
*/
public class SolrQueryParser extends QueryParser {

View File

@ -98,7 +98,7 @@ public class OpenBitSet implements Cloneable, Serializable {
/**
* Returns the current capacity of this set. Included for
* compatibility. This is *not* equal to {@see cardinality}
* compatibility. This is *not* equal to {@link #cardinality}
*/
public long size() {
return capacity();