LUCENE-1872: improve javadocs

git-svn-id: https://svn.apache.org/repos/asf/lucene/java/trunk@809202 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Michael McCandless 2009-08-29 20:52:03 +00:00
parent 886e887b3d
commit 8107d186c7
9 changed files with 219 additions and 146 deletions

View File

@ -29,59 +29,57 @@ import org.apache.lucene.analysis.tokenattributes.TypeAttribute;
import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
/**
* <b>Expert:</b> This class provides a {@link TokenStream} for indexing numeric values
* that can be used by {@link NumericRangeQuery}/{@link NumericRangeFilter}.
* For more information, how to use this class and its configuration properties
* (<a href="../search/NumericRangeQuery.html#precisionStepDesc"><code>precisionStep</code></a>)
* read the docs of {@link NumericRangeQuery}.
* <b>Expert:</b> This class provides a {@link TokenStream}
* for indexing numeric values that can be used by {@link
* NumericRangeQuery} or {@link NumericRangeFilter}.
*
* <p><b>For easy usage during indexing, there is a {@link NumericField}, that uses the optimal
* indexing settings (no norms, no term freqs). {@link NumericField} is a wrapper around this
* expert token stream.</b>
* <p>Note that for simple usage, {@link NumericField} is
* recommended. {@link NumericField} disables norms and
* term freqs, as they are not usually needed during
* searching. If you need to change these settings, you
* should use this class.
*
* <p>This stream is not intended to be used in analyzers, its more for iterating the
* different precisions during indexing a specific numeric value.
* A numeric value is indexed as multiple string encoded terms, each reduced
* by zeroing bits from the right. Each value is also prefixed (in the first char) by the
* <code>shift</code> value (number of bits removed) used during encoding.
* The number of bits removed from the right for each trie entry is called
* <code>precisionStep</code> in this API.
* <p>See {@link NumericField} for capabilities of fields
* indexed numerically.</p>
*
* <p>Here's an example usage, for an int field:
*
* <p>The usage pattern is (it is recommened to switch off norms and term frequencies
* for numeric fields; it does not make sense to have them):
* <pre>
* Field field = new Field(name, new NumericTokenStream(precisionStep).set<em>???</em>Value(value));
* field.setOmitNorms(true);
* field.setOmitTermFreqAndPositions(true);
* document.add(field);
* Field field = new Field(name, new NumericTokenStream(precisionStep).setIntValue(value));
* field.setOmitNorms(true);
* field.setOmitTermFreqAndPositions(true);
* document.add(field);
* </pre>
*
* <p>For optimal performance, re-use the TokenStream and Field instance
* for more than one document:
*
* <pre>
* <em>// init</em>
* NumericTokenStream stream = new NumericTokenStream(precisionStep);
* Field field = new Field(name, stream);
* field.setOmitNorms(true);
* field.setOmitTermFreqAndPositions(true);
* Document document = new Document();
* document.add(field);
* <em>// use this code to index many documents:</em>
* stream.set<em>???</em>Value(value1)
* writer.addDocument(document);
* stream.set<em>???</em>Value(value2)
* writer.addDocument(document);
* ...
* NumericTokenStream stream = new NumericTokenStream(precisionStep);
* Field field = new Field(name, stream);
* field.setOmitNorms(true);
* field.setOmitTermFreqAndPositions(true);
* Document document = new Document();
* document.add(field);
* for(all documents) {
* stream.setIntValue(value)
* writer.addDocument(document);
* }
* </pre>
*
* <p><em>Please note:</em> Token streams are read, when the document is added to index.
* If you index more than one numeric field, use a separate instance for each.
* <p>This stream is not intended to be used in analyzers;
* it's more for iterating the different precisions during
* indexing a specific numeric value.</p>
* <p><b>NOTE</b>: as TokenStreams are only consumed once
* the Document is added to the index, if you index more
* than one numeric field, use a separate NumericTokenStream
* instance for each.</p>
*
* <p>Values indexed by this stream can be loaded into the {@link FieldCache}
* and can be sorted (use {@link SortField}{@code .TYPE} to specify the correct
* type; {@link SortField#AUTO} does not work with this type of field).
* Values solely used for sorting can be indexed using a <code>precisionStep</code>
* of {@link Integer#MAX_VALUE} (at least &ge;64), because this step only produces
* one value token with highest precision.
* <p>See {@link NumericRangeQuery} for more details on the
* <a
* href="../search/NumericRangeQuery.html#precisionStepDesc"><code>precisionStep</code></a>
* parameter as well as how numeric fields work under the hood.</p>
*
* <p><font color="red"><b>NOTE:</b> This API is experimental and
* might change in incompatible ways in the next release.</font>

View File

@ -28,58 +28,108 @@ import org.apache.lucene.search.SortField; // javadocs
import org.apache.lucene.search.FieldCache; // javadocs
/**
* This class provides a {@link Field} for indexing numeric values
* that can be used by {@link NumericRangeQuery}/{@link NumericRangeFilter}.
* For more information, how to use this class and its configuration properties
* (<a href="../search/NumericRangeQuery.html#precisionStepDesc"><code>precisionStep</code></a>)
* read the docs of {@link NumericRangeQuery}.
*
* <p>A numeric value is indexed as multiple string encoded terms, each reduced
* by zeroing bits from the right. Each value is also prefixed (in the first char) by the
* <code>shift</code> value (number of bits removed) used during encoding.
* The number of bits removed from the right for each trie entry is called
* <code>precisionStep</code> in this API.
*
* <p>The usage pattern is:
* <p>This class provides a {@link Field} that enables indexing
* of numeric values for efficient range filtering and
* sorting. Here's an example usage, adding an int value:
* <pre>
* document.add(
* new NumericField(name, precisionStep, Field.Store.XXX, true).set<em>???</em>Value(value)
* );
* document.add(new NumericField(name).setIntValue(value));
* </pre>
* <p>For optimal performance, re-use the NumericField and {@link Document} instance
* for more than one document:
*
* For optimal performance, re-use the
* NumericField and {@link Document} instance for more than
* one document:
*
* <pre>
* <em>// init</em>
* NumericField field = new NumericField(name, precisionStep, Field.Store.XXX, true);
* NumericField field = new NumericField(name);
* Document document = new Document();
* document.add(field);
* <em>// use this code to index many documents:</em>
* field.set<em>???</em>Value(value1)
* writer.addDocument(document);
* field.set<em>???</em>Value(value2)
* writer.addDocument(document);
* ...
*
* for(all documents) {
* ...
* field.setIntValue(value)
* writer.addDocument(document);
* ...
* }
* </pre>
*
* <p>More advanced users can instead use {@link NumericTokenStream} directly, when
* indexing numbers. This class is a wrapper around this token stream type for easier,
* more intuitive usage.
* <p>The java native types int, long, float and double are
* directly supported. However, any value that can be
* converted into these native types can also be indexed.
* For example, date/time values represented by a
* <code>java.util.Date</code> can be translated into a long
* value using the <code>getTime</code> method. If you
* don't need millisecond precision, you can quantize the
* value, either by dividing the result of
* <code>getTime</code> or using the separate getters (for
* year, month, etc.) to construct an int or long value.</p>
*
* <p><b>Please note:</b> This class is only used during indexing. You can also create
* numeric stored fields with it, but when retrieving the stored field value
* from a {@link Document} instance after search, you will get a conventional
* {@link Fieldable} instance where the numeric values are returned as {@link String}s
* (according to <code>toString(value)</code> of the used data type).
* <p>To perform range querying or filtering against a
* NumericField, use {@link NumericRangeQuery} or {@link
* NumericRangeFilter}. To sort according to a
* NumericField, use the normal numeric sort types, eg
* {@link SortField#INT} (note that {@link SortField#AUTO}
* will not work with these fields). NumericField values
* can also be loaded directly from {@link FieldCache}.</p>
*
* <p>Values indexed by this field can be loaded into the {@link FieldCache}
* and can be sorted (use {@link SortField}{@code .TYPE} to specify the correct
* type; {@link SortField#AUTO} does not work with this type of field).
* Values solely used for sorting can be indexed using a <code>precisionStep</code>
* of {@link Integer#MAX_VALUE} (at least &ge;64), because this step only produces
* one value token with highest precision.
* <p>By default, a NumericField's value is not stored but
* is indexed for range filtering and sorting. You can use
* the {@link #NumericField(String,Field.Store,boolean)}
* constructor if you need to change these defaults.</p>
*
* <p><font color="red"><b>NOTE:</b> This API is experimental and
* might change in incompatible ways in the next release.</font>
* <p>You may add the same field name as a NumericField to
* the same document more than once. Range querying and
* filtering will be the logical OR of all values, however
* sort behavior is not defined. If you need to sort, you
* should separately index a single-valued NumericField.</p>
*
* <p>A NumericField will consume somewhat more disk space
* in the index than an ordindary single-valued field.
* However, for a typical index that includes substantial
* textual content per document, this increase will likely
* be in the noise. </p>
*
* <p>Within lucene, each numeric value is indexed as a
* <em>trie</em> structure, where each term is logically
* assigned to larger and larger pre-defined brackets. The
* step size between each successive bracket is called the
* <code>precisionStep</code>, measured in bits. Smaller
* <code>precisionStep</code> values result in larger number
* of brackets, which consumes more disk space in the index
* but may result in faster range search performance. The
* default value, 4, was selected for a reasonable tradeoff
* of disk space consumption versus performance. You can
* use the expert constructor {@link
* #NumericField(String,int,Field.Store,boolean)} if you'd
* like to change the value. Note that you must also
* specify a congruent value when creating {@link
* NumericRangeQuery} or {@link NumericRangeFilter}.
*
* <p>If you only need to sort by numeric value, and never
* run range querying/filtering, you can index using a
* <code>precisionStep</code> of {@link Integer#MAX_VALUE}.
* This will minimize disk space consumed. </p>
*
* <p>More advanced users can instead use {@link
* NumericTokenStream} directly, when indexing numbers. This
* class is a wrapper around this token stream type for
* easier, more intuitive usage.</p>
*
* <p>For more information on the internals of numeric trie
* indexing, including the <a
* href="../search/NumericRangeQuery.html#precisionStepDesc"><code>precisionStep</code></a>
* configuration, see {@link NumericRangeQuery}.
*
* <p><b>NOTE:</b> This class is only used during
* indexing. When retrieving the stored field value from a
* {@link Document} instance after search, you will get a
* conventional {@link Fieldable} instance where the numeric
* values are returned as {@link String}s (according to
* <code>toString(value)</code> of the used data type).
*
* <p><font color="red"><b>NOTE:</b> This API is
* experimental and might change in incompatible ways in the
* next release.</font>
*
* @since 2.9
*/

View File

@ -31,11 +31,12 @@ import org.apache.lucene.util.ReaderUtil;
/** Implements search over a single IndexReader.
*
* <p>Applications usually need only call the inherited {@link #search(Query)}
* or {@link #search(Query,Filter)} methods. For performance reasons it is
* <p>Applications usually need only call the inherited
* {@link #search(Query,int)}
* or {@link #search(Query,Filter,int)} methods. For performance reasons it is
* recommended to open only one IndexSearcher and use it for all of your searches.
*
* <p>Note that you can only access Hits from an IndexSearcher as long as it is
* <p>Note that you can only access the deprecated {@link Hits} from an IndexSearcher as long as it is
* not yet closed, otherwise an IOException will be thrown.
*
* <a name="thread-safety"></a><p><b>NOTE</b>: {@link

View File

@ -22,25 +22,29 @@ import org.apache.lucene.document.NumericField; // for javadocs
import org.apache.lucene.util.NumericUtils; // for javadocs
/**
* Implementation of a {@link Filter} that implements <em>trie-based</em> range filtering
* for numeric values. For more information about the algorithm look into the docs of
* {@link NumericRangeQuery}.
* A {@link Filter} that only accepts numeric values within
* a specified range. To use this, you must first index the
* numeric values using {@link NumericField} (expert: {@link
* NumericTokenStream}).
*
* <p>This filter depends on a specific structure of terms in the index that can only be created
* by indexing using {@link NumericField} (expert: {@link NumericTokenStream}).
* <p>You create a new NumericRangeFilter with the static
* factory methods, eg:
*
* <p><b>Please note:</b> This class has no constructor, you can create filters depending on the data type
* by using the static factories {@linkplain #newLongRange NumericRangeFilter.newLongRange()},
* {@linkplain #newIntRange NumericRangeFilter.newIntRange()}, {@linkplain #newDoubleRange NumericRangeFilter.newDoubleRange()},
* and {@linkplain #newFloatRange NumericRangeFilter.newFloatRange()}, e.g.:
* <pre>
* Filter f = NumericRangeFilter.newFloatRange(field, <a href="NumericRangeQuery.html#precisionStepDesc">precisionStep</a>,
* Filter f = NumericRangeFilter.newFloatRange("weight",
* new Float(0.3f), new Float(0.10f),
* true, true);
* </pre>
*
* accepts all documents whose float valued "weight" field
* ranges from 0.3 to 0.10, inclusive.
*
* <p><font color="red"><b>NOTE:</b> This API is experimental and
* might change in incompatible ways in the next release.</font>
* might change in incompatible ways in the next
* release.</font>
*
* See {@link NumericRangeQuery} for details on how Lucene
* indexes and searches numeric valued fields.
*
* @since 2.9
**/

View File

@ -29,34 +29,58 @@ import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.Term;
/**
* Implementation of a {@link Query} that implements <em>trie-based</em> range querying
* for numeric values.
* <p>A {@link Query} that matches numeric values within a
* specified range. To use this, you must first index the
* numeric values using {@link NumericField} (expert: {@link
* NumericTokenStream}). If your terms are instead textual,
* you should use {@link TermRangeQuery}. {@link
* NumericRangeFilter} is the filter equivalent of this
* query.</p>
*
* <h3>Usage</h3>
* <h4>Indexing</h4>
* Before numeric values can be queried, they must be indexed in a special way. You can do this
* by adding numeric fields to the index by specifying a {@link NumericField} (expert: {@link NumericTokenStream}).
* An important setting is the <a href="#precisionStepDesc"><code>precisionStep</code></a>, which specifies,
* how many different precisions per numeric value are indexed to speed up range queries.
* Lower values create more terms but speed up search, higher values create less terms, but
* slow down search. Suitable values are between <b>1</b> and <b>8</b>. A good starting point to test is <b>4</b>,
* which is the default value for all <code>Numeric*</code> classes. For a discussion about ideal
* values, see below. Indexing code examples can be found in {@link NumericField}.
* <p>You create a new NumericRangeQuery with the static
* factory methods, eg:
*
* <h4>Searching</h4>
* <p>This class has no constructor, you can create queries depending on the data type
* by using the static factories {@linkplain #newLongRange NumericRangeQuery.newLongRange()},
* {@linkplain #newIntRange NumericRangeQuery.newIntRange()}, {@linkplain #newDoubleRange NumericRangeQuery.newDoubleRange()},
* and {@linkplain #newFloatRange NumericRangeQuery.newFloatRange()}, e.g.:
* <pre>
* Query q = NumericRangeQuery.newFloatRange(field, <a href="#precisionStepDesc">precisionStep</a>,
* Query q = NumericRangeQuery.newFloatRange("weight",
* new Float(0.3f), new Float(0.10f),
* true, true);
* </pre>
* The used <a href="#precisionStepDesc"><code>precisionStep</code></a> must be compatible
* to the one used during indexing (see below). The default is also <b>4</b>.
*
* <h3>How it works</h3>
* matches all documents whose float valued "weight" field
* ranges from 0.3 to 0.10, inclusive.
*
* <p>The performance of NumericRangeQuery is much better
* than the corresponding {@link TermRangeQuery} because the
* number of terms that must be searched is usually far
* fewer, thanks to trie indexing, described below.</p>
*
* <p>You can optionally specify a <a
* href="#precisionStepDesc"><code>precisionStep</code></a>
* when creating this query. This is necessary if you've
* changed this configuration from its default (4) during
* indexing. Lower values consume more disk space but speed
* up searching. Suitable values are between <b>1</b> and
* <b>8</b>. A good starting point to test is <b>4</b>,
* which is the default value for all <code>Numeric*</code>
* classes. See <a href="#precisionStepDesc">below</a> for
* details.
*
* <p>This query defaults to {@linkplain
* MultiTermQuery#CONSTANT_SCORE_AUTO_REWRITE_DEFAULT} for
* 32 bit (int/float) ranges with precisionStep &le;8 and 64
* bit (long/double) ranges with precisionStep &le;6.
* Otherwise it uses {@linkplain
* MultiTermQuery#CONSTANT_SCORE_FILTER_REWRITE} as the
* number of terms is likely to be high. With precision
* steps of &le;4, this query can be run with one of the
* BooleanQuery rewrite methods without changing
* BooleanQuery's default max clause count.
*
* <p><font color="red"><b>NOTE:</b> This API is experimental and
* might change in incompatible ways in the next release.</font>
*
*
* <br><br><h3>How it works</h3>
*
* <p>See the publication about <a target="_blank" href="http://www.panfmp.org">panFMP</a>,
* where this algorithm was described (referred to as <code>TrieRangeQuery</code>):
@ -118,10 +142,6 @@ import org.apache.lucene.index.Term;
* Sorting is also possible with range query optimized fields using one of the above <code>precisionSteps</code>.
* </ul>
*
* <p>This dramatically improves the performance of Apache Lucene with range queries, which
* are no longer dependent on the index size and the number of distinct values because there is
* an upper limit unrelated to either of these properties.</p>
*
* <p>Comparisions of the different types of RangeQueries on an index with about 500,000 docs showed
* that {@link TermRangeQuery} in boolean rewrite mode (with raised {@link BooleanQuery} clause count)
* took about 30-40 secs to complete, {@link TermRangeQuery} in constant score filter rewrite mode took 5 secs
@ -129,19 +149,6 @@ import org.apache.lucene.index.Term;
* precision step). This query type was developed for a geographic portal, where the performance for
* e.g. bounding boxes or exact date/time stamps is important.</p>
*
* <p>The query defaults to {@linkplain MultiTermQuery#CONSTANT_SCORE_AUTO_REWRITE_DEFAULT}
* for 32 bit (int/float) ranges with precisionStep &le;8 and
* 64 bit (long/double) ranges with precisionStep &le;6.
* Otherwise it uses {@linkplain
* MultiTermQuery#CONSTANT_SCORE_FILTER_REWRITE} as the
* number of terms is likely to be high.
* With precision steps of &le;4, this query can be run with
* one of the BooleanQuery rewrite methods without changing
* BooleanQuery's default max clause count.
*
* <p><font color="red"><b>NOTE:</b> This API is experimental and
* might change in incompatible ways in the next release.</font>
*
* @since 2.9
**/
public final class NumericRangeQuery extends MultiTermQuery {

View File

@ -20,12 +20,13 @@ package org.apache.lucene.search;
import java.text.Collator;
/**
* A Filter that restricts search results to a range of values in a given
* field.
* A Filter that restricts search results to a range of term
* values in a given field.
*
* <p>This filter matches the documents looking for terms that fall into the
* supplied range according to {@link String#compareTo(String)}. It is not intended
* for numerical ranges, use {@link NumericRangeFilter} instead.
* supplied range according to {@link
* String#compareTo(String)}, unless a <code>Collator</code> is provided. It is not intended
* for numerical ranges; use {@link NumericRangeFilter} instead.
*
* <p>If you construct a large number of range filters with different ranges but on the
* same field, {@link FieldCacheRangeFilter} may have significantly better performance.

View File

@ -24,11 +24,12 @@ import org.apache.lucene.index.IndexReader;
import org.apache.lucene.util.ToStringUtils;
/**
* A Query that matches documents within an exclusive range of terms.
* A Query that matches documents within an range of terms.
*
* <p>This query matches the documents looking for terms that fall into the
* supplied range according to {@link String#compareTo(String)}. It is not intended
* for numerical ranges, use {@link NumericRangeQuery} instead.
* supplied range according to {@link
* String#compareTo(String)}, unless a <code>Collator</code> is provided. It is not intended
* for numerical ranges; use {@link NumericRangeQuery} instead.
*
* <p>This query uses the {@link
* MultiTermQuery#CONSTANT_SCORE_AUTO_REWRITE_DEFAULT}

View File

@ -45,7 +45,9 @@ public abstract class Directory {
* this Directory instance). */
protected LockFactory lockFactory;
/** @deprecated For some Directory implementations ({@link
/** List the files in the directory.
*
* @deprecated For some Directory implementations ({@link
* FSDirectory}, and its subclasses), this method
* silently filters its results to include only index
* files. Please use {@link #listAll} instead, which

View File

@ -23,11 +23,17 @@ import java.util.List;
import java.util.Set;
/**
* Files with the specified extensions are placed in the
* Expert: A Directory instance that switches files betweeen
* two other Directory instances.
* <p>Files with the specified extensions are placed in the
* primary directory; others are placed in the secondary
* directory. The provided Set must not change once passed
* to this class, and must allow multiple threads to call
* contains at once.
* contains at once.</p>
*
* <p><b>NOTE</b>: this API is new and experimental and is
* subject to suddenly change in the next release.
*/
public class FileSwitchDirectory extends Directory {
@ -43,11 +49,13 @@ public class FileSwitchDirectory extends Directory {
this.doClose = doClose;
this.lockFactory = primaryDir.getLockFactory();
}
/** Return the primary directory */
public Directory getPrimaryDir() {
return primaryDir;
}
/** Return the secondary directory */
public Directory getSecondaryDir() {
return secondaryDir;
}
@ -76,6 +84,7 @@ public class FileSwitchDirectory extends Directory {
return listAll();
}
/** Utility method to return a file's extension. */
public static String getExtension(String name) {
int i = name.lastIndexOf('.');
if (i == -1) {