mirror of https://github.com/apache/lucene.git
LUCENE-2008: javadoc improvements
git-svn-id: https://svn.apache.org/repos/asf/lucene/java/trunk@829568 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
fbe28af914
commit
ea2883c40a
|
@ -150,6 +150,9 @@ Optimizations
|
||||||
|
|
||||||
Documentation
|
Documentation
|
||||||
|
|
||||||
|
* LUCENE-2008: Javadoc improvements for TokenStream/Tokenizer/Token
|
||||||
|
(Luke Nezda via Mike McCandless)
|
||||||
|
|
||||||
Build
|
Build
|
||||||
|
|
||||||
* LUCENE-486: Remove test->demo dependencies. (Michael Busch)
|
* LUCENE-486: Remove test->demo dependencies. (Michael Busch)
|
||||||
|
|
|
@ -32,7 +32,7 @@ import java.util.Iterator;
|
||||||
* is in the set without the necessity of converting it
|
* is in the set without the necessity of converting it
|
||||||
* to a String first.
|
* to a String first.
|
||||||
* <P>
|
* <P>
|
||||||
* <em>Please note:</em> This class implements {@link Set} but
|
* <em>Please note:</em> This class implements {@link java.util.Set Set} but
|
||||||
* does not behave like it should in all cases. The generic type is
|
* does not behave like it should in all cases. The generic type is
|
||||||
* {@code Set<Object>}, because you can add any object to it,
|
* {@code Set<Object>}, because you can add any object to it,
|
||||||
* that has a string representation. The add methods will use
|
* that has a string representation. The add methods will use
|
||||||
|
|
|
@ -68,9 +68,7 @@ public final class StopAnalyzer extends Analyzer {
|
||||||
|
|
||||||
/** Builds an analyzer with the stop words from the given set.
|
/** Builds an analyzer with the stop words from the given set.
|
||||||
* @param matchVersion See <a href="#version">above</a>
|
* @param matchVersion See <a href="#version">above</a>
|
||||||
* @param stopWords Set of stop words
|
* @param stopWords Set of stop words */
|
||||||
* @param enablePositionIncrements See {@link
|
|
||||||
* StopFilter#setEnablePositionIncrements} */
|
|
||||||
public StopAnalyzer(Version matchVersion, Set<?> stopWords) {
|
public StopAnalyzer(Version matchVersion, Set<?> stopWords) {
|
||||||
this.stopWords = stopWords;
|
this.stopWords = stopWords;
|
||||||
enablePositionIncrements = StopFilter.getEnablePositionIncrementsVersionDefault(matchVersion);
|
enablePositionIncrements = StopFilter.getEnablePositionIncrementsVersionDefault(matchVersion);
|
||||||
|
|
|
@ -54,7 +54,6 @@ public final class StopFilter extends TokenFilter {
|
||||||
* @param input Input TokenStream
|
* @param input Input TokenStream
|
||||||
* @param stopWords A Set of Strings or char[] or any other toString()-able set representing the stopwords
|
* @param stopWords A Set of Strings or char[] or any other toString()-able set representing the stopwords
|
||||||
* @param ignoreCase if true, all words are lower cased first
|
* @param ignoreCase if true, all words are lower cased first
|
||||||
* @param ignoreCase -Ignore case when stopping.
|
|
||||||
*/
|
*/
|
||||||
public StopFilter(boolean enablePositionIncrements, TokenStream input, Set<?> stopWords, boolean ignoreCase)
|
public StopFilter(boolean enablePositionIncrements, TokenStream input, Set<?> stopWords, boolean ignoreCase)
|
||||||
{
|
{
|
||||||
|
|
|
@ -53,7 +53,7 @@ d.add(new Field("f2", final2));
|
||||||
d.add(new Field("f3", final3));
|
d.add(new Field("f3", final3));
|
||||||
d.add(new Field("f4", final4));
|
d.add(new Field("f4", final4));
|
||||||
* </pre>
|
* </pre>
|
||||||
* In this example, <code>sink1</code> and <code>sink2<code> will both get tokens from both
|
* In this example, <code>sink1</code> and <code>sink2</code> will both get tokens from both
|
||||||
* <code>reader1</code> and <code>reader2</code> after whitespace tokenizer
|
* <code>reader1</code> and <code>reader2</code> after whitespace tokenizer
|
||||||
* and now we can further wrap any of these in extra analysis, and more "sources" can be inserted if desired.
|
* and now we can further wrap any of these in extra analysis, and more "sources" can be inserted if desired.
|
||||||
* It is important, that tees are consumed before sinks (in the above example, the field names must be
|
* It is important, that tees are consumed before sinks (in the above example, the field names must be
|
||||||
|
|
|
@ -37,7 +37,7 @@ import org.apache.lucene.util.AttributeImpl;
|
||||||
<p>
|
<p>
|
||||||
The start and end offsets permit applications to re-associate a token with
|
The start and end offsets permit applications to re-associate a token with
|
||||||
its source text, e.g., to display highlighted query terms in a document
|
its source text, e.g., to display highlighted query terms in a document
|
||||||
browser, or to show matching text fragments in a KWIC (KeyWord In Context)
|
browser, or to show matching text fragments in a <abbr title="KeyWord In Context">KWIC</abbr>
|
||||||
display, etc.
|
display, etc.
|
||||||
<p>
|
<p>
|
||||||
The type is a string, assigned by a lexical analyzer
|
The type is a string, assigned by a lexical analyzer
|
||||||
|
@ -59,9 +59,9 @@ import org.apache.lucene.util.AttributeImpl;
|
||||||
|
|
||||||
<br><br>
|
<br><br>
|
||||||
|
|
||||||
<p>Tokenizers and filters should try to re-use a Token
|
<p>Tokenizers and TokenFilters should try to re-use a Token
|
||||||
instance when possible for best performance, by
|
instance when possible for best performance, by
|
||||||
implementing the {@link TokenStream#next(Token)} API.
|
implementing the {@link TokenStream#incrementToken()} API.
|
||||||
Failing that, to create a new Token you should first use
|
Failing that, to create a new Token you should first use
|
||||||
one of the constructors that starts with null text. To load
|
one of the constructors that starts with null text. To load
|
||||||
the token from a char[] use {@link #setTermBuffer(char[], int, int)}.
|
the token from a char[] use {@link #setTermBuffer(char[], int, int)}.
|
||||||
|
@ -75,30 +75,30 @@ import org.apache.lucene.util.AttributeImpl;
|
||||||
set the length of the term text. See <a target="_top"
|
set the length of the term text. See <a target="_top"
|
||||||
href="https://issues.apache.org/jira/browse/LUCENE-969">LUCENE-969</a>
|
href="https://issues.apache.org/jira/browse/LUCENE-969">LUCENE-969</a>
|
||||||
for details.</p>
|
for details.</p>
|
||||||
<p>Typical reuse patterns:
|
<p>Typical Token reuse patterns:
|
||||||
<ul>
|
<ul>
|
||||||
<li> Copying text from a string (type is reset to #DEFAULT_TYPE if not specified):<br/>
|
<li> Copying text from a string (type is reset to {@link #DEFAULT_TYPE} if not specified):<br/>
|
||||||
<pre>
|
<pre>
|
||||||
return reusableToken.reinit(string, startOffset, endOffset[, type]);
|
return reusableToken.reinit(string, startOffset, endOffset[, type]);
|
||||||
</pre>
|
</pre>
|
||||||
</li>
|
</li>
|
||||||
<li> Copying some text from a string (type is reset to #DEFAULT_TYPE if not specified):<br/>
|
<li> Copying some text from a string (type is reset to {@link #DEFAULT_TYPE} if not specified):<br/>
|
||||||
<pre>
|
<pre>
|
||||||
return reusableToken.reinit(string, 0, string.length(), startOffset, endOffset[, type]);
|
return reusableToken.reinit(string, 0, string.length(), startOffset, endOffset[, type]);
|
||||||
</pre>
|
</pre>
|
||||||
</li>
|
</li>
|
||||||
</li>
|
</li>
|
||||||
<li> Copying text from char[] buffer (type is reset to #DEFAULT_TYPE if not specified):<br/>
|
<li> Copying text from char[] buffer (type is reset to {@link #DEFAULT_TYPE} if not specified):<br/>
|
||||||
<pre>
|
<pre>
|
||||||
return reusableToken.reinit(buffer, 0, buffer.length, startOffset, endOffset[, type]);
|
return reusableToken.reinit(buffer, 0, buffer.length, startOffset, endOffset[, type]);
|
||||||
</pre>
|
</pre>
|
||||||
</li>
|
</li>
|
||||||
<li> Copying some text from a char[] buffer (type is reset to #DEFAULT_TYPE if not specified):<br/>
|
<li> Copying some text from a char[] buffer (type is reset to {@link #DEFAULT_TYPE} if not specified):<br/>
|
||||||
<pre>
|
<pre>
|
||||||
return reusableToken.reinit(buffer, start, end - start, startOffset, endOffset[, type]);
|
return reusableToken.reinit(buffer, start, end - start, startOffset, endOffset[, type]);
|
||||||
</pre>
|
</pre>
|
||||||
</li>
|
</li>
|
||||||
<li> Copying from one one Token to another (type is reset to #DEFAULT_TYPE if not specified):<br/>
|
<li> Copying from one one Token to another (type is reset to {@link #DEFAULT_TYPE} if not specified):<br/>
|
||||||
<pre>
|
<pre>
|
||||||
return reusableToken.reinit(source.termBuffer(), 0, source.termLength(), source.startOffset(), source.endOffset()[, source.type()]);
|
return reusableToken.reinit(source.termBuffer(), 0, source.termLength(), source.startOffset(), source.endOffset()[, source.type()]);
|
||||||
</pre>
|
</pre>
|
||||||
|
@ -108,7 +108,7 @@ import org.apache.lucene.util.AttributeImpl;
|
||||||
<ul>
|
<ul>
|
||||||
<li>clear() initializes all of the fields to default values. This was changed in contrast to Lucene 2.4, but should affect no one.</li>
|
<li>clear() initializes all of the fields to default values. This was changed in contrast to Lucene 2.4, but should affect no one.</li>
|
||||||
<li>Because <code>TokenStreams</code> can be chained, one cannot assume that the <code>Token's</code> current type is correct.</li>
|
<li>Because <code>TokenStreams</code> can be chained, one cannot assume that the <code>Token's</code> current type is correct.</li>
|
||||||
<li>The startOffset and endOffset represent the start and offset in the source text. So be careful in adjusting them.</li>
|
<li>The startOffset and endOffset represent the start and offset in the source text, so be careful in adjusting them.</li>
|
||||||
<li>When caching a reusable token, clone it. When injecting a cached token into a stream that can be reset, clone it again.</li>
|
<li>When caching a reusable token, clone it. When injecting a cached token into a stream that can be reset, clone it again.</li>
|
||||||
</ul>
|
</ul>
|
||||||
</p>
|
</p>
|
||||||
|
|
|
@ -19,15 +19,10 @@ package org.apache.lucene.analysis;
|
||||||
|
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
|
|
||||||
/** A TokenFilter is a TokenStream whose input is another token stream.
|
/** A TokenFilter is a TokenStream whose input is another TokenStream.
|
||||||
<p>
|
<p>
|
||||||
This is an abstract class.
|
This is an abstract class; subclasses must override {@link #incrementToken()}.
|
||||||
NOTE: subclasses must override
|
@see TokenStream
|
||||||
{@link #incrementToken()} if the new TokenStream API is used
|
|
||||||
and {@link #next(Token)} or {@link #next()} if the old
|
|
||||||
TokenStream API is used.
|
|
||||||
<p>
|
|
||||||
See {@link TokenStream}
|
|
||||||
*/
|
*/
|
||||||
public abstract class TokenFilter extends TokenStream {
|
public abstract class TokenFilter extends TokenStream {
|
||||||
/** The source of tokens for this filter. */
|
/** The source of tokens for this filter. */
|
||||||
|
|
|
@ -31,14 +31,14 @@ import org.apache.lucene.util.AttributeSource;
|
||||||
* A <code>TokenStream</code> enumerates the sequence of tokens, either from
|
* A <code>TokenStream</code> enumerates the sequence of tokens, either from
|
||||||
* {@link Field}s of a {@link Document} or from query text.
|
* {@link Field}s of a {@link Document} or from query text.
|
||||||
* <p>
|
* <p>
|
||||||
* This is an abstract class. Concrete subclasses are:
|
* This is an abstract class; concrete subclasses are:
|
||||||
* <ul>
|
* <ul>
|
||||||
* <li>{@link Tokenizer}, a <code>TokenStream</code> whose input is a Reader; and
|
* <li>{@link Tokenizer}, a <code>TokenStream</code> whose input is a Reader; and
|
||||||
* <li>{@link TokenFilter}, a <code>TokenStream</code> whose input is another
|
* <li>{@link TokenFilter}, a <code>TokenStream</code> whose input is another
|
||||||
* <code>TokenStream</code>.
|
* <code>TokenStream</code>.
|
||||||
* </ul>
|
* </ul>
|
||||||
* A new <code>TokenStream</code> API has been introduced with Lucene 2.9. This API
|
* A new <code>TokenStream</code> API has been introduced with Lucene 2.9. This API
|
||||||
* has moved from being {@link Token} based to {@link Attribute} based. While
|
* has moved from being {@link Token}-based to {@link Attribute}-based. While
|
||||||
* {@link Token} still exists in 2.9 as a convenience class, the preferred way
|
* {@link Token} still exists in 2.9 as a convenience class, the preferred way
|
||||||
* to store the information of a {@link Token} is to use {@link AttributeImpl}s.
|
* to store the information of a {@link Token} is to use {@link AttributeImpl}s.
|
||||||
* <p>
|
* <p>
|
||||||
|
@ -54,14 +54,14 @@ import org.apache.lucene.util.AttributeSource;
|
||||||
* <li>Instantiation of <code>TokenStream</code>/{@link TokenFilter}s which add/get
|
* <li>Instantiation of <code>TokenStream</code>/{@link TokenFilter}s which add/get
|
||||||
* attributes to/from the {@link AttributeSource}.
|
* attributes to/from the {@link AttributeSource}.
|
||||||
* <li>The consumer calls {@link TokenStream#reset()}.
|
* <li>The consumer calls {@link TokenStream#reset()}.
|
||||||
* <li>the consumer retrieves attributes from the stream and stores local
|
* <li>The consumer retrieves attributes from the stream and stores local
|
||||||
* references to all attributes it wants to access
|
* references to all attributes it wants to access.
|
||||||
* <li>The consumer calls {@link #incrementToken()} until it returns false and
|
* <li>The consumer calls {@link #incrementToken()} until it returns false
|
||||||
* consumes the attributes after each call.
|
* consuming the attributes after each call.
|
||||||
* <li>The consumer calls {@link #end()} so that any end-of-stream operations
|
* <li>The consumer calls {@link #end()} so that any end-of-stream operations
|
||||||
* can be performed.
|
* can be performed.
|
||||||
* <li>The consumer calls {@link #close()} to release any resource when finished
|
* <li>The consumer calls {@link #close()} to release any resource when finished
|
||||||
* using the <code>TokenStream</code>
|
* using the <code>TokenStream</code>.
|
||||||
* </ol>
|
* </ol>
|
||||||
* To make sure that filters and consumers know which attributes are available,
|
* To make sure that filters and consumers know which attributes are available,
|
||||||
* the attributes must be added during instantiation. Filters and consumers are
|
* the attributes must be added during instantiation. Filters and consumers are
|
||||||
|
@ -72,7 +72,7 @@ import org.apache.lucene.util.AttributeSource;
|
||||||
* Javadoc.
|
* Javadoc.
|
||||||
* <p>
|
* <p>
|
||||||
* Sometimes it is desirable to capture a current state of a <code>TokenStream</code>,
|
* Sometimes it is desirable to capture a current state of a <code>TokenStream</code>,
|
||||||
* e.g. for buffering purposes (see {@link CachingTokenFilter},
|
* e.g., for buffering purposes (see {@link CachingTokenFilter},
|
||||||
* {@link TeeSinkTokenFilter}). For this usecase
|
* {@link TeeSinkTokenFilter}). For this usecase
|
||||||
* {@link AttributeSource#captureState} and {@link AttributeSource#restoreState}
|
* {@link AttributeSource#captureState} and {@link AttributeSource#restoreState}
|
||||||
* can be used.
|
* can be used.
|
||||||
|
@ -101,7 +101,7 @@ public abstract class TokenStream extends AttributeSource implements Closeable {
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Consumers (ie {@link IndexWriter}) use this method to advance the stream to
|
* Consumers (i.e., {@link IndexWriter}) use this method to advance the stream to
|
||||||
* the next token. Implementing classes must implement this method and update
|
* the next token. Implementing classes must implement this method and update
|
||||||
* the appropriate {@link AttributeImpl}s with the attributes of the next
|
* the appropriate {@link AttributeImpl}s with the attributes of the next
|
||||||
* token.
|
* token.
|
||||||
|
|
|
@ -24,20 +24,14 @@ import java.io.IOException;
|
||||||
|
|
||||||
/** A Tokenizer is a TokenStream whose input is a Reader.
|
/** A Tokenizer is a TokenStream whose input is a Reader.
|
||||||
<p>
|
<p>
|
||||||
This is an abstract class.
|
This is an abstract class; subclasses must override {@link #incrementToken()}
|
||||||
<p>
|
|
||||||
NOTE: subclasses must override
|
|
||||||
{@link #incrementToken()} if the new TokenStream API is used
|
|
||||||
and {@link #next(Token)} or {@link #next()} if the old
|
|
||||||
TokenStream API is used.
|
|
||||||
<p>
|
<p>
|
||||||
NOTE: Subclasses overriding {@link #incrementToken()} must
|
NOTE: Subclasses overriding {@link #incrementToken()} must
|
||||||
call {@link AttributeSource#clearAttributes()} before
|
call {@link AttributeSource#clearAttributes()} before
|
||||||
setting attributes.
|
setting attributes.
|
||||||
Subclasses overriding {@link #next(Token)} must call
|
Subclasses overriding {@link #incrementToken()} must call
|
||||||
{@link Token#clear()} before setting Token attributes.
|
{@link Token#clear()} before setting Token attributes.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
public abstract class Tokenizer extends TokenStream {
|
public abstract class Tokenizer extends TokenStream {
|
||||||
/** The text source for this Tokenizer. */
|
/** The text source for this Tokenizer. */
|
||||||
protected Reader input;
|
protected Reader input;
|
||||||
|
|
Loading…
Reference in New Issue