LUCENE-2008: javadoc improvements

git-svn-id: https://svn.apache.org/repos/asf/lucene/java/trunk@829568 13f79535-47bb-0310-9956-ffa450edef68
2009-10-25 13:08:50 +00:00 · 2009-10-25 13:08:50 +00:00 · ea2883c40a
parent fbe28af914
commit ea2883c40a
9 changed files with 30 additions and 41 deletions
--- a/CHANGES.txt
+++ b/CHANGES.txt
@ -150,6 +150,9 @@ Optimizations

 Documentation

+* LUCENE-2008: Javadoc improvements for TokenStream/Tokenizer/Token
+  (Luke Nezda via Mike McCandless)
+
 Build

 * LUCENE-486: Remove test->demo dependencies. (Michael Busch)
--- a/src/java/org/apache/lucene/analysis/CharArraySet.java
+++ b/src/java/org/apache/lucene/analysis/CharArraySet.java
@ -32,7 +32,7 @@ import java.util.Iterator;
 * is in the set without the necessity of converting it
 * to a String first.
 * <P>
- * <em>Please note:</em> This class implements {@link Set} but
+ * <em>Please note:</em> This class implements {@link java.util.Set Set} but
 * does not behave like it should in all cases. The generic type is
 * {@code Set<Object>}, because you can add any object to it,
 * that has a string representation. The add methods will use
--- a/src/java/org/apache/lucene/analysis/StopAnalyzer.java
+++ b/src/java/org/apache/lucene/analysis/StopAnalyzer.java
@ -68,9 +68,7 @@ public final class StopAnalyzer extends Analyzer {

  /** Builds an analyzer with the stop words from the given set.
   * @param matchVersion See <a href="#version">above</a>
-   * @param stopWords Set of stop words
-   * @param enablePositionIncrements See {@link
-   * StopFilter#setEnablePositionIncrements} */
+   * @param stopWords Set of stop words */
  public StopAnalyzer(Version matchVersion, Set<?> stopWords) {
    this.stopWords = stopWords;
    enablePositionIncrements = StopFilter.getEnablePositionIncrementsVersionDefault(matchVersion);
--- a/src/java/org/apache/lucene/analysis/StopFilter.java
+++ b/src/java/org/apache/lucene/analysis/StopFilter.java
@ -54,7 +54,6 @@ public final class StopFilter extends TokenFilter {
   * @param input Input TokenStream
   * @param stopWords A Set of Strings or char[] or any other toString()-able set representing the stopwords
   * @param ignoreCase if true, all words are lower cased first
-   * @param ignoreCase -Ignore case when stopping.
   */
  public StopFilter(boolean enablePositionIncrements, TokenStream input, Set<?> stopWords, boolean ignoreCase)
  {
--- a/src/java/org/apache/lucene/analysis/TeeSinkTokenFilter.java
+++ b/src/java/org/apache/lucene/analysis/TeeSinkTokenFilter.java
@ -53,7 +53,7 @@ d.add(new Field("f2", final2));
 d.add(new Field("f3", final3));
 d.add(new Field("f4", final4));
 * </pre>
- * In this example, <code>sink1</code> and <code>sink2<code> will both get tokens from both
+ * In this example, <code>sink1</code> and <code>sink2</code> will both get tokens from both
 * <code>reader1</code> and <code>reader2</code> after whitespace tokenizer
 * and now we can further wrap any of these in extra analysis, and more "sources" can be inserted if desired.
 * It is important, that tees are consumed before sinks (in the above example, the field names must be
--- a/src/java/org/apache/lucene/analysis/Token.java
+++ b/src/java/org/apache/lucene/analysis/Token.java
@ -37,7 +37,7 @@ import org.apache.lucene.util.AttributeImpl;
  <p>
  The start and end offsets permit applications to re-associate a token with
  its source text, e.g., to display highlighted query terms in a document
-  browser, or to show matching text fragments in a KWIC (KeyWord In Context)
+  browser, or to show matching text fragments in a <abbr title="KeyWord In Context">KWIC</abbr>
  display, etc.
  <p>
  The type is a string, assigned by a lexical analyzer
@ -59,9 +59,9 @@ import org.apache.lucene.util.AttributeImpl;
  
  <br><br>
  
-  <p>Tokenizers and filters should try to re-use a Token
+  <p>Tokenizers and TokenFilters should try to re-use a Token
  instance when possible for best performance, by
-  implementing the {@link TokenStream#next(Token)} API.
+  implementing the {@link TokenStream#incrementToken()} API.
  Failing that, to create a new Token you should first use
  one of the constructors that starts with null text.  To load
  the token from a char[] use {@link #setTermBuffer(char[], int, int)}.
@ -75,30 +75,30 @@ import org.apache.lucene.util.AttributeImpl;
  set the length of the term text.  See <a target="_top"
  href="https://issues.apache.org/jira/browse/LUCENE-969">LUCENE-969</a>
  for details.</p>
-  <p>Typical reuse patterns:
+  <p>Typical Token reuse patterns:
  <ul>
-  <li> Copying text from a string (type is reset to #DEFAULT_TYPE if not specified):<br/>
+  <li> Copying text from a string (type is reset to {@link #DEFAULT_TYPE} if not specified):<br/>
  <pre>
    return reusableToken.reinit(string, startOffset, endOffset[, type]);
  </pre>
  </li>
-  <li> Copying some text from a string (type is reset to #DEFAULT_TYPE if not specified):<br/>
+  <li> Copying some text from a string (type is reset to {@link #DEFAULT_TYPE} if not specified):<br/>
  <pre>
    return reusableToken.reinit(string, 0, string.length(), startOffset, endOffset[, type]);
  </pre>
  </li>
  </li>
-  <li> Copying text from char[] buffer (type is reset to #DEFAULT_TYPE if not specified):<br/>
+  <li> Copying text from char[] buffer (type is reset to {@link #DEFAULT_TYPE} if not specified):<br/>
  <pre>
    return reusableToken.reinit(buffer, 0, buffer.length, startOffset, endOffset[, type]);
  </pre>
  </li>
-  <li> Copying some text from a char[] buffer (type is reset to #DEFAULT_TYPE if not specified):<br/>
+  <li> Copying some text from a char[] buffer (type is reset to {@link #DEFAULT_TYPE} if not specified):<br/>
  <pre>
    return reusableToken.reinit(buffer, start, end - start, startOffset, endOffset[, type]);
  </pre>
  </li>
-  <li> Copying from one one Token to another (type is reset to #DEFAULT_TYPE if not specified):<br/>
+  <li> Copying from one one Token to another (type is reset to {@link #DEFAULT_TYPE} if not specified):<br/>
  <pre>
    return reusableToken.reinit(source.termBuffer(), 0, source.termLength(), source.startOffset(), source.endOffset()[, source.type()]);
  </pre>
@ -108,7 +108,7 @@ import org.apache.lucene.util.AttributeImpl;
  <ul>
  <li>clear() initializes all of the fields to default values. This was changed in contrast to Lucene 2.4, but should affect no one.</li>
  <li>Because <code>TokenStreams</code> can be chained, one cannot assume that the <code>Token's</code> current type is correct.</li>
-  <li>The startOffset and endOffset represent the start and offset in the source text. So be careful in adjusting them.</li>
+  <li>The startOffset and endOffset represent the start and offset in the source text, so be careful in adjusting them.</li>
  <li>When caching a reusable token, clone it. When injecting a cached token into a stream that can be reset, clone it again.</li>
  </ul>
  </p>
--- a/src/java/org/apache/lucene/analysis/TokenFilter.java
+++ b/src/java/org/apache/lucene/analysis/TokenFilter.java
@ -19,15 +19,10 @@ package org.apache.lucene.analysis;

 import java.io.IOException;

-/** A TokenFilter is a TokenStream whose input is another token stream.
+/** A TokenFilter is a TokenStream whose input is another TokenStream.
  <p>
-  This is an abstract class.
-  NOTE: subclasses must override 
-  {@link #incrementToken()} if the new TokenStream API is used
-  and {@link #next(Token)} or {@link #next()} if the old
-  TokenStream API is used.
-  <p>
-  See {@link TokenStream}
+  This is an abstract class; subclasses must override {@link #incrementToken()}.
+  @see TokenStream
  */
 public abstract class TokenFilter extends TokenStream {
  /** The source of tokens for this filter. */
--- a/src/java/org/apache/lucene/analysis/TokenStream.java
+++ b/src/java/org/apache/lucene/analysis/TokenStream.java
@ -31,14 +31,14 @@ import org.apache.lucene.util.AttributeSource;
 * A <code>TokenStream</code> enumerates the sequence of tokens, either from
 * {@link Field}s of a {@link Document} or from query text.
 * <p>
- * This is an abstract class. Concrete subclasses are:
+ * This is an abstract class; concrete subclasses are:
 * <ul>
 * <li>{@link Tokenizer}, a <code>TokenStream</code> whose input is a Reader; and
 * <li>{@link TokenFilter}, a <code>TokenStream</code> whose input is another
 * <code>TokenStream</code>.
 * </ul>
 * A new <code>TokenStream</code> API has been introduced with Lucene 2.9. This API
- * has moved from being {@link Token} based to {@link Attribute} based. While
+ * has moved from being {@link Token}-based to {@link Attribute}-based. While
 * {@link Token} still exists in 2.9 as a convenience class, the preferred way
 * to store the information of a {@link Token} is to use {@link AttributeImpl}s.
 * <p>
@ -54,14 +54,14 @@ import org.apache.lucene.util.AttributeSource;
 * <li>Instantiation of <code>TokenStream</code>/{@link TokenFilter}s which add/get
 * attributes to/from the {@link AttributeSource}.
 * <li>The consumer calls {@link TokenStream#reset()}.
- * <li>the consumer retrieves attributes from the stream and stores local
- * references to all attributes it wants to access
- * <li>The consumer calls {@link #incrementToken()} until it returns false and
- * consumes the attributes after each call.
+ * <li>The consumer retrieves attributes from the stream and stores local
+ * references to all attributes it wants to access.
+ * <li>The consumer calls {@link #incrementToken()} until it returns false
+ * consuming the attributes after each call.
 * <li>The consumer calls {@link #end()} so that any end-of-stream operations
 * can be performed.
 * <li>The consumer calls {@link #close()} to release any resource when finished
- * using the <code>TokenStream</code>
+ * using the <code>TokenStream</code>.
 * </ol>
 * To make sure that filters and consumers know which attributes are available,
 * the attributes must be added during instantiation. Filters and consumers are
@ -72,7 +72,7 @@ import org.apache.lucene.util.AttributeSource;
 * Javadoc.
 * <p>
 * Sometimes it is desirable to capture a current state of a <code>TokenStream</code>,
- * e.g. for buffering purposes (see {@link CachingTokenFilter},
+ * e.g., for buffering purposes (see {@link CachingTokenFilter},
 * {@link TeeSinkTokenFilter}). For this usecase
 * {@link AttributeSource#captureState} and {@link AttributeSource#restoreState}
 * can be used.
@ -101,7 +101,7 @@ public abstract class TokenStream extends AttributeSource implements Closeable {
  }
  
  /**
-   * Consumers (ie {@link IndexWriter}) use this method to advance the stream to
+   * Consumers (i.e., {@link IndexWriter}) use this method to advance the stream to
   * the next token. Implementing classes must implement this method and update
   * the appropriate {@link AttributeImpl}s with the attributes of the next
   * token.
--- a/src/java/org/apache/lucene/analysis/Tokenizer.java
+++ b/src/java/org/apache/lucene/analysis/Tokenizer.java
@ -24,20 +24,14 @@ import java.io.IOException;

 /** A Tokenizer is a TokenStream whose input is a Reader.
  <p>
-  This is an abstract class.
-  <p>
-  NOTE: subclasses must override 
-  {@link #incrementToken()} if the new TokenStream API is used
-  and {@link #next(Token)} or {@link #next()} if the old
-  TokenStream API is used.
+  This is an abstract class; subclasses must override {@link #incrementToken()}
  <p>
  NOTE: Subclasses overriding {@link #incrementToken()} must
  call {@link AttributeSource#clearAttributes()} before
  setting attributes.
-  Subclasses overriding {@link #next(Token)} must call
+  Subclasses overriding {@link #incrementToken()} must call
  {@link Token#clear()} before setting Token attributes. 
 */
-
 public abstract class Tokenizer extends TokenStream {
  /** The text source for this Tokenizer. */
  protected Reader input;