LUCENE-1101: TokenStream.next(Token) reuse 'policy': calling Token.clear() should be responsibility of token producer.

git-svn-id: https://svn.apache.org/repos/asf/lucene/java/trunk@607521 13f79535-47bb-0310-9956-ffa450edef68
2007-12-30 07:34:30 +00:00 · 2007-12-30 07:34:30 +00:00 · b367e863e6
parent efbd1260a9
commit b367e863e6
7 changed files with 28 additions and 9 deletions
--- a/CHANGES.txt
+++ b/CHANGES.txt
@ -85,6 +85,10 @@ API Changes
 12. LUCENE-1089: Added PriorityQueue.insertWithOverflow, which returns
    the Object (if any) that was bumped from the queue to allow
    re-use.  (Shai Erera via Mike McCandless)
 13. LUCENE-1101: Token reuse 'contract' (defined LUCENE-969)
    modified so it is token producer's responsibility
    to call Token.clear(). (Doron Cohen)   
 Bug fixes
--- a/src/java/org/apache/lucene/analysis/CharTokenizer.java
+++ b/src/java/org/apache/lucene/analysis/CharTokenizer.java
@ -45,6 +45,7 @@ public abstract class CharTokenizer extends Tokenizer {
  }
  public final Token next(Token token) throws IOException {
    token.clear();
    int length = 0;
    int start = bufferIndex;
    char[] buffer = token.termBuffer();
--- a/src/java/org/apache/lucene/analysis/KeywordTokenizer.java
+++ b/src/java/org/apache/lucene/analysis/KeywordTokenizer.java
@ -42,6 +42,7 @@ public class KeywordTokenizer extends Tokenizer {
    if (!done) {
      done = true;
      int upto = 0;
      result.clear();
      char[] buffer = result.termBuffer();
      while (true) {
        final int length = input.read(buffer, upto, buffer.length-upto);
--- a/src/java/org/apache/lucene/analysis/TokenStream.java
+++ b/src/java/org/apache/lucene/analysis/TokenStream.java
@ -58,14 +58,23 @@ public abstract class TokenStream {
   *  When possible, the input Token should be used as the
   *  returned Token (this gives fastest tokenization
   *  performance), but this is not required and a new Token
-   *  may be returned.  Callers may re-use a single Token
+   *  may be returned. Callers may re-use a single Token
-   *  instance for successive calls to this method and must
+   *  instance for successive calls to this method.
-   *  therefore fully consume the previously returned Token
+   *  <p>
-   *  before calling this method again.
+   *  This implicitly defines a "contract" between 
-   *  @param result a Token that may or may not be used to
+   *  consumers (callers of this method) and 
-   *   return
+   *  producers (implementations of this method 
-   *  @return next token in the stream or null if
+   *  that are the source for tokens):
-   *   end-of-stream was hit*/
+   *  <ul>
   *   <li>A consumer must fully consume the previously 
   *       returned Token before calling this method again.</li>
   *   <li>A producer must call {@link Token#clear()}
   *       before setting the fields in it & returning it</li>
   *  </ul>
   *  Note that a {@link TokenFilter} is considered a consumer.
   *  @param result a Token that may or may not be used to return
   *  @return next token in the stream or null if end-of-stream was hit
   */
  public Token next(Token result) throws IOException {
    return next();
  }
--- a/src/java/org/apache/lucene/analysis/Tokenizer.java
+++ b/src/java/org/apache/lucene/analysis/Tokenizer.java
@ -23,8 +23,12 @@ import java.io.IOException;
 /** A Tokenizer is a TokenStream whose input is a Reader.
  <p>
  This is an abstract class.
  <p>
  NOTE: subclasses must override at least one of {@link
  #next()} or {@link #next(Token)}.
  <p>
  NOTE: subclasses overriding {@link #next(Token)} must  
  call {@link Token#clear()}.
 */
 public abstract class Tokenizer extends TokenStream {
--- a/src/java/org/apache/lucene/analysis/standard/StandardTokenizer.java
+++ b/src/java/org/apache/lucene/analysis/standard/StandardTokenizer.java
@ -92,6 +92,7 @@ public class StandardTokenizer extends Tokenizer {
 	    return null;
 	}
        result.clear();
        scanner.getText(result);
        final int start = scanner.yychar();
        result.setStartOffset(start);
--- a/src/java/org/apache/lucene/index/DocumentsWriter.java
+++ b/src/java/org/apache/lucene/index/DocumentsWriter.java
@ -1373,7 +1373,6 @@ final class DocumentsWriter {
            offsetEnd = offset-1;
            Token token;
            for(;;) {
              localToken.clear();
              token = stream.next(localToken);
              if (token == null) break;
              position += (token.getPositionIncrement() - 1);