LUCENE-1101: TokenStream.next(Token) reuse 'policy': calling Token.clear() should be responsibility of token producer.

git-svn-id: https://svn.apache.org/repos/asf/lucene/java/trunk@607521 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Doron Cohen 2007-12-30 07:34:30 +00:00
parent efbd1260a9
commit b367e863e6
7 changed files with 28 additions and 9 deletions

View File

@ -85,6 +85,10 @@ API Changes
12. LUCENE-1089: Added PriorityQueue.insertWithOverflow, which returns 12. LUCENE-1089: Added PriorityQueue.insertWithOverflow, which returns
the Object (if any) that was bumped from the queue to allow the Object (if any) that was bumped from the queue to allow
re-use. (Shai Erera via Mike McCandless) re-use. (Shai Erera via Mike McCandless)
13. LUCENE-1101: Token reuse 'contract' (defined LUCENE-969)
modified so it is token producer's responsibility
to call Token.clear(). (Doron Cohen)
Bug fixes Bug fixes

View File

@ -45,6 +45,7 @@ public abstract class CharTokenizer extends Tokenizer {
} }
public final Token next(Token token) throws IOException { public final Token next(Token token) throws IOException {
token.clear();
int length = 0; int length = 0;
int start = bufferIndex; int start = bufferIndex;
char[] buffer = token.termBuffer(); char[] buffer = token.termBuffer();

View File

@ -42,6 +42,7 @@ public class KeywordTokenizer extends Tokenizer {
if (!done) { if (!done) {
done = true; done = true;
int upto = 0; int upto = 0;
result.clear();
char[] buffer = result.termBuffer(); char[] buffer = result.termBuffer();
while (true) { while (true) {
final int length = input.read(buffer, upto, buffer.length-upto); final int length = input.read(buffer, upto, buffer.length-upto);

View File

@ -58,14 +58,23 @@ public abstract class TokenStream {
* When possible, the input Token should be used as the * When possible, the input Token should be used as the
* returned Token (this gives fastest tokenization * returned Token (this gives fastest tokenization
* performance), but this is not required and a new Token * performance), but this is not required and a new Token
* may be returned. Callers may re-use a single Token * may be returned. Callers may re-use a single Token
* instance for successive calls to this method and must * instance for successive calls to this method.
* therefore fully consume the previously returned Token * <p>
* before calling this method again. * This implicitly defines a "contract" between
* @param result a Token that may or may not be used to * consumers (callers of this method) and
* return * producers (implementations of this method
* @return next token in the stream or null if * that are the source for tokens):
* end-of-stream was hit*/ * <ul>
* <li>A consumer must fully consume the previously
* returned Token before calling this method again.</li>
* <li>A producer must call {@link Token#clear()}
* before setting the fields in it & returning it</li>
* </ul>
* Note that a {@link TokenFilter} is considered a consumer.
* @param result a Token that may or may not be used to return
* @return next token in the stream or null if end-of-stream was hit
*/
public Token next(Token result) throws IOException { public Token next(Token result) throws IOException {
return next(); return next();
} }

View File

@ -23,8 +23,12 @@ import java.io.IOException;
/** A Tokenizer is a TokenStream whose input is a Reader. /** A Tokenizer is a TokenStream whose input is a Reader.
<p> <p>
This is an abstract class. This is an abstract class.
<p>
NOTE: subclasses must override at least one of {@link NOTE: subclasses must override at least one of {@link
#next()} or {@link #next(Token)}. #next()} or {@link #next(Token)}.
<p>
NOTE: subclasses overriding {@link #next(Token)} must
call {@link Token#clear()}.
*/ */
public abstract class Tokenizer extends TokenStream { public abstract class Tokenizer extends TokenStream {

View File

@ -92,6 +92,7 @@ public class StandardTokenizer extends Tokenizer {
return null; return null;
} }
result.clear();
scanner.getText(result); scanner.getText(result);
final int start = scanner.yychar(); final int start = scanner.yychar();
result.setStartOffset(start); result.setStartOffset(start);

View File

@ -1373,7 +1373,6 @@ final class DocumentsWriter {
offsetEnd = offset-1; offsetEnd = offset-1;
Token token; Token token;
for(;;) { for(;;) {
localToken.clear();
token = stream.next(localToken); token = stream.next(localToken);
if (token == null) break; if (token == null) break;
position += (token.getPositionIncrement() - 1); position += (token.getPositionIncrement() - 1);