LUCENE-3766: Remove Tokenizer's default ctor

git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1242890 13f79535-47bb-0310-9956-ffa450edef68
2012-02-10 19:12:35 +00:00 · 2012-02-10 19:12:35 +00:00 · 590741dcfe
parent 18a1d40283
commit 590741dcfe
23 changed files with 181 additions and 205 deletions
--- a/lucene/CHANGES.txt
+++ b/lucene/CHANGES.txt
@ -784,6 +784,11 @@ API Changes
  Deprecated StandardAnalyzer and ClassicAnalyzer ctors that take File,
  please use the Reader ctor instead.  (Robert Muir)

+* LUCENE-3766: Deprecate no-arg ctors of Tokenizer. Tokenizers are
+  TokenStreams with Readers: tokenizers with null Readers will not be
+  supported in Lucene 4.0, just use a TokenStream.  
+  (Mike McCandless, Robert Muir)
+
 New Features

 * LUCENE-3593: Added a FieldValueFilter that accepts all documents that either
--- a/lucene/contrib/highlighter/src/test/org/apache/lucene/search/highlight/TokenSourcesTest.java
+++ b/lucene/contrib/highlighter/src/test/org/apache/lucene/search/highlight/TokenSourcesTest.java
@ -18,7 +18,6 @@ package org.apache.lucene.search.highlight;
 */

 import java.io.IOException;
-import java.io.Reader;

 import org.apache.lucene.analysis.*;
 import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
@ -47,15 +46,7 @@ import org.apache.lucene.util.LuceneTestCase;
 public class TokenSourcesTest extends LuceneTestCase {
  private static final String FIELD = "text";

-  private static final class OverlapAnalyzer extends Analyzer {
-
-    @Override
-    public TokenStreamComponents createComponents(String fieldName, Reader reader) {
-      return new TokenStreamComponents(new TokenStreamOverlap());
-    }
-  }
-
-  private static final class TokenStreamOverlap extends Tokenizer {
+  private static final class OverlappingTokenStream extends TokenStream {
    private Token[] tokens;

    private int i = -1;
@ -64,10 +55,6 @@ public class TokenSourcesTest extends LuceneTestCase {
    private final OffsetAttribute offsetAttribute = addAttribute(OffsetAttribute.class);
    private final PositionIncrementAttribute positionIncrementAttribute = addAttribute(PositionIncrementAttribute.class);

-    public TokenStreamOverlap() {
-      reset();
-    }
-
    @Override
    public boolean incrementToken() throws IOException {
      this.i++;
@ -102,13 +89,13 @@ public class TokenSourcesTest extends LuceneTestCase {
    final String TEXT = "the fox did not jump";
    final Directory directory = newDirectory();
    final IndexWriter indexWriter = new IndexWriter(directory,
-        newIndexWriterConfig(TEST_VERSION_CURRENT, new OverlapAnalyzer()));
+        newIndexWriterConfig(TEST_VERSION_CURRENT, null));
    try {
      final Document document = new Document();
      FieldType customType = new FieldType(TextField.TYPE_UNSTORED);
      customType.setStoreTermVectors(true);
      customType.setStoreTermVectorOffsets(true);
-      document.add(new Field(FIELD, new TokenStreamOverlap(), customType));
+      document.add(new Field(FIELD, new OverlappingTokenStream(), customType));
      indexWriter.addDocument(document);
    } finally {
      indexWriter.close();
@ -146,14 +133,14 @@ public class TokenSourcesTest extends LuceneTestCase {
    final String TEXT = "the fox did not jump";
    final Directory directory = newDirectory();
    final IndexWriter indexWriter = new IndexWriter(directory,
-        newIndexWriterConfig(TEST_VERSION_CURRENT, new OverlapAnalyzer()));
+        newIndexWriterConfig(TEST_VERSION_CURRENT, null));
    try {
      final Document document = new Document();
      FieldType customType = new FieldType(TextField.TYPE_UNSTORED);
      customType.setStoreTermVectors(true);
      customType.setStoreTermVectorOffsets(true);
      customType.setStoreTermVectorPositions(true);
-      document.add(new Field(FIELD, new TokenStreamOverlap(), customType));
+      document.add(new Field(FIELD, new OverlappingTokenStream(), customType));
      indexWriter.addDocument(document);
    } finally {
      indexWriter.close();
@ -191,13 +178,13 @@ public class TokenSourcesTest extends LuceneTestCase {
    final String TEXT = "the fox did not jump";
    final Directory directory = newDirectory();
    final IndexWriter indexWriter = new IndexWriter(directory,
-        newIndexWriterConfig(TEST_VERSION_CURRENT, new OverlapAnalyzer()));
+        newIndexWriterConfig(TEST_VERSION_CURRENT, null));
    try {
      final Document document = new Document();
      FieldType customType = new FieldType(TextField.TYPE_UNSTORED);
      customType.setStoreTermVectors(true);
      customType.setStoreTermVectorOffsets(true);
-      document.add(new Field(FIELD, new TokenStreamOverlap(), customType));
+      document.add(new Field(FIELD, new OverlappingTokenStream(), customType));
      indexWriter.addDocument(document);
    } finally {
      indexWriter.close();
@ -236,13 +223,13 @@ public class TokenSourcesTest extends LuceneTestCase {
    final String TEXT = "the fox did not jump";
    final Directory directory = newDirectory();
    final IndexWriter indexWriter = new IndexWriter(directory,
-        newIndexWriterConfig(TEST_VERSION_CURRENT, new OverlapAnalyzer()));
+        newIndexWriterConfig(TEST_VERSION_CURRENT, null));
    try {
      final Document document = new Document();
      FieldType customType = new FieldType(TextField.TYPE_UNSTORED);
      customType.setStoreTermVectors(true);
      customType.setStoreTermVectorOffsets(true);
-      document.add(new Field(FIELD, new TokenStreamOverlap(), customType));
+      document.add(new Field(FIELD, new OverlappingTokenStream(), customType));
      indexWriter.addDocument(document);
    } finally {
      indexWriter.close();
--- a/lucene/contrib/highlighter/src/test/org/apache/lucene/search/vectorhighlight/IndexTimeSynonymTest.java
+++ b/lucene/contrib/highlighter/src/test/org/apache/lucene/search/vectorhighlight/IndexTimeSynonymTest.java
@ -298,7 +298,7 @@ public class IndexTimeSynonymTest extends AbstractTestCase {
    
    @Override
    public TokenStreamComponents createComponents(String fieldName, Reader reader) {
-      Tokenizer ts = new Tokenizer(Token.TOKEN_ATTRIBUTE_FACTORY) {
+      Tokenizer ts = new Tokenizer(Token.TOKEN_ATTRIBUTE_FACTORY, reader) {
        final AttributeImpl reusableToken = (AttributeImpl) addAttribute(CharTermAttribute.class);
        int p = 0;
        
--- a/lucene/core/src/java/org/apache/lucene/analysis/Tokenizer.java
+++ b/lucene/core/src/java/org/apache/lucene/analysis/Tokenizer.java
@ -34,34 +34,24 @@ public abstract class Tokenizer extends TokenStream {
  /** The text source for this Tokenizer. */
  protected Reader input;

-  /** Construct a tokenizer with null input. */
-  protected Tokenizer() {}
-  
  /** Construct a token stream processing the given input. */
  protected Tokenizer(Reader input) {
-    this.input = CharReader.get(input);
+    assert input != null: "input must not be null";
+    this.input = input;
  }
  
-  /** Construct a tokenizer with null input using the given AttributeFactory. */
-  protected Tokenizer(AttributeFactory factory) {
-    super(factory);
-  }
-
  /** Construct a token stream processing the given input using the given AttributeFactory. */
  protected Tokenizer(AttributeFactory factory, Reader input) {
    super(factory);
-    this.input = CharReader.get(input);
-  }
-
-  /** Construct a token stream processing the given input using the given AttributeSource. */
-  protected Tokenizer(AttributeSource source) {
-    super(source);
+    assert input != null: "input must not be null";
+    this.input = input;
  }

  /** Construct a token stream processing the given input using the given AttributeSource. */
  protected Tokenizer(AttributeSource source, Reader input) {
    super(source);
-    this.input = CharReader.get(input);
+    assert input != null: "input must not be null";
+    this.input = input;
  }
  
  /** By default, closes the input Reader. */
@ -82,6 +72,7 @@ public abstract class Tokenizer extends TokenStream {
   * @see CharStream#correctOffset
   */
  protected final int correctOffset(int currentOff) {
+    assert input != null: "this tokenizer is closed";
    return (input instanceof CharStream) ? ((CharStream) input).correctOffset(currentOff) : currentOff;
  }

@ -89,6 +80,7 @@ public abstract class Tokenizer extends TokenStream {
   *  analyzer (in its tokenStream method) will use
   *  this to re-use a previously created tokenizer. */
  public void reset(Reader input) throws IOException {
+    assert input != null: "input must not be null";
    this.input = input;
  }
 }
--- a/lucene/core/src/java/org/apache/lucene/util/AttributeSource.java
+++ b/lucene/core/src/java/org/apache/lucene/util/AttributeSource.java
@ -119,7 +119,7 @@ public class AttributeSource {
  private final Map<Class<? extends AttributeImpl>, AttributeImpl> attributeImpls;
  private final State[] currentState;

-  private AttributeFactory factory;
+  private final AttributeFactory factory;
  
  /**
   * An AttributeSource using the default attribute factory {@link AttributeSource.AttributeFactory#DEFAULT_ATTRIBUTE_FACTORY}.
--- a/lucene/core/src/test/org/apache/lucene/document/TestDocument.java
+++ b/lucene/core/src/test/org/apache/lucene/document/TestDocument.java
@ -19,6 +19,7 @@ package org.apache.lucene.document;

 import java.io.StringReader;

+import org.apache.lucene.analysis.EmptyTokenizer;
 import org.apache.lucene.analysis.Tokenizer;
 import org.apache.lucene.index.DocsAndPositionsEnum;
 import org.apache.lucene.index.Fields;
@ -285,11 +286,7 @@ public class TestDocument extends LuceneTestCase {
  // LUCENE-3616
  public void testInvalidFields() {
    try {
-      new Field("foo", new Tokenizer() {
-        @Override
-        public boolean incrementToken() {
-          return false;
-        }}, StringField.TYPE_STORED);
+      new Field("foo", new EmptyTokenizer(new StringReader("")), StringField.TYPE_STORED);
      fail("did not hit expected exc");
    } catch (IllegalArgumentException iae) {
      // expected
--- a/lucene/core/src/test/org/apache/lucene/index/TestIndexWriter.java
+++ b/lucene/core/src/test/org/apache/lucene/index/TestIndexWriter.java
@ -1569,6 +1569,7 @@ public class TestIndexWriter extends LuceneTestCase {
    private final CharTermAttribute termAtt = addAttribute(CharTermAttribute.class);

    public StringSplitTokenizer(Reader r) {
+      super(r);
      try {
        reset(r);
      } catch (IOException e) {
--- a/lucene/core/src/test/org/apache/lucene/index/TestPostingsOffsets.java
+++ b/lucene/core/src/test/org/apache/lucene/index/TestPostingsOffsets.java
@ -23,7 +23,7 @@ import java.util.List;
 import java.util.Map;

 import org.apache.lucene.analysis.Analyzer;
-import org.apache.lucene.analysis.CannedAnalyzer;
+import org.apache.lucene.analysis.CannedTokenStream;
 import org.apache.lucene.analysis.MockAnalyzer;
 import org.apache.lucene.analysis.MockPayloadAnalyzer;
 import org.apache.lucene.analysis.Token;
@ -72,9 +72,9 @@ public class TestPostingsOffsets extends LuceneTestCase {
      makeToken("a", 1, 9, 17),
      makeToken("c", 1, 19, 50),
    };
-    doc.add(new Field("content", new CannedAnalyzer.CannedTokenizer(tokens), ft));
+    doc.add(new Field("content", new CannedTokenStream(tokens), ft));

-    w.addDocument(doc, new CannedAnalyzer(tokens));
+    w.addDocument(doc);
    IndexReader r = w.getReader();
    w.close();

@ -281,7 +281,7 @@ public class TestPostingsOffsets extends LuceneTestCase {
        offset += offIncr + tokenOffset;
        //System.out.println("  " + token + " posIncr=" + token.getPositionIncrement() + " pos=" + pos + " off=" + token.startOffset() + "/" + token.endOffset() + " (freq=" + postingsByDoc.get(docCount).size() + ")");
      }
-      doc.add(new Field("content", new CannedAnalyzer.CannedTokenizer(tokens.toArray(new Token[tokens.size()])), ft));
+      doc.add(new Field("content", new CannedTokenStream(tokens.toArray(new Token[tokens.size()])), ft));
      w.addDocument(doc);
    }
    final DirectoryReader r = w.getReader();
--- a/lucene/core/src/test/org/apache/lucene/index/TestSameTokenSamePosition.java
+++ b/lucene/core/src/test/org/apache/lucene/index/TestSameTokenSamePosition.java
@ -18,15 +18,12 @@ package org.apache.lucene.index;
 */

 import java.io.IOException;
-import java.io.Reader;

-import org.apache.lucene.analysis.Analyzer;
-import org.apache.lucene.analysis.Tokenizer;
+import org.apache.lucene.analysis.TokenStream;
 import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
 import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
 import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
 import org.apache.lucene.document.Document;
-import org.apache.lucene.document.Field;
 import org.apache.lucene.document.TextField;
 import org.apache.lucene.store.Directory;
 import org.apache.lucene.util.LuceneTestCase;
@ -39,9 +36,9 @@ public class TestSameTokenSamePosition extends LuceneTestCase {
   */
  public void test() throws Exception {
    Directory dir = newDirectory();
-    RandomIndexWriter riw = new RandomIndexWriter(random, dir, newIndexWriterConfig(TEST_VERSION_CURRENT, new BugReproAnalyzer()));
+    RandomIndexWriter riw = new RandomIndexWriter(random, dir);
    Document doc = new Document();
-    doc.add(new Field("eng", "Six drunken", TextField.TYPE_STORED  /*This shouldn't matter. */));
+    doc.add(new TextField("eng", new BugReproTokenStream()));
    riw.addDocument(doc);
    riw.close();
    dir.close();
@ -52,10 +49,10 @@ public class TestSameTokenSamePosition extends LuceneTestCase {
   */
  public void testMoreDocs() throws Exception {
    Directory dir = newDirectory();
-    RandomIndexWriter riw = new RandomIndexWriter(random, dir, newIndexWriterConfig(TEST_VERSION_CURRENT, new BugReproAnalyzer()));
-    Document doc = new Document();
-    doc.add(new Field("eng", "Six drunken", TextField.TYPE_STORED  /*This shouldn't matter. */));
+    RandomIndexWriter riw = new RandomIndexWriter(random, dir);
    for (int i = 0; i < 100; i++) {
+      Document doc = new Document();
+      doc.add(new TextField("eng", new BugReproTokenStream()));
      riw.addDocument(doc);
    }
    riw.close();
@ -63,14 +60,7 @@ public class TestSameTokenSamePosition extends LuceneTestCase {
  }
 }

-final class BugReproAnalyzer extends Analyzer {
-  @Override
-  public TokenStreamComponents createComponents(String arg0, Reader arg1) {
-    return new TokenStreamComponents(new BugReproAnalyzerTokenizer());
-  }
-}
-
-final class BugReproAnalyzerTokenizer extends Tokenizer {
+final class BugReproTokenStream extends TokenStream {
  private final CharTermAttribute termAtt = addAttribute(CharTermAttribute.class);
  private final OffsetAttribute offsetAtt = addAttribute(OffsetAttribute.class);
  private final PositionIncrementAttribute posIncAtt = addAttribute(PositionIncrementAttribute.class);
--- a/lucene/core/src/test/org/apache/lucene/index/TestTermVectorsReader.java
+++ b/lucene/core/src/test/org/apache/lucene/index/TestTermVectorsReader.java
@ -137,14 +137,15 @@ public class TestTermVectorsReader extends LuceneTestCase {
    super.tearDown();
  }

-  private class MyTokenStream extends Tokenizer {
+  private class MyTokenizer extends Tokenizer {
    private int tokenUpto;
    
    private final CharTermAttribute termAtt;
    private final PositionIncrementAttribute posIncrAtt;
    private final OffsetAttribute offsetAtt;
    
-    public MyTokenStream() {
+    public MyTokenizer(Reader reader) {
+      super(reader);
      termAtt = addAttribute(CharTermAttribute.class);
      posIncrAtt = addAttribute(PositionIncrementAttribute.class);
      offsetAtt = addAttribute(OffsetAttribute.class);
@ -178,7 +179,7 @@ public class TestTermVectorsReader extends LuceneTestCase {
  private class MyAnalyzer extends Analyzer {
    @Override
    public TokenStreamComponents createComponents(String fieldName, Reader reader) {
-      return new TokenStreamComponents(new MyTokenStream());
+      return new TokenStreamComponents(new MyTokenizer(reader));
    }
  }

--- a/lucene/core/src/test/org/apache/lucene/index/TestTermdocPerf.java
+++ b/lucene/core/src/test/org/apache/lucene/index/TestTermdocPerf.java
@ -32,7 +32,7 @@ import org.apache.lucene.util.BytesRef;
 import org.apache.lucene.util.LuceneTestCase;
 import org.apache.lucene.util._TestUtil;

-class RepeatingTokenStream extends Tokenizer {
+class RepeatingTokenizer extends Tokenizer {
  
  private final Random random;
  private final float percentDocs;
@ -41,7 +41,8 @@ class RepeatingTokenStream extends Tokenizer {
  CharTermAttribute termAtt;
  String value;

-   public RepeatingTokenStream(String val, Random random, float percentDocs, int maxTF) {
+   public RepeatingTokenizer(Reader reader, String val, Random random, float percentDocs, int maxTF) {
+     super(reader);
     this.value = val;
     this.random = random;
     this.percentDocs = percentDocs;
@ -75,12 +76,11 @@ class RepeatingTokenStream extends Tokenizer {
 public class TestTermdocPerf extends LuceneTestCase {

  void addDocs(final Random random, Directory dir, final int ndocs, String field, final String val, final int maxTF, final float percentDocs) throws IOException {
-    final RepeatingTokenStream ts = new RepeatingTokenStream(val, random, percentDocs, maxTF);

    Analyzer analyzer = new Analyzer() {
      @Override
      public TokenStreamComponents createComponents(String fieldName, Reader reader) {
-        return new TokenStreamComponents(ts);
+        return new TokenStreamComponents(new RepeatingTokenizer(reader, val, random, percentDocs, maxTF));
      }
    };

--- a/lucene/core/src/test/org/apache/lucene/search/TestMultiPhraseQuery.java
+++ b/lucene/core/src/test/org/apache/lucene/search/TestMultiPhraseQuery.java
@ -18,19 +18,11 @@ package org.apache.lucene.search;
 */

 import java.io.IOException;
-import java.io.Reader;
-import java.util.Collection;
 import java.util.LinkedList;

-import org.apache.lucene.analysis.Analyzer;
-import org.apache.lucene.analysis.CannedAnalyzer;
+import org.apache.lucene.analysis.CannedTokenStream;
 import org.apache.lucene.analysis.Token;
-import org.apache.lucene.analysis.TokenStream;
-import org.apache.lucene.analysis.Tokenizer;
-import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
-import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
 import org.apache.lucene.document.Document;
-import org.apache.lucene.document.Field;
 import org.apache.lucene.document.StringField;
 import org.apache.lucene.document.TextField;
 import org.apache.lucene.index.IndexReader;
@ -46,7 +38,6 @@ import org.apache.lucene.store.Directory;
 import org.apache.lucene.store.RAMDirectory;
 import org.apache.lucene.util.BytesRef;
 import org.apache.lucene.util.LuceneTestCase;
-import org.apache.lucene.util.TermContext;

 /**
 * This class tests the MultiPhraseQuery class.
@ -336,10 +327,12 @@ public class TestMultiPhraseQuery extends LuceneTestCase {
    tokens[2].append("c");
    tokens[2].setPositionIncrement(0);

-    RandomIndexWriter writer = new RandomIndexWriter(random, dir, new CannedAnalyzer(tokens));
+    RandomIndexWriter writer = new RandomIndexWriter(random, dir);
    Document doc = new Document();
-    doc.add(new TextField("field", ""));
+    doc.add(new TextField("field", new CannedTokenStream(tokens)));
    writer.addDocument(doc);
+    doc = new Document();
+    doc.add(new TextField("field", new CannedTokenStream(tokens)));
    writer.addDocument(doc);
    IndexReader r = writer.getReader();
    writer.close();
@ -434,10 +427,10 @@ public class TestMultiPhraseQuery extends LuceneTestCase {
  
  private void doTestZeroPosIncrSloppy(Query q, int nExpected) throws IOException {
    Directory dir = newDirectory(); // random dir
-    IndexWriterConfig cfg = newIndexWriterConfig(TEST_VERSION_CURRENT, new CannedAnalyzer(INCR_0_DOC_TOKENS));
+    IndexWriterConfig cfg = newIndexWriterConfig(TEST_VERSION_CURRENT, null);
    IndexWriter writer = new IndexWriter(dir, cfg);
    Document doc = new Document();
-    doc.add(new TextField("field", ""));
+    doc.add(new TextField("field", new CannedTokenStream(INCR_0_DOC_TOKENS)));
    writer.addDocument(doc);
    IndexReader r = IndexReader.open(writer,false);
    writer.close();
--- a/lucene/core/src/test/org/apache/lucene/search/TestPositionIncrement.java
+++ b/lucene/core/src/test/org/apache/lucene/search/TestPositionIncrement.java
@ -58,7 +58,7 @@ public class TestPositionIncrement extends LuceneTestCase {
    Analyzer analyzer = new Analyzer() {
      @Override
      public TokenStreamComponents createComponents(String fieldName, Reader reader) {
-        return new TokenStreamComponents(new Tokenizer() {
+        return new TokenStreamComponents(new Tokenizer(reader) {
          private final String[] TOKENS = {"1", "2", "3", "4", "5"};
          private final int[] INCREMENTS = {0, 2, 1, 0, 1};
          private int i = 0;
--- a/lucene/test-framework/src/java/org/apache/lucene/analysis/CannedAnalyzer.java
+++ b/lucene/test-framework/src/java/org/apache/lucene/analysis/CannedAnalyzer.java
@ -1,73 +0,0 @@
-package org.apache.lucene.analysis;
-
-/**
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-import java.io.Reader;
-import java.io.IOException;
-
-import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
-import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
-import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
-
-public class CannedAnalyzer extends Analyzer {
-  private final Token[] tokens;
-
-  public CannedAnalyzer(Token[] tokens) {
-    this.tokens = tokens;
-  }
-
-  @Override
-  public TokenStreamComponents createComponents(String fieldName, Reader reader) {
-    return new TokenStreamComponents(new CannedTokenizer(tokens));
-  }
-
-  public static class CannedTokenizer extends Tokenizer {
-    private final Token[] tokens;
-    private int upto = 0;
-    private final CharTermAttribute termAtt = addAttribute(CharTermAttribute.class);
-    private final PositionIncrementAttribute posIncrAtt = addAttribute(PositionIncrementAttribute.class);
-    private final OffsetAttribute offsetAtt = addAttribute(OffsetAttribute.class);
-
-    public CannedTokenizer(Token[] tokens) {
-      this.tokens = tokens;
-    }
-
-    @Override
-    public final boolean incrementToken() throws IOException {
-      if (upto < tokens.length) {
-        final Token token = tokens[upto++];     
-        // TODO: can we just capture/restoreState so
-        // we get all attrs...?
-        clearAttributes();      
-        termAtt.setEmpty();
-        termAtt.append(token.toString());
-        posIncrAtt.setPositionIncrement(token.getPositionIncrement());
-        offsetAtt.setOffset(token.startOffset(), token.endOffset());
-        return true;
-      } else {
-        return false;
-      }
-    }
-
-    @Override
-    public void reset() throws IOException {
-      super.reset();
-      this.upto = 0;
-    }
-  }
-}
--- a/lucene/test-framework/src/java/org/apache/lucene/analysis/CannedTokenStream.java
+++ b/lucene/test-framework/src/java/org/apache/lucene/analysis/CannedTokenStream.java
@ -0,0 +1,56 @@
+package org.apache.lucene.analysis;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.io.IOException;
+
+import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
+import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
+import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
+
+/**
+ * emits a canned set of {@link Token}
+ */
+public final class CannedTokenStream extends TokenStream {
+  private final Token[] tokens;
+  private int upto = 0;
+  private final CharTermAttribute termAtt = addAttribute(CharTermAttribute.class);
+  private final PositionIncrementAttribute posIncrAtt = addAttribute(PositionIncrementAttribute.class);
+  private final OffsetAttribute offsetAtt = addAttribute(OffsetAttribute.class);
+  
+  public CannedTokenStream(Token[] tokens) {
+    this.tokens = tokens;
+  }
+  
+  @Override
+  public boolean incrementToken() throws IOException {
+    if (upto < tokens.length) {
+      final Token token = tokens[upto++];     
+      // TODO: can we just capture/restoreState so
+      // we get all attrs...?
+      clearAttributes();      
+      termAtt.setEmpty();
+      termAtt.append(token.toString());
+      posIncrAtt.setPositionIncrement(token.getPositionIncrement());
+      offsetAtt.setOffset(token.startOffset(), token.endOffset());
+      return true;
+    } else {
+      return false;
+    }
+  }
+}
--- a/lucene/test-framework/src/java/org/apache/lucene/analysis/EmptyTokenizer.java
+++ b/lucene/test-framework/src/java/org/apache/lucene/analysis/EmptyTokenizer.java
@ -0,0 +1,36 @@
+package org.apache.lucene.analysis;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.io.IOException;
+import java.io.Reader;
+
+/**
+ * Emits no tokens
+ */
+public final class EmptyTokenizer extends Tokenizer {
+
+  public EmptyTokenizer(Reader input) {
+    super(input);
+  }
+
+  @Override
+  public boolean incrementToken() throws IOException {
+    return false;
+  }
+}
--- a/modules/analysis/common/src/java/org/apache/lucene/analysis/standard/ClassicTokenizer.java
+++ b/modules/analysis/common/src/java/org/apache/lucene/analysis/standard/ClassicTokenizer.java
@ -99,29 +99,28 @@ public final class ClassicTokenizer extends Tokenizer {
   * See http://issues.apache.org/jira/browse/LUCENE-1068
   */
  public ClassicTokenizer(Version matchVersion, Reader input) {
-    super();
-    init(input, matchVersion);
+    super(input);
+    init(matchVersion);
  }

  /**
   * Creates a new ClassicTokenizer with a given {@link AttributeSource}. 
   */
  public ClassicTokenizer(Version matchVersion, AttributeSource source, Reader input) {
-    super(source);
-    init(input, matchVersion);
+    super(source, input);
+    init(matchVersion);
  }

  /**
   * Creates a new ClassicTokenizer with a given {@link org.apache.lucene.util.AttributeSource.AttributeFactory} 
   */
  public ClassicTokenizer(Version matchVersion, AttributeFactory factory, Reader input) {
-    super(factory);
-    init(input, matchVersion);
+    super(factory, input);
+    init(matchVersion);
  }

-  private void init(Reader input, Version matchVersion) {
+  private void init(Version matchVersion) {
    this.scanner = new ClassicTokenizerImpl(input);
-    this.input = input;
  }

  // this tokenizer generates three attributes:
--- a/modules/analysis/common/src/java/org/apache/lucene/analysis/standard/StandardTokenizer.java
+++ b/modules/analysis/common/src/java/org/apache/lucene/analysis/standard/StandardTokenizer.java
@ -125,27 +125,27 @@ public final class StandardTokenizer extends Tokenizer {
   * See http://issues.apache.org/jira/browse/LUCENE-1068
   */
  public StandardTokenizer(Version matchVersion, Reader input) {
-    super();
-    init(input, matchVersion);
+    super(input);
+    init(matchVersion);
  }

  /**
   * Creates a new StandardTokenizer with a given {@link AttributeSource}. 
   */
  public StandardTokenizer(Version matchVersion, AttributeSource source, Reader input) {
-    super(source);
-    init(input, matchVersion);
+    super(source, input);
+    init(matchVersion);
  }

  /**
   * Creates a new StandardTokenizer with a given {@link org.apache.lucene.util.AttributeSource.AttributeFactory} 
   */
  public StandardTokenizer(Version matchVersion, AttributeFactory factory, Reader input) {
-    super(factory);
-    init(input, matchVersion);
+    super(factory, input);
+    init(matchVersion);
  }

-  private final void init(Reader input, Version matchVersion) {
+  private final void init(Version matchVersion) {
    if (matchVersion.onOrAfter(Version.LUCENE_34)) {
      this.scanner = new StandardTokenizerImpl(input);
    } else if (matchVersion.onOrAfter(Version.LUCENE_31)) {
@ -153,7 +153,6 @@ public final class StandardTokenizer extends Tokenizer {
    } else {
      this.scanner = new ClassicTokenizerImpl(input);
    }
-    this.input = input;
  }

  // this tokenizer generates three attributes:
--- a/modules/queryparser/src/test/org/apache/lucene/queryparser/classic/TestMultiFieldQueryParser.java
+++ b/modules/queryparser/src/test/org/apache/lucene/queryparser/classic/TestMultiFieldQueryParser.java
@ -17,7 +17,6 @@ package org.apache.lucene.queryparser.classic;
 * limitations under the License.
 */

-import java.io.IOException;
 import java.io.Reader;
 import java.util.HashMap;
 import java.util.Map;
@ -27,11 +26,11 @@ import org.apache.lucene.document.Document;
 import org.apache.lucene.document.TextField;
 import org.apache.lucene.index.IndexReader;
 import org.apache.lucene.index.IndexWriter;
+import org.apache.lucene.search.BooleanClause.Occur;
 import org.apache.lucene.search.BooleanClause;
 import org.apache.lucene.search.IndexSearcher;
 import org.apache.lucene.search.Query;
 import org.apache.lucene.search.ScoreDoc;
-import org.apache.lucene.search.BooleanClause.Occur;
 import org.apache.lucene.store.Directory;
 import org.apache.lucene.util.LuceneTestCase;

@ -312,18 +311,11 @@ public class TestMultiFieldQueryParser extends LuceneTestCase {
    @Override
    public TokenStreamComponents createComponents(String fieldName, Reader reader) {
      if ("f1".equals(fieldName)) {
-        return new TokenStreamComponents(new EmptyTokenStream());
+        return new TokenStreamComponents(new EmptyTokenizer(reader));
      } else {
        return stdAnalyzer.createComponents(fieldName, reader);
      }
    }
-
-    private static class EmptyTokenStream extends Tokenizer {
-      @Override
-      public boolean incrementToken() throws IOException {
-        return false;
-      }
-    }
  }

 }
--- a/modules/queryparser/src/test/org/apache/lucene/queryparser/classic/TestMultiPhraseQueryParsing.java
+++ b/modules/queryparser/src/test/org/apache/lucene/queryparser/classic/TestMultiPhraseQueryParsing.java
@ -50,7 +50,7 @@ public class TestMultiPhraseQueryParsing extends LuceneTestCase {

    @Override
    public TokenStreamComponents createComponents(String fieldName, Reader reader) {
-      return new TokenStreamComponents(new CannedTokenizer(tokens));
+      return new TokenStreamComponents(new CannedTokenizer(reader, tokens));
    }
  }

@ -61,7 +61,8 @@ public class TestMultiPhraseQueryParsing extends LuceneTestCase {
    private final CharTermAttribute termAtt = addAttribute(CharTermAttribute.class);
    private final PositionIncrementAttribute posIncrAtt = addAttribute(PositionIncrementAttribute.class);

-    public CannedTokenizer(TokenAndPos[] tokens) {
+    public CannedTokenizer(Reader reader, TokenAndPos[] tokens) {
+      super(reader);
      this.tokens = tokens;
    }

--- a/modules/queryparser/src/test/org/apache/lucene/queryparser/flexible/standard/TestMultiFieldQPHelper.java
+++ b/modules/queryparser/src/test/org/apache/lucene/queryparser/flexible/standard/TestMultiFieldQPHelper.java
@ -17,7 +17,9 @@ package org.apache.lucene.queryparser.flexible.standard;
 * limitations under the License.
 */

+import java.io.IOException;
 import java.io.Reader;
+import java.io.StringReader;
 import java.util.HashMap;
 import java.util.Map;

@ -28,11 +30,11 @@ import org.apache.lucene.index.IndexReader;
 import org.apache.lucene.index.IndexWriter;
 import org.apache.lucene.queryparser.flexible.core.QueryNodeException;
 import org.apache.lucene.queryparser.flexible.standard.config.StandardQueryConfigHandler;
+import org.apache.lucene.search.BooleanClause.Occur;
 import org.apache.lucene.search.BooleanClause;
 import org.apache.lucene.search.IndexSearcher;
 import org.apache.lucene.search.Query;
 import org.apache.lucene.search.ScoreDoc;
-import org.apache.lucene.search.BooleanClause.Occur;
 import org.apache.lucene.store.Directory;
 import org.apache.lucene.util.LuceneTestCase;

@ -350,18 +352,11 @@ public class TestMultiFieldQPHelper extends LuceneTestCase {
    @Override
    public TokenStreamComponents createComponents(String fieldName, Reader reader) {
      if ("f1".equals(fieldName)) {
-        return new TokenStreamComponents(new EmptyTokenStream());
+        return new TokenStreamComponents(new EmptyTokenizer(reader));
      } else {
        return stdAnalyzer.createComponents(fieldName, reader);
      }
    }
-
-    private static class EmptyTokenStream extends Tokenizer {
-      @Override
-      public boolean incrementToken() {
-        return false;
-      }
-    }
  }

 }
--- a/modules/queryparser/src/test/org/apache/lucene/queryparser/flexible/standard/TestQPHelper.java
+++ b/modules/queryparser/src/test/org/apache/lucene/queryparser/flexible/standard/TestQPHelper.java
@ -19,6 +19,7 @@ package org.apache.lucene.queryparser.flexible.standard;

 import java.io.IOException;
 import java.io.Reader;
+import java.io.StringReader;
 import java.text.DateFormat;
 import java.util.Calendar;
 import java.util.Date;
@ -35,18 +36,19 @@ import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
 import org.apache.lucene.document.DateTools;
 import org.apache.lucene.document.Document;
 import org.apache.lucene.document.TextField;
-import org.apache.lucene.index.IndexWriter;
 import org.apache.lucene.index.IndexReader;
+import org.apache.lucene.index.IndexWriter;
 import org.apache.lucene.index.Term;
-import org.apache.lucene.queryparser.flexible.messages.MessageImpl;
 import org.apache.lucene.queryparser.flexible.core.QueryNodeException;
 import org.apache.lucene.queryparser.flexible.core.messages.QueryParserMessages;
 import org.apache.lucene.queryparser.flexible.core.nodes.FuzzyQueryNode;
 import org.apache.lucene.queryparser.flexible.core.nodes.QueryNode;
 import org.apache.lucene.queryparser.flexible.core.processors.QueryNodeProcessorImpl;
 import org.apache.lucene.queryparser.flexible.core.processors.QueryNodeProcessorPipeline;
+import org.apache.lucene.queryparser.flexible.messages.MessageImpl;
 import org.apache.lucene.queryparser.flexible.standard.config.StandardQueryConfigHandler;
 import org.apache.lucene.queryparser.flexible.standard.nodes.WildcardQueryNode;
+import org.apache.lucene.search.BooleanClause.Occur;
 import org.apache.lucene.search.BooleanClause;
 import org.apache.lucene.search.BooleanQuery;
 import org.apache.lucene.search.FuzzyQuery;
@ -62,7 +64,6 @@ import org.apache.lucene.search.ScoreDoc;
 import org.apache.lucene.search.TermQuery;
 import org.apache.lucene.search.TermRangeQuery;
 import org.apache.lucene.search.WildcardQuery;
-import org.apache.lucene.search.BooleanClause.Occur;
 import org.apache.lucene.store.Directory;
 import org.apache.lucene.util.LuceneTestCase;
 import org.apache.lucene.util.automaton.BasicAutomata;
@ -1215,11 +1216,15 @@ public class TestQPHelper extends LuceneTestCase {
    super.tearDown();
  }

-  private class CannedTokenStream extends Tokenizer {
+  private class CannedTokenizer extends Tokenizer {
    private int upto = 0;
    private final PositionIncrementAttribute posIncr = addAttribute(PositionIncrementAttribute.class);
    private final CharTermAttribute term = addAttribute(CharTermAttribute.class);
-    
+
+    public CannedTokenizer(Reader reader) {
+      super(reader);
+    }
+
    @Override
    public boolean incrementToken() {
      clearAttributes();
@ -1253,7 +1258,7 @@ public class TestQPHelper extends LuceneTestCase {
  private class CannedAnalyzer extends Analyzer {
    @Override
    public TokenStreamComponents createComponents(String ignored, Reader alsoIgnored) {
-      return new TokenStreamComponents(new CannedTokenStream());
+      return new TokenStreamComponents(new CannedTokenizer(alsoIgnored));
    }
  }

--- a/solr/core/src/java/org/apache/solr/analysis/TrieTokenizerFactory.java
+++ b/solr/core/src/java/org/apache/solr/analysis/TrieTokenizerFactory.java
@ -67,7 +67,7 @@ final class TrieTokenizer extends Tokenizer {

  public TrieTokenizer(Reader input, TrieTypes type, int precisionStep, NumericTokenStream ts) {
    // must share the attribute source with the NumericTokenStream we delegate to
-    super(ts);
+    super(ts, input);
    this.type = type;
    this.precisionStep = precisionStep;
    this.ts = ts;