LUCENE-3396: Converted most Analyzers over to using ReusableAnalyzerBase

git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1169607 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Christopher John Male 2011-09-12 05:50:26 +00:00
parent e287b70df3
commit 4c5606ee29
56 changed files with 494 additions and 557 deletions

View File

@ -171,6 +171,9 @@ Changes in backwards compatibility policy
IndexableFieldType. See MIGRATE.txt for more details. IndexableFieldType. See MIGRATE.txt for more details.
(Nikola Tankovic, Mike McCandless, Chris Male) (Nikola Tankovic, Mike McCandless, Chris Male)
* LUCENE-3396: ReusableAnalyzerBase.TokenStreamComponents.reset(Reader) now returns void instead
of boolean. If a Component cannot be reset, it should throw an Exception.
Changes in Runtime Behavior Changes in Runtime Behavior
* LUCENE-2846: omitNorms now behaves like omitTermFrequencyAndPositions, if you * LUCENE-2846: omitNorms now behaves like omitTermFrequencyAndPositions, if you
@ -523,6 +526,12 @@ New features
(David Mark Nemeskey via Robert Muir) (David Mark Nemeskey via Robert Muir)
* LUCENE-3396: ReusableAnalyzerBase now provides a ReuseStrategy abstraction which
controls how TokenStreamComponents are reused per request. Two implementations are
provided - GlobalReuseStrategy which implements the current behavior of sharing
components between all fields, and PerFieldReuseStrategy which shares per field.
(Chris Male)
Optimizations Optimizations
* LUCENE-2588: Don't store unnecessary suffixes when writing the terms * LUCENE-2588: Don't store unnecessary suffixes when writing the terms

View File

@ -1802,7 +1802,7 @@ public class HighlighterTest extends BaseTokenStreamTestCase implements Formatte
// behaviour to synonyms // behaviour to synonyms
// =================================================================== // ===================================================================
final class SynonymAnalyzer extends Analyzer { final class SynonymAnalyzer extends ReusableAnalyzerBase {
private Map<String,String> synonyms; private Map<String,String> synonyms;
public SynonymAnalyzer(Map<String,String> synonyms) { public SynonymAnalyzer(Map<String,String> synonyms) {
@ -1816,12 +1816,12 @@ final class SynonymAnalyzer extends Analyzer {
* java.io.Reader) * java.io.Reader)
*/ */
@Override @Override
public TokenStream tokenStream(String arg0, Reader arg1) { public TokenStreamComponents createComponents(String arg0, Reader arg1) {
Tokenizer stream = new MockTokenizer(arg1, MockTokenizer.SIMPLE, true); Tokenizer stream = new MockTokenizer(arg1, MockTokenizer.SIMPLE, true);
stream.addAttribute(CharTermAttribute.class); stream.addAttribute(CharTermAttribute.class);
stream.addAttribute(PositionIncrementAttribute.class); stream.addAttribute(PositionIncrementAttribute.class);
stream.addAttribute(OffsetAttribute.class); stream.addAttribute(OffsetAttribute.class);
return new SynonymTokenizer(stream, synonyms); return new TokenStreamComponents(stream, new SynonymTokenizer(stream, synonyms));
} }
} }

View File

@ -20,10 +20,7 @@ package org.apache.lucene.search.highlight;
import java.io.Reader; import java.io.Reader;
import java.io.StringReader; import java.io.StringReader;
import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.analysis.*;
import org.apache.lucene.analysis.BaseTokenStreamTestCase;
import org.apache.lucene.analysis.MockTokenizer;
import org.apache.lucene.analysis.TokenStream;
public class OffsetLimitTokenFilterTest extends BaseTokenStreamTestCase { public class OffsetLimitTokenFilterTest extends BaseTokenStreamTestCase {
@ -52,14 +49,13 @@ public class OffsetLimitTokenFilterTest extends BaseTokenStreamTestCase {
assertTokenStreamContents(filter, new String[] {"short", "toolong", assertTokenStreamContents(filter, new String[] {"short", "toolong",
"evenmuchlongertext"}); "evenmuchlongertext"});
// TODO: This is not actually testing reuse! (reusableTokenStream is not implemented) checkOneTermReuse(new ReusableAnalyzerBase() {
checkOneTermReuse(new Analyzer() {
@Override @Override
public TokenStream tokenStream(String fieldName, Reader reader) { public TokenStreamComponents createComponents(String fieldName, Reader reader) {
MockTokenizer tokenizer = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false); MockTokenizer tokenizer = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false);
tokenizer.setEnableChecks(false); tokenizer.setEnableChecks(false);
return new OffsetLimitTokenFilter(tokenizer, 10); return new TokenStreamComponents(tokenizer, new OffsetLimitTokenFilter(tokenizer, 10));
} }
}, "llenges", "llenges"); }, "llenges", "llenges");
} }

View File

@ -20,9 +20,7 @@ package org.apache.lucene.search.highlight;
import java.io.IOException; import java.io.IOException;
import java.io.Reader; import java.io.Reader;
import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.analysis.*;
import org.apache.lucene.analysis.Token;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
import org.apache.lucene.analysis.tokenattributes.OffsetAttribute; import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute; import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
@ -50,15 +48,15 @@ import org.apache.lucene.util.LuceneTestCase;
public class TokenSourcesTest extends LuceneTestCase { public class TokenSourcesTest extends LuceneTestCase {
private static final String FIELD = "text"; private static final String FIELD = "text";
private static final class OverlapAnalyzer extends Analyzer { private static final class OverlapAnalyzer extends ReusableAnalyzerBase {
@Override @Override
public TokenStream tokenStream(String fieldName, Reader reader) { public TokenStreamComponents createComponents(String fieldName, Reader reader) {
return new TokenStreamOverlap(); return new TokenStreamComponents(new TokenStreamOverlap());
} }
} }
private static final class TokenStreamOverlap extends TokenStream { private static final class TokenStreamOverlap extends Tokenizer {
private Token[] tokens; private Token[] tokens;
private int i = -1; private int i = -1;

View File

@ -24,11 +24,7 @@ import java.util.ArrayList;
import java.util.Collection; import java.util.Collection;
import java.util.List; import java.util.List;
import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.analysis.*;
import org.apache.lucene.analysis.MockAnalyzer;
import org.apache.lucene.analysis.MockTokenizer;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.Tokenizer;
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
import org.apache.lucene.analysis.tokenattributes.OffsetAttribute; import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
import org.apache.lucene.analysis.tokenattributes.TermToBytesRefAttribute; import org.apache.lucene.analysis.tokenattributes.TermToBytesRefAttribute;
@ -198,10 +194,10 @@ public abstract class AbstractTestCase extends LuceneTestCase {
return phraseQuery; return phraseQuery;
} }
static final class BigramAnalyzer extends Analyzer { static final class BigramAnalyzer extends ReusableAnalyzerBase {
@Override @Override
public TokenStream tokenStream(String fieldName, Reader reader) { public TokenStreamComponents createComponents(String fieldName, Reader reader) {
return new BasicNGramTokenizer( reader ); return new TokenStreamComponents(new BasicNGramTokenizer(reader));
} }
} }

View File

@ -22,9 +22,7 @@ import java.io.Reader;
import java.util.HashSet; import java.util.HashSet;
import java.util.Set; import java.util.Set;
import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.analysis.*;
import org.apache.lucene.analysis.Token;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
import org.apache.lucene.search.BooleanQuery; import org.apache.lucene.search.BooleanQuery;
import org.apache.lucene.search.BooleanClause.Occur; import org.apache.lucene.search.BooleanClause.Occur;
@ -292,15 +290,15 @@ public class IndexTimeSynonymTest extends AbstractTestCase {
return token; return token;
} }
public static final class TokenArrayAnalyzer extends Analyzer { public static final class TokenArrayAnalyzer extends ReusableAnalyzerBase {
Token[] tokens; final Token[] tokens;
public TokenArrayAnalyzer(Token... tokens) { public TokenArrayAnalyzer(Token... tokens) {
this.tokens = tokens; this.tokens = tokens;
} }
@Override @Override
public TokenStream tokenStream(String fieldName, Reader reader) { public TokenStreamComponents createComponents(String fieldName, Reader reader) {
TokenStream ts = new TokenStream(Token.TOKEN_ATTRIBUTE_FACTORY) { Tokenizer ts = new Tokenizer(Token.TOKEN_ATTRIBUTE_FACTORY) {
final AttributeImpl reusableToken = (AttributeImpl) addAttribute(CharTermAttribute.class); final AttributeImpl reusableToken = (AttributeImpl) addAttribute(CharTermAttribute.class);
int p = 0; int p = 0;
@ -318,7 +316,7 @@ public class IndexTimeSynonymTest extends AbstractTestCase {
this.p = 0; this.p = 0;
} }
}; };
return ts; return new TokenStreamComponents(ts);
} }
} }
} }

View File

@ -17,8 +17,13 @@ package org.apache.lucene.analysis;
* limitations under the License. * limitations under the License.
*/ */
import org.apache.lucene.store.AlreadyClosedException;
import org.apache.lucene.util.CloseableThreadLocal;
import java.io.IOException; import java.io.IOException;
import java.io.Reader; import java.io.Reader;
import java.util.HashMap;
import java.util.Map;
/** /**
* An convenience subclass of Analyzer that makes it easy to implement * An convenience subclass of Analyzer that makes it easy to implement
@ -38,6 +43,16 @@ import java.io.Reader;
*/ */
public abstract class ReusableAnalyzerBase extends Analyzer { public abstract class ReusableAnalyzerBase extends Analyzer {
private final ReuseStrategy reuseStrategy;
public ReusableAnalyzerBase() {
this(new GlobalReuseStrategy());
}
public ReusableAnalyzerBase(ReuseStrategy reuseStrategy) {
this.reuseStrategy = reuseStrategy;
}
/** /**
* Creates a new {@link TokenStreamComponents} instance for this analyzer. * Creates a new {@link TokenStreamComponents} instance for this analyzer.
* *
@ -66,14 +81,15 @@ public abstract class ReusableAnalyzerBase extends Analyzer {
@Override @Override
public final TokenStream reusableTokenStream(final String fieldName, public final TokenStream reusableTokenStream(final String fieldName,
final Reader reader) throws IOException { final Reader reader) throws IOException {
TokenStreamComponents streamChain = (TokenStreamComponents) TokenStreamComponents components = reuseStrategy.getReusableComponents(fieldName);
getPreviousTokenStream();
final Reader r = initReader(reader); final Reader r = initReader(reader);
if (streamChain == null || !streamChain.reset(r)) { if (components == null) {
streamChain = createComponents(fieldName, r); components = createComponents(fieldName, r);
setPreviousTokenStream(streamChain); reuseStrategy.setReusableComponents(fieldName, components);
} else {
components.reset(r);
} }
return streamChain.getTokenStream(); return components.getTokenStream();
} }
/** /**
@ -99,6 +115,15 @@ public abstract class ReusableAnalyzerBase extends Analyzer {
return reader; return reader;
} }
/**
* {@inheritDoc}
*/
@Override
public void close() {
super.close();
reuseStrategy.close();
}
/** /**
* This class encapsulates the outer components of a token stream. It provides * This class encapsulates the outer components of a token stream. It provides
* access to the source ({@link Tokenizer}) and the outer end (sink), an * access to the source ({@link Tokenizer}) and the outer end (sink), an
@ -137,22 +162,16 @@ public abstract class ReusableAnalyzerBase extends Analyzer {
} }
/** /**
* Resets the encapsulated components with the given reader. This method by * Resets the encapsulated components with the given reader. If the components
* default returns <code>true</code> indicating that the components have * cannot be reset, an Exception should be thrown.
* been reset successfully. Subclasses of {@link ReusableAnalyzerBase} might use
* their own {@link TokenStreamComponents} returning <code>false</code> if
* the components cannot be reset.
* *
* @param reader * @param reader
* a reader to reset the source component * a reader to reset the source component
* @return <code>true</code> if the components were reset, otherwise
* <code>false</code>
* @throws IOException * @throws IOException
* if the component's reset method throws an {@link IOException} * if the component's reset method throws an {@link IOException}
*/ */
protected boolean reset(final Reader reader) throws IOException { protected void reset(final Reader reader) throws IOException {
source.reset(reader); source.reset(reader);
return true;
} }
/** /**
@ -166,4 +185,124 @@ public abstract class ReusableAnalyzerBase extends Analyzer {
} }
/**
* Strategy defining how TokenStreamComponents are reused per call to
* {@link ReusableAnalyzerBase#tokenStream(String, java.io.Reader)}.
*/
public static abstract class ReuseStrategy {
private CloseableThreadLocal<Object> storedValue = new CloseableThreadLocal<Object>();
/**
* Gets the reusable TokenStreamComponents for the field with the given name
*
* @param fieldName Name of the field whose reusable TokenStreamComponents
* are to be retrieved
* @return Reusable TokenStreamComponents for the field, or {@code null}
* if there was no previous components for the field
*/
public abstract TokenStreamComponents getReusableComponents(String fieldName);
/**
* Stores the given TokenStreamComponents as the reusable components for the
* field with the give name
*
* @param fieldName Name of the field whose TokenStreamComponents are being set
* @param components TokenStreamComponents which are to be reused for the field
*/
public abstract void setReusableComponents(String fieldName, TokenStreamComponents components);
/**
* Returns the currently stored value
*
* @return Currently stored value or {@code null} if no value is stored
*/
protected final Object getStoredValue() {
try {
return storedValue.get();
} catch (NullPointerException npe) {
if (storedValue == null) {
throw new AlreadyClosedException("this Analyzer is closed");
} else {
throw npe;
}
}
}
/**
* Sets the stored value
*
* @param storedValue Value to store
*/
protected final void setStoredValue(Object storedValue) {
try {
this.storedValue.set(storedValue);
} catch (NullPointerException npe) {
if (storedValue == null) {
throw new AlreadyClosedException("this Analyzer is closed");
} else {
throw npe;
}
}
}
/**
* Closes the ReuseStrategy, freeing any resources
*/
public void close() {
storedValue.close();
storedValue = null;
}
}
/**
* Implementation of {@link ReuseStrategy} that reuses the same components for
* every field.
*/
public final static class GlobalReuseStrategy extends ReuseStrategy {
/**
* {@inheritDoc}
*/
public TokenStreamComponents getReusableComponents(String fieldName) {
return (TokenStreamComponents) getStoredValue();
}
/**
* {@inheritDoc}
*/
public void setReusableComponents(String fieldName, TokenStreamComponents components) {
setStoredValue(components);
}
}
/**
* Implementation of {@link ReuseStrategy} that reuses components per-field by
* maintaining a Map of TokenStreamComponent per field name.
*/
public static class PerFieldReuseStrategy extends ReuseStrategy {
/**
* {@inheritDoc}
*/
@SuppressWarnings("unchecked")
public TokenStreamComponents getReusableComponents(String fieldName) {
Map<String, TokenStreamComponents> componentsPerField = (Map<String, TokenStreamComponents>) getStoredValue();
return componentsPerField != null ? componentsPerField.get(fieldName) : null;
}
/**
* {@inheritDoc}
*/
@SuppressWarnings("unchecked")
public void setReusableComponents(String fieldName, TokenStreamComponents components) {
Map<String, TokenStreamComponents> componentsPerField = (Map<String, TokenStreamComponents>) getStoredValue();
if (componentsPerField == null) {
componentsPerField = new HashMap<String, TokenStreamComponents>();
setStoredValue(componentsPerField);
}
componentsPerField.put(fieldName, components);
}
}
} }

View File

@ -42,7 +42,7 @@ import org.apache.lucene.util.automaton.CharacterRunAutomaton;
* </ul> * </ul>
* @see MockTokenizer * @see MockTokenizer
*/ */
public final class MockAnalyzer extends Analyzer { public final class MockAnalyzer extends ReusableAnalyzerBase {
private final CharacterRunAutomaton runAutomaton; private final CharacterRunAutomaton runAutomaton;
private final boolean lowerCase; private final boolean lowerCase;
private final CharacterRunAutomaton filter; private final CharacterRunAutomaton filter;
@ -62,6 +62,7 @@ public final class MockAnalyzer extends Analyzer {
* @param enablePositionIncrements true if position increments should reflect filtered terms. * @param enablePositionIncrements true if position increments should reflect filtered terms.
*/ */
public MockAnalyzer(Random random, CharacterRunAutomaton runAutomaton, boolean lowerCase, CharacterRunAutomaton filter, boolean enablePositionIncrements) { public MockAnalyzer(Random random, CharacterRunAutomaton runAutomaton, boolean lowerCase, CharacterRunAutomaton filter, boolean enablePositionIncrements) {
super(new PerFieldReuseStrategy());
this.random = random; this.random = random;
this.runAutomaton = runAutomaton; this.runAutomaton = runAutomaton;
this.lowerCase = lowerCase; this.lowerCase = lowerCase;
@ -88,41 +89,11 @@ public final class MockAnalyzer extends Analyzer {
} }
@Override @Override
public TokenStream tokenStream(String fieldName, Reader reader) { public TokenStreamComponents createComponents(String fieldName, Reader reader) {
MockTokenizer tokenizer = new MockTokenizer(reader, runAutomaton, lowerCase); MockTokenizer tokenizer = new MockTokenizer(reader, runAutomaton, lowerCase);
tokenizer.setEnableChecks(enableChecks); tokenizer.setEnableChecks(enableChecks);
TokenFilter filt = new MockTokenFilter(tokenizer, filter, enablePositionIncrements); TokenFilter filt = new MockTokenFilter(tokenizer, filter, enablePositionIncrements);
filt = maybePayload(filt, fieldName); return new TokenStreamComponents(tokenizer, maybePayload(filt, fieldName));
return filt;
}
private class SavedStreams {
MockTokenizer tokenizer;
TokenFilter filter;
}
@Override
public TokenStream reusableTokenStream(String fieldName, Reader reader)
throws IOException {
@SuppressWarnings("unchecked") Map<String,SavedStreams> map = (Map) getPreviousTokenStream();
if (map == null) {
map = new HashMap<String,SavedStreams>();
setPreviousTokenStream(map);
}
SavedStreams saved = map.get(fieldName);
if (saved == null) {
saved = new SavedStreams();
saved.tokenizer = new MockTokenizer(reader, runAutomaton, lowerCase);
saved.tokenizer.setEnableChecks(enableChecks);
saved.filter = new MockTokenFilter(saved.tokenizer, filter, enablePositionIncrements);
saved.filter = maybePayload(saved.filter, fieldName);
map.put(fieldName, saved);
return saved.filter;
} else {
saved.tokenizer.reset(reader);
return saved.filter;
}
} }
private synchronized TokenFilter maybePayload(TokenFilter stream, String fieldName) { private synchronized TokenFilter maybePayload(TokenFilter stream, String fieldName) {

View File

@ -30,16 +30,15 @@ import java.io.Reader;
* *
* *
**/ **/
public final class MockPayloadAnalyzer extends Analyzer { public final class MockPayloadAnalyzer extends ReusableAnalyzerBase {
@Override @Override
public TokenStream tokenStream(String fieldName, Reader reader) { public TokenStreamComponents createComponents(String fieldName, Reader reader) {
TokenStream result = new MockTokenizer(reader, MockTokenizer.WHITESPACE, true); Tokenizer result = new MockTokenizer(reader, MockTokenizer.WHITESPACE, true);
return new MockPayloadFilter(result, fieldName); return new TokenStreamComponents(result, new MockPayloadFilter(result, fieldName));
} }
} }
/** /**
* *
* *

View File

@ -19,6 +19,7 @@ package org.apache.lucene;
import java.io.Reader; import java.io.Reader;
import org.apache.lucene.analysis.ReusableAnalyzerBase;
import org.apache.lucene.util.LuceneTestCase; import org.apache.lucene.util.LuceneTestCase;
import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.TokenStream; import org.apache.lucene.analysis.TokenStream;
@ -34,32 +35,36 @@ public class TestAssertions extends LuceneTestCase {
} }
} }
static class TestAnalyzer1 extends Analyzer { static class TestAnalyzer1 extends ReusableAnalyzerBase {
@Override @Override
public final TokenStream tokenStream(String s, Reader r) { return null; } protected TokenStreamComponents createComponents(String fieldName, Reader aReader) {
@Override return null;
public final TokenStream reusableTokenStream(String s, Reader r) { return null; } }
} }
static final class TestAnalyzer2 extends Analyzer { static final class TestAnalyzer2 extends ReusableAnalyzerBase {
@Override @Override
public TokenStream tokenStream(String s, Reader r) { return null; } protected TokenStreamComponents createComponents(String fieldName, Reader aReader) {
@Override return null;
public TokenStream reusableTokenStream(String s, Reader r) { return null; } }
} }
static class TestAnalyzer3 extends Analyzer { static class TestAnalyzer3 extends ReusableAnalyzerBase {
@Override @Override
public TokenStream tokenStream(String s, Reader r) { return null; } protected TokenStreamComponents createComponents(String fieldName, Reader aReader) {
@Override return null;
public TokenStream reusableTokenStream(String s, Reader r) { return null; } }
} }
static class TestAnalyzer4 extends Analyzer { static class TestAnalyzer4 extends ReusableAnalyzerBase {
@Override @Override
public final TokenStream tokenStream(String s, Reader r) { return null; } protected TokenStreamComponents createComponents(String fieldName, Reader aReader) {
@Override return null;
public TokenStream reusableTokenStream(String s, Reader r) { return null; } }
} }
static class TestTokenStream1 extends TokenStream { static class TestTokenStream1 extends TokenStream {

View File

@ -20,11 +20,7 @@ package org.apache.lucene.index;
import java.io.IOException; import java.io.IOException;
import java.io.Reader; import java.io.Reader;
import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.analysis.*;
import org.apache.lucene.analysis.MockAnalyzer;
import org.apache.lucene.analysis.MockTokenizer;
import org.apache.lucene.analysis.TokenFilter;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
import org.apache.lucene.analysis.tokenattributes.PayloadAttribute; import org.apache.lucene.analysis.tokenattributes.PayloadAttribute;
import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute; import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
@ -107,10 +103,10 @@ public class TestDocumentWriter extends LuceneTestCase {
} }
public void testPositionIncrementGap() throws IOException { public void testPositionIncrementGap() throws IOException {
Analyzer analyzer = new Analyzer() { Analyzer analyzer = new ReusableAnalyzerBase() {
@Override @Override
public TokenStream tokenStream(String fieldName, Reader reader) { public TokenStreamComponents createComponents(String fieldName, Reader reader) {
return new MockTokenizer(reader, MockTokenizer.WHITESPACE, false); return new TokenStreamComponents(new MockTokenizer(reader, MockTokenizer.WHITESPACE, false));
} }
@Override @Override
@ -142,10 +138,11 @@ public class TestDocumentWriter extends LuceneTestCase {
} }
public void testTokenReuse() throws IOException { public void testTokenReuse() throws IOException {
Analyzer analyzer = new Analyzer() { Analyzer analyzer = new ReusableAnalyzerBase() {
@Override @Override
public TokenStream tokenStream(String fieldName, Reader reader) { public TokenStreamComponents createComponents(String fieldName, Reader reader) {
return new TokenFilter(new MockTokenizer(reader, MockTokenizer.WHITESPACE, false)) { Tokenizer tokenizer = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false);
return new TokenStreamComponents(tokenizer, new TokenFilter(tokenizer) {
boolean first = true; boolean first = true;
AttributeSource.State state; AttributeSource.State state;
@ -187,7 +184,7 @@ public class TestDocumentWriter extends LuceneTestCase {
final CharTermAttribute termAtt = addAttribute(CharTermAttribute.class); final CharTermAttribute termAtt = addAttribute(CharTermAttribute.class);
final PayloadAttribute payloadAtt = addAttribute(PayloadAttribute.class); final PayloadAttribute payloadAtt = addAttribute(PayloadAttribute.class);
final PositionIncrementAttribute posIncrAtt = addAttribute(PositionIncrementAttribute.class); final PositionIncrementAttribute posIncrAtt = addAttribute(PositionIncrementAttribute.class);
}; });
} }
}; };

View File

@ -31,11 +31,7 @@ import java.util.List;
import java.util.Map; import java.util.Map;
import java.util.Random; import java.util.Random;
import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.analysis.*;
import org.apache.lucene.analysis.MockAnalyzer;
import org.apache.lucene.analysis.MockTokenizer;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.Tokenizer;
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute; import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
import org.apache.lucene.document.BinaryField; import org.apache.lucene.document.BinaryField;
@ -1710,10 +1706,10 @@ public class TestIndexWriter extends LuceneTestCase {
dir.close(); dir.close();
} }
static final class StringSplitAnalyzer extends Analyzer { static final class StringSplitAnalyzer extends ReusableAnalyzerBase {
@Override @Override
public TokenStream tokenStream(String fieldName, Reader reader) { public TokenStreamComponents createComponents(String fieldName, Reader reader) {
return new StringSplitTokenizer(reader); return new TokenStreamComponents(new StringSplitTokenizer(reader));
} }
} }

View File

@ -23,11 +23,7 @@ import java.util.HashMap;
import java.util.Map; import java.util.Map;
import java.util.concurrent.atomic.AtomicBoolean; import java.util.concurrent.atomic.AtomicBoolean;
import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.analysis.*;
import org.apache.lucene.analysis.MockAnalyzer;
import org.apache.lucene.analysis.MockFixedLengthPayloadFilter;
import org.apache.lucene.analysis.MockTokenizer;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.document.Document; import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field; import org.apache.lucene.document.Field;
import org.apache.lucene.document.StringField; import org.apache.lucene.document.StringField;
@ -179,21 +175,20 @@ public class TestIndexWriterCommit extends LuceneTestCase {
Analyzer analyzer; Analyzer analyzer;
if (random.nextBoolean()) { if (random.nextBoolean()) {
// no payloads // no payloads
analyzer = new Analyzer() { analyzer = new ReusableAnalyzerBase() {
@Override @Override
public TokenStream tokenStream(String fieldName, Reader reader) { public TokenStreamComponents createComponents(String fieldName, Reader reader) {
return new MockTokenizer(reader, MockTokenizer.WHITESPACE, true); return new TokenStreamComponents(new MockTokenizer(reader, MockTokenizer.WHITESPACE, true));
} }
}; };
} else { } else {
// fixed length payloads // fixed length payloads
final int length = random.nextInt(200); final int length = random.nextInt(200);
analyzer = new Analyzer() { analyzer = new ReusableAnalyzerBase() {
@Override @Override
public TokenStream tokenStream(String fieldName, Reader reader) { public TokenStreamComponents createComponents(String fieldName, Reader reader) {
return new MockFixedLengthPayloadFilter(random, Tokenizer tokenizer = new MockTokenizer(reader, MockTokenizer.WHITESPACE, true);
new MockTokenizer(reader, MockTokenizer.WHITESPACE, true), return new TokenStreamComponents(tokenizer, new MockFixedLengthPayloadFilter(random, tokenizer, length));
length);
} }
}; };
} }

View File

@ -26,10 +26,7 @@ import java.util.Random;
import java.util.concurrent.atomic.AtomicInteger; import java.util.concurrent.atomic.AtomicInteger;
import java.util.concurrent.atomic.AtomicBoolean; import java.util.concurrent.atomic.AtomicBoolean;
import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.analysis.*;
import org.apache.lucene.analysis.MockAnalyzer;
import org.apache.lucene.analysis.MockTokenizer;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.document.Document; import org.apache.lucene.document.Document;
import org.apache.lucene.document.FieldType; import org.apache.lucene.document.FieldType;
import org.apache.lucene.document.StringField; import org.apache.lucene.document.StringField;
@ -902,10 +899,10 @@ public class TestIndexWriterDelete extends LuceneTestCase {
final Random r = random; final Random r = random;
Directory dir = newDirectory(); Directory dir = newDirectory();
// note this test explicitly disables payloads // note this test explicitly disables payloads
final Analyzer analyzer = new Analyzer() { final Analyzer analyzer = new ReusableAnalyzerBase() {
@Override @Override
public TokenStream tokenStream(String fieldName, Reader reader) { public TokenStreamComponents createComponents(String fieldName, Reader reader) {
return new MockTokenizer(reader, MockTokenizer.WHITESPACE, true); return new TokenStreamComponents(new MockTokenizer(reader, MockTokenizer.WHITESPACE, true));
} }
}; };
IndexWriter w = new IndexWriter(dir, newIndexWriterConfig( TEST_VERSION_CURRENT, analyzer).setRAMBufferSizeMB(1.0).setMaxBufferedDocs(IndexWriterConfig.DISABLE_AUTO_FLUSH).setMaxBufferedDeleteTerms(IndexWriterConfig.DISABLE_AUTO_FLUSH)); IndexWriter w = new IndexWriter(dir, newIndexWriterConfig( TEST_VERSION_CURRENT, analyzer).setRAMBufferSizeMB(1.0).setMaxBufferedDocs(IndexWriterConfig.DISABLE_AUTO_FLUSH).setMaxBufferedDeleteTerms(IndexWriterConfig.DISABLE_AUTO_FLUSH));

View File

@ -27,11 +27,7 @@ import java.util.Iterator;
import java.util.List; import java.util.List;
import java.util.Random; import java.util.Random;
import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.analysis.*;
import org.apache.lucene.analysis.MockAnalyzer;
import org.apache.lucene.analysis.MockTokenizer;
import org.apache.lucene.analysis.TokenFilter;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.document.Document; import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field; import org.apache.lucene.document.Field;
import org.apache.lucene.document.FieldType; import org.apache.lucene.document.FieldType;
@ -390,12 +386,12 @@ public class TestIndexWriterExceptions extends LuceneTestCase {
doc.add(newField("field", "a field", TextField.TYPE_STORED)); doc.add(newField("field", "a field", TextField.TYPE_STORED));
w.addDocument(doc); w.addDocument(doc);
Analyzer analyzer = new Analyzer() { Analyzer analyzer = new ReusableAnalyzerBase(new ReusableAnalyzerBase.PerFieldReuseStrategy()) {
@Override @Override
public TokenStream tokenStream(String fieldName, Reader reader) { public TokenStreamComponents createComponents(String fieldName, Reader reader) {
MockTokenizer tokenizer = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false); MockTokenizer tokenizer = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false);
tokenizer.setEnableChecks(false); // disable workflow checking as we forcefully close() in exceptional cases. tokenizer.setEnableChecks(false); // disable workflow checking as we forcefully close() in exceptional cases.
return new CrashingFilter(fieldName, tokenizer); return new TokenStreamComponents(tokenizer, new CrashingFilter(fieldName, tokenizer));
} }
}; };
@ -458,13 +454,13 @@ public class TestIndexWriterExceptions extends LuceneTestCase {
// LUCENE-1072 // LUCENE-1072
public void testExceptionFromTokenStream() throws IOException { public void testExceptionFromTokenStream() throws IOException {
Directory dir = newDirectory(); Directory dir = newDirectory();
IndexWriterConfig conf = newIndexWriterConfig( TEST_VERSION_CURRENT, new Analyzer() { IndexWriterConfig conf = newIndexWriterConfig( TEST_VERSION_CURRENT, new ReusableAnalyzerBase() {
@Override @Override
public TokenStream tokenStream(String fieldName, Reader reader) { public TokenStreamComponents createComponents(String fieldName, Reader reader) {
MockTokenizer tokenizer = new MockTokenizer(reader, MockTokenizer.SIMPLE, true); MockTokenizer tokenizer = new MockTokenizer(reader, MockTokenizer.SIMPLE, true);
tokenizer.setEnableChecks(false); // disable workflow checking as we forcefully close() in exceptional cases. tokenizer.setEnableChecks(false); // disable workflow checking as we forcefully close() in exceptional cases.
return new TokenFilter(tokenizer) { return new TokenStreamComponents(tokenizer, new TokenFilter(tokenizer) {
private int count = 0; private int count = 0;
@Override @Override
@ -480,7 +476,7 @@ public class TestIndexWriterExceptions extends LuceneTestCase {
super.reset(); super.reset();
this.count = 0; this.count = 0;
} }
}; });
} }
}); });
@ -595,12 +591,12 @@ public class TestIndexWriterExceptions extends LuceneTestCase {
} }
public void testDocumentsWriterExceptions() throws IOException { public void testDocumentsWriterExceptions() throws IOException {
Analyzer analyzer = new Analyzer() { Analyzer analyzer = new ReusableAnalyzerBase(new ReusableAnalyzerBase.PerFieldReuseStrategy()) {
@Override @Override
public TokenStream tokenStream(String fieldName, Reader reader) { public TokenStreamComponents createComponents(String fieldName, Reader reader) {
MockTokenizer tokenizer = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false); MockTokenizer tokenizer = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false);
tokenizer.setEnableChecks(false); // disable workflow checking as we forcefully close() in exceptional cases. tokenizer.setEnableChecks(false); // disable workflow checking as we forcefully close() in exceptional cases.
return new CrashingFilter(fieldName, tokenizer); return new TokenStreamComponents(tokenizer, new CrashingFilter(fieldName, tokenizer));
} }
}; };
@ -691,12 +687,12 @@ public class TestIndexWriterExceptions extends LuceneTestCase {
} }
public void testDocumentsWriterExceptionThreads() throws Exception { public void testDocumentsWriterExceptionThreads() throws Exception {
Analyzer analyzer = new Analyzer() { Analyzer analyzer = new ReusableAnalyzerBase(new ReusableAnalyzerBase.PerFieldReuseStrategy()) {
@Override @Override
public TokenStream tokenStream(String fieldName, Reader reader) { public TokenStreamComponents createComponents(String fieldName, Reader reader) {
MockTokenizer tokenizer = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false); MockTokenizer tokenizer = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false);
tokenizer.setEnableChecks(false); // disable workflow checking as we forcefully close() in exceptional cases. tokenizer.setEnableChecks(false); // disable workflow checking as we forcefully close() in exceptional cases.
return new CrashingFilter(fieldName, tokenizer); return new TokenStreamComponents(tokenizer, new CrashingFilter(fieldName, tokenizer));
} }
}; };

View File

@ -20,10 +20,7 @@ package org.apache.lucene.index;
import java.io.IOException; import java.io.IOException;
import java.io.Reader; import java.io.Reader;
import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.analysis.*;
import org.apache.lucene.analysis.MockAnalyzer;
import org.apache.lucene.analysis.MockTokenizer;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.document.Document; import org.apache.lucene.document.Document;
import org.apache.lucene.document.TextField; import org.apache.lucene.document.TextField;
import org.apache.lucene.index.codecs.CodecProvider; import org.apache.lucene.index.codecs.CodecProvider;
@ -71,10 +68,10 @@ public class TestLazyProxSkipping extends LuceneTestCase {
private void createIndex(int numHits) throws IOException { private void createIndex(int numHits) throws IOException {
int numDocs = 500; int numDocs = 500;
final Analyzer analyzer = new Analyzer() { final Analyzer analyzer = new ReusableAnalyzerBase() {
@Override @Override
public TokenStream tokenStream(String fieldName, Reader reader) { public TokenStreamComponents createComponents(String fieldName, Reader reader) {
return new MockTokenizer(reader, MockTokenizer.WHITESPACE, true); return new TokenStreamComponents(new MockTokenizer(reader, MockTokenizer.WHITESPACE, true));
} }
}; };
Directory directory = new SeekCountingDirectory(new RAMDirectory()); Directory directory = new SeekCountingDirectory(new RAMDirectory());

View File

@ -21,10 +21,7 @@ import java.io.IOException;
import java.io.Reader; import java.io.Reader;
import java.util.concurrent.atomic.AtomicInteger; import java.util.concurrent.atomic.AtomicInteger;
import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.analysis.*;
import org.apache.lucene.analysis.MockTokenizer;
import org.apache.lucene.analysis.TokenFilter;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.tokenattributes.PayloadAttribute; import org.apache.lucene.analysis.tokenattributes.PayloadAttribute;
import org.apache.lucene.document.Document; import org.apache.lucene.document.Document;
import org.apache.lucene.document.TextField; import org.apache.lucene.document.TextField;
@ -114,11 +111,12 @@ public class TestMultiLevelSkipList extends LuceneTestCase {
assertEquals("Wrong payload for the target " + target + ": " + b.bytes[b.offset], (byte) target, b.bytes[b.offset]); assertEquals("Wrong payload for the target " + target + ": " + b.bytes[b.offset], (byte) target, b.bytes[b.offset]);
} }
private static class PayloadAnalyzer extends Analyzer { private static class PayloadAnalyzer extends ReusableAnalyzerBase {
private final AtomicInteger payloadCount = new AtomicInteger(-1); private final AtomicInteger payloadCount = new AtomicInteger(-1);
@Override @Override
public TokenStream tokenStream(String fieldName, Reader reader) { public TokenStreamComponents createComponents(String fieldName, Reader reader) {
return new PayloadFilter(payloadCount, new MockTokenizer(reader, MockTokenizer.WHITESPACE, true)); Tokenizer tokenizer = new MockTokenizer(reader, MockTokenizer.WHITESPACE, true);
return new TokenStreamComponents(tokenizer, new PayloadFilter(payloadCount, tokenizer));
} }
} }

View File

@ -25,11 +25,7 @@ import java.util.HashMap;
import java.util.List; import java.util.List;
import java.util.Map; import java.util.Map;
import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.analysis.*;
import org.apache.lucene.analysis.MockTokenizer;
import org.apache.lucene.analysis.TokenFilter;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.MockAnalyzer;
import org.apache.lucene.analysis.tokenattributes.PayloadAttribute; import org.apache.lucene.analysis.tokenattributes.PayloadAttribute;
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
import org.apache.lucene.document.Document; import org.apache.lucene.document.Document;
@ -105,12 +101,12 @@ public class TestPayloads extends LuceneTestCase {
// so this field is used to check if the DocumentWriter correctly enables the payloads bit // so this field is used to check if the DocumentWriter correctly enables the payloads bit
// even if only some term positions have payloads // even if only some term positions have payloads
d.add(newField("f2", "This field has payloads in all docs", TextField.TYPE_UNSTORED)); d.add(newField("f2", "This field has payloads in all docs", TextField.TYPE_UNSTORED));
d.add(newField("f2", "This field has payloads in all docs", TextField.TYPE_UNSTORED)); d.add(newField("f2", "This field has payloads in all docs NO PAYLOAD", TextField.TYPE_UNSTORED));
// this field is used to verify if the SegmentMerger enables payloads for a field if it has payloads // this field is used to verify if the SegmentMerger enables payloads for a field if it has payloads
// enabled in only some documents // enabled in only some documents
d.add(newField("f3", "This field has payloads in some docs", TextField.TYPE_UNSTORED)); d.add(newField("f3", "This field has payloads in some docs", TextField.TYPE_UNSTORED));
// only add payload data for field f2 // only add payload data for field f2
analyzer.setPayloadData("f2", 1, "somedata".getBytes(), 0, 1); analyzer.setPayloadData("f2", "somedata".getBytes(), 0, 1);
writer.addDocument(d); writer.addDocument(d);
// flush // flush
writer.close(); writer.close();

View File

@ -20,8 +20,7 @@ package org.apache.lucene.index;
import java.io.IOException; import java.io.IOException;
import java.io.Reader; import java.io.Reader;
import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.analysis.ReusableAnalyzerBase;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.Tokenizer; import org.apache.lucene.analysis.Tokenizer;
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
import org.apache.lucene.analysis.tokenattributes.OffsetAttribute; import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
@ -64,10 +63,10 @@ public class TestSameTokenSamePosition extends LuceneTestCase {
} }
} }
final class BugReproAnalyzer extends Analyzer{ final class BugReproAnalyzer extends ReusableAnalyzerBase {
@Override @Override
public TokenStream tokenStream(String arg0, Reader arg1) { public TokenStreamComponents createComponents(String arg0, Reader arg1) {
return new BugReproAnalyzerTokenizer(); return new TokenStreamComponents(new BugReproAnalyzerTokenizer());
} }
} }

View File

@ -137,7 +137,7 @@ public class TestTermVectorsReader extends LuceneTestCase {
super.tearDown(); super.tearDown();
} }
private class MyTokenStream extends TokenStream { private class MyTokenStream extends Tokenizer {
private int tokenUpto; private int tokenUpto;
private final CharTermAttribute termAtt; private final CharTermAttribute termAtt;
@ -175,10 +175,10 @@ public class TestTermVectorsReader extends LuceneTestCase {
} }
} }
private class MyAnalyzer extends Analyzer { private class MyAnalyzer extends ReusableAnalyzerBase {
@Override @Override
public TokenStream tokenStream(String fieldName, Reader reader) { public TokenStreamComponents createComponents(String fieldName, Reader reader) {
return new MyTokenStream(); return new TokenStreamComponents(new MyTokenStream());
} }
} }

View File

@ -22,7 +22,7 @@ import java.io.Reader;
import java.util.Random; import java.util.Random;
import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.TokenStream; import org.apache.lucene.analysis.ReusableAnalyzerBase;
import org.apache.lucene.analysis.Tokenizer; import org.apache.lucene.analysis.Tokenizer;
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
import org.apache.lucene.document.Document; import org.apache.lucene.document.Document;
@ -77,10 +77,10 @@ public class TestTermdocPerf extends LuceneTestCase {
void addDocs(final Random random, Directory dir, final int ndocs, String field, final String val, final int maxTF, final float percentDocs) throws IOException { void addDocs(final Random random, Directory dir, final int ndocs, String field, final String val, final int maxTF, final float percentDocs) throws IOException {
final RepeatingTokenStream ts = new RepeatingTokenStream(val, random, percentDocs, maxTF); final RepeatingTokenStream ts = new RepeatingTokenStream(val, random, percentDocs, maxTF);
Analyzer analyzer = new Analyzer() { Analyzer analyzer = new ReusableAnalyzerBase() {
@Override @Override
public TokenStream tokenStream(String fieldName, Reader reader) { public TokenStreamComponents createComponents(String fieldName, Reader reader) {
return ts; return new TokenStreamComponents(ts);
} }
}; };

View File

@ -17,6 +17,7 @@ package org.apache.lucene.search;
* limitations under the License. * limitations under the License.
*/ */
import org.apache.lucene.analysis.ReusableAnalyzerBase;
import org.apache.lucene.index.IndexWriterConfig; import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.index.RandomIndexWriter; import org.apache.lucene.index.RandomIndexWriter;
import org.apache.lucene.index.Term; import org.apache.lucene.index.Term;
@ -345,7 +346,7 @@ public class TestMultiPhraseQuery extends LuceneTestCase {
} }
} }
private static class CannedAnalyzer extends Analyzer { private static class CannedAnalyzer extends ReusableAnalyzerBase {
private final TokenAndPos[] tokens; private final TokenAndPos[] tokens;
public CannedAnalyzer(TokenAndPos[] tokens) { public CannedAnalyzer(TokenAndPos[] tokens) {
@ -353,8 +354,8 @@ public class TestMultiPhraseQuery extends LuceneTestCase {
} }
@Override @Override
public TokenStream tokenStream(String fieldName, Reader reader) { public TokenStreamComponents createComponents(String fieldName, Reader reader) {
return new CannedTokenizer(tokens); return new TokenStreamComponents(new CannedTokenizer(tokens));
} }
} }

View File

@ -55,10 +55,10 @@ public class TestPhraseQuery extends LuceneTestCase {
@BeforeClass @BeforeClass
public static void beforeClass() throws Exception { public static void beforeClass() throws Exception {
directory = newDirectory(); directory = newDirectory();
Analyzer analyzer = new Analyzer() { Analyzer analyzer = new ReusableAnalyzerBase() {
@Override @Override
public TokenStream tokenStream(String fieldName, Reader reader) { public TokenStreamComponents createComponents(String fieldName, Reader reader) {
return new MockTokenizer(reader, MockTokenizer.WHITESPACE, false); return new TokenStreamComponents(new MockTokenizer(reader, MockTokenizer.WHITESPACE, false));
} }
@Override @Override

View File

@ -56,10 +56,10 @@ public class TestPositionIncrement extends LuceneTestCase {
final static boolean VERBOSE = false; final static boolean VERBOSE = false;
public void testSetPosition() throws Exception { public void testSetPosition() throws Exception {
Analyzer analyzer = new Analyzer() { Analyzer analyzer = new ReusableAnalyzerBase() {
@Override @Override
public TokenStream tokenStream(String fieldName, Reader reader) { public TokenStreamComponents createComponents(String fieldName, Reader reader) {
return new TokenStream() { return new TokenStreamComponents(new Tokenizer() {
private final String[] TOKENS = {"1", "2", "3", "4", "5"}; private final String[] TOKENS = {"1", "2", "3", "4", "5"};
private final int[] INCREMENTS = {0, 2, 1, 0, 1}; private final int[] INCREMENTS = {0, 2, 1, 0, 1};
private int i = 0; private int i = 0;
@ -85,7 +85,7 @@ public class TestPositionIncrement extends LuceneTestCase {
super.reset(); super.reset();
this.i = 0; this.i = 0;
} }
}; });
} }
}; };
Directory store = newDirectory(); Directory store = newDirectory();

View File

@ -190,7 +190,7 @@ public class TestTermRangeQuery extends LuceneTestCase {
assertFalse("queries with different inclusive are not equal", query.equals(other)); assertFalse("queries with different inclusive are not equal", query.equals(other));
} }
private static class SingleCharAnalyzer extends Analyzer { private static class SingleCharAnalyzer extends ReusableAnalyzerBase {
private static class SingleCharTokenizer extends Tokenizer { private static class SingleCharTokenizer extends Tokenizer {
char[] buffer = new char[1]; char[] buffer = new char[1];
@ -225,19 +225,8 @@ public class TestTermRangeQuery extends LuceneTestCase {
} }
@Override @Override
public TokenStream reusableTokenStream(String fieldName, Reader reader) throws IOException { public TokenStreamComponents createComponents(String fieldName, Reader reader) {
Tokenizer tokenizer = (Tokenizer) getPreviousTokenStream(); return new TokenStreamComponents(new SingleCharTokenizer(reader));
if (tokenizer == null) {
tokenizer = new SingleCharTokenizer(reader);
setPreviousTokenStream(tokenizer);
} else
tokenizer.reset(reader);
return tokenizer;
}
@Override
public TokenStream tokenStream(String fieldName, Reader reader) {
return new SingleCharTokenizer(reader);
} }
} }

View File

@ -55,14 +55,16 @@ public class PayloadHelper {
public IndexReader reader; public IndexReader reader;
public final class PayloadAnalyzer extends Analyzer { public final class PayloadAnalyzer extends ReusableAnalyzerBase {
public PayloadAnalyzer() {
super(new PerFieldReuseStrategy());
}
@Override @Override
public TokenStream tokenStream(String fieldName, Reader reader) { public TokenStreamComponents createComponents(String fieldName, Reader reader) {
TokenStream result = new MockTokenizer(reader, MockTokenizer.SIMPLE, true); Tokenizer result = new MockTokenizer(reader, MockTokenizer.SIMPLE, true);
result = new PayloadFilter(result, fieldName); return new TokenStreamComponents(result, new PayloadFilter(result, fieldName));
return result;
} }
} }

View File

@ -55,12 +55,11 @@ public class TestPayloadNearQuery extends LuceneTestCase {
private static byte[] payload2 = new byte[]{2}; private static byte[] payload2 = new byte[]{2};
private static byte[] payload4 = new byte[]{4}; private static byte[] payload4 = new byte[]{4};
private static class PayloadAnalyzer extends Analyzer { private static class PayloadAnalyzer extends ReusableAnalyzerBase {
@Override @Override
public TokenStream tokenStream(String fieldName, Reader reader) { public TokenStreamComponents createComponents(String fieldName, Reader reader) {
TokenStream result = new MockTokenizer(reader, MockTokenizer.SIMPLE, true); Tokenizer result = new MockTokenizer(reader, MockTokenizer.SIMPLE, true);
result = new PayloadFilter(result, fieldName); return new TokenStreamComponents(result, new PayloadFilter(result, fieldName));
return result;
} }
} }

View File

@ -64,14 +64,16 @@ public class TestPayloadTermQuery extends LuceneTestCase {
private static final byte[] payloadMultiField2 = new byte[]{4}; private static final byte[] payloadMultiField2 = new byte[]{4};
protected static Directory directory; protected static Directory directory;
private static class PayloadAnalyzer extends Analyzer { private static class PayloadAnalyzer extends ReusableAnalyzerBase {
private PayloadAnalyzer() {
super(new PerFieldReuseStrategy());
}
@Override @Override
public TokenStream tokenStream(String fieldName, Reader reader) { public TokenStreamComponents createComponents(String fieldName, Reader reader) {
TokenStream result = new MockTokenizer(reader, MockTokenizer.SIMPLE, true); Tokenizer result = new MockTokenizer(reader, MockTokenizer.SIMPLE, true);
result = new PayloadFilter(result, fieldName); return new TokenStreamComponents(result, new PayloadFilter(result, fieldName));
return result;
} }
} }

View File

@ -25,6 +25,7 @@ import java.util.Collections;
import java.util.List; import java.util.List;
import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.MockAnalyzer;
import org.apache.lucene.analysis.MockTokenizer; import org.apache.lucene.analysis.MockTokenizer;
import org.apache.lucene.analysis.TokenFilter; import org.apache.lucene.analysis.TokenFilter;
import org.apache.lucene.analysis.TokenStream; import org.apache.lucene.analysis.TokenStream;
@ -70,14 +71,12 @@ public class TestBasics extends LuceneTestCase {
private static Directory directory; private static Directory directory;
static final class SimplePayloadFilter extends TokenFilter { static final class SimplePayloadFilter extends TokenFilter {
String fieldName;
int pos; int pos;
final PayloadAttribute payloadAttr; final PayloadAttribute payloadAttr;
final CharTermAttribute termAttr; final CharTermAttribute termAttr;
public SimplePayloadFilter(TokenStream input, String fieldName) { public SimplePayloadFilter(TokenStream input) {
super(input); super(input);
this.fieldName = fieldName;
pos = 0; pos = 0;
payloadAttr = input.addAttribute(PayloadAttribute.class); payloadAttr = input.addAttribute(PayloadAttribute.class);
termAttr = input.addAttribute(CharTermAttribute.class); termAttr = input.addAttribute(CharTermAttribute.class);
@ -105,7 +104,7 @@ public class TestBasics extends LuceneTestCase {
@Override @Override
public TokenStream tokenStream(String fieldName, Reader reader) { public TokenStream tokenStream(String fieldName, Reader reader) {
return new SimplePayloadFilter(new MockTokenizer(reader, MockTokenizer.SIMPLE, true), fieldName); return new SimplePayloadFilter(new MockTokenizer(reader, MockTokenizer.SIMPLE, true));
} }
}; };

View File

@ -23,10 +23,7 @@ import java.util.Collection;
import java.util.HashSet; import java.util.HashSet;
import java.util.Set; import java.util.Set;
import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.analysis.*;
import org.apache.lucene.analysis.MockTokenizer;
import org.apache.lucene.analysis.TokenFilter;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.tokenattributes.PayloadAttribute; import org.apache.lucene.analysis.tokenattributes.PayloadAttribute;
import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute; import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
@ -479,18 +476,16 @@ public class TestPayloadSpans extends LuceneTestCase {
assertEquals(numSpans, cnt); assertEquals(numSpans, cnt);
} }
final class PayloadAnalyzer extends Analyzer { final class PayloadAnalyzer extends ReusableAnalyzerBase {
@Override @Override
public TokenStream tokenStream(String fieldName, Reader reader) { public TokenStreamComponents createComponents(String fieldName, Reader reader) {
TokenStream result = new MockTokenizer(reader, MockTokenizer.SIMPLE, true); Tokenizer result = new MockTokenizer(reader, MockTokenizer.SIMPLE, true);
result = new PayloadFilter(result, fieldName); return new TokenStreamComponents(result, new PayloadFilter(result));
return result;
} }
} }
final class PayloadFilter extends TokenFilter { final class PayloadFilter extends TokenFilter {
String fieldName;
Set<String> entities = new HashSet<String>(); Set<String> entities = new HashSet<String>();
Set<String> nopayload = new HashSet<String>(); Set<String> nopayload = new HashSet<String>();
int pos; int pos;
@ -498,9 +493,8 @@ public class TestPayloadSpans extends LuceneTestCase {
CharTermAttribute termAtt; CharTermAttribute termAtt;
PositionIncrementAttribute posIncrAtt; PositionIncrementAttribute posIncrAtt;
public PayloadFilter(TokenStream input, String fieldName) { public PayloadFilter(TokenStream input) {
super(input); super(input);
this.fieldName = fieldName;
pos = 0; pos = 0;
entities.add("xx"); entities.add("xx");
entities.add("one"); entities.add("one");
@ -536,13 +530,12 @@ public class TestPayloadSpans extends LuceneTestCase {
} }
} }
public final class TestPayloadAnalyzer extends Analyzer { public final class TestPayloadAnalyzer extends ReusableAnalyzerBase {
@Override @Override
public TokenStream tokenStream(String fieldName, Reader reader) { public TokenStreamComponents createComponents(String fieldName, Reader reader) {
TokenStream result = new MockTokenizer(reader, MockTokenizer.SIMPLE, true); Tokenizer result = new MockTokenizer(reader, MockTokenizer.SIMPLE, true);
result = new PayloadFilter(result, fieldName); return new TokenStreamComponents(result, new PayloadFilter(result));
return result;
} }
} }
} }

View File

@ -106,3 +106,5 @@ Build
* LUCENE-2413: All analyzers in contrib/analyzers and contrib/icu were moved to the * LUCENE-2413: All analyzers in contrib/analyzers and contrib/icu were moved to the
analysis module. The 'smartcn' and 'stempel' components now depend on 'common'. analysis module. The 'smartcn' and 'stempel' components now depend on 'common'.
(Robert Muir) (Robert Muir)
* LUCENE-3376: Moved ReusableAnalyzerBase into lucene core. (Chris Male)

View File

@ -27,6 +27,7 @@ import java.util.regex.Matcher;
import java.util.regex.Pattern; import java.util.regex.Pattern;
import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.ReusableAnalyzerBase;
import org.apache.lucene.analysis.TokenStream; import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.Tokenizer; import org.apache.lucene.analysis.Tokenizer;
import org.apache.lucene.analysis.core.StopAnalyzer; import org.apache.lucene.analysis.core.StopAnalyzer;
@ -66,7 +67,7 @@ import org.apache.lucene.util.Version;
* @deprecated (4.0) use the pattern-based analysis in the analysis/pattern package instead. * @deprecated (4.0) use the pattern-based analysis in the analysis/pattern package instead.
*/ */
@Deprecated @Deprecated
public final class PatternAnalyzer extends Analyzer { public final class PatternAnalyzer extends ReusableAnalyzerBase {
/** <code>"\\W+"</code>; Divides text at non-letters (NOT Character.isLetter(c)) */ /** <code>"\\W+"</code>; Divides text at non-letters (NOT Character.isLetter(c)) */
public static final Pattern NON_WORD_PATTERN = Pattern.compile("\\W+"); public static final Pattern NON_WORD_PATTERN = Pattern.compile("\\W+");
@ -187,25 +188,21 @@ public final class PatternAnalyzer extends Analyzer {
* the string to tokenize * the string to tokenize
* @return a new token stream * @return a new token stream
*/ */
public TokenStream tokenStream(String fieldName, String text) { public TokenStreamComponents createComponents(String fieldName, String text) {
// Ideally the Analyzer superclass should have a method with the same signature, // Ideally the Analyzer superclass should have a method with the same signature,
// with a default impl that simply delegates to the StringReader flavour. // with a default impl that simply delegates to the StringReader flavour.
if (text == null) if (text == null)
throw new IllegalArgumentException("text must not be null"); throw new IllegalArgumentException("text must not be null");
TokenStream stream;
if (pattern == NON_WORD_PATTERN) { // fast path if (pattern == NON_WORD_PATTERN) { // fast path
stream = new FastStringTokenizer(text, true, toLowerCase, stopWords); return new TokenStreamComponents(new FastStringTokenizer(text, true, toLowerCase, stopWords));
} } else if (pattern == WHITESPACE_PATTERN) { // fast path
else if (pattern == WHITESPACE_PATTERN) { // fast path return new TokenStreamComponents(new FastStringTokenizer(text, false, toLowerCase, stopWords));
stream = new FastStringTokenizer(text, false, toLowerCase, stopWords);
}
else {
stream = new PatternTokenizer(text, pattern, toLowerCase);
if (stopWords != null) stream = new StopFilter(matchVersion, stream, stopWords);
} }
return stream; Tokenizer tokenizer = new PatternTokenizer(text, pattern, toLowerCase);
TokenStream result = (stopWords != null) ? new StopFilter(matchVersion, tokenizer, stopWords) : tokenizer;
return new TokenStreamComponents(tokenizer, result);
} }
/** /**
@ -220,10 +217,10 @@ public final class PatternAnalyzer extends Analyzer {
* @return a new token stream * @return a new token stream
*/ */
@Override @Override
public TokenStream tokenStream(String fieldName, Reader reader) { public TokenStreamComponents createComponents(String fieldName, Reader reader) {
try { try {
String text = toString(reader); String text = toString(reader);
return tokenStream(fieldName, text); return createComponents(fieldName, text);
} catch (IOException e) { } catch (IOException e) {
throw new RuntimeException(e); throw new RuntimeException(e);
} }

View File

@ -123,9 +123,9 @@ public final class ClassicAnalyzer extends StopwordAnalyzerBase {
tok = new StopFilter(matchVersion, tok, stopwords); tok = new StopFilter(matchVersion, tok, stopwords);
return new TokenStreamComponents(src, tok) { return new TokenStreamComponents(src, tok) {
@Override @Override
protected boolean reset(final Reader reader) throws IOException { protected void reset(final Reader reader) throws IOException {
src.setMaxTokenLength(ClassicAnalyzer.this.maxTokenLength); src.setMaxTokenLength(ClassicAnalyzer.this.maxTokenLength);
return super.reset(reader); super.reset(reader);
} }
}; };
} }

View File

@ -124,9 +124,9 @@ public final class StandardAnalyzer extends StopwordAnalyzerBase {
tok = new StopFilter(matchVersion, tok, stopwords); tok = new StopFilter(matchVersion, tok, stopwords);
return new TokenStreamComponents(src, tok) { return new TokenStreamComponents(src, tok) {
@Override @Override
protected boolean reset(final Reader reader) throws IOException { protected void reset(final Reader reader) throws IOException {
src.setMaxTokenLength(StandardAnalyzer.this.maxTokenLength); src.setMaxTokenLength(StandardAnalyzer.this.maxTokenLength);
return super.reset(reader); super.reset(reader);
} }
}; };
} }

View File

@ -21,9 +21,7 @@ import java.io.IOException;
import java.io.Reader; import java.io.Reader;
import java.io.StringReader; import java.io.StringReader;
import org.apache.lucene.analysis.BaseTokenStreamTestCase; import org.apache.lucene.analysis.*;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.core.WhitespaceTokenizer; import org.apache.lucene.analysis.core.WhitespaceTokenizer;
import org.apache.lucene.analysis.tokenattributes.OffsetAttribute; import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
import org.apache.lucene.util.Version; import org.apache.lucene.util.Version;
@ -66,10 +64,10 @@ public class TestChineseTokenizer extends BaseTokenStreamTestCase
* Analyzer that just uses ChineseTokenizer, not ChineseFilter. * Analyzer that just uses ChineseTokenizer, not ChineseFilter.
* convenience to show the behavior of the tokenizer * convenience to show the behavior of the tokenizer
*/ */
private class JustChineseTokenizerAnalyzer extends Analyzer { private class JustChineseTokenizerAnalyzer extends ReusableAnalyzerBase {
@Override @Override
public TokenStream tokenStream(String fieldName, Reader reader) { public TokenStreamComponents createComponents(String fieldName, Reader reader) {
return new ChineseTokenizer(reader); return new TokenStreamComponents(new ChineseTokenizer(reader));
} }
} }
@ -77,10 +75,11 @@ public class TestChineseTokenizer extends BaseTokenStreamTestCase
* Analyzer that just uses ChineseFilter, not ChineseTokenizer. * Analyzer that just uses ChineseFilter, not ChineseTokenizer.
* convenience to show the behavior of the filter. * convenience to show the behavior of the filter.
*/ */
private class JustChineseFilterAnalyzer extends Analyzer { private class JustChineseFilterAnalyzer extends ReusableAnalyzerBase {
@Override @Override
public TokenStream tokenStream(String fieldName, Reader reader) { public TokenStreamComponents createComponents(String fieldName, Reader reader) {
return new ChineseFilter(new WhitespaceTokenizer(Version.LUCENE_CURRENT, reader)); Tokenizer tokenizer = new WhitespaceTokenizer(Version.LUCENE_CURRENT, reader);
return new TokenStreamComponents(tokenizer, new ChineseFilter(tokenizer));
} }
} }

View File

@ -19,11 +19,8 @@ package org.apache.lucene.analysis.commongrams;
import java.io.Reader; import java.io.Reader;
import java.io.StringReader; import java.io.StringReader;
import java.util.Arrays; import java.util.Arrays;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.BaseTokenStreamTestCase; import org.apache.lucene.analysis.*;
import org.apache.lucene.analysis.MockTokenizer;
import org.apache.lucene.analysis.TokenFilter;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.core.WhitespaceTokenizer; import org.apache.lucene.analysis.core.WhitespaceTokenizer;
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
import org.apache.lucene.analysis.util.CharArraySet; import org.apache.lucene.analysis.util.CharArraySet;
@ -87,11 +84,12 @@ public class CommonGramsFilterTest extends BaseTokenStreamTestCase {
* @return Map<String,String> * @return Map<String,String>
*/ */
public void testCommonGramsQueryFilter() throws Exception { public void testCommonGramsQueryFilter() throws Exception {
Analyzer a = new Analyzer() { Analyzer a = new ReusableAnalyzerBase() {
@Override @Override
public TokenStream tokenStream(String field, Reader in) { public TokenStreamComponents createComponents(String field, Reader in) {
return new CommonGramsQueryFilter(new CommonGramsFilter(TEST_VERSION_CURRENT, Tokenizer tokenizer = new MockTokenizer(in, MockTokenizer.WHITESPACE, false);
new MockTokenizer(in, MockTokenizer.WHITESPACE, false), commonWords)); return new TokenStreamComponents(tokenizer, new CommonGramsQueryFilter(new CommonGramsFilter(TEST_VERSION_CURRENT,
tokenizer, commonWords)));
} }
}; };
@ -156,11 +154,12 @@ public class CommonGramsFilterTest extends BaseTokenStreamTestCase {
} }
public void testCommonGramsFilter() throws Exception { public void testCommonGramsFilter() throws Exception {
Analyzer a = new Analyzer() { Analyzer a = new ReusableAnalyzerBase() {
@Override @Override
public TokenStream tokenStream(String field, Reader in) { public TokenStreamComponents createComponents(String field, Reader in) {
return new CommonGramsFilter(TEST_VERSION_CURRENT, Tokenizer tokenizer = new MockTokenizer(in, MockTokenizer.WHITESPACE, false);
new MockTokenizer(in, MockTokenizer.WHITESPACE, false), commonWords); return new TokenStreamComponents(tokenizer, new CommonGramsFilter(TEST_VERSION_CURRENT,
tokenizer, commonWords));
} }
}; };

View File

@ -21,10 +21,7 @@ import java.io.IOException;
import java.io.Reader; import java.io.Reader;
import java.io.StringReader; import java.io.StringReader;
import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.analysis.*;
import org.apache.lucene.analysis.BaseTokenStreamTestCase;
import org.apache.lucene.analysis.TokenFilter;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.standard.StandardTokenizer; import org.apache.lucene.analysis.standard.StandardTokenizer;
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
import org.apache.lucene.analysis.tokenattributes.PayloadAttribute; import org.apache.lucene.analysis.tokenattributes.PayloadAttribute;
@ -120,12 +117,12 @@ public class TestAnalyzers extends BaseTokenStreamTestCase {
String[] y = StandardTokenizer.TOKEN_TYPES; String[] y = StandardTokenizer.TOKEN_TYPES;
} }
private static class LowerCaseWhitespaceAnalyzer extends Analyzer { private static class LowerCaseWhitespaceAnalyzer extends ReusableAnalyzerBase {
@Override @Override
public TokenStream tokenStream(String fieldName, Reader reader) { public TokenStreamComponents createComponents(String fieldName, Reader reader) {
return new LowerCaseFilter(TEST_VERSION_CURRENT, Tokenizer tokenizer = new WhitespaceTokenizer(TEST_VERSION_CURRENT, reader);
new WhitespaceTokenizer(TEST_VERSION_CURRENT, reader)); return new TokenStreamComponents(tokenizer, new LowerCaseFilter(TEST_VERSION_CURRENT, tokenizer));
} }
} }

View File

@ -18,6 +18,7 @@ package org.apache.lucene.analysis.miscellaneous;
*/ */
import java.io.IOException; import java.io.IOException;
import java.io.StringReader;
import java.util.Arrays; import java.util.Arrays;
import java.util.regex.Pattern; import java.util.regex.Pattern;
@ -128,7 +129,7 @@ public class PatternAnalyzerTest extends BaseTokenStreamTestCase {
assertTokenStreamContents(ts, expected); assertTokenStreamContents(ts, expected);
// analysis of a String, uses PatternAnalyzer.tokenStream(String, String) // analysis of a String, uses PatternAnalyzer.tokenStream(String, String)
TokenStream ts2 = analyzer.tokenStream("dummy", document); TokenStream ts2 = analyzer.tokenStream("dummy", new StringReader(document));
assertTokenStreamContents(ts2, expected); assertTokenStreamContents(ts2, expected);
} }
} }

View File

@ -17,12 +17,7 @@
package org.apache.lucene.analysis.miscellaneous; package org.apache.lucene.analysis.miscellaneous;
import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.analysis.*;
import org.apache.lucene.analysis.BaseTokenStreamTestCase;
import org.apache.lucene.analysis.MockTokenizer;
import org.apache.lucene.analysis.TokenFilter;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.Token;
import org.apache.lucene.analysis.core.StopFilter; import org.apache.lucene.analysis.core.StopFilter;
import org.apache.lucene.analysis.standard.StandardAnalyzer; import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute; import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
@ -245,13 +240,13 @@ public class TestWordDelimiterFilter extends BaseTokenStreamTestCase {
new int[] { 1, 1, 1 }); new int[] { 1, 1, 1 });
/* analyzer that will consume tokens with large position increments */ /* analyzer that will consume tokens with large position increments */
Analyzer a2 = new Analyzer() { Analyzer a2 = new ReusableAnalyzerBase() {
@Override @Override
public TokenStream tokenStream(String field, Reader reader) { public TokenStreamComponents createComponents(String field, Reader reader) {
return new WordDelimiterFilter( Tokenizer tokenizer = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false);
new LargePosIncTokenFilter( return new TokenStreamComponents(tokenizer, new WordDelimiterFilter(
new MockTokenizer(reader, MockTokenizer.WHITESPACE, false)), new LargePosIncTokenFilter(tokenizer),
flags, protWords); flags, protWords));
} }
}; };
@ -278,13 +273,14 @@ public class TestWordDelimiterFilter extends BaseTokenStreamTestCase {
new int[] { 6, 14, 19 }, new int[] { 6, 14, 19 },
new int[] { 1, 11, 1 }); new int[] { 1, 11, 1 });
Analyzer a3 = new Analyzer() { Analyzer a3 = new ReusableAnalyzerBase() {
@Override @Override
public TokenStream tokenStream(String field, Reader reader) { public TokenStreamComponents createComponents(String field, Reader reader) {
Tokenizer tokenizer = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false);
StopFilter filter = new StopFilter(TEST_VERSION_CURRENT, StopFilter filter = new StopFilter(TEST_VERSION_CURRENT,
new MockTokenizer(reader, MockTokenizer.WHITESPACE, false), StandardAnalyzer.STOP_WORDS_SET); tokenizer, StandardAnalyzer.STOP_WORDS_SET);
filter.setEnablePositionIncrements(true); filter.setEnablePositionIncrements(true);
return new WordDelimiterFilter(filter, flags, protWords); return new TokenStreamComponents(tokenizer, new WordDelimiterFilter(filter, flags, protWords));
} }
}; };

View File

@ -144,32 +144,6 @@ public class QueryAutoStopWordAnalyzerTest extends BaseTokenStreamTestCase {
assertTokenStreamContents(protectedTokenStream, new String[]{"boring"}); assertTokenStreamContents(protectedTokenStream, new String[]{"boring"});
} }
/*
* analyzer that does not support reuse
* it is LetterTokenizer on odd invocations, WhitespaceTokenizer on even.
*/
private class NonreusableAnalyzer extends Analyzer {
int invocationCount = 0;
@Override
public TokenStream tokenStream(String fieldName, Reader reader) {
if (++invocationCount % 2 == 0)
return new MockTokenizer(reader, MockTokenizer.WHITESPACE, false);
else
return new MockTokenizer(reader, MockTokenizer.SIMPLE, false);
}
}
public void testWrappingNonReusableAnalyzer() throws Exception {
QueryAutoStopWordAnalyzer a = new QueryAutoStopWordAnalyzer(TEST_VERSION_CURRENT, new NonreusableAnalyzer());
a.addStopWords(reader, 10);
TokenStream tokenStream = a.reusableTokenStream("repetitiveField", new StringReader("boring"));
assertTokenStreamContents(tokenStream, new String[0]);
tokenStream = a.reusableTokenStream("repetitiveField", new StringReader("vaguelyboring"));
assertTokenStreamContents(tokenStream, new String[0]);
}
public void testTokenStream() throws Exception { public void testTokenStream() throws Exception {
QueryAutoStopWordAnalyzer a = new QueryAutoStopWordAnalyzer(TEST_VERSION_CURRENT, new MockAnalyzer(random, MockTokenizer.WHITESPACE, false)); QueryAutoStopWordAnalyzer a = new QueryAutoStopWordAnalyzer(TEST_VERSION_CURRENT, new MockAnalyzer(random, MockTokenizer.WHITESPACE, false));
a.addStopWords(reader, 10); a.addStopWords(reader, 10);

View File

@ -149,40 +149,6 @@ public class ShingleAnalyzerWrapperTest extends BaseTokenStreamTestCase {
new int[] { 1, 0, 1, 0, 1, 0, 1 }); new int[] { 1, 0, 1, 0, 1, 0, 1 });
} }
/*
* analyzer that does not support reuse
* it is LetterTokenizer on odd invocations, WhitespaceTokenizer on even.
*/
private class NonreusableAnalyzer extends Analyzer {
int invocationCount = 0;
@Override
public TokenStream tokenStream(String fieldName, Reader reader) {
if (++invocationCount % 2 == 0)
return new MockTokenizer(reader, MockTokenizer.WHITESPACE, false);
else
return new MockTokenizer(reader, MockTokenizer.SIMPLE, false);
}
}
public void testWrappedAnalyzerDoesNotReuse() throws Exception {
Analyzer a = new ShingleAnalyzerWrapper(new NonreusableAnalyzer());
assertAnalyzesToReuse(a, "please divide into shingles.",
new String[] { "please", "please divide", "divide", "divide into", "into", "into shingles", "shingles" },
new int[] { 0, 0, 7, 7, 14, 14, 19 },
new int[] { 6, 13, 13, 18, 18, 27, 27 },
new int[] { 1, 0, 1, 0, 1, 0, 1 });
assertAnalyzesToReuse(a, "please divide into shingles.",
new String[] { "please", "please divide", "divide", "divide into", "into", "into shingles.", "shingles." },
new int[] { 0, 0, 7, 7, 14, 14, 19 },
new int[] { 6, 13, 13, 18, 18, 28, 28 },
new int[] { 1, 0, 1, 0, 1, 0, 1 });
assertAnalyzesToReuse(a, "please divide into shingles.",
new String[] { "please", "please divide", "divide", "divide into", "into", "into shingles", "shingles" },
new int[] { 0, 0, 7, 7, 14, 14, 19 },
new int[] { 6, 13, 13, 18, 18, 27, 27 },
new int[] { 1, 0, 1, 0, 1, 0, 1 });
}
public void testNonDefaultMinShingleSize() throws Exception { public void testNonDefaultMinShingleSize() throws Exception {
ShingleAnalyzerWrapper analyzer ShingleAnalyzerWrapper analyzer
= new ShingleAnalyzerWrapper(new MockAnalyzer(random, MockTokenizer.WHITESPACE, false), 3, 4); = new ShingleAnalyzerWrapper(new MockAnalyzer(random, MockTokenizer.WHITESPACE, false), 3, 4);

View File

@ -18,9 +18,7 @@ package org.apache.lucene.collation;
*/ */
import org.apache.lucene.analysis.CollationTestBase; import org.apache.lucene.analysis.*;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.core.KeywordTokenizer; import org.apache.lucene.analysis.core.KeywordTokenizer;
import org.apache.lucene.util.BytesRef; import org.apache.lucene.util.BytesRef;
@ -54,7 +52,7 @@ public class TestCollationKeyFilter extends CollationTestBase {
(collator.getCollationKey(secondRangeEndOriginal).toByteArray())); (collator.getCollationKey(secondRangeEndOriginal).toByteArray()));
public final class TestAnalyzer extends Analyzer { public final class TestAnalyzer extends ReusableAnalyzerBase {
private Collator _collator; private Collator _collator;
TestAnalyzer(Collator collator) { TestAnalyzer(Collator collator) {
@ -62,10 +60,9 @@ public class TestCollationKeyFilter extends CollationTestBase {
} }
@Override @Override
public TokenStream tokenStream(String fieldName, Reader reader) { public TokenStreamComponents createComponents(String fieldName, Reader reader) {
TokenStream result = new KeywordTokenizer(reader); Tokenizer result = new KeywordTokenizer(reader);
result = new CollationKeyFilter(result, _collator); return new TokenStreamComponents(result, new CollationKeyFilter(result, _collator));
return result;
} }
} }

View File

@ -20,20 +20,18 @@ package org.apache.lucene.analysis.icu;
import java.io.IOException; import java.io.IOException;
import java.io.Reader; import java.io.Reader;
import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.analysis.*;
import org.apache.lucene.analysis.BaseTokenStreamTestCase;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.core.WhitespaceTokenizer; import org.apache.lucene.analysis.core.WhitespaceTokenizer;
/** /**
* Tests ICUFoldingFilter * Tests ICUFoldingFilter
*/ */
public class TestICUFoldingFilter extends BaseTokenStreamTestCase { public class TestICUFoldingFilter extends BaseTokenStreamTestCase {
Analyzer a = new Analyzer() { Analyzer a = new ReusableAnalyzerBase() {
@Override @Override
public TokenStream tokenStream(String fieldName, Reader reader) { public TokenStreamComponents createComponents(String fieldName, Reader reader) {
return new ICUFoldingFilter( Tokenizer tokenizer = new WhitespaceTokenizer(TEST_VERSION_CURRENT, reader);
new WhitespaceTokenizer(TEST_VERSION_CURRENT, reader)); return new TokenStreamComponents(tokenizer, new ICUFoldingFilter(tokenizer));
} }
}; };
public void testDefaults() throws IOException { public void testDefaults() throws IOException {

View File

@ -20,9 +20,7 @@ package org.apache.lucene.analysis.icu;
import java.io.IOException; import java.io.IOException;
import java.io.Reader; import java.io.Reader;
import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.analysis.*;
import org.apache.lucene.analysis.BaseTokenStreamTestCase;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.core.WhitespaceTokenizer; import org.apache.lucene.analysis.core.WhitespaceTokenizer;
import com.ibm.icu.text.Normalizer2; import com.ibm.icu.text.Normalizer2;
@ -31,11 +29,11 @@ import com.ibm.icu.text.Normalizer2;
* Tests the ICUNormalizer2Filter * Tests the ICUNormalizer2Filter
*/ */
public class TestICUNormalizer2Filter extends BaseTokenStreamTestCase { public class TestICUNormalizer2Filter extends BaseTokenStreamTestCase {
Analyzer a = new Analyzer() { Analyzer a = new ReusableAnalyzerBase() {
@Override @Override
public TokenStream tokenStream(String fieldName, Reader reader) { public TokenStreamComponents createComponents(String fieldName, Reader reader) {
return new ICUNormalizer2Filter( Tokenizer tokenizer = new WhitespaceTokenizer(TEST_VERSION_CURRENT, reader);
new WhitespaceTokenizer(TEST_VERSION_CURRENT, reader)); return new TokenStreamComponents(tokenizer, new ICUNormalizer2Filter(tokenizer));
} }
}; };
@ -61,13 +59,14 @@ public class TestICUNormalizer2Filter extends BaseTokenStreamTestCase {
} }
public void testAlternate() throws IOException { public void testAlternate() throws IOException {
Analyzer a = new Analyzer() { Analyzer a = new ReusableAnalyzerBase() {
@Override @Override
public TokenStream tokenStream(String fieldName, Reader reader) { public TokenStreamComponents createComponents(String fieldName, Reader reader) {
return new ICUNormalizer2Filter( Tokenizer tokenizer = new WhitespaceTokenizer(TEST_VERSION_CURRENT, reader);
new WhitespaceTokenizer(TEST_VERSION_CURRENT, reader), return new TokenStreamComponents(tokenizer, new ICUNormalizer2Filter(
tokenizer,
/* specify nfc with decompose to get nfd */ /* specify nfc with decompose to get nfd */
Normalizer2.getInstance(null, "nfc", Normalizer2.Mode.DECOMPOSE)); Normalizer2.getInstance(null, "nfc", Normalizer2.Mode.DECOMPOSE)));
} }
}; };

View File

@ -20,9 +20,7 @@ package org.apache.lucene.collation;
import com.ibm.icu.text.Collator; import com.ibm.icu.text.Collator;
import org.apache.lucene.analysis.CollationTestBase; import org.apache.lucene.analysis.*;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.core.KeywordTokenizer; import org.apache.lucene.analysis.core.KeywordTokenizer;
import org.apache.lucene.util.BytesRef; import org.apache.lucene.util.BytesRef;
@ -46,7 +44,7 @@ public class TestICUCollationKeyFilter extends CollationTestBase {
(collator.getCollationKey(secondRangeEndOriginal).toByteArray())); (collator.getCollationKey(secondRangeEndOriginal).toByteArray()));
public final class TestAnalyzer extends Analyzer { public final class TestAnalyzer extends ReusableAnalyzerBase {
private Collator _collator; private Collator _collator;
TestAnalyzer(Collator collator) { TestAnalyzer(Collator collator) {
@ -54,10 +52,9 @@ public class TestICUCollationKeyFilter extends CollationTestBase {
} }
@Override @Override
public TokenStream tokenStream(String fieldName, Reader reader) { public TokenStreamComponents createComponents(String fieldName, Reader reader) {
TokenStream result = new KeywordTokenizer(reader); Tokenizer result = new KeywordTokenizer(reader);
result = new ICUCollationKeyFilter(result, _collator); return new TokenStreamComponents(result, new ICUCollationKeyFilter(result, _collator));
return result;
} }
} }

View File

@ -25,6 +25,7 @@ import java.util.Collections;
import java.util.Set; import java.util.Set;
import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.ReusableAnalyzerBase;
import org.apache.lucene.analysis.en.PorterStemFilter; import org.apache.lucene.analysis.en.PorterStemFilter;
import org.apache.lucene.analysis.util.WordlistLoader; import org.apache.lucene.analysis.util.WordlistLoader;
import org.apache.lucene.analysis.TokenStream; import org.apache.lucene.analysis.TokenStream;
@ -54,7 +55,7 @@ import org.apache.lucene.util.Version;
* </p> * </p>
* @lucene.experimental * @lucene.experimental
*/ */
public final class SmartChineseAnalyzer extends Analyzer { public final class SmartChineseAnalyzer extends ReusableAnalyzerBase {
private final Set<?> stopWords; private final Set<?> stopWords;
@ -141,9 +142,9 @@ public final class SmartChineseAnalyzer extends Analyzer {
} }
@Override @Override
public TokenStream tokenStream(String fieldName, Reader reader) { public TokenStreamComponents createComponents(String fieldName, Reader reader) {
TokenStream result = new SentenceTokenizer(reader); Tokenizer tokenizer = new SentenceTokenizer(reader);
result = new WordTokenFilter(result); TokenStream result = new WordTokenFilter(tokenizer);
// result = new LowerCaseFilter(result); // result = new LowerCaseFilter(result);
// LowerCaseFilter is not needed, as SegTokenFilter lowercases Basic Latin text. // LowerCaseFilter is not needed, as SegTokenFilter lowercases Basic Latin text.
// The porter stemming is too strict, this is not a bug, this is a feature:) // The porter stemming is too strict, this is not a bug, this is a feature:)
@ -151,32 +152,6 @@ public final class SmartChineseAnalyzer extends Analyzer {
if (!stopWords.isEmpty()) { if (!stopWords.isEmpty()) {
result = new StopFilter(matchVersion, result, stopWords, false); result = new StopFilter(matchVersion, result, stopWords, false);
} }
return result; return new TokenStreamComponents(tokenizer, result);
}
private static final class SavedStreams {
Tokenizer tokenStream;
TokenStream filteredTokenStream;
}
@Override
public TokenStream reusableTokenStream(String fieldName, Reader reader)
throws IOException {
SavedStreams streams = (SavedStreams) getPreviousTokenStream();
if (streams == null) {
streams = new SavedStreams();
setPreviousTokenStream(streams);
streams.tokenStream = new SentenceTokenizer(reader);
streams.filteredTokenStream = new WordTokenFilter(streams.tokenStream);
streams.filteredTokenStream = new PorterStemFilter(streams.filteredTokenStream);
if (!stopWords.isEmpty()) {
streams.filteredTokenStream = new StopFilter(matchVersion, streams.filteredTokenStream, stopWords, false);
}
} else {
streams.tokenStream.reset(reader);
streams.filteredTokenStream.reset(); // reset WordTokenFilter's state
}
return streams.filteredTokenStream;
} }
} }

View File

@ -5,10 +5,7 @@ import java.io.Reader;
import java.util.HashSet; import java.util.HashSet;
import java.util.Set; import java.util.Set;
import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.analysis.*;
import org.apache.lucene.analysis.MockAnalyzer;
import org.apache.lucene.analysis.MockTokenizer;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
import org.apache.lucene.analysis.tokenattributes.PayloadAttribute; import org.apache.lucene.analysis.tokenattributes.PayloadAttribute;
import org.apache.lucene.document.Document; import org.apache.lucene.document.Document;
@ -140,10 +137,10 @@ public class CategoryListIteratorTest extends LuceneTestCase {
DataTokenStream dts2 = new DataTokenStream("2",new SortingIntEncoder( DataTokenStream dts2 = new DataTokenStream("2",new SortingIntEncoder(
new UniqueValuesIntEncoder(new DGapIntEncoder(new VInt8IntEncoder())))); new UniqueValuesIntEncoder(new DGapIntEncoder(new VInt8IntEncoder()))));
// this test requires that no payloads ever be randomly present! // this test requires that no payloads ever be randomly present!
final Analyzer noPayloadsAnalyzer = new Analyzer() { final Analyzer noPayloadsAnalyzer = new ReusableAnalyzerBase() {
@Override @Override
public TokenStream tokenStream(String fieldName, Reader reader) { public TokenStreamComponents createComponents(String fieldName, Reader reader) {
return new MockTokenizer(reader, MockTokenizer.KEYWORD, false); return new TokenStreamComponents(new MockTokenizer(reader, MockTokenizer.KEYWORD, false));
} }
}; };
// NOTE: test is wired to LogMP... because test relies on certain docids having payloads // NOTE: test is wired to LogMP... because test relies on certain docids having payloads

View File

@ -20,10 +20,7 @@ package org.apache.lucene.queryparser.analyzing;
import java.io.IOException; import java.io.IOException;
import java.io.Reader; import java.io.Reader;
import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.analysis.*;
import org.apache.lucene.analysis.MockTokenizer;
import org.apache.lucene.analysis.TokenFilter;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
import org.apache.lucene.queryparser.classic.ParseException; import org.apache.lucene.queryparser.classic.ParseException;
import org.apache.lucene.util.LuceneTestCase; import org.apache.lucene.util.LuceneTestCase;
@ -137,14 +134,11 @@ final class TestFoldingFilter extends TokenFilter {
} }
} }
final class ASCIIAnalyzer extends org.apache.lucene.analysis.Analyzer { final class ASCIIAnalyzer extends ReusableAnalyzerBase {
public ASCIIAnalyzer() {
}
@Override @Override
public TokenStream tokenStream(String fieldName, Reader reader) { public TokenStreamComponents createComponents(String fieldName, Reader reader) {
TokenStream result = new MockTokenizer(reader, MockTokenizer.SIMPLE, true); Tokenizer result = new MockTokenizer(reader, MockTokenizer.SIMPLE, true);
result = new TestFoldingFilter(result); return new TokenStreamComponents(result, new TestFoldingFilter(result));
return result;
} }
} }

View File

@ -122,16 +122,12 @@ public class TestMultiAnalyzer extends BaseTokenStreamTestCase {
* Expands "multi" to "multi" and "multi2", both at the same position, * Expands "multi" to "multi" and "multi2", both at the same position,
* and expands "triplemulti" to "triplemulti", "multi3", and "multi2". * and expands "triplemulti" to "triplemulti", "multi3", and "multi2".
*/ */
private class MultiAnalyzer extends Analyzer { private class MultiAnalyzer extends ReusableAnalyzerBase {
public MultiAnalyzer() {
}
@Override @Override
public TokenStream tokenStream(String fieldName, Reader reader) { public TokenStreamComponents createComponents(String fieldName, Reader reader) {
TokenStream result = new MockTokenizer(reader, MockTokenizer.WHITESPACE, true); Tokenizer result = new MockTokenizer(reader, MockTokenizer.WHITESPACE, true);
result = new TestFilter(result); return new TokenStreamComponents(result, new TestFilter(result));
return result;
} }
} }
@ -196,16 +192,12 @@ public class TestMultiAnalyzer extends BaseTokenStreamTestCase {
* Analyzes "the quick brown" as: quick(incr=2) brown(incr=1). * Analyzes "the quick brown" as: quick(incr=2) brown(incr=1).
* Does not work correctly for input other than "the quick brown ...". * Does not work correctly for input other than "the quick brown ...".
*/ */
private class PosIncrementAnalyzer extends Analyzer { private class PosIncrementAnalyzer extends ReusableAnalyzerBase {
public PosIncrementAnalyzer() {
}
@Override @Override
public TokenStream tokenStream(String fieldName, Reader reader) { public TokenStreamComponents createComponents(String fieldName, Reader reader) {
TokenStream result = new MockTokenizer(reader, MockTokenizer.WHITESPACE, true); Tokenizer result = new MockTokenizer(reader, MockTokenizer.WHITESPACE, true);
result = new TestPosIncrementFilter(result); return new TokenStreamComponents(result, new TestPosIncrementFilter(result));
return result;
} }
} }

View File

@ -22,9 +22,7 @@ import java.io.Reader;
import java.util.HashMap; import java.util.HashMap;
import java.util.Map; import java.util.Map;
import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.analysis.*;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.MockAnalyzer;
import org.apache.lucene.document.Document; import org.apache.lucene.document.Document;
import org.apache.lucene.document.TextField; import org.apache.lucene.document.TextField;
import org.apache.lucene.index.IndexWriter; import org.apache.lucene.index.IndexWriter;
@ -302,22 +300,23 @@ public class TestMultiFieldQueryParser extends LuceneTestCase {
/** /**
* Return empty tokens for field "f1". * Return empty tokens for field "f1".
*/ */
private static class AnalyzerReturningNull extends Analyzer { private static class AnalyzerReturningNull extends ReusableAnalyzerBase {
MockAnalyzer stdAnalyzer = new MockAnalyzer(random); MockAnalyzer stdAnalyzer = new MockAnalyzer(random);
public AnalyzerReturningNull() { public AnalyzerReturningNull() {
super(new PerFieldReuseStrategy());
} }
@Override @Override
public TokenStream tokenStream(String fieldName, Reader reader) { public TokenStreamComponents createComponents(String fieldName, Reader reader) {
if ("f1".equals(fieldName)) { if ("f1".equals(fieldName)) {
return new EmptyTokenStream(); return new TokenStreamComponents(new EmptyTokenStream());
} else { } else {
return stdAnalyzer.tokenStream(fieldName, reader); return stdAnalyzer.createComponents(fieldName, reader);
} }
} }
private static class EmptyTokenStream extends TokenStream { private static class EmptyTokenStream extends Tokenizer {
@Override @Override
public boolean incrementToken() throws IOException { public boolean incrementToken() throws IOException {
return false; return false;

View File

@ -18,6 +18,7 @@ package org.apache.lucene.queryparser.classic;
*/ */
import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.ReusableAnalyzerBase;
import org.apache.lucene.analysis.TokenStream; import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.Tokenizer; import org.apache.lucene.analysis.Tokenizer;
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
@ -41,7 +42,7 @@ public class TestMultiPhraseQueryParsing extends LuceneTestCase {
} }
} }
private static class CannedAnalyzer extends Analyzer { private static class CannedAnalyzer extends ReusableAnalyzerBase {
private final TokenAndPos[] tokens; private final TokenAndPos[] tokens;
public CannedAnalyzer(TokenAndPos[] tokens) { public CannedAnalyzer(TokenAndPos[] tokens) {
@ -49,8 +50,8 @@ public class TestMultiPhraseQueryParsing extends LuceneTestCase {
} }
@Override @Override
public TokenStream tokenStream(String fieldName, Reader reader) { public TokenStreamComponents createComponents(String fieldName, Reader reader) {
return new CannedTokenizer(tokens); return new TokenStreamComponents(new CannedTokenizer(tokens));
} }
} }

View File

@ -112,12 +112,13 @@ public class TestPrecedenceQueryParser extends LuceneTestCase {
} }
} }
public static final class QPTestAnalyzer extends Analyzer { public static final class QPTestAnalyzer extends ReusableAnalyzerBase {
/** Filters MockTokenizer with StopFilter. */ /** Filters MockTokenizer with StopFilter. */
@Override @Override
public final TokenStream tokenStream(String fieldName, Reader reader) { public final TokenStreamComponents createComponents(String fieldName, Reader reader) {
return new QPTestFilter(new MockTokenizer(reader, MockTokenizer.SIMPLE, true)); Tokenizer tokenizer = new MockTokenizer(reader, MockTokenizer.SIMPLE, true);
return new TokenStreamComponents(tokenizer, new QPTestFilter(tokenizer));
} }
} }

View File

@ -143,16 +143,12 @@ public class TestMultiAnalyzerQPHelper extends LuceneTestCase {
* Expands "multi" to "multi" and "multi2", both at the same position, and * Expands "multi" to "multi" and "multi2", both at the same position, and
* expands "triplemulti" to "triplemulti", "multi3", and "multi2". * expands "triplemulti" to "triplemulti", "multi3", and "multi2".
*/ */
private class MultiAnalyzer extends Analyzer { private class MultiAnalyzer extends ReusableAnalyzerBase {
public MultiAnalyzer() {
}
@Override @Override
public TokenStream tokenStream(String fieldName, Reader reader) { public TokenStreamComponents createComponents(String fieldName, Reader reader) {
TokenStream result = new MockTokenizer(reader, MockTokenizer.WHITESPACE, true); Tokenizer result = new MockTokenizer(reader, MockTokenizer.WHITESPACE, true);
result = new TestFilter(result); return new TokenStreamComponents(result, new TestFilter(result));
return result;
} }
} }
@ -214,16 +210,12 @@ public class TestMultiAnalyzerQPHelper extends LuceneTestCase {
* Analyzes "the quick brown" as: quick(incr=2) brown(incr=1). Does not work * Analyzes "the quick brown" as: quick(incr=2) brown(incr=1). Does not work
* correctly for input other than "the quick brown ...". * correctly for input other than "the quick brown ...".
*/ */
private class PosIncrementAnalyzer extends Analyzer { private class PosIncrementAnalyzer extends ReusableAnalyzerBase {
public PosIncrementAnalyzer() {
}
@Override @Override
public TokenStream tokenStream(String fieldName, Reader reader) { public TokenStreamComponents createComponents(String fieldName, Reader reader) {
TokenStream result = new MockTokenizer(reader, MockTokenizer.WHITESPACE, true); Tokenizer result = new MockTokenizer(reader, MockTokenizer.WHITESPACE, true);
result = new TestPosIncrementFilter(result); return new TokenStreamComponents(result, new TestPosIncrementFilter(result));
return result;
} }
} }

View File

@ -21,9 +21,7 @@ import java.io.Reader;
import java.util.HashMap; import java.util.HashMap;
import java.util.Map; import java.util.Map;
import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.analysis.*;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.MockAnalyzer;
import org.apache.lucene.document.Document; import org.apache.lucene.document.Document;
import org.apache.lucene.document.TextField; import org.apache.lucene.document.TextField;
import org.apache.lucene.index.IndexWriter; import org.apache.lucene.index.IndexWriter;
@ -340,22 +338,23 @@ public class TestMultiFieldQPHelper extends LuceneTestCase {
/** /**
* Return empty tokens for field "f1". * Return empty tokens for field "f1".
*/ */
private static final class AnalyzerReturningNull extends Analyzer { private static final class AnalyzerReturningNull extends ReusableAnalyzerBase {
MockAnalyzer stdAnalyzer = new MockAnalyzer(random); MockAnalyzer stdAnalyzer = new MockAnalyzer(random);
public AnalyzerReturningNull() { public AnalyzerReturningNull() {
super(new PerFieldReuseStrategy());
} }
@Override @Override
public TokenStream tokenStream(String fieldName, Reader reader) { public TokenStreamComponents createComponents(String fieldName, Reader reader) {
if ("f1".equals(fieldName)) { if ("f1".equals(fieldName)) {
return new EmptyTokenStream(); return new TokenStreamComponents(new EmptyTokenStream());
} else { } else {
return stdAnalyzer.tokenStream(fieldName, reader); return stdAnalyzer.createComponents(fieldName, reader);
} }
} }
private static class EmptyTokenStream extends TokenStream { private static class EmptyTokenStream extends Tokenizer {
@Override @Override
public boolean incrementToken() { public boolean incrementToken() {
return false; return false;

View File

@ -128,12 +128,13 @@ public class TestQPHelper extends LuceneTestCase {
} }
} }
public static final class QPTestAnalyzer extends Analyzer { public static final class QPTestAnalyzer extends ReusableAnalyzerBase {
/** Filters MockTokenizer with StopFilter. */ /** Filters MockTokenizer with StopFilter. */
@Override @Override
public final TokenStream tokenStream(String fieldName, Reader reader) { public final TokenStreamComponents createComponents(String fieldName, Reader reader) {
return new QPTestFilter(new MockTokenizer(reader, MockTokenizer.SIMPLE, true)); Tokenizer tokenizer = new MockTokenizer(reader, MockTokenizer.SIMPLE, true);
return new TokenStreamComponents(tokenizer, new QPTestFilter(tokenizer));
} }
} }
@ -344,10 +345,10 @@ public class TestQPHelper extends LuceneTestCase {
} }
} }
private class SimpleCJKAnalyzer extends Analyzer { private class SimpleCJKAnalyzer extends ReusableAnalyzerBase {
@Override @Override
public TokenStream tokenStream(String fieldName, Reader reader) { public TokenStreamComponents createComponents(String fieldName, Reader reader) {
return new SimpleCJKTokenizer(reader); return new TokenStreamComponents(new SimpleCJKTokenizer(reader));
} }
} }
@ -1241,10 +1242,10 @@ public class TestQPHelper extends LuceneTestCase {
} }
} }
private class CannedAnalyzer extends Analyzer { private class CannedAnalyzer extends ReusableAnalyzerBase {
@Override @Override
public TokenStream tokenStream(String ignored, Reader alsoIgnored) { public TokenStreamComponents createComponents(String ignored, Reader alsoIgnored) {
return new CannedTokenStream(); return new TokenStreamComponents(new CannedTokenStream());
} }
} }

View File

@ -25,13 +25,7 @@ import java.util.Date;
import java.util.GregorianCalendar; import java.util.GregorianCalendar;
import java.util.Locale; import java.util.Locale;
import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.analysis.*;
import org.apache.lucene.analysis.MockAnalyzer;
import org.apache.lucene.analysis.MockTokenFilter;
import org.apache.lucene.analysis.MockTokenizer;
import org.apache.lucene.analysis.TokenFilter;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.Tokenizer;
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
import org.apache.lucene.analysis.tokenattributes.OffsetAttribute; import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute; import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
@ -104,12 +98,13 @@ public abstract class QueryParserTestBase extends LuceneTestCase {
} }
public static final class QPTestAnalyzer extends Analyzer { public static final class QPTestAnalyzer extends ReusableAnalyzerBase {
/** Filters MockTokenizer with StopFilter. */ /** Filters MockTokenizer with StopFilter. */
@Override @Override
public final TokenStream tokenStream(String fieldName, Reader reader) { public TokenStreamComponents createComponents(String fieldName, Reader reader) {
return new QPTestFilter(new MockTokenizer(reader, MockTokenizer.SIMPLE, true)); Tokenizer tokenizer = new MockTokenizer(reader, MockTokenizer.SIMPLE, true);
return new TokenStreamComponents(tokenizer, new QPTestFilter(tokenizer));
} }
} }
@ -245,10 +240,10 @@ public abstract class QueryParserTestBase extends LuceneTestCase {
} }
} }
private class SimpleCJKAnalyzer extends Analyzer { private class SimpleCJKAnalyzer extends ReusableAnalyzerBase {
@Override @Override
public TokenStream tokenStream(String fieldName, Reader reader) { public TokenStreamComponents createComponents(String fieldName, Reader reader) {
return new SimpleCJKTokenizer(reader); return new TokenStreamComponents(new SimpleCJKTokenizer(reader));
} }
} }
@ -348,10 +343,10 @@ public abstract class QueryParserTestBase extends LuceneTestCase {
assertQueryEquals("a OR -b", null, "a -b"); assertQueryEquals("a OR -b", null, "a -b");
// +,-,! should be directly adjacent to operand (i.e. not separated by whitespace) to be treated as an operator // +,-,! should be directly adjacent to operand (i.e. not separated by whitespace) to be treated as an operator
Analyzer a = new Analyzer() { Analyzer a = new ReusableAnalyzerBase() {
@Override @Override
public TokenStream tokenStream(String fieldName, Reader reader) { public TokenStreamComponents createComponents(String fieldName, Reader reader) {
return new MockTokenizer(reader, MockTokenizer.WHITESPACE, false); return new TokenStreamComponents(new MockTokenizer(reader, MockTokenizer.WHITESPACE, false));
} }
}; };
assertQueryEquals("a - b", a, "a - b"); assertQueryEquals("a - b", a, "a - b");
@ -1162,18 +1157,19 @@ public abstract class QueryParserTestBase extends LuceneTestCase {
} }
/** whitespace+lowercase analyzer with synonyms */ /** whitespace+lowercase analyzer with synonyms */
private class Analyzer1 extends Analyzer { private class Analyzer1 extends ReusableAnalyzerBase {
@Override @Override
public TokenStream tokenStream(String fieldName, Reader reader) { public TokenStreamComponents createComponents(String fieldName, Reader reader) {
return new MockSynonymFilter(new MockTokenizer(reader, MockTokenizer.WHITESPACE, true)); Tokenizer tokenizer = new MockTokenizer(reader, MockTokenizer.WHITESPACE, true);
return new TokenStreamComponents(tokenizer, new MockSynonymFilter(tokenizer));
} }
} }
/** whitespace+lowercase analyzer without synonyms */ /** whitespace+lowercase analyzer without synonyms */
private class Analyzer2 extends Analyzer { private class Analyzer2 extends ReusableAnalyzerBase {
@Override @Override
public TokenStream tokenStream(String fieldName, Reader reader) { public TokenStreamComponents createComponents(String fieldName, Reader reader) {
return new MockTokenizer(reader, MockTokenizer.WHITESPACE, true); return new TokenStreamComponents(new MockTokenizer(reader, MockTokenizer.WHITESPACE, true));
} }
} }
@ -1235,10 +1231,11 @@ public abstract class QueryParserTestBase extends LuceneTestCase {
} }
} }
private class MockCollationAnalyzer extends Analyzer { private class MockCollationAnalyzer extends ReusableAnalyzerBase {
@Override @Override
public TokenStream tokenStream(String fieldName, Reader reader) { public TokenStreamComponents createComponents(String fieldName, Reader reader) {
return new MockCollationFilter(new MockTokenizer(reader, MockTokenizer.WHITESPACE, true)); Tokenizer tokenizer = new MockTokenizer(reader, MockTokenizer.WHITESPACE, true);
return new TokenStreamComponents(tokenizer, new MockCollationFilter(tokenizer));
} }
} }