mirror of https://github.com/apache/lucene.git
LUCENE-3396: Converted most Analyzers over to using ReusableAnalyzerBase
git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1169607 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
e287b70df3
commit
4c5606ee29
|
@ -171,6 +171,9 @@ Changes in backwards compatibility policy
|
|||
IndexableFieldType. See MIGRATE.txt for more details.
|
||||
(Nikola Tankovic, Mike McCandless, Chris Male)
|
||||
|
||||
* LUCENE-3396: ReusableAnalyzerBase.TokenStreamComponents.reset(Reader) now returns void instead
|
||||
of boolean. If a Component cannot be reset, it should throw an Exception.
|
||||
|
||||
Changes in Runtime Behavior
|
||||
|
||||
* LUCENE-2846: omitNorms now behaves like omitTermFrequencyAndPositions, if you
|
||||
|
@ -523,6 +526,12 @@ New features
|
|||
|
||||
(David Mark Nemeskey via Robert Muir)
|
||||
|
||||
* LUCENE-3396: ReusableAnalyzerBase now provides a ReuseStrategy abstraction which
|
||||
controls how TokenStreamComponents are reused per request. Two implementations are
|
||||
provided - GlobalReuseStrategy which implements the current behavior of sharing
|
||||
components between all fields, and PerFieldReuseStrategy which shares per field.
|
||||
(Chris Male)
|
||||
|
||||
Optimizations
|
||||
|
||||
* LUCENE-2588: Don't store unnecessary suffixes when writing the terms
|
||||
|
|
|
@ -1802,7 +1802,7 @@ public class HighlighterTest extends BaseTokenStreamTestCase implements Formatte
|
|||
// behaviour to synonyms
|
||||
// ===================================================================
|
||||
|
||||
final class SynonymAnalyzer extends Analyzer {
|
||||
final class SynonymAnalyzer extends ReusableAnalyzerBase {
|
||||
private Map<String,String> synonyms;
|
||||
|
||||
public SynonymAnalyzer(Map<String,String> synonyms) {
|
||||
|
@ -1816,12 +1816,12 @@ final class SynonymAnalyzer extends Analyzer {
|
|||
* java.io.Reader)
|
||||
*/
|
||||
@Override
|
||||
public TokenStream tokenStream(String arg0, Reader arg1) {
|
||||
public TokenStreamComponents createComponents(String arg0, Reader arg1) {
|
||||
Tokenizer stream = new MockTokenizer(arg1, MockTokenizer.SIMPLE, true);
|
||||
stream.addAttribute(CharTermAttribute.class);
|
||||
stream.addAttribute(PositionIncrementAttribute.class);
|
||||
stream.addAttribute(OffsetAttribute.class);
|
||||
return new SynonymTokenizer(stream, synonyms);
|
||||
return new TokenStreamComponents(stream, new SynonymTokenizer(stream, synonyms));
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -20,10 +20,7 @@ package org.apache.lucene.search.highlight;
|
|||
import java.io.Reader;
|
||||
import java.io.StringReader;
|
||||
|
||||
import org.apache.lucene.analysis.Analyzer;
|
||||
import org.apache.lucene.analysis.BaseTokenStreamTestCase;
|
||||
import org.apache.lucene.analysis.MockTokenizer;
|
||||
import org.apache.lucene.analysis.TokenStream;
|
||||
import org.apache.lucene.analysis.*;
|
||||
|
||||
public class OffsetLimitTokenFilterTest extends BaseTokenStreamTestCase {
|
||||
|
||||
|
@ -52,15 +49,14 @@ public class OffsetLimitTokenFilterTest extends BaseTokenStreamTestCase {
|
|||
assertTokenStreamContents(filter, new String[] {"short", "toolong",
|
||||
"evenmuchlongertext"});
|
||||
|
||||
// TODO: This is not actually testing reuse! (reusableTokenStream is not implemented)
|
||||
checkOneTermReuse(new Analyzer() {
|
||||
checkOneTermReuse(new ReusableAnalyzerBase() {
|
||||
|
||||
@Override
|
||||
public TokenStream tokenStream(String fieldName, Reader reader) {
|
||||
public TokenStreamComponents createComponents(String fieldName, Reader reader) {
|
||||
MockTokenizer tokenizer = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false);
|
||||
tokenizer.setEnableChecks(false);
|
||||
return new OffsetLimitTokenFilter(tokenizer, 10);
|
||||
return new TokenStreamComponents(tokenizer, new OffsetLimitTokenFilter(tokenizer, 10));
|
||||
}
|
||||
}, "llenges", "llenges");
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -20,9 +20,7 @@ package org.apache.lucene.search.highlight;
|
|||
import java.io.IOException;
|
||||
import java.io.Reader;
|
||||
|
||||
import org.apache.lucene.analysis.Analyzer;
|
||||
import org.apache.lucene.analysis.Token;
|
||||
import org.apache.lucene.analysis.TokenStream;
|
||||
import org.apache.lucene.analysis.*;
|
||||
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
|
||||
import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
|
||||
import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
|
||||
|
@ -50,15 +48,15 @@ import org.apache.lucene.util.LuceneTestCase;
|
|||
public class TokenSourcesTest extends LuceneTestCase {
|
||||
private static final String FIELD = "text";
|
||||
|
||||
private static final class OverlapAnalyzer extends Analyzer {
|
||||
private static final class OverlapAnalyzer extends ReusableAnalyzerBase {
|
||||
|
||||
@Override
|
||||
public TokenStream tokenStream(String fieldName, Reader reader) {
|
||||
return new TokenStreamOverlap();
|
||||
public TokenStreamComponents createComponents(String fieldName, Reader reader) {
|
||||
return new TokenStreamComponents(new TokenStreamOverlap());
|
||||
}
|
||||
}
|
||||
|
||||
private static final class TokenStreamOverlap extends TokenStream {
|
||||
private static final class TokenStreamOverlap extends Tokenizer {
|
||||
private Token[] tokens;
|
||||
|
||||
private int i = -1;
|
||||
|
|
|
@ -24,11 +24,7 @@ import java.util.ArrayList;
|
|||
import java.util.Collection;
|
||||
import java.util.List;
|
||||
|
||||
import org.apache.lucene.analysis.Analyzer;
|
||||
import org.apache.lucene.analysis.MockAnalyzer;
|
||||
import org.apache.lucene.analysis.MockTokenizer;
|
||||
import org.apache.lucene.analysis.TokenStream;
|
||||
import org.apache.lucene.analysis.Tokenizer;
|
||||
import org.apache.lucene.analysis.*;
|
||||
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
|
||||
import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
|
||||
import org.apache.lucene.analysis.tokenattributes.TermToBytesRefAttribute;
|
||||
|
@ -198,10 +194,10 @@ public abstract class AbstractTestCase extends LuceneTestCase {
|
|||
return phraseQuery;
|
||||
}
|
||||
|
||||
static final class BigramAnalyzer extends Analyzer {
|
||||
static final class BigramAnalyzer extends ReusableAnalyzerBase {
|
||||
@Override
|
||||
public TokenStream tokenStream(String fieldName, Reader reader) {
|
||||
return new BasicNGramTokenizer( reader );
|
||||
public TokenStreamComponents createComponents(String fieldName, Reader reader) {
|
||||
return new TokenStreamComponents(new BasicNGramTokenizer(reader));
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -22,9 +22,7 @@ import java.io.Reader;
|
|||
import java.util.HashSet;
|
||||
import java.util.Set;
|
||||
|
||||
import org.apache.lucene.analysis.Analyzer;
|
||||
import org.apache.lucene.analysis.Token;
|
||||
import org.apache.lucene.analysis.TokenStream;
|
||||
import org.apache.lucene.analysis.*;
|
||||
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
|
||||
import org.apache.lucene.search.BooleanQuery;
|
||||
import org.apache.lucene.search.BooleanClause.Occur;
|
||||
|
@ -292,15 +290,15 @@ public class IndexTimeSynonymTest extends AbstractTestCase {
|
|||
return token;
|
||||
}
|
||||
|
||||
public static final class TokenArrayAnalyzer extends Analyzer {
|
||||
Token[] tokens;
|
||||
public TokenArrayAnalyzer( Token... tokens ){
|
||||
public static final class TokenArrayAnalyzer extends ReusableAnalyzerBase {
|
||||
final Token[] tokens;
|
||||
public TokenArrayAnalyzer(Token... tokens) {
|
||||
this.tokens = tokens;
|
||||
}
|
||||
|
||||
@Override
|
||||
public TokenStream tokenStream(String fieldName, Reader reader) {
|
||||
TokenStream ts = new TokenStream(Token.TOKEN_ATTRIBUTE_FACTORY) {
|
||||
public TokenStreamComponents createComponents(String fieldName, Reader reader) {
|
||||
Tokenizer ts = new Tokenizer(Token.TOKEN_ATTRIBUTE_FACTORY) {
|
||||
final AttributeImpl reusableToken = (AttributeImpl) addAttribute(CharTermAttribute.class);
|
||||
int p = 0;
|
||||
|
||||
|
@ -318,7 +316,7 @@ public class IndexTimeSynonymTest extends AbstractTestCase {
|
|||
this.p = 0;
|
||||
}
|
||||
};
|
||||
return ts;
|
||||
return new TokenStreamComponents(ts);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -17,8 +17,13 @@ package org.apache.lucene.analysis;
|
|||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import org.apache.lucene.store.AlreadyClosedException;
|
||||
import org.apache.lucene.util.CloseableThreadLocal;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.io.Reader;
|
||||
import java.util.HashMap;
|
||||
import java.util.Map;
|
||||
|
||||
/**
|
||||
* An convenience subclass of Analyzer that makes it easy to implement
|
||||
|
@ -38,6 +43,16 @@ import java.io.Reader;
|
|||
*/
|
||||
public abstract class ReusableAnalyzerBase extends Analyzer {
|
||||
|
||||
private final ReuseStrategy reuseStrategy;
|
||||
|
||||
public ReusableAnalyzerBase() {
|
||||
this(new GlobalReuseStrategy());
|
||||
}
|
||||
|
||||
public ReusableAnalyzerBase(ReuseStrategy reuseStrategy) {
|
||||
this.reuseStrategy = reuseStrategy;
|
||||
}
|
||||
|
||||
/**
|
||||
* Creates a new {@link TokenStreamComponents} instance for this analyzer.
|
||||
*
|
||||
|
@ -66,14 +81,15 @@ public abstract class ReusableAnalyzerBase extends Analyzer {
|
|||
@Override
|
||||
public final TokenStream reusableTokenStream(final String fieldName,
|
||||
final Reader reader) throws IOException {
|
||||
TokenStreamComponents streamChain = (TokenStreamComponents)
|
||||
getPreviousTokenStream();
|
||||
TokenStreamComponents components = reuseStrategy.getReusableComponents(fieldName);
|
||||
final Reader r = initReader(reader);
|
||||
if (streamChain == null || !streamChain.reset(r)) {
|
||||
streamChain = createComponents(fieldName, r);
|
||||
setPreviousTokenStream(streamChain);
|
||||
if (components == null) {
|
||||
components = createComponents(fieldName, r);
|
||||
reuseStrategy.setReusableComponents(fieldName, components);
|
||||
} else {
|
||||
components.reset(r);
|
||||
}
|
||||
return streamChain.getTokenStream();
|
||||
return components.getTokenStream();
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -98,7 +114,16 @@ public abstract class ReusableAnalyzerBase extends Analyzer {
|
|||
protected Reader initReader(Reader reader) {
|
||||
return reader;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* {@inheritDoc}
|
||||
*/
|
||||
@Override
|
||||
public void close() {
|
||||
super.close();
|
||||
reuseStrategy.close();
|
||||
}
|
||||
|
||||
/**
|
||||
* This class encapsulates the outer components of a token stream. It provides
|
||||
* access to the source ({@link Tokenizer}) and the outer end (sink), an
|
||||
|
@ -137,22 +162,16 @@ public abstract class ReusableAnalyzerBase extends Analyzer {
|
|||
}
|
||||
|
||||
/**
|
||||
* Resets the encapsulated components with the given reader. This method by
|
||||
* default returns <code>true</code> indicating that the components have
|
||||
* been reset successfully. Subclasses of {@link ReusableAnalyzerBase} might use
|
||||
* their own {@link TokenStreamComponents} returning <code>false</code> if
|
||||
* the components cannot be reset.
|
||||
* Resets the encapsulated components with the given reader. If the components
|
||||
* cannot be reset, an Exception should be thrown.
|
||||
*
|
||||
* @param reader
|
||||
* a reader to reset the source component
|
||||
* @return <code>true</code> if the components were reset, otherwise
|
||||
* <code>false</code>
|
||||
* @throws IOException
|
||||
* if the component's reset method throws an {@link IOException}
|
||||
*/
|
||||
protected boolean reset(final Reader reader) throws IOException {
|
||||
protected void reset(final Reader reader) throws IOException {
|
||||
source.reset(reader);
|
||||
return true;
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -166,4 +185,124 @@ public abstract class ReusableAnalyzerBase extends Analyzer {
|
|||
|
||||
}
|
||||
|
||||
/**
|
||||
* Strategy defining how TokenStreamComponents are reused per call to
|
||||
* {@link ReusableAnalyzerBase#tokenStream(String, java.io.Reader)}.
|
||||
*/
|
||||
public static abstract class ReuseStrategy {
|
||||
|
||||
private CloseableThreadLocal<Object> storedValue = new CloseableThreadLocal<Object>();
|
||||
|
||||
/**
|
||||
* Gets the reusable TokenStreamComponents for the field with the given name
|
||||
*
|
||||
* @param fieldName Name of the field whose reusable TokenStreamComponents
|
||||
* are to be retrieved
|
||||
* @return Reusable TokenStreamComponents for the field, or {@code null}
|
||||
* if there was no previous components for the field
|
||||
*/
|
||||
public abstract TokenStreamComponents getReusableComponents(String fieldName);
|
||||
|
||||
/**
|
||||
* Stores the given TokenStreamComponents as the reusable components for the
|
||||
* field with the give name
|
||||
*
|
||||
* @param fieldName Name of the field whose TokenStreamComponents are being set
|
||||
* @param components TokenStreamComponents which are to be reused for the field
|
||||
*/
|
||||
public abstract void setReusableComponents(String fieldName, TokenStreamComponents components);
|
||||
|
||||
/**
|
||||
* Returns the currently stored value
|
||||
*
|
||||
* @return Currently stored value or {@code null} if no value is stored
|
||||
*/
|
||||
protected final Object getStoredValue() {
|
||||
try {
|
||||
return storedValue.get();
|
||||
} catch (NullPointerException npe) {
|
||||
if (storedValue == null) {
|
||||
throw new AlreadyClosedException("this Analyzer is closed");
|
||||
} else {
|
||||
throw npe;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Sets the stored value
|
||||
*
|
||||
* @param storedValue Value to store
|
||||
*/
|
||||
protected final void setStoredValue(Object storedValue) {
|
||||
try {
|
||||
this.storedValue.set(storedValue);
|
||||
} catch (NullPointerException npe) {
|
||||
if (storedValue == null) {
|
||||
throw new AlreadyClosedException("this Analyzer is closed");
|
||||
} else {
|
||||
throw npe;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Closes the ReuseStrategy, freeing any resources
|
||||
*/
|
||||
public void close() {
|
||||
storedValue.close();
|
||||
storedValue = null;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Implementation of {@link ReuseStrategy} that reuses the same components for
|
||||
* every field.
|
||||
*/
|
||||
public final static class GlobalReuseStrategy extends ReuseStrategy {
|
||||
|
||||
/**
|
||||
* {@inheritDoc}
|
||||
*/
|
||||
public TokenStreamComponents getReusableComponents(String fieldName) {
|
||||
return (TokenStreamComponents) getStoredValue();
|
||||
}
|
||||
|
||||
/**
|
||||
* {@inheritDoc}
|
||||
*/
|
||||
public void setReusableComponents(String fieldName, TokenStreamComponents components) {
|
||||
setStoredValue(components);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Implementation of {@link ReuseStrategy} that reuses components per-field by
|
||||
* maintaining a Map of TokenStreamComponent per field name.
|
||||
*/
|
||||
public static class PerFieldReuseStrategy extends ReuseStrategy {
|
||||
|
||||
/**
|
||||
* {@inheritDoc}
|
||||
*/
|
||||
@SuppressWarnings("unchecked")
|
||||
public TokenStreamComponents getReusableComponents(String fieldName) {
|
||||
Map<String, TokenStreamComponents> componentsPerField = (Map<String, TokenStreamComponents>) getStoredValue();
|
||||
return componentsPerField != null ? componentsPerField.get(fieldName) : null;
|
||||
}
|
||||
|
||||
/**
|
||||
* {@inheritDoc}
|
||||
*/
|
||||
@SuppressWarnings("unchecked")
|
||||
public void setReusableComponents(String fieldName, TokenStreamComponents components) {
|
||||
Map<String, TokenStreamComponents> componentsPerField = (Map<String, TokenStreamComponents>) getStoredValue();
|
||||
if (componentsPerField == null) {
|
||||
componentsPerField = new HashMap<String, TokenStreamComponents>();
|
||||
setStoredValue(componentsPerField);
|
||||
}
|
||||
componentsPerField.put(fieldName, components);
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
|
|
@ -42,7 +42,7 @@ import org.apache.lucene.util.automaton.CharacterRunAutomaton;
|
|||
* </ul>
|
||||
* @see MockTokenizer
|
||||
*/
|
||||
public final class MockAnalyzer extends Analyzer {
|
||||
public final class MockAnalyzer extends ReusableAnalyzerBase {
|
||||
private final CharacterRunAutomaton runAutomaton;
|
||||
private final boolean lowerCase;
|
||||
private final CharacterRunAutomaton filter;
|
||||
|
@ -62,6 +62,7 @@ public final class MockAnalyzer extends Analyzer {
|
|||
* @param enablePositionIncrements true if position increments should reflect filtered terms.
|
||||
*/
|
||||
public MockAnalyzer(Random random, CharacterRunAutomaton runAutomaton, boolean lowerCase, CharacterRunAutomaton filter, boolean enablePositionIncrements) {
|
||||
super(new PerFieldReuseStrategy());
|
||||
this.random = random;
|
||||
this.runAutomaton = runAutomaton;
|
||||
this.lowerCase = lowerCase;
|
||||
|
@ -88,41 +89,11 @@ public final class MockAnalyzer extends Analyzer {
|
|||
}
|
||||
|
||||
@Override
|
||||
public TokenStream tokenStream(String fieldName, Reader reader) {
|
||||
public TokenStreamComponents createComponents(String fieldName, Reader reader) {
|
||||
MockTokenizer tokenizer = new MockTokenizer(reader, runAutomaton, lowerCase);
|
||||
tokenizer.setEnableChecks(enableChecks);
|
||||
TokenFilter filt = new MockTokenFilter(tokenizer, filter, enablePositionIncrements);
|
||||
filt = maybePayload(filt, fieldName);
|
||||
return filt;
|
||||
}
|
||||
|
||||
private class SavedStreams {
|
||||
MockTokenizer tokenizer;
|
||||
TokenFilter filter;
|
||||
}
|
||||
|
||||
@Override
|
||||
public TokenStream reusableTokenStream(String fieldName, Reader reader)
|
||||
throws IOException {
|
||||
@SuppressWarnings("unchecked") Map<String,SavedStreams> map = (Map) getPreviousTokenStream();
|
||||
if (map == null) {
|
||||
map = new HashMap<String,SavedStreams>();
|
||||
setPreviousTokenStream(map);
|
||||
}
|
||||
|
||||
SavedStreams saved = map.get(fieldName);
|
||||
if (saved == null) {
|
||||
saved = new SavedStreams();
|
||||
saved.tokenizer = new MockTokenizer(reader, runAutomaton, lowerCase);
|
||||
saved.tokenizer.setEnableChecks(enableChecks);
|
||||
saved.filter = new MockTokenFilter(saved.tokenizer, filter, enablePositionIncrements);
|
||||
saved.filter = maybePayload(saved.filter, fieldName);
|
||||
map.put(fieldName, saved);
|
||||
return saved.filter;
|
||||
} else {
|
||||
saved.tokenizer.reset(reader);
|
||||
return saved.filter;
|
||||
}
|
||||
return new TokenStreamComponents(tokenizer, maybePayload(filt, fieldName));
|
||||
}
|
||||
|
||||
private synchronized TokenFilter maybePayload(TokenFilter stream, String fieldName) {
|
||||
|
|
|
@ -30,16 +30,15 @@ import java.io.Reader;
|
|||
*
|
||||
*
|
||||
**/
|
||||
public final class MockPayloadAnalyzer extends Analyzer {
|
||||
public final class MockPayloadAnalyzer extends ReusableAnalyzerBase {
|
||||
|
||||
@Override
|
||||
public TokenStream tokenStream(String fieldName, Reader reader) {
|
||||
TokenStream result = new MockTokenizer(reader, MockTokenizer.WHITESPACE, true);
|
||||
return new MockPayloadFilter(result, fieldName);
|
||||
public TokenStreamComponents createComponents(String fieldName, Reader reader) {
|
||||
Tokenizer result = new MockTokenizer(reader, MockTokenizer.WHITESPACE, true);
|
||||
return new TokenStreamComponents(result, new MockPayloadFilter(result, fieldName));
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
*
|
||||
*
|
||||
|
|
|
@ -19,6 +19,7 @@ package org.apache.lucene;
|
|||
|
||||
import java.io.Reader;
|
||||
|
||||
import org.apache.lucene.analysis.ReusableAnalyzerBase;
|
||||
import org.apache.lucene.util.LuceneTestCase;
|
||||
import org.apache.lucene.analysis.Analyzer;
|
||||
import org.apache.lucene.analysis.TokenStream;
|
||||
|
@ -34,32 +35,36 @@ public class TestAssertions extends LuceneTestCase {
|
|||
}
|
||||
}
|
||||
|
||||
static class TestAnalyzer1 extends Analyzer {
|
||||
static class TestAnalyzer1 extends ReusableAnalyzerBase {
|
||||
|
||||
@Override
|
||||
public final TokenStream tokenStream(String s, Reader r) { return null; }
|
||||
@Override
|
||||
public final TokenStream reusableTokenStream(String s, Reader r) { return null; }
|
||||
protected TokenStreamComponents createComponents(String fieldName, Reader aReader) {
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
||||
static final class TestAnalyzer2 extends Analyzer {
|
||||
static final class TestAnalyzer2 extends ReusableAnalyzerBase {
|
||||
|
||||
@Override
|
||||
public TokenStream tokenStream(String s, Reader r) { return null; }
|
||||
@Override
|
||||
public TokenStream reusableTokenStream(String s, Reader r) { return null; }
|
||||
protected TokenStreamComponents createComponents(String fieldName, Reader aReader) {
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
||||
static class TestAnalyzer3 extends Analyzer {
|
||||
static class TestAnalyzer3 extends ReusableAnalyzerBase {
|
||||
|
||||
@Override
|
||||
public TokenStream tokenStream(String s, Reader r) { return null; }
|
||||
@Override
|
||||
public TokenStream reusableTokenStream(String s, Reader r) { return null; }
|
||||
protected TokenStreamComponents createComponents(String fieldName, Reader aReader) {
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
||||
static class TestAnalyzer4 extends Analyzer {
|
||||
static class TestAnalyzer4 extends ReusableAnalyzerBase {
|
||||
|
||||
@Override
|
||||
public final TokenStream tokenStream(String s, Reader r) { return null; }
|
||||
@Override
|
||||
public TokenStream reusableTokenStream(String s, Reader r) { return null; }
|
||||
protected TokenStreamComponents createComponents(String fieldName, Reader aReader) {
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
||||
static class TestTokenStream1 extends TokenStream {
|
||||
|
|
|
@ -20,11 +20,7 @@ package org.apache.lucene.index;
|
|||
import java.io.IOException;
|
||||
import java.io.Reader;
|
||||
|
||||
import org.apache.lucene.analysis.Analyzer;
|
||||
import org.apache.lucene.analysis.MockAnalyzer;
|
||||
import org.apache.lucene.analysis.MockTokenizer;
|
||||
import org.apache.lucene.analysis.TokenFilter;
|
||||
import org.apache.lucene.analysis.TokenStream;
|
||||
import org.apache.lucene.analysis.*;
|
||||
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
|
||||
import org.apache.lucene.analysis.tokenattributes.PayloadAttribute;
|
||||
import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
|
||||
|
@ -107,10 +103,10 @@ public class TestDocumentWriter extends LuceneTestCase {
|
|||
}
|
||||
|
||||
public void testPositionIncrementGap() throws IOException {
|
||||
Analyzer analyzer = new Analyzer() {
|
||||
Analyzer analyzer = new ReusableAnalyzerBase() {
|
||||
@Override
|
||||
public TokenStream tokenStream(String fieldName, Reader reader) {
|
||||
return new MockTokenizer(reader, MockTokenizer.WHITESPACE, false);
|
||||
public TokenStreamComponents createComponents(String fieldName, Reader reader) {
|
||||
return new TokenStreamComponents(new MockTokenizer(reader, MockTokenizer.WHITESPACE, false));
|
||||
}
|
||||
|
||||
@Override
|
||||
|
@ -142,10 +138,11 @@ public class TestDocumentWriter extends LuceneTestCase {
|
|||
}
|
||||
|
||||
public void testTokenReuse() throws IOException {
|
||||
Analyzer analyzer = new Analyzer() {
|
||||
Analyzer analyzer = new ReusableAnalyzerBase() {
|
||||
@Override
|
||||
public TokenStream tokenStream(String fieldName, Reader reader) {
|
||||
return new TokenFilter(new MockTokenizer(reader, MockTokenizer.WHITESPACE, false)) {
|
||||
public TokenStreamComponents createComponents(String fieldName, Reader reader) {
|
||||
Tokenizer tokenizer = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false);
|
||||
return new TokenStreamComponents(tokenizer, new TokenFilter(tokenizer) {
|
||||
boolean first = true;
|
||||
AttributeSource.State state;
|
||||
|
||||
|
@ -187,7 +184,7 @@ public class TestDocumentWriter extends LuceneTestCase {
|
|||
final CharTermAttribute termAtt = addAttribute(CharTermAttribute.class);
|
||||
final PayloadAttribute payloadAtt = addAttribute(PayloadAttribute.class);
|
||||
final PositionIncrementAttribute posIncrAtt = addAttribute(PositionIncrementAttribute.class);
|
||||
};
|
||||
});
|
||||
}
|
||||
};
|
||||
|
||||
|
|
|
@ -31,11 +31,7 @@ import java.util.List;
|
|||
import java.util.Map;
|
||||
import java.util.Random;
|
||||
|
||||
import org.apache.lucene.analysis.Analyzer;
|
||||
import org.apache.lucene.analysis.MockAnalyzer;
|
||||
import org.apache.lucene.analysis.MockTokenizer;
|
||||
import org.apache.lucene.analysis.TokenStream;
|
||||
import org.apache.lucene.analysis.Tokenizer;
|
||||
import org.apache.lucene.analysis.*;
|
||||
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
|
||||
import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
|
||||
import org.apache.lucene.document.BinaryField;
|
||||
|
@ -1710,10 +1706,10 @@ public class TestIndexWriter extends LuceneTestCase {
|
|||
dir.close();
|
||||
}
|
||||
|
||||
static final class StringSplitAnalyzer extends Analyzer {
|
||||
static final class StringSplitAnalyzer extends ReusableAnalyzerBase {
|
||||
@Override
|
||||
public TokenStream tokenStream(String fieldName, Reader reader) {
|
||||
return new StringSplitTokenizer(reader);
|
||||
public TokenStreamComponents createComponents(String fieldName, Reader reader) {
|
||||
return new TokenStreamComponents(new StringSplitTokenizer(reader));
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -23,11 +23,7 @@ import java.util.HashMap;
|
|||
import java.util.Map;
|
||||
import java.util.concurrent.atomic.AtomicBoolean;
|
||||
|
||||
import org.apache.lucene.analysis.Analyzer;
|
||||
import org.apache.lucene.analysis.MockAnalyzer;
|
||||
import org.apache.lucene.analysis.MockFixedLengthPayloadFilter;
|
||||
import org.apache.lucene.analysis.MockTokenizer;
|
||||
import org.apache.lucene.analysis.TokenStream;
|
||||
import org.apache.lucene.analysis.*;
|
||||
import org.apache.lucene.document.Document;
|
||||
import org.apache.lucene.document.Field;
|
||||
import org.apache.lucene.document.StringField;
|
||||
|
@ -179,21 +175,20 @@ public class TestIndexWriterCommit extends LuceneTestCase {
|
|||
Analyzer analyzer;
|
||||
if (random.nextBoolean()) {
|
||||
// no payloads
|
||||
analyzer = new Analyzer() {
|
||||
analyzer = new ReusableAnalyzerBase() {
|
||||
@Override
|
||||
public TokenStream tokenStream(String fieldName, Reader reader) {
|
||||
return new MockTokenizer(reader, MockTokenizer.WHITESPACE, true);
|
||||
public TokenStreamComponents createComponents(String fieldName, Reader reader) {
|
||||
return new TokenStreamComponents(new MockTokenizer(reader, MockTokenizer.WHITESPACE, true));
|
||||
}
|
||||
};
|
||||
} else {
|
||||
// fixed length payloads
|
||||
final int length = random.nextInt(200);
|
||||
analyzer = new Analyzer() {
|
||||
analyzer = new ReusableAnalyzerBase() {
|
||||
@Override
|
||||
public TokenStream tokenStream(String fieldName, Reader reader) {
|
||||
return new MockFixedLengthPayloadFilter(random,
|
||||
new MockTokenizer(reader, MockTokenizer.WHITESPACE, true),
|
||||
length);
|
||||
public TokenStreamComponents createComponents(String fieldName, Reader reader) {
|
||||
Tokenizer tokenizer = new MockTokenizer(reader, MockTokenizer.WHITESPACE, true);
|
||||
return new TokenStreamComponents(tokenizer, new MockFixedLengthPayloadFilter(random, tokenizer, length));
|
||||
}
|
||||
};
|
||||
}
|
||||
|
|
|
@ -26,10 +26,7 @@ import java.util.Random;
|
|||
import java.util.concurrent.atomic.AtomicInteger;
|
||||
import java.util.concurrent.atomic.AtomicBoolean;
|
||||
|
||||
import org.apache.lucene.analysis.Analyzer;
|
||||
import org.apache.lucene.analysis.MockAnalyzer;
|
||||
import org.apache.lucene.analysis.MockTokenizer;
|
||||
import org.apache.lucene.analysis.TokenStream;
|
||||
import org.apache.lucene.analysis.*;
|
||||
import org.apache.lucene.document.Document;
|
||||
import org.apache.lucene.document.FieldType;
|
||||
import org.apache.lucene.document.StringField;
|
||||
|
@ -902,10 +899,10 @@ public class TestIndexWriterDelete extends LuceneTestCase {
|
|||
final Random r = random;
|
||||
Directory dir = newDirectory();
|
||||
// note this test explicitly disables payloads
|
||||
final Analyzer analyzer = new Analyzer() {
|
||||
final Analyzer analyzer = new ReusableAnalyzerBase() {
|
||||
@Override
|
||||
public TokenStream tokenStream(String fieldName, Reader reader) {
|
||||
return new MockTokenizer(reader, MockTokenizer.WHITESPACE, true);
|
||||
public TokenStreamComponents createComponents(String fieldName, Reader reader) {
|
||||
return new TokenStreamComponents(new MockTokenizer(reader, MockTokenizer.WHITESPACE, true));
|
||||
}
|
||||
};
|
||||
IndexWriter w = new IndexWriter(dir, newIndexWriterConfig( TEST_VERSION_CURRENT, analyzer).setRAMBufferSizeMB(1.0).setMaxBufferedDocs(IndexWriterConfig.DISABLE_AUTO_FLUSH).setMaxBufferedDeleteTerms(IndexWriterConfig.DISABLE_AUTO_FLUSH));
|
||||
|
|
|
@ -27,11 +27,7 @@ import java.util.Iterator;
|
|||
import java.util.List;
|
||||
import java.util.Random;
|
||||
|
||||
import org.apache.lucene.analysis.Analyzer;
|
||||
import org.apache.lucene.analysis.MockAnalyzer;
|
||||
import org.apache.lucene.analysis.MockTokenizer;
|
||||
import org.apache.lucene.analysis.TokenFilter;
|
||||
import org.apache.lucene.analysis.TokenStream;
|
||||
import org.apache.lucene.analysis.*;
|
||||
import org.apache.lucene.document.Document;
|
||||
import org.apache.lucene.document.Field;
|
||||
import org.apache.lucene.document.FieldType;
|
||||
|
@ -390,12 +386,12 @@ public class TestIndexWriterExceptions extends LuceneTestCase {
|
|||
doc.add(newField("field", "a field", TextField.TYPE_STORED));
|
||||
w.addDocument(doc);
|
||||
|
||||
Analyzer analyzer = new Analyzer() {
|
||||
Analyzer analyzer = new ReusableAnalyzerBase(new ReusableAnalyzerBase.PerFieldReuseStrategy()) {
|
||||
@Override
|
||||
public TokenStream tokenStream(String fieldName, Reader reader) {
|
||||
public TokenStreamComponents createComponents(String fieldName, Reader reader) {
|
||||
MockTokenizer tokenizer = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false);
|
||||
tokenizer.setEnableChecks(false); // disable workflow checking as we forcefully close() in exceptional cases.
|
||||
return new CrashingFilter(fieldName, tokenizer);
|
||||
return new TokenStreamComponents(tokenizer, new CrashingFilter(fieldName, tokenizer));
|
||||
}
|
||||
};
|
||||
|
||||
|
@ -458,13 +454,13 @@ public class TestIndexWriterExceptions extends LuceneTestCase {
|
|||
// LUCENE-1072
|
||||
public void testExceptionFromTokenStream() throws IOException {
|
||||
Directory dir = newDirectory();
|
||||
IndexWriterConfig conf = newIndexWriterConfig( TEST_VERSION_CURRENT, new Analyzer() {
|
||||
IndexWriterConfig conf = newIndexWriterConfig( TEST_VERSION_CURRENT, new ReusableAnalyzerBase() {
|
||||
|
||||
@Override
|
||||
public TokenStream tokenStream(String fieldName, Reader reader) {
|
||||
public TokenStreamComponents createComponents(String fieldName, Reader reader) {
|
||||
MockTokenizer tokenizer = new MockTokenizer(reader, MockTokenizer.SIMPLE, true);
|
||||
tokenizer.setEnableChecks(false); // disable workflow checking as we forcefully close() in exceptional cases.
|
||||
return new TokenFilter(tokenizer) {
|
||||
return new TokenStreamComponents(tokenizer, new TokenFilter(tokenizer) {
|
||||
private int count = 0;
|
||||
|
||||
@Override
|
||||
|
@ -480,7 +476,7 @@ public class TestIndexWriterExceptions extends LuceneTestCase {
|
|||
super.reset();
|
||||
this.count = 0;
|
||||
}
|
||||
};
|
||||
});
|
||||
}
|
||||
|
||||
});
|
||||
|
@ -595,12 +591,12 @@ public class TestIndexWriterExceptions extends LuceneTestCase {
|
|||
}
|
||||
|
||||
public void testDocumentsWriterExceptions() throws IOException {
|
||||
Analyzer analyzer = new Analyzer() {
|
||||
Analyzer analyzer = new ReusableAnalyzerBase(new ReusableAnalyzerBase.PerFieldReuseStrategy()) {
|
||||
@Override
|
||||
public TokenStream tokenStream(String fieldName, Reader reader) {
|
||||
public TokenStreamComponents createComponents(String fieldName, Reader reader) {
|
||||
MockTokenizer tokenizer = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false);
|
||||
tokenizer.setEnableChecks(false); // disable workflow checking as we forcefully close() in exceptional cases.
|
||||
return new CrashingFilter(fieldName, tokenizer);
|
||||
return new TokenStreamComponents(tokenizer, new CrashingFilter(fieldName, tokenizer));
|
||||
}
|
||||
};
|
||||
|
||||
|
@ -691,12 +687,12 @@ public class TestIndexWriterExceptions extends LuceneTestCase {
|
|||
}
|
||||
|
||||
public void testDocumentsWriterExceptionThreads() throws Exception {
|
||||
Analyzer analyzer = new Analyzer() {
|
||||
Analyzer analyzer = new ReusableAnalyzerBase(new ReusableAnalyzerBase.PerFieldReuseStrategy()) {
|
||||
@Override
|
||||
public TokenStream tokenStream(String fieldName, Reader reader) {
|
||||
public TokenStreamComponents createComponents(String fieldName, Reader reader) {
|
||||
MockTokenizer tokenizer = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false);
|
||||
tokenizer.setEnableChecks(false); // disable workflow checking as we forcefully close() in exceptional cases.
|
||||
return new CrashingFilter(fieldName, tokenizer);
|
||||
return new TokenStreamComponents(tokenizer, new CrashingFilter(fieldName, tokenizer));
|
||||
}
|
||||
};
|
||||
|
||||
|
|
|
@ -20,10 +20,7 @@ package org.apache.lucene.index;
|
|||
import java.io.IOException;
|
||||
import java.io.Reader;
|
||||
|
||||
import org.apache.lucene.analysis.Analyzer;
|
||||
import org.apache.lucene.analysis.MockAnalyzer;
|
||||
import org.apache.lucene.analysis.MockTokenizer;
|
||||
import org.apache.lucene.analysis.TokenStream;
|
||||
import org.apache.lucene.analysis.*;
|
||||
import org.apache.lucene.document.Document;
|
||||
import org.apache.lucene.document.TextField;
|
||||
import org.apache.lucene.index.codecs.CodecProvider;
|
||||
|
@ -71,10 +68,10 @@ public class TestLazyProxSkipping extends LuceneTestCase {
|
|||
private void createIndex(int numHits) throws IOException {
|
||||
int numDocs = 500;
|
||||
|
||||
final Analyzer analyzer = new Analyzer() {
|
||||
final Analyzer analyzer = new ReusableAnalyzerBase() {
|
||||
@Override
|
||||
public TokenStream tokenStream(String fieldName, Reader reader) {
|
||||
return new MockTokenizer(reader, MockTokenizer.WHITESPACE, true);
|
||||
public TokenStreamComponents createComponents(String fieldName, Reader reader) {
|
||||
return new TokenStreamComponents(new MockTokenizer(reader, MockTokenizer.WHITESPACE, true));
|
||||
}
|
||||
};
|
||||
Directory directory = new SeekCountingDirectory(new RAMDirectory());
|
||||
|
|
|
@ -21,10 +21,7 @@ import java.io.IOException;
|
|||
import java.io.Reader;
|
||||
import java.util.concurrent.atomic.AtomicInteger;
|
||||
|
||||
import org.apache.lucene.analysis.Analyzer;
|
||||
import org.apache.lucene.analysis.MockTokenizer;
|
||||
import org.apache.lucene.analysis.TokenFilter;
|
||||
import org.apache.lucene.analysis.TokenStream;
|
||||
import org.apache.lucene.analysis.*;
|
||||
import org.apache.lucene.analysis.tokenattributes.PayloadAttribute;
|
||||
import org.apache.lucene.document.Document;
|
||||
import org.apache.lucene.document.TextField;
|
||||
|
@ -114,11 +111,12 @@ public class TestMultiLevelSkipList extends LuceneTestCase {
|
|||
assertEquals("Wrong payload for the target " + target + ": " + b.bytes[b.offset], (byte) target, b.bytes[b.offset]);
|
||||
}
|
||||
|
||||
private static class PayloadAnalyzer extends Analyzer {
|
||||
private static class PayloadAnalyzer extends ReusableAnalyzerBase {
|
||||
private final AtomicInteger payloadCount = new AtomicInteger(-1);
|
||||
@Override
|
||||
public TokenStream tokenStream(String fieldName, Reader reader) {
|
||||
return new PayloadFilter(payloadCount, new MockTokenizer(reader, MockTokenizer.WHITESPACE, true));
|
||||
public TokenStreamComponents createComponents(String fieldName, Reader reader) {
|
||||
Tokenizer tokenizer = new MockTokenizer(reader, MockTokenizer.WHITESPACE, true);
|
||||
return new TokenStreamComponents(tokenizer, new PayloadFilter(payloadCount, tokenizer));
|
||||
}
|
||||
|
||||
}
|
||||
|
|
|
@ -25,11 +25,7 @@ import java.util.HashMap;
|
|||
import java.util.List;
|
||||
import java.util.Map;
|
||||
|
||||
import org.apache.lucene.analysis.Analyzer;
|
||||
import org.apache.lucene.analysis.MockTokenizer;
|
||||
import org.apache.lucene.analysis.TokenFilter;
|
||||
import org.apache.lucene.analysis.TokenStream;
|
||||
import org.apache.lucene.analysis.MockAnalyzer;
|
||||
import org.apache.lucene.analysis.*;
|
||||
import org.apache.lucene.analysis.tokenattributes.PayloadAttribute;
|
||||
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
|
||||
import org.apache.lucene.document.Document;
|
||||
|
@ -105,12 +101,12 @@ public class TestPayloads extends LuceneTestCase {
|
|||
// so this field is used to check if the DocumentWriter correctly enables the payloads bit
|
||||
// even if only some term positions have payloads
|
||||
d.add(newField("f2", "This field has payloads in all docs", TextField.TYPE_UNSTORED));
|
||||
d.add(newField("f2", "This field has payloads in all docs", TextField.TYPE_UNSTORED));
|
||||
d.add(newField("f2", "This field has payloads in all docs NO PAYLOAD", TextField.TYPE_UNSTORED));
|
||||
// this field is used to verify if the SegmentMerger enables payloads for a field if it has payloads
|
||||
// enabled in only some documents
|
||||
d.add(newField("f3", "This field has payloads in some docs", TextField.TYPE_UNSTORED));
|
||||
// only add payload data for field f2
|
||||
analyzer.setPayloadData("f2", 1, "somedata".getBytes(), 0, 1);
|
||||
analyzer.setPayloadData("f2", "somedata".getBytes(), 0, 1);
|
||||
writer.addDocument(d);
|
||||
// flush
|
||||
writer.close();
|
||||
|
|
|
@ -20,8 +20,7 @@ package org.apache.lucene.index;
|
|||
import java.io.IOException;
|
||||
import java.io.Reader;
|
||||
|
||||
import org.apache.lucene.analysis.Analyzer;
|
||||
import org.apache.lucene.analysis.TokenStream;
|
||||
import org.apache.lucene.analysis.ReusableAnalyzerBase;
|
||||
import org.apache.lucene.analysis.Tokenizer;
|
||||
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
|
||||
import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
|
||||
|
@ -64,10 +63,10 @@ public class TestSameTokenSamePosition extends LuceneTestCase {
|
|||
}
|
||||
}
|
||||
|
||||
final class BugReproAnalyzer extends Analyzer{
|
||||
final class BugReproAnalyzer extends ReusableAnalyzerBase {
|
||||
@Override
|
||||
public TokenStream tokenStream(String arg0, Reader arg1) {
|
||||
return new BugReproAnalyzerTokenizer();
|
||||
public TokenStreamComponents createComponents(String arg0, Reader arg1) {
|
||||
return new TokenStreamComponents(new BugReproAnalyzerTokenizer());
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -137,7 +137,7 @@ public class TestTermVectorsReader extends LuceneTestCase {
|
|||
super.tearDown();
|
||||
}
|
||||
|
||||
private class MyTokenStream extends TokenStream {
|
||||
private class MyTokenStream extends Tokenizer {
|
||||
private int tokenUpto;
|
||||
|
||||
private final CharTermAttribute termAtt;
|
||||
|
@ -175,10 +175,10 @@ public class TestTermVectorsReader extends LuceneTestCase {
|
|||
}
|
||||
}
|
||||
|
||||
private class MyAnalyzer extends Analyzer {
|
||||
private class MyAnalyzer extends ReusableAnalyzerBase {
|
||||
@Override
|
||||
public TokenStream tokenStream(String fieldName, Reader reader) {
|
||||
return new MyTokenStream();
|
||||
public TokenStreamComponents createComponents(String fieldName, Reader reader) {
|
||||
return new TokenStreamComponents(new MyTokenStream());
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -22,7 +22,7 @@ import java.io.Reader;
|
|||
import java.util.Random;
|
||||
|
||||
import org.apache.lucene.analysis.Analyzer;
|
||||
import org.apache.lucene.analysis.TokenStream;
|
||||
import org.apache.lucene.analysis.ReusableAnalyzerBase;
|
||||
import org.apache.lucene.analysis.Tokenizer;
|
||||
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
|
||||
import org.apache.lucene.document.Document;
|
||||
|
@ -77,10 +77,10 @@ public class TestTermdocPerf extends LuceneTestCase {
|
|||
void addDocs(final Random random, Directory dir, final int ndocs, String field, final String val, final int maxTF, final float percentDocs) throws IOException {
|
||||
final RepeatingTokenStream ts = new RepeatingTokenStream(val, random, percentDocs, maxTF);
|
||||
|
||||
Analyzer analyzer = new Analyzer() {
|
||||
Analyzer analyzer = new ReusableAnalyzerBase() {
|
||||
@Override
|
||||
public TokenStream tokenStream(String fieldName, Reader reader) {
|
||||
return ts;
|
||||
public TokenStreamComponents createComponents(String fieldName, Reader reader) {
|
||||
return new TokenStreamComponents(ts);
|
||||
}
|
||||
};
|
||||
|
||||
|
|
|
@ -17,6 +17,7 @@ package org.apache.lucene.search;
|
|||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import org.apache.lucene.analysis.ReusableAnalyzerBase;
|
||||
import org.apache.lucene.index.IndexWriterConfig;
|
||||
import org.apache.lucene.index.RandomIndexWriter;
|
||||
import org.apache.lucene.index.Term;
|
||||
|
@ -345,7 +346,7 @@ public class TestMultiPhraseQuery extends LuceneTestCase {
|
|||
}
|
||||
}
|
||||
|
||||
private static class CannedAnalyzer extends Analyzer {
|
||||
private static class CannedAnalyzer extends ReusableAnalyzerBase {
|
||||
private final TokenAndPos[] tokens;
|
||||
|
||||
public CannedAnalyzer(TokenAndPos[] tokens) {
|
||||
|
@ -353,8 +354,8 @@ public class TestMultiPhraseQuery extends LuceneTestCase {
|
|||
}
|
||||
|
||||
@Override
|
||||
public TokenStream tokenStream(String fieldName, Reader reader) {
|
||||
return new CannedTokenizer(tokens);
|
||||
public TokenStreamComponents createComponents(String fieldName, Reader reader) {
|
||||
return new TokenStreamComponents(new CannedTokenizer(tokens));
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -55,10 +55,10 @@ public class TestPhraseQuery extends LuceneTestCase {
|
|||
@BeforeClass
|
||||
public static void beforeClass() throws Exception {
|
||||
directory = newDirectory();
|
||||
Analyzer analyzer = new Analyzer() {
|
||||
Analyzer analyzer = new ReusableAnalyzerBase() {
|
||||
@Override
|
||||
public TokenStream tokenStream(String fieldName, Reader reader) {
|
||||
return new MockTokenizer(reader, MockTokenizer.WHITESPACE, false);
|
||||
public TokenStreamComponents createComponents(String fieldName, Reader reader) {
|
||||
return new TokenStreamComponents(new MockTokenizer(reader, MockTokenizer.WHITESPACE, false));
|
||||
}
|
||||
|
||||
@Override
|
||||
|
|
|
@ -56,10 +56,10 @@ public class TestPositionIncrement extends LuceneTestCase {
|
|||
final static boolean VERBOSE = false;
|
||||
|
||||
public void testSetPosition() throws Exception {
|
||||
Analyzer analyzer = new Analyzer() {
|
||||
Analyzer analyzer = new ReusableAnalyzerBase() {
|
||||
@Override
|
||||
public TokenStream tokenStream(String fieldName, Reader reader) {
|
||||
return new TokenStream() {
|
||||
public TokenStreamComponents createComponents(String fieldName, Reader reader) {
|
||||
return new TokenStreamComponents(new Tokenizer() {
|
||||
private final String[] TOKENS = {"1", "2", "3", "4", "5"};
|
||||
private final int[] INCREMENTS = {0, 2, 1, 0, 1};
|
||||
private int i = 0;
|
||||
|
@ -85,7 +85,7 @@ public class TestPositionIncrement extends LuceneTestCase {
|
|||
super.reset();
|
||||
this.i = 0;
|
||||
}
|
||||
};
|
||||
});
|
||||
}
|
||||
};
|
||||
Directory store = newDirectory();
|
||||
|
|
|
@ -190,7 +190,7 @@ public class TestTermRangeQuery extends LuceneTestCase {
|
|||
assertFalse("queries with different inclusive are not equal", query.equals(other));
|
||||
}
|
||||
|
||||
private static class SingleCharAnalyzer extends Analyzer {
|
||||
private static class SingleCharAnalyzer extends ReusableAnalyzerBase {
|
||||
|
||||
private static class SingleCharTokenizer extends Tokenizer {
|
||||
char[] buffer = new char[1];
|
||||
|
@ -225,19 +225,8 @@ public class TestTermRangeQuery extends LuceneTestCase {
|
|||
}
|
||||
|
||||
@Override
|
||||
public TokenStream reusableTokenStream(String fieldName, Reader reader) throws IOException {
|
||||
Tokenizer tokenizer = (Tokenizer) getPreviousTokenStream();
|
||||
if (tokenizer == null) {
|
||||
tokenizer = new SingleCharTokenizer(reader);
|
||||
setPreviousTokenStream(tokenizer);
|
||||
} else
|
||||
tokenizer.reset(reader);
|
||||
return tokenizer;
|
||||
}
|
||||
|
||||
@Override
|
||||
public TokenStream tokenStream(String fieldName, Reader reader) {
|
||||
return new SingleCharTokenizer(reader);
|
||||
public TokenStreamComponents createComponents(String fieldName, Reader reader) {
|
||||
return new TokenStreamComponents(new SingleCharTokenizer(reader));
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -55,14 +55,16 @@ public class PayloadHelper {
|
|||
|
||||
public IndexReader reader;
|
||||
|
||||
public final class PayloadAnalyzer extends Analyzer {
|
||||
public final class PayloadAnalyzer extends ReusableAnalyzerBase {
|
||||
|
||||
public PayloadAnalyzer() {
|
||||
super(new PerFieldReuseStrategy());
|
||||
}
|
||||
|
||||
@Override
|
||||
public TokenStream tokenStream(String fieldName, Reader reader) {
|
||||
TokenStream result = new MockTokenizer(reader, MockTokenizer.SIMPLE, true);
|
||||
result = new PayloadFilter(result, fieldName);
|
||||
return result;
|
||||
public TokenStreamComponents createComponents(String fieldName, Reader reader) {
|
||||
Tokenizer result = new MockTokenizer(reader, MockTokenizer.SIMPLE, true);
|
||||
return new TokenStreamComponents(result, new PayloadFilter(result, fieldName));
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -55,12 +55,11 @@ public class TestPayloadNearQuery extends LuceneTestCase {
|
|||
private static byte[] payload2 = new byte[]{2};
|
||||
private static byte[] payload4 = new byte[]{4};
|
||||
|
||||
private static class PayloadAnalyzer extends Analyzer {
|
||||
private static class PayloadAnalyzer extends ReusableAnalyzerBase {
|
||||
@Override
|
||||
public TokenStream tokenStream(String fieldName, Reader reader) {
|
||||
TokenStream result = new MockTokenizer(reader, MockTokenizer.SIMPLE, true);
|
||||
result = new PayloadFilter(result, fieldName);
|
||||
return result;
|
||||
public TokenStreamComponents createComponents(String fieldName, Reader reader) {
|
||||
Tokenizer result = new MockTokenizer(reader, MockTokenizer.SIMPLE, true);
|
||||
return new TokenStreamComponents(result, new PayloadFilter(result, fieldName));
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -64,14 +64,16 @@ public class TestPayloadTermQuery extends LuceneTestCase {
|
|||
private static final byte[] payloadMultiField2 = new byte[]{4};
|
||||
protected static Directory directory;
|
||||
|
||||
private static class PayloadAnalyzer extends Analyzer {
|
||||
private static class PayloadAnalyzer extends ReusableAnalyzerBase {
|
||||
|
||||
private PayloadAnalyzer() {
|
||||
super(new PerFieldReuseStrategy());
|
||||
}
|
||||
|
||||
@Override
|
||||
public TokenStream tokenStream(String fieldName, Reader reader) {
|
||||
TokenStream result = new MockTokenizer(reader, MockTokenizer.SIMPLE, true);
|
||||
result = new PayloadFilter(result, fieldName);
|
||||
return result;
|
||||
public TokenStreamComponents createComponents(String fieldName, Reader reader) {
|
||||
Tokenizer result = new MockTokenizer(reader, MockTokenizer.SIMPLE, true);
|
||||
return new TokenStreamComponents(result, new PayloadFilter(result, fieldName));
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -25,6 +25,7 @@ import java.util.Collections;
|
|||
import java.util.List;
|
||||
|
||||
import org.apache.lucene.analysis.Analyzer;
|
||||
import org.apache.lucene.analysis.MockAnalyzer;
|
||||
import org.apache.lucene.analysis.MockTokenizer;
|
||||
import org.apache.lucene.analysis.TokenFilter;
|
||||
import org.apache.lucene.analysis.TokenStream;
|
||||
|
@ -70,14 +71,12 @@ public class TestBasics extends LuceneTestCase {
|
|||
private static Directory directory;
|
||||
|
||||
static final class SimplePayloadFilter extends TokenFilter {
|
||||
String fieldName;
|
||||
int pos;
|
||||
final PayloadAttribute payloadAttr;
|
||||
final CharTermAttribute termAttr;
|
||||
|
||||
public SimplePayloadFilter(TokenStream input, String fieldName) {
|
||||
public SimplePayloadFilter(TokenStream input) {
|
||||
super(input);
|
||||
this.fieldName = fieldName;
|
||||
pos = 0;
|
||||
payloadAttr = input.addAttribute(PayloadAttribute.class);
|
||||
termAttr = input.addAttribute(CharTermAttribute.class);
|
||||
|
@ -105,7 +104,7 @@ public class TestBasics extends LuceneTestCase {
|
|||
|
||||
@Override
|
||||
public TokenStream tokenStream(String fieldName, Reader reader) {
|
||||
return new SimplePayloadFilter(new MockTokenizer(reader, MockTokenizer.SIMPLE, true), fieldName);
|
||||
return new SimplePayloadFilter(new MockTokenizer(reader, MockTokenizer.SIMPLE, true));
|
||||
}
|
||||
|
||||
};
|
||||
|
|
|
@ -23,10 +23,7 @@ import java.util.Collection;
|
|||
import java.util.HashSet;
|
||||
import java.util.Set;
|
||||
|
||||
import org.apache.lucene.analysis.Analyzer;
|
||||
import org.apache.lucene.analysis.MockTokenizer;
|
||||
import org.apache.lucene.analysis.TokenFilter;
|
||||
import org.apache.lucene.analysis.TokenStream;
|
||||
import org.apache.lucene.analysis.*;
|
||||
import org.apache.lucene.analysis.tokenattributes.PayloadAttribute;
|
||||
import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
|
||||
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
|
||||
|
@ -479,18 +476,16 @@ public class TestPayloadSpans extends LuceneTestCase {
|
|||
assertEquals(numSpans, cnt);
|
||||
}
|
||||
|
||||
final class PayloadAnalyzer extends Analyzer {
|
||||
final class PayloadAnalyzer extends ReusableAnalyzerBase {
|
||||
|
||||
@Override
|
||||
public TokenStream tokenStream(String fieldName, Reader reader) {
|
||||
TokenStream result = new MockTokenizer(reader, MockTokenizer.SIMPLE, true);
|
||||
result = new PayloadFilter(result, fieldName);
|
||||
return result;
|
||||
public TokenStreamComponents createComponents(String fieldName, Reader reader) {
|
||||
Tokenizer result = new MockTokenizer(reader, MockTokenizer.SIMPLE, true);
|
||||
return new TokenStreamComponents(result, new PayloadFilter(result));
|
||||
}
|
||||
}
|
||||
|
||||
final class PayloadFilter extends TokenFilter {
|
||||
String fieldName;
|
||||
Set<String> entities = new HashSet<String>();
|
||||
Set<String> nopayload = new HashSet<String>();
|
||||
int pos;
|
||||
|
@ -498,9 +493,8 @@ public class TestPayloadSpans extends LuceneTestCase {
|
|||
CharTermAttribute termAtt;
|
||||
PositionIncrementAttribute posIncrAtt;
|
||||
|
||||
public PayloadFilter(TokenStream input, String fieldName) {
|
||||
public PayloadFilter(TokenStream input) {
|
||||
super(input);
|
||||
this.fieldName = fieldName;
|
||||
pos = 0;
|
||||
entities.add("xx");
|
||||
entities.add("one");
|
||||
|
@ -536,13 +530,12 @@ public class TestPayloadSpans extends LuceneTestCase {
|
|||
}
|
||||
}
|
||||
|
||||
public final class TestPayloadAnalyzer extends Analyzer {
|
||||
public final class TestPayloadAnalyzer extends ReusableAnalyzerBase {
|
||||
|
||||
@Override
|
||||
public TokenStream tokenStream(String fieldName, Reader reader) {
|
||||
TokenStream result = new MockTokenizer(reader, MockTokenizer.SIMPLE, true);
|
||||
result = new PayloadFilter(result, fieldName);
|
||||
return result;
|
||||
public TokenStreamComponents createComponents(String fieldName, Reader reader) {
|
||||
Tokenizer result = new MockTokenizer(reader, MockTokenizer.SIMPLE, true);
|
||||
return new TokenStreamComponents(result, new PayloadFilter(result));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -106,3 +106,5 @@ Build
|
|||
* LUCENE-2413: All analyzers in contrib/analyzers and contrib/icu were moved to the
|
||||
analysis module. The 'smartcn' and 'stempel' components now depend on 'common'.
|
||||
(Robert Muir)
|
||||
|
||||
* LUCENE-3376: Moved ReusableAnalyzerBase into lucene core. (Chris Male)
|
||||
|
|
|
@ -27,6 +27,7 @@ import java.util.regex.Matcher;
|
|||
import java.util.regex.Pattern;
|
||||
|
||||
import org.apache.lucene.analysis.Analyzer;
|
||||
import org.apache.lucene.analysis.ReusableAnalyzerBase;
|
||||
import org.apache.lucene.analysis.TokenStream;
|
||||
import org.apache.lucene.analysis.Tokenizer;
|
||||
import org.apache.lucene.analysis.core.StopAnalyzer;
|
||||
|
@ -66,7 +67,7 @@ import org.apache.lucene.util.Version;
|
|||
* @deprecated (4.0) use the pattern-based analysis in the analysis/pattern package instead.
|
||||
*/
|
||||
@Deprecated
|
||||
public final class PatternAnalyzer extends Analyzer {
|
||||
public final class PatternAnalyzer extends ReusableAnalyzerBase {
|
||||
|
||||
/** <code>"\\W+"</code>; Divides text at non-letters (NOT Character.isLetter(c)) */
|
||||
public static final Pattern NON_WORD_PATTERN = Pattern.compile("\\W+");
|
||||
|
@ -187,25 +188,21 @@ public final class PatternAnalyzer extends Analyzer {
|
|||
* the string to tokenize
|
||||
* @return a new token stream
|
||||
*/
|
||||
public TokenStream tokenStream(String fieldName, String text) {
|
||||
public TokenStreamComponents createComponents(String fieldName, String text) {
|
||||
// Ideally the Analyzer superclass should have a method with the same signature,
|
||||
// with a default impl that simply delegates to the StringReader flavour.
|
||||
if (text == null)
|
||||
throw new IllegalArgumentException("text must not be null");
|
||||
|
||||
TokenStream stream;
|
||||
if (pattern == NON_WORD_PATTERN) { // fast path
|
||||
stream = new FastStringTokenizer(text, true, toLowerCase, stopWords);
|
||||
return new TokenStreamComponents(new FastStringTokenizer(text, true, toLowerCase, stopWords));
|
||||
} else if (pattern == WHITESPACE_PATTERN) { // fast path
|
||||
return new TokenStreamComponents(new FastStringTokenizer(text, false, toLowerCase, stopWords));
|
||||
}
|
||||
else if (pattern == WHITESPACE_PATTERN) { // fast path
|
||||
stream = new FastStringTokenizer(text, false, toLowerCase, stopWords);
|
||||
}
|
||||
else {
|
||||
stream = new PatternTokenizer(text, pattern, toLowerCase);
|
||||
if (stopWords != null) stream = new StopFilter(matchVersion, stream, stopWords);
|
||||
}
|
||||
|
||||
return stream;
|
||||
|
||||
Tokenizer tokenizer = new PatternTokenizer(text, pattern, toLowerCase);
|
||||
TokenStream result = (stopWords != null) ? new StopFilter(matchVersion, tokenizer, stopWords) : tokenizer;
|
||||
return new TokenStreamComponents(tokenizer, result);
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -220,10 +217,10 @@ public final class PatternAnalyzer extends Analyzer {
|
|||
* @return a new token stream
|
||||
*/
|
||||
@Override
|
||||
public TokenStream tokenStream(String fieldName, Reader reader) {
|
||||
public TokenStreamComponents createComponents(String fieldName, Reader reader) {
|
||||
try {
|
||||
String text = toString(reader);
|
||||
return tokenStream(fieldName, text);
|
||||
return createComponents(fieldName, text);
|
||||
} catch (IOException e) {
|
||||
throw new RuntimeException(e);
|
||||
}
|
||||
|
|
|
@ -123,9 +123,9 @@ public final class ClassicAnalyzer extends StopwordAnalyzerBase {
|
|||
tok = new StopFilter(matchVersion, tok, stopwords);
|
||||
return new TokenStreamComponents(src, tok) {
|
||||
@Override
|
||||
protected boolean reset(final Reader reader) throws IOException {
|
||||
protected void reset(final Reader reader) throws IOException {
|
||||
src.setMaxTokenLength(ClassicAnalyzer.this.maxTokenLength);
|
||||
return super.reset(reader);
|
||||
super.reset(reader);
|
||||
}
|
||||
};
|
||||
}
|
||||
|
|
|
@ -124,9 +124,9 @@ public final class StandardAnalyzer extends StopwordAnalyzerBase {
|
|||
tok = new StopFilter(matchVersion, tok, stopwords);
|
||||
return new TokenStreamComponents(src, tok) {
|
||||
@Override
|
||||
protected boolean reset(final Reader reader) throws IOException {
|
||||
protected void reset(final Reader reader) throws IOException {
|
||||
src.setMaxTokenLength(StandardAnalyzer.this.maxTokenLength);
|
||||
return super.reset(reader);
|
||||
super.reset(reader);
|
||||
}
|
||||
};
|
||||
}
|
||||
|
|
|
@ -21,9 +21,7 @@ import java.io.IOException;
|
|||
import java.io.Reader;
|
||||
import java.io.StringReader;
|
||||
|
||||
import org.apache.lucene.analysis.BaseTokenStreamTestCase;
|
||||
import org.apache.lucene.analysis.Analyzer;
|
||||
import org.apache.lucene.analysis.TokenStream;
|
||||
import org.apache.lucene.analysis.*;
|
||||
import org.apache.lucene.analysis.core.WhitespaceTokenizer;
|
||||
import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
|
||||
import org.apache.lucene.util.Version;
|
||||
|
@ -66,10 +64,10 @@ public class TestChineseTokenizer extends BaseTokenStreamTestCase
|
|||
* Analyzer that just uses ChineseTokenizer, not ChineseFilter.
|
||||
* convenience to show the behavior of the tokenizer
|
||||
*/
|
||||
private class JustChineseTokenizerAnalyzer extends Analyzer {
|
||||
private class JustChineseTokenizerAnalyzer extends ReusableAnalyzerBase {
|
||||
@Override
|
||||
public TokenStream tokenStream(String fieldName, Reader reader) {
|
||||
return new ChineseTokenizer(reader);
|
||||
public TokenStreamComponents createComponents(String fieldName, Reader reader) {
|
||||
return new TokenStreamComponents(new ChineseTokenizer(reader));
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -77,10 +75,11 @@ public class TestChineseTokenizer extends BaseTokenStreamTestCase
|
|||
* Analyzer that just uses ChineseFilter, not ChineseTokenizer.
|
||||
* convenience to show the behavior of the filter.
|
||||
*/
|
||||
private class JustChineseFilterAnalyzer extends Analyzer {
|
||||
private class JustChineseFilterAnalyzer extends ReusableAnalyzerBase {
|
||||
@Override
|
||||
public TokenStream tokenStream(String fieldName, Reader reader) {
|
||||
return new ChineseFilter(new WhitespaceTokenizer(Version.LUCENE_CURRENT, reader));
|
||||
public TokenStreamComponents createComponents(String fieldName, Reader reader) {
|
||||
Tokenizer tokenizer = new WhitespaceTokenizer(Version.LUCENE_CURRENT, reader);
|
||||
return new TokenStreamComponents(tokenizer, new ChineseFilter(tokenizer));
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -19,11 +19,8 @@ package org.apache.lucene.analysis.commongrams;
|
|||
import java.io.Reader;
|
||||
import java.io.StringReader;
|
||||
import java.util.Arrays;
|
||||
import org.apache.lucene.analysis.Analyzer;
|
||||
import org.apache.lucene.analysis.BaseTokenStreamTestCase;
|
||||
import org.apache.lucene.analysis.MockTokenizer;
|
||||
import org.apache.lucene.analysis.TokenFilter;
|
||||
import org.apache.lucene.analysis.TokenStream;
|
||||
|
||||
import org.apache.lucene.analysis.*;
|
||||
import org.apache.lucene.analysis.core.WhitespaceTokenizer;
|
||||
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
|
||||
import org.apache.lucene.analysis.util.CharArraySet;
|
||||
|
@ -87,11 +84,12 @@ public class CommonGramsFilterTest extends BaseTokenStreamTestCase {
|
|||
* @return Map<String,String>
|
||||
*/
|
||||
public void testCommonGramsQueryFilter() throws Exception {
|
||||
Analyzer a = new Analyzer() {
|
||||
Analyzer a = new ReusableAnalyzerBase() {
|
||||
@Override
|
||||
public TokenStream tokenStream(String field, Reader in) {
|
||||
return new CommonGramsQueryFilter(new CommonGramsFilter(TEST_VERSION_CURRENT,
|
||||
new MockTokenizer(in, MockTokenizer.WHITESPACE, false), commonWords));
|
||||
public TokenStreamComponents createComponents(String field, Reader in) {
|
||||
Tokenizer tokenizer = new MockTokenizer(in, MockTokenizer.WHITESPACE, false);
|
||||
return new TokenStreamComponents(tokenizer, new CommonGramsQueryFilter(new CommonGramsFilter(TEST_VERSION_CURRENT,
|
||||
tokenizer, commonWords)));
|
||||
}
|
||||
};
|
||||
|
||||
|
@ -156,11 +154,12 @@ public class CommonGramsFilterTest extends BaseTokenStreamTestCase {
|
|||
}
|
||||
|
||||
public void testCommonGramsFilter() throws Exception {
|
||||
Analyzer a = new Analyzer() {
|
||||
Analyzer a = new ReusableAnalyzerBase() {
|
||||
@Override
|
||||
public TokenStream tokenStream(String field, Reader in) {
|
||||
return new CommonGramsFilter(TEST_VERSION_CURRENT,
|
||||
new MockTokenizer(in, MockTokenizer.WHITESPACE, false), commonWords);
|
||||
public TokenStreamComponents createComponents(String field, Reader in) {
|
||||
Tokenizer tokenizer = new MockTokenizer(in, MockTokenizer.WHITESPACE, false);
|
||||
return new TokenStreamComponents(tokenizer, new CommonGramsFilter(TEST_VERSION_CURRENT,
|
||||
tokenizer, commonWords));
|
||||
}
|
||||
};
|
||||
|
||||
|
|
|
@ -21,10 +21,7 @@ import java.io.IOException;
|
|||
import java.io.Reader;
|
||||
import java.io.StringReader;
|
||||
|
||||
import org.apache.lucene.analysis.Analyzer;
|
||||
import org.apache.lucene.analysis.BaseTokenStreamTestCase;
|
||||
import org.apache.lucene.analysis.TokenFilter;
|
||||
import org.apache.lucene.analysis.TokenStream;
|
||||
import org.apache.lucene.analysis.*;
|
||||
import org.apache.lucene.analysis.standard.StandardTokenizer;
|
||||
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
|
||||
import org.apache.lucene.analysis.tokenattributes.PayloadAttribute;
|
||||
|
@ -120,12 +117,12 @@ public class TestAnalyzers extends BaseTokenStreamTestCase {
|
|||
String[] y = StandardTokenizer.TOKEN_TYPES;
|
||||
}
|
||||
|
||||
private static class LowerCaseWhitespaceAnalyzer extends Analyzer {
|
||||
private static class LowerCaseWhitespaceAnalyzer extends ReusableAnalyzerBase {
|
||||
|
||||
@Override
|
||||
public TokenStream tokenStream(String fieldName, Reader reader) {
|
||||
return new LowerCaseFilter(TEST_VERSION_CURRENT,
|
||||
new WhitespaceTokenizer(TEST_VERSION_CURRENT, reader));
|
||||
public TokenStreamComponents createComponents(String fieldName, Reader reader) {
|
||||
Tokenizer tokenizer = new WhitespaceTokenizer(TEST_VERSION_CURRENT, reader);
|
||||
return new TokenStreamComponents(tokenizer, new LowerCaseFilter(TEST_VERSION_CURRENT, tokenizer));
|
||||
}
|
||||
|
||||
}
|
||||
|
@ -237,4 +234,4 @@ final class PayloadSetter extends TokenFilter {
|
|||
data[0]++;
|
||||
return true;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -18,6 +18,7 @@ package org.apache.lucene.analysis.miscellaneous;
|
|||
*/
|
||||
|
||||
import java.io.IOException;
|
||||
import java.io.StringReader;
|
||||
import java.util.Arrays;
|
||||
import java.util.regex.Pattern;
|
||||
|
||||
|
@ -128,7 +129,7 @@ public class PatternAnalyzerTest extends BaseTokenStreamTestCase {
|
|||
assertTokenStreamContents(ts, expected);
|
||||
|
||||
// analysis of a String, uses PatternAnalyzer.tokenStream(String, String)
|
||||
TokenStream ts2 = analyzer.tokenStream("dummy", document);
|
||||
TokenStream ts2 = analyzer.tokenStream("dummy", new StringReader(document));
|
||||
assertTokenStreamContents(ts2, expected);
|
||||
}
|
||||
}
|
||||
|
|
|
@ -17,12 +17,7 @@
|
|||
|
||||
package org.apache.lucene.analysis.miscellaneous;
|
||||
|
||||
import org.apache.lucene.analysis.Analyzer;
|
||||
import org.apache.lucene.analysis.BaseTokenStreamTestCase;
|
||||
import org.apache.lucene.analysis.MockTokenizer;
|
||||
import org.apache.lucene.analysis.TokenFilter;
|
||||
import org.apache.lucene.analysis.TokenStream;
|
||||
import org.apache.lucene.analysis.Token;
|
||||
import org.apache.lucene.analysis.*;
|
||||
import org.apache.lucene.analysis.core.StopFilter;
|
||||
import org.apache.lucene.analysis.standard.StandardAnalyzer;
|
||||
import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
|
||||
|
@ -245,13 +240,13 @@ public class TestWordDelimiterFilter extends BaseTokenStreamTestCase {
|
|||
new int[] { 1, 1, 1 });
|
||||
|
||||
/* analyzer that will consume tokens with large position increments */
|
||||
Analyzer a2 = new Analyzer() {
|
||||
Analyzer a2 = new ReusableAnalyzerBase() {
|
||||
@Override
|
||||
public TokenStream tokenStream(String field, Reader reader) {
|
||||
return new WordDelimiterFilter(
|
||||
new LargePosIncTokenFilter(
|
||||
new MockTokenizer(reader, MockTokenizer.WHITESPACE, false)),
|
||||
flags, protWords);
|
||||
public TokenStreamComponents createComponents(String field, Reader reader) {
|
||||
Tokenizer tokenizer = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false);
|
||||
return new TokenStreamComponents(tokenizer, new WordDelimiterFilter(
|
||||
new LargePosIncTokenFilter(tokenizer),
|
||||
flags, protWords));
|
||||
}
|
||||
};
|
||||
|
||||
|
@ -278,13 +273,14 @@ public class TestWordDelimiterFilter extends BaseTokenStreamTestCase {
|
|||
new int[] { 6, 14, 19 },
|
||||
new int[] { 1, 11, 1 });
|
||||
|
||||
Analyzer a3 = new Analyzer() {
|
||||
Analyzer a3 = new ReusableAnalyzerBase() {
|
||||
@Override
|
||||
public TokenStream tokenStream(String field, Reader reader) {
|
||||
public TokenStreamComponents createComponents(String field, Reader reader) {
|
||||
Tokenizer tokenizer = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false);
|
||||
StopFilter filter = new StopFilter(TEST_VERSION_CURRENT,
|
||||
new MockTokenizer(reader, MockTokenizer.WHITESPACE, false), StandardAnalyzer.STOP_WORDS_SET);
|
||||
tokenizer, StandardAnalyzer.STOP_WORDS_SET);
|
||||
filter.setEnablePositionIncrements(true);
|
||||
return new WordDelimiterFilter(filter, flags, protWords);
|
||||
return new TokenStreamComponents(tokenizer, new WordDelimiterFilter(filter, flags, protWords));
|
||||
}
|
||||
};
|
||||
|
||||
|
|
|
@ -144,32 +144,6 @@ public class QueryAutoStopWordAnalyzerTest extends BaseTokenStreamTestCase {
|
|||
assertTokenStreamContents(protectedTokenStream, new String[]{"boring"});
|
||||
}
|
||||
|
||||
/*
|
||||
* analyzer that does not support reuse
|
||||
* it is LetterTokenizer on odd invocations, WhitespaceTokenizer on even.
|
||||
*/
|
||||
private class NonreusableAnalyzer extends Analyzer {
|
||||
int invocationCount = 0;
|
||||
@Override
|
||||
public TokenStream tokenStream(String fieldName, Reader reader) {
|
||||
if (++invocationCount % 2 == 0)
|
||||
return new MockTokenizer(reader, MockTokenizer.WHITESPACE, false);
|
||||
else
|
||||
return new MockTokenizer(reader, MockTokenizer.SIMPLE, false);
|
||||
}
|
||||
}
|
||||
|
||||
public void testWrappingNonReusableAnalyzer() throws Exception {
|
||||
QueryAutoStopWordAnalyzer a = new QueryAutoStopWordAnalyzer(TEST_VERSION_CURRENT, new NonreusableAnalyzer());
|
||||
a.addStopWords(reader, 10);
|
||||
|
||||
TokenStream tokenStream = a.reusableTokenStream("repetitiveField", new StringReader("boring"));
|
||||
assertTokenStreamContents(tokenStream, new String[0]);
|
||||
|
||||
tokenStream = a.reusableTokenStream("repetitiveField", new StringReader("vaguelyboring"));
|
||||
assertTokenStreamContents(tokenStream, new String[0]);
|
||||
}
|
||||
|
||||
public void testTokenStream() throws Exception {
|
||||
QueryAutoStopWordAnalyzer a = new QueryAutoStopWordAnalyzer(TEST_VERSION_CURRENT, new MockAnalyzer(random, MockTokenizer.WHITESPACE, false));
|
||||
a.addStopWords(reader, 10);
|
||||
|
|
|
@ -148,40 +148,6 @@ public class ShingleAnalyzerWrapperTest extends BaseTokenStreamTestCase {
|
|||
new int[] { 6, 9, 9, 12, 12, 18, 18 },
|
||||
new int[] { 1, 0, 1, 0, 1, 0, 1 });
|
||||
}
|
||||
|
||||
/*
|
||||
* analyzer that does not support reuse
|
||||
* it is LetterTokenizer on odd invocations, WhitespaceTokenizer on even.
|
||||
*/
|
||||
private class NonreusableAnalyzer extends Analyzer {
|
||||
int invocationCount = 0;
|
||||
@Override
|
||||
public TokenStream tokenStream(String fieldName, Reader reader) {
|
||||
if (++invocationCount % 2 == 0)
|
||||
return new MockTokenizer(reader, MockTokenizer.WHITESPACE, false);
|
||||
else
|
||||
return new MockTokenizer(reader, MockTokenizer.SIMPLE, false);
|
||||
}
|
||||
}
|
||||
|
||||
public void testWrappedAnalyzerDoesNotReuse() throws Exception {
|
||||
Analyzer a = new ShingleAnalyzerWrapper(new NonreusableAnalyzer());
|
||||
assertAnalyzesToReuse(a, "please divide into shingles.",
|
||||
new String[] { "please", "please divide", "divide", "divide into", "into", "into shingles", "shingles" },
|
||||
new int[] { 0, 0, 7, 7, 14, 14, 19 },
|
||||
new int[] { 6, 13, 13, 18, 18, 27, 27 },
|
||||
new int[] { 1, 0, 1, 0, 1, 0, 1 });
|
||||
assertAnalyzesToReuse(a, "please divide into shingles.",
|
||||
new String[] { "please", "please divide", "divide", "divide into", "into", "into shingles.", "shingles." },
|
||||
new int[] { 0, 0, 7, 7, 14, 14, 19 },
|
||||
new int[] { 6, 13, 13, 18, 18, 28, 28 },
|
||||
new int[] { 1, 0, 1, 0, 1, 0, 1 });
|
||||
assertAnalyzesToReuse(a, "please divide into shingles.",
|
||||
new String[] { "please", "please divide", "divide", "divide into", "into", "into shingles", "shingles" },
|
||||
new int[] { 0, 0, 7, 7, 14, 14, 19 },
|
||||
new int[] { 6, 13, 13, 18, 18, 27, 27 },
|
||||
new int[] { 1, 0, 1, 0, 1, 0, 1 });
|
||||
}
|
||||
|
||||
public void testNonDefaultMinShingleSize() throws Exception {
|
||||
ShingleAnalyzerWrapper analyzer
|
||||
|
|
|
@ -18,9 +18,7 @@ package org.apache.lucene.collation;
|
|||
*/
|
||||
|
||||
|
||||
import org.apache.lucene.analysis.CollationTestBase;
|
||||
import org.apache.lucene.analysis.TokenStream;
|
||||
import org.apache.lucene.analysis.Analyzer;
|
||||
import org.apache.lucene.analysis.*;
|
||||
import org.apache.lucene.analysis.core.KeywordTokenizer;
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
|
||||
|
@ -54,7 +52,7 @@ public class TestCollationKeyFilter extends CollationTestBase {
|
|||
(collator.getCollationKey(secondRangeEndOriginal).toByteArray()));
|
||||
|
||||
|
||||
public final class TestAnalyzer extends Analyzer {
|
||||
public final class TestAnalyzer extends ReusableAnalyzerBase {
|
||||
private Collator _collator;
|
||||
|
||||
TestAnalyzer(Collator collator) {
|
||||
|
@ -62,10 +60,9 @@ public class TestCollationKeyFilter extends CollationTestBase {
|
|||
}
|
||||
|
||||
@Override
|
||||
public TokenStream tokenStream(String fieldName, Reader reader) {
|
||||
TokenStream result = new KeywordTokenizer(reader);
|
||||
result = new CollationKeyFilter(result, _collator);
|
||||
return result;
|
||||
public TokenStreamComponents createComponents(String fieldName, Reader reader) {
|
||||
Tokenizer result = new KeywordTokenizer(reader);
|
||||
return new TokenStreamComponents(result, new CollationKeyFilter(result, _collator));
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -20,20 +20,18 @@ package org.apache.lucene.analysis.icu;
|
|||
import java.io.IOException;
|
||||
import java.io.Reader;
|
||||
|
||||
import org.apache.lucene.analysis.Analyzer;
|
||||
import org.apache.lucene.analysis.BaseTokenStreamTestCase;
|
||||
import org.apache.lucene.analysis.TokenStream;
|
||||
import org.apache.lucene.analysis.*;
|
||||
import org.apache.lucene.analysis.core.WhitespaceTokenizer;
|
||||
|
||||
/**
|
||||
* Tests ICUFoldingFilter
|
||||
*/
|
||||
public class TestICUFoldingFilter extends BaseTokenStreamTestCase {
|
||||
Analyzer a = new Analyzer() {
|
||||
Analyzer a = new ReusableAnalyzerBase() {
|
||||
@Override
|
||||
public TokenStream tokenStream(String fieldName, Reader reader) {
|
||||
return new ICUFoldingFilter(
|
||||
new WhitespaceTokenizer(TEST_VERSION_CURRENT, reader));
|
||||
public TokenStreamComponents createComponents(String fieldName, Reader reader) {
|
||||
Tokenizer tokenizer = new WhitespaceTokenizer(TEST_VERSION_CURRENT, reader);
|
||||
return new TokenStreamComponents(tokenizer, new ICUFoldingFilter(tokenizer));
|
||||
}
|
||||
};
|
||||
public void testDefaults() throws IOException {
|
||||
|
|
|
@ -20,9 +20,7 @@ package org.apache.lucene.analysis.icu;
|
|||
import java.io.IOException;
|
||||
import java.io.Reader;
|
||||
|
||||
import org.apache.lucene.analysis.Analyzer;
|
||||
import org.apache.lucene.analysis.BaseTokenStreamTestCase;
|
||||
import org.apache.lucene.analysis.TokenStream;
|
||||
import org.apache.lucene.analysis.*;
|
||||
import org.apache.lucene.analysis.core.WhitespaceTokenizer;
|
||||
|
||||
import com.ibm.icu.text.Normalizer2;
|
||||
|
@ -31,11 +29,11 @@ import com.ibm.icu.text.Normalizer2;
|
|||
* Tests the ICUNormalizer2Filter
|
||||
*/
|
||||
public class TestICUNormalizer2Filter extends BaseTokenStreamTestCase {
|
||||
Analyzer a = new Analyzer() {
|
||||
Analyzer a = new ReusableAnalyzerBase() {
|
||||
@Override
|
||||
public TokenStream tokenStream(String fieldName, Reader reader) {
|
||||
return new ICUNormalizer2Filter(
|
||||
new WhitespaceTokenizer(TEST_VERSION_CURRENT, reader));
|
||||
public TokenStreamComponents createComponents(String fieldName, Reader reader) {
|
||||
Tokenizer tokenizer = new WhitespaceTokenizer(TEST_VERSION_CURRENT, reader);
|
||||
return new TokenStreamComponents(tokenizer, new ICUNormalizer2Filter(tokenizer));
|
||||
}
|
||||
};
|
||||
|
||||
|
@ -61,13 +59,14 @@ public class TestICUNormalizer2Filter extends BaseTokenStreamTestCase {
|
|||
}
|
||||
|
||||
public void testAlternate() throws IOException {
|
||||
Analyzer a = new Analyzer() {
|
||||
Analyzer a = new ReusableAnalyzerBase() {
|
||||
@Override
|
||||
public TokenStream tokenStream(String fieldName, Reader reader) {
|
||||
return new ICUNormalizer2Filter(
|
||||
new WhitespaceTokenizer(TEST_VERSION_CURRENT, reader),
|
||||
public TokenStreamComponents createComponents(String fieldName, Reader reader) {
|
||||
Tokenizer tokenizer = new WhitespaceTokenizer(TEST_VERSION_CURRENT, reader);
|
||||
return new TokenStreamComponents(tokenizer, new ICUNormalizer2Filter(
|
||||
tokenizer,
|
||||
/* specify nfc with decompose to get nfd */
|
||||
Normalizer2.getInstance(null, "nfc", Normalizer2.Mode.DECOMPOSE));
|
||||
Normalizer2.getInstance(null, "nfc", Normalizer2.Mode.DECOMPOSE)));
|
||||
}
|
||||
};
|
||||
|
||||
|
|
|
@ -20,9 +20,7 @@ package org.apache.lucene.collation;
|
|||
|
||||
import com.ibm.icu.text.Collator;
|
||||
|
||||
import org.apache.lucene.analysis.CollationTestBase;
|
||||
import org.apache.lucene.analysis.TokenStream;
|
||||
import org.apache.lucene.analysis.Analyzer;
|
||||
import org.apache.lucene.analysis.*;
|
||||
import org.apache.lucene.analysis.core.KeywordTokenizer;
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
|
||||
|
@ -46,7 +44,7 @@ public class TestICUCollationKeyFilter extends CollationTestBase {
|
|||
(collator.getCollationKey(secondRangeEndOriginal).toByteArray()));
|
||||
|
||||
|
||||
public final class TestAnalyzer extends Analyzer {
|
||||
public final class TestAnalyzer extends ReusableAnalyzerBase {
|
||||
private Collator _collator;
|
||||
|
||||
TestAnalyzer(Collator collator) {
|
||||
|
@ -54,10 +52,9 @@ public class TestICUCollationKeyFilter extends CollationTestBase {
|
|||
}
|
||||
|
||||
@Override
|
||||
public TokenStream tokenStream(String fieldName, Reader reader) {
|
||||
TokenStream result = new KeywordTokenizer(reader);
|
||||
result = new ICUCollationKeyFilter(result, _collator);
|
||||
return result;
|
||||
public TokenStreamComponents createComponents(String fieldName, Reader reader) {
|
||||
Tokenizer result = new KeywordTokenizer(reader);
|
||||
return new TokenStreamComponents(result, new ICUCollationKeyFilter(result, _collator));
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -25,6 +25,7 @@ import java.util.Collections;
|
|||
import java.util.Set;
|
||||
|
||||
import org.apache.lucene.analysis.Analyzer;
|
||||
import org.apache.lucene.analysis.ReusableAnalyzerBase;
|
||||
import org.apache.lucene.analysis.en.PorterStemFilter;
|
||||
import org.apache.lucene.analysis.util.WordlistLoader;
|
||||
import org.apache.lucene.analysis.TokenStream;
|
||||
|
@ -54,7 +55,7 @@ import org.apache.lucene.util.Version;
|
|||
* </p>
|
||||
* @lucene.experimental
|
||||
*/
|
||||
public final class SmartChineseAnalyzer extends Analyzer {
|
||||
public final class SmartChineseAnalyzer extends ReusableAnalyzerBase {
|
||||
|
||||
private final Set<?> stopWords;
|
||||
|
||||
|
@ -141,9 +142,9 @@ public final class SmartChineseAnalyzer extends Analyzer {
|
|||
}
|
||||
|
||||
@Override
|
||||
public TokenStream tokenStream(String fieldName, Reader reader) {
|
||||
TokenStream result = new SentenceTokenizer(reader);
|
||||
result = new WordTokenFilter(result);
|
||||
public TokenStreamComponents createComponents(String fieldName, Reader reader) {
|
||||
Tokenizer tokenizer = new SentenceTokenizer(reader);
|
||||
TokenStream result = new WordTokenFilter(tokenizer);
|
||||
// result = new LowerCaseFilter(result);
|
||||
// LowerCaseFilter is not needed, as SegTokenFilter lowercases Basic Latin text.
|
||||
// The porter stemming is too strict, this is not a bug, this is a feature:)
|
||||
|
@ -151,32 +152,6 @@ public final class SmartChineseAnalyzer extends Analyzer {
|
|||
if (!stopWords.isEmpty()) {
|
||||
result = new StopFilter(matchVersion, result, stopWords, false);
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
private static final class SavedStreams {
|
||||
Tokenizer tokenStream;
|
||||
TokenStream filteredTokenStream;
|
||||
}
|
||||
|
||||
@Override
|
||||
public TokenStream reusableTokenStream(String fieldName, Reader reader)
|
||||
throws IOException {
|
||||
SavedStreams streams = (SavedStreams) getPreviousTokenStream();
|
||||
if (streams == null) {
|
||||
streams = new SavedStreams();
|
||||
setPreviousTokenStream(streams);
|
||||
streams.tokenStream = new SentenceTokenizer(reader);
|
||||
streams.filteredTokenStream = new WordTokenFilter(streams.tokenStream);
|
||||
streams.filteredTokenStream = new PorterStemFilter(streams.filteredTokenStream);
|
||||
if (!stopWords.isEmpty()) {
|
||||
streams.filteredTokenStream = new StopFilter(matchVersion, streams.filteredTokenStream, stopWords, false);
|
||||
}
|
||||
} else {
|
||||
streams.tokenStream.reset(reader);
|
||||
streams.filteredTokenStream.reset(); // reset WordTokenFilter's state
|
||||
}
|
||||
|
||||
return streams.filteredTokenStream;
|
||||
return new TokenStreamComponents(tokenizer, result);
|
||||
}
|
||||
}
|
||||
|
|
|
@ -5,10 +5,7 @@ import java.io.Reader;
|
|||
import java.util.HashSet;
|
||||
import java.util.Set;
|
||||
|
||||
import org.apache.lucene.analysis.Analyzer;
|
||||
import org.apache.lucene.analysis.MockAnalyzer;
|
||||
import org.apache.lucene.analysis.MockTokenizer;
|
||||
import org.apache.lucene.analysis.TokenStream;
|
||||
import org.apache.lucene.analysis.*;
|
||||
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
|
||||
import org.apache.lucene.analysis.tokenattributes.PayloadAttribute;
|
||||
import org.apache.lucene.document.Document;
|
||||
|
@ -140,10 +137,10 @@ public class CategoryListIteratorTest extends LuceneTestCase {
|
|||
DataTokenStream dts2 = new DataTokenStream("2",new SortingIntEncoder(
|
||||
new UniqueValuesIntEncoder(new DGapIntEncoder(new VInt8IntEncoder()))));
|
||||
// this test requires that no payloads ever be randomly present!
|
||||
final Analyzer noPayloadsAnalyzer = new Analyzer() {
|
||||
final Analyzer noPayloadsAnalyzer = new ReusableAnalyzerBase() {
|
||||
@Override
|
||||
public TokenStream tokenStream(String fieldName, Reader reader) {
|
||||
return new MockTokenizer(reader, MockTokenizer.KEYWORD, false);
|
||||
public TokenStreamComponents createComponents(String fieldName, Reader reader) {
|
||||
return new TokenStreamComponents(new MockTokenizer(reader, MockTokenizer.KEYWORD, false));
|
||||
}
|
||||
};
|
||||
// NOTE: test is wired to LogMP... because test relies on certain docids having payloads
|
||||
|
|
|
@ -20,10 +20,7 @@ package org.apache.lucene.queryparser.analyzing;
|
|||
import java.io.IOException;
|
||||
import java.io.Reader;
|
||||
|
||||
import org.apache.lucene.analysis.Analyzer;
|
||||
import org.apache.lucene.analysis.MockTokenizer;
|
||||
import org.apache.lucene.analysis.TokenFilter;
|
||||
import org.apache.lucene.analysis.TokenStream;
|
||||
import org.apache.lucene.analysis.*;
|
||||
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
|
||||
import org.apache.lucene.queryparser.classic.ParseException;
|
||||
import org.apache.lucene.util.LuceneTestCase;
|
||||
|
@ -137,14 +134,11 @@ final class TestFoldingFilter extends TokenFilter {
|
|||
}
|
||||
}
|
||||
|
||||
final class ASCIIAnalyzer extends org.apache.lucene.analysis.Analyzer {
|
||||
public ASCIIAnalyzer() {
|
||||
}
|
||||
final class ASCIIAnalyzer extends ReusableAnalyzerBase {
|
||||
|
||||
@Override
|
||||
public TokenStream tokenStream(String fieldName, Reader reader) {
|
||||
TokenStream result = new MockTokenizer(reader, MockTokenizer.SIMPLE, true);
|
||||
result = new TestFoldingFilter(result);
|
||||
return result;
|
||||
public TokenStreamComponents createComponents(String fieldName, Reader reader) {
|
||||
Tokenizer result = new MockTokenizer(reader, MockTokenizer.SIMPLE, true);
|
||||
return new TokenStreamComponents(result, new TestFoldingFilter(result));
|
||||
}
|
||||
}
|
||||
|
|
|
@ -122,16 +122,12 @@ public class TestMultiAnalyzer extends BaseTokenStreamTestCase {
|
|||
* Expands "multi" to "multi" and "multi2", both at the same position,
|
||||
* and expands "triplemulti" to "triplemulti", "multi3", and "multi2".
|
||||
*/
|
||||
private class MultiAnalyzer extends Analyzer {
|
||||
|
||||
public MultiAnalyzer() {
|
||||
}
|
||||
private class MultiAnalyzer extends ReusableAnalyzerBase {
|
||||
|
||||
@Override
|
||||
public TokenStream tokenStream(String fieldName, Reader reader) {
|
||||
TokenStream result = new MockTokenizer(reader, MockTokenizer.WHITESPACE, true);
|
||||
result = new TestFilter(result);
|
||||
return result;
|
||||
public TokenStreamComponents createComponents(String fieldName, Reader reader) {
|
||||
Tokenizer result = new MockTokenizer(reader, MockTokenizer.WHITESPACE, true);
|
||||
return new TokenStreamComponents(result, new TestFilter(result));
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -196,16 +192,12 @@ public class TestMultiAnalyzer extends BaseTokenStreamTestCase {
|
|||
* Analyzes "the quick brown" as: quick(incr=2) brown(incr=1).
|
||||
* Does not work correctly for input other than "the quick brown ...".
|
||||
*/
|
||||
private class PosIncrementAnalyzer extends Analyzer {
|
||||
|
||||
public PosIncrementAnalyzer() {
|
||||
}
|
||||
private class PosIncrementAnalyzer extends ReusableAnalyzerBase {
|
||||
|
||||
@Override
|
||||
public TokenStream tokenStream(String fieldName, Reader reader) {
|
||||
TokenStream result = new MockTokenizer(reader, MockTokenizer.WHITESPACE, true);
|
||||
result = new TestPosIncrementFilter(result);
|
||||
return result;
|
||||
public TokenStreamComponents createComponents(String fieldName, Reader reader) {
|
||||
Tokenizer result = new MockTokenizer(reader, MockTokenizer.WHITESPACE, true);
|
||||
return new TokenStreamComponents(result, new TestPosIncrementFilter(result));
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -22,9 +22,7 @@ import java.io.Reader;
|
|||
import java.util.HashMap;
|
||||
import java.util.Map;
|
||||
|
||||
import org.apache.lucene.analysis.Analyzer;
|
||||
import org.apache.lucene.analysis.TokenStream;
|
||||
import org.apache.lucene.analysis.MockAnalyzer;
|
||||
import org.apache.lucene.analysis.*;
|
||||
import org.apache.lucene.document.Document;
|
||||
import org.apache.lucene.document.TextField;
|
||||
import org.apache.lucene.index.IndexWriter;
|
||||
|
@ -302,22 +300,23 @@ public class TestMultiFieldQueryParser extends LuceneTestCase {
|
|||
/**
|
||||
* Return empty tokens for field "f1".
|
||||
*/
|
||||
private static class AnalyzerReturningNull extends Analyzer {
|
||||
private static class AnalyzerReturningNull extends ReusableAnalyzerBase {
|
||||
MockAnalyzer stdAnalyzer = new MockAnalyzer(random);
|
||||
|
||||
public AnalyzerReturningNull() {
|
||||
super(new PerFieldReuseStrategy());
|
||||
}
|
||||
|
||||
@Override
|
||||
public TokenStream tokenStream(String fieldName, Reader reader) {
|
||||
public TokenStreamComponents createComponents(String fieldName, Reader reader) {
|
||||
if ("f1".equals(fieldName)) {
|
||||
return new EmptyTokenStream();
|
||||
return new TokenStreamComponents(new EmptyTokenStream());
|
||||
} else {
|
||||
return stdAnalyzer.tokenStream(fieldName, reader);
|
||||
return stdAnalyzer.createComponents(fieldName, reader);
|
||||
}
|
||||
}
|
||||
|
||||
private static class EmptyTokenStream extends TokenStream {
|
||||
private static class EmptyTokenStream extends Tokenizer {
|
||||
@Override
|
||||
public boolean incrementToken() throws IOException {
|
||||
return false;
|
||||
|
|
|
@ -18,6 +18,7 @@ package org.apache.lucene.queryparser.classic;
|
|||
*/
|
||||
|
||||
import org.apache.lucene.analysis.Analyzer;
|
||||
import org.apache.lucene.analysis.ReusableAnalyzerBase;
|
||||
import org.apache.lucene.analysis.TokenStream;
|
||||
import org.apache.lucene.analysis.Tokenizer;
|
||||
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
|
||||
|
@ -41,7 +42,7 @@ public class TestMultiPhraseQueryParsing extends LuceneTestCase {
|
|||
}
|
||||
}
|
||||
|
||||
private static class CannedAnalyzer extends Analyzer {
|
||||
private static class CannedAnalyzer extends ReusableAnalyzerBase {
|
||||
private final TokenAndPos[] tokens;
|
||||
|
||||
public CannedAnalyzer(TokenAndPos[] tokens) {
|
||||
|
@ -49,8 +50,8 @@ public class TestMultiPhraseQueryParsing extends LuceneTestCase {
|
|||
}
|
||||
|
||||
@Override
|
||||
public TokenStream tokenStream(String fieldName, Reader reader) {
|
||||
return new CannedTokenizer(tokens);
|
||||
public TokenStreamComponents createComponents(String fieldName, Reader reader) {
|
||||
return new TokenStreamComponents(new CannedTokenizer(tokens));
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -112,12 +112,13 @@ public class TestPrecedenceQueryParser extends LuceneTestCase {
|
|||
}
|
||||
}
|
||||
|
||||
public static final class QPTestAnalyzer extends Analyzer {
|
||||
public static final class QPTestAnalyzer extends ReusableAnalyzerBase {
|
||||
|
||||
/** Filters MockTokenizer with StopFilter. */
|
||||
@Override
|
||||
public final TokenStream tokenStream(String fieldName, Reader reader) {
|
||||
return new QPTestFilter(new MockTokenizer(reader, MockTokenizer.SIMPLE, true));
|
||||
public final TokenStreamComponents createComponents(String fieldName, Reader reader) {
|
||||
Tokenizer tokenizer = new MockTokenizer(reader, MockTokenizer.SIMPLE, true);
|
||||
return new TokenStreamComponents(tokenizer, new QPTestFilter(tokenizer));
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -143,16 +143,12 @@ public class TestMultiAnalyzerQPHelper extends LuceneTestCase {
|
|||
* Expands "multi" to "multi" and "multi2", both at the same position, and
|
||||
* expands "triplemulti" to "triplemulti", "multi3", and "multi2".
|
||||
*/
|
||||
private class MultiAnalyzer extends Analyzer {
|
||||
|
||||
public MultiAnalyzer() {
|
||||
}
|
||||
private class MultiAnalyzer extends ReusableAnalyzerBase {
|
||||
|
||||
@Override
|
||||
public TokenStream tokenStream(String fieldName, Reader reader) {
|
||||
TokenStream result = new MockTokenizer(reader, MockTokenizer.WHITESPACE, true);
|
||||
result = new TestFilter(result);
|
||||
return result;
|
||||
public TokenStreamComponents createComponents(String fieldName, Reader reader) {
|
||||
Tokenizer result = new MockTokenizer(reader, MockTokenizer.WHITESPACE, true);
|
||||
return new TokenStreamComponents(result, new TestFilter(result));
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -214,16 +210,12 @@ public class TestMultiAnalyzerQPHelper extends LuceneTestCase {
|
|||
* Analyzes "the quick brown" as: quick(incr=2) brown(incr=1). Does not work
|
||||
* correctly for input other than "the quick brown ...".
|
||||
*/
|
||||
private class PosIncrementAnalyzer extends Analyzer {
|
||||
|
||||
public PosIncrementAnalyzer() {
|
||||
}
|
||||
private class PosIncrementAnalyzer extends ReusableAnalyzerBase {
|
||||
|
||||
@Override
|
||||
public TokenStream tokenStream(String fieldName, Reader reader) {
|
||||
TokenStream result = new MockTokenizer(reader, MockTokenizer.WHITESPACE, true);
|
||||
result = new TestPosIncrementFilter(result);
|
||||
return result;
|
||||
public TokenStreamComponents createComponents(String fieldName, Reader reader) {
|
||||
Tokenizer result = new MockTokenizer(reader, MockTokenizer.WHITESPACE, true);
|
||||
return new TokenStreamComponents(result, new TestPosIncrementFilter(result));
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -21,9 +21,7 @@ import java.io.Reader;
|
|||
import java.util.HashMap;
|
||||
import java.util.Map;
|
||||
|
||||
import org.apache.lucene.analysis.Analyzer;
|
||||
import org.apache.lucene.analysis.TokenStream;
|
||||
import org.apache.lucene.analysis.MockAnalyzer;
|
||||
import org.apache.lucene.analysis.*;
|
||||
import org.apache.lucene.document.Document;
|
||||
import org.apache.lucene.document.TextField;
|
||||
import org.apache.lucene.index.IndexWriter;
|
||||
|
@ -340,22 +338,23 @@ public class TestMultiFieldQPHelper extends LuceneTestCase {
|
|||
/**
|
||||
* Return empty tokens for field "f1".
|
||||
*/
|
||||
private static final class AnalyzerReturningNull extends Analyzer {
|
||||
private static final class AnalyzerReturningNull extends ReusableAnalyzerBase {
|
||||
MockAnalyzer stdAnalyzer = new MockAnalyzer(random);
|
||||
|
||||
public AnalyzerReturningNull() {
|
||||
super(new PerFieldReuseStrategy());
|
||||
}
|
||||
|
||||
@Override
|
||||
public TokenStream tokenStream(String fieldName, Reader reader) {
|
||||
public TokenStreamComponents createComponents(String fieldName, Reader reader) {
|
||||
if ("f1".equals(fieldName)) {
|
||||
return new EmptyTokenStream();
|
||||
return new TokenStreamComponents(new EmptyTokenStream());
|
||||
} else {
|
||||
return stdAnalyzer.tokenStream(fieldName, reader);
|
||||
return stdAnalyzer.createComponents(fieldName, reader);
|
||||
}
|
||||
}
|
||||
|
||||
private static class EmptyTokenStream extends TokenStream {
|
||||
private static class EmptyTokenStream extends Tokenizer {
|
||||
@Override
|
||||
public boolean incrementToken() {
|
||||
return false;
|
||||
|
|
|
@ -128,12 +128,13 @@ public class TestQPHelper extends LuceneTestCase {
|
|||
}
|
||||
}
|
||||
|
||||
public static final class QPTestAnalyzer extends Analyzer {
|
||||
public static final class QPTestAnalyzer extends ReusableAnalyzerBase {
|
||||
|
||||
/** Filters MockTokenizer with StopFilter. */
|
||||
@Override
|
||||
public final TokenStream tokenStream(String fieldName, Reader reader) {
|
||||
return new QPTestFilter(new MockTokenizer(reader, MockTokenizer.SIMPLE, true));
|
||||
public final TokenStreamComponents createComponents(String fieldName, Reader reader) {
|
||||
Tokenizer tokenizer = new MockTokenizer(reader, MockTokenizer.SIMPLE, true);
|
||||
return new TokenStreamComponents(tokenizer, new QPTestFilter(tokenizer));
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -344,10 +345,10 @@ public class TestQPHelper extends LuceneTestCase {
|
|||
}
|
||||
}
|
||||
|
||||
private class SimpleCJKAnalyzer extends Analyzer {
|
||||
private class SimpleCJKAnalyzer extends ReusableAnalyzerBase {
|
||||
@Override
|
||||
public TokenStream tokenStream(String fieldName, Reader reader) {
|
||||
return new SimpleCJKTokenizer(reader);
|
||||
public TokenStreamComponents createComponents(String fieldName, Reader reader) {
|
||||
return new TokenStreamComponents(new SimpleCJKTokenizer(reader));
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -1241,10 +1242,10 @@ public class TestQPHelper extends LuceneTestCase {
|
|||
}
|
||||
}
|
||||
|
||||
private class CannedAnalyzer extends Analyzer {
|
||||
private class CannedAnalyzer extends ReusableAnalyzerBase {
|
||||
@Override
|
||||
public TokenStream tokenStream(String ignored, Reader alsoIgnored) {
|
||||
return new CannedTokenStream();
|
||||
public TokenStreamComponents createComponents(String ignored, Reader alsoIgnored) {
|
||||
return new TokenStreamComponents(new CannedTokenStream());
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -25,13 +25,7 @@ import java.util.Date;
|
|||
import java.util.GregorianCalendar;
|
||||
import java.util.Locale;
|
||||
|
||||
import org.apache.lucene.analysis.Analyzer;
|
||||
import org.apache.lucene.analysis.MockAnalyzer;
|
||||
import org.apache.lucene.analysis.MockTokenFilter;
|
||||
import org.apache.lucene.analysis.MockTokenizer;
|
||||
import org.apache.lucene.analysis.TokenFilter;
|
||||
import org.apache.lucene.analysis.TokenStream;
|
||||
import org.apache.lucene.analysis.Tokenizer;
|
||||
import org.apache.lucene.analysis.*;
|
||||
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
|
||||
import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
|
||||
import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
|
||||
|
@ -104,12 +98,13 @@ public abstract class QueryParserTestBase extends LuceneTestCase {
|
|||
}
|
||||
|
||||
|
||||
public static final class QPTestAnalyzer extends Analyzer {
|
||||
public static final class QPTestAnalyzer extends ReusableAnalyzerBase {
|
||||
|
||||
/** Filters MockTokenizer with StopFilter. */
|
||||
@Override
|
||||
public final TokenStream tokenStream(String fieldName, Reader reader) {
|
||||
return new QPTestFilter(new MockTokenizer(reader, MockTokenizer.SIMPLE, true));
|
||||
public TokenStreamComponents createComponents(String fieldName, Reader reader) {
|
||||
Tokenizer tokenizer = new MockTokenizer(reader, MockTokenizer.SIMPLE, true);
|
||||
return new TokenStreamComponents(tokenizer, new QPTestFilter(tokenizer));
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -245,10 +240,10 @@ public abstract class QueryParserTestBase extends LuceneTestCase {
|
|||
}
|
||||
}
|
||||
|
||||
private class SimpleCJKAnalyzer extends Analyzer {
|
||||
private class SimpleCJKAnalyzer extends ReusableAnalyzerBase {
|
||||
@Override
|
||||
public TokenStream tokenStream(String fieldName, Reader reader) {
|
||||
return new SimpleCJKTokenizer(reader);
|
||||
public TokenStreamComponents createComponents(String fieldName, Reader reader) {
|
||||
return new TokenStreamComponents(new SimpleCJKTokenizer(reader));
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -348,10 +343,10 @@ public abstract class QueryParserTestBase extends LuceneTestCase {
|
|||
assertQueryEquals("a OR -b", null, "a -b");
|
||||
|
||||
// +,-,! should be directly adjacent to operand (i.e. not separated by whitespace) to be treated as an operator
|
||||
Analyzer a = new Analyzer() {
|
||||
Analyzer a = new ReusableAnalyzerBase() {
|
||||
@Override
|
||||
public TokenStream tokenStream(String fieldName, Reader reader) {
|
||||
return new MockTokenizer(reader, MockTokenizer.WHITESPACE, false);
|
||||
public TokenStreamComponents createComponents(String fieldName, Reader reader) {
|
||||
return new TokenStreamComponents(new MockTokenizer(reader, MockTokenizer.WHITESPACE, false));
|
||||
}
|
||||
};
|
||||
assertQueryEquals("a - b", a, "a - b");
|
||||
|
@ -1162,18 +1157,19 @@ public abstract class QueryParserTestBase extends LuceneTestCase {
|
|||
}
|
||||
|
||||
/** whitespace+lowercase analyzer with synonyms */
|
||||
private class Analyzer1 extends Analyzer {
|
||||
private class Analyzer1 extends ReusableAnalyzerBase {
|
||||
@Override
|
||||
public TokenStream tokenStream(String fieldName, Reader reader) {
|
||||
return new MockSynonymFilter(new MockTokenizer(reader, MockTokenizer.WHITESPACE, true));
|
||||
public TokenStreamComponents createComponents(String fieldName, Reader reader) {
|
||||
Tokenizer tokenizer = new MockTokenizer(reader, MockTokenizer.WHITESPACE, true);
|
||||
return new TokenStreamComponents(tokenizer, new MockSynonymFilter(tokenizer));
|
||||
}
|
||||
}
|
||||
|
||||
/** whitespace+lowercase analyzer without synonyms */
|
||||
private class Analyzer2 extends Analyzer {
|
||||
private class Analyzer2 extends ReusableAnalyzerBase {
|
||||
@Override
|
||||
public TokenStream tokenStream(String fieldName, Reader reader) {
|
||||
return new MockTokenizer(reader, MockTokenizer.WHITESPACE, true);
|
||||
public TokenStreamComponents createComponents(String fieldName, Reader reader) {
|
||||
return new TokenStreamComponents(new MockTokenizer(reader, MockTokenizer.WHITESPACE, true));
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -1235,10 +1231,11 @@ public abstract class QueryParserTestBase extends LuceneTestCase {
|
|||
}
|
||||
|
||||
}
|
||||
private class MockCollationAnalyzer extends Analyzer {
|
||||
private class MockCollationAnalyzer extends ReusableAnalyzerBase {
|
||||
@Override
|
||||
public TokenStream tokenStream(String fieldName, Reader reader) {
|
||||
return new MockCollationFilter(new MockTokenizer(reader, MockTokenizer.WHITESPACE, true));
|
||||
public TokenStreamComponents createComponents(String fieldName, Reader reader) {
|
||||
Tokenizer tokenizer = new MockTokenizer(reader, MockTokenizer.WHITESPACE, true);
|
||||
return new TokenStreamComponents(tokenizer, new MockCollationFilter(tokenizer));
|
||||
}
|
||||
}
|
||||
|
||||
|
|
Loading…
Reference in New Issue