mirror of https://github.com/apache/lucene.git
LUCENE-3396: Converted most Analyzers over to using ReusableAnalyzerBase
git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1169607 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
e287b70df3
commit
4c5606ee29
|
@ -171,6 +171,9 @@ Changes in backwards compatibility policy
|
||||||
IndexableFieldType. See MIGRATE.txt for more details.
|
IndexableFieldType. See MIGRATE.txt for more details.
|
||||||
(Nikola Tankovic, Mike McCandless, Chris Male)
|
(Nikola Tankovic, Mike McCandless, Chris Male)
|
||||||
|
|
||||||
|
* LUCENE-3396: ReusableAnalyzerBase.TokenStreamComponents.reset(Reader) now returns void instead
|
||||||
|
of boolean. If a Component cannot be reset, it should throw an Exception.
|
||||||
|
|
||||||
Changes in Runtime Behavior
|
Changes in Runtime Behavior
|
||||||
|
|
||||||
* LUCENE-2846: omitNorms now behaves like omitTermFrequencyAndPositions, if you
|
* LUCENE-2846: omitNorms now behaves like omitTermFrequencyAndPositions, if you
|
||||||
|
@ -523,6 +526,12 @@ New features
|
||||||
|
|
||||||
(David Mark Nemeskey via Robert Muir)
|
(David Mark Nemeskey via Robert Muir)
|
||||||
|
|
||||||
|
* LUCENE-3396: ReusableAnalyzerBase now provides a ReuseStrategy abstraction which
|
||||||
|
controls how TokenStreamComponents are reused per request. Two implementations are
|
||||||
|
provided - GlobalReuseStrategy which implements the current behavior of sharing
|
||||||
|
components between all fields, and PerFieldReuseStrategy which shares per field.
|
||||||
|
(Chris Male)
|
||||||
|
|
||||||
Optimizations
|
Optimizations
|
||||||
|
|
||||||
* LUCENE-2588: Don't store unnecessary suffixes when writing the terms
|
* LUCENE-2588: Don't store unnecessary suffixes when writing the terms
|
||||||
|
|
|
@ -1802,7 +1802,7 @@ public class HighlighterTest extends BaseTokenStreamTestCase implements Formatte
|
||||||
// behaviour to synonyms
|
// behaviour to synonyms
|
||||||
// ===================================================================
|
// ===================================================================
|
||||||
|
|
||||||
final class SynonymAnalyzer extends Analyzer {
|
final class SynonymAnalyzer extends ReusableAnalyzerBase {
|
||||||
private Map<String,String> synonyms;
|
private Map<String,String> synonyms;
|
||||||
|
|
||||||
public SynonymAnalyzer(Map<String,String> synonyms) {
|
public SynonymAnalyzer(Map<String,String> synonyms) {
|
||||||
|
@ -1816,12 +1816,12 @@ final class SynonymAnalyzer extends Analyzer {
|
||||||
* java.io.Reader)
|
* java.io.Reader)
|
||||||
*/
|
*/
|
||||||
@Override
|
@Override
|
||||||
public TokenStream tokenStream(String arg0, Reader arg1) {
|
public TokenStreamComponents createComponents(String arg0, Reader arg1) {
|
||||||
Tokenizer stream = new MockTokenizer(arg1, MockTokenizer.SIMPLE, true);
|
Tokenizer stream = new MockTokenizer(arg1, MockTokenizer.SIMPLE, true);
|
||||||
stream.addAttribute(CharTermAttribute.class);
|
stream.addAttribute(CharTermAttribute.class);
|
||||||
stream.addAttribute(PositionIncrementAttribute.class);
|
stream.addAttribute(PositionIncrementAttribute.class);
|
||||||
stream.addAttribute(OffsetAttribute.class);
|
stream.addAttribute(OffsetAttribute.class);
|
||||||
return new SynonymTokenizer(stream, synonyms);
|
return new TokenStreamComponents(stream, new SynonymTokenizer(stream, synonyms));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -20,10 +20,7 @@ package org.apache.lucene.search.highlight;
|
||||||
import java.io.Reader;
|
import java.io.Reader;
|
||||||
import java.io.StringReader;
|
import java.io.StringReader;
|
||||||
|
|
||||||
import org.apache.lucene.analysis.Analyzer;
|
import org.apache.lucene.analysis.*;
|
||||||
import org.apache.lucene.analysis.BaseTokenStreamTestCase;
|
|
||||||
import org.apache.lucene.analysis.MockTokenizer;
|
|
||||||
import org.apache.lucene.analysis.TokenStream;
|
|
||||||
|
|
||||||
public class OffsetLimitTokenFilterTest extends BaseTokenStreamTestCase {
|
public class OffsetLimitTokenFilterTest extends BaseTokenStreamTestCase {
|
||||||
|
|
||||||
|
@ -52,14 +49,13 @@ public class OffsetLimitTokenFilterTest extends BaseTokenStreamTestCase {
|
||||||
assertTokenStreamContents(filter, new String[] {"short", "toolong",
|
assertTokenStreamContents(filter, new String[] {"short", "toolong",
|
||||||
"evenmuchlongertext"});
|
"evenmuchlongertext"});
|
||||||
|
|
||||||
// TODO: This is not actually testing reuse! (reusableTokenStream is not implemented)
|
checkOneTermReuse(new ReusableAnalyzerBase() {
|
||||||
checkOneTermReuse(new Analyzer() {
|
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public TokenStream tokenStream(String fieldName, Reader reader) {
|
public TokenStreamComponents createComponents(String fieldName, Reader reader) {
|
||||||
MockTokenizer tokenizer = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false);
|
MockTokenizer tokenizer = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false);
|
||||||
tokenizer.setEnableChecks(false);
|
tokenizer.setEnableChecks(false);
|
||||||
return new OffsetLimitTokenFilter(tokenizer, 10);
|
return new TokenStreamComponents(tokenizer, new OffsetLimitTokenFilter(tokenizer, 10));
|
||||||
}
|
}
|
||||||
}, "llenges", "llenges");
|
}, "llenges", "llenges");
|
||||||
}
|
}
|
||||||
|
|
|
@ -20,9 +20,7 @@ package org.apache.lucene.search.highlight;
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
import java.io.Reader;
|
import java.io.Reader;
|
||||||
|
|
||||||
import org.apache.lucene.analysis.Analyzer;
|
import org.apache.lucene.analysis.*;
|
||||||
import org.apache.lucene.analysis.Token;
|
|
||||||
import org.apache.lucene.analysis.TokenStream;
|
|
||||||
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
|
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
|
||||||
import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
|
import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
|
||||||
import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
|
import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
|
||||||
|
@ -50,15 +48,15 @@ import org.apache.lucene.util.LuceneTestCase;
|
||||||
public class TokenSourcesTest extends LuceneTestCase {
|
public class TokenSourcesTest extends LuceneTestCase {
|
||||||
private static final String FIELD = "text";
|
private static final String FIELD = "text";
|
||||||
|
|
||||||
private static final class OverlapAnalyzer extends Analyzer {
|
private static final class OverlapAnalyzer extends ReusableAnalyzerBase {
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public TokenStream tokenStream(String fieldName, Reader reader) {
|
public TokenStreamComponents createComponents(String fieldName, Reader reader) {
|
||||||
return new TokenStreamOverlap();
|
return new TokenStreamComponents(new TokenStreamOverlap());
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
private static final class TokenStreamOverlap extends TokenStream {
|
private static final class TokenStreamOverlap extends Tokenizer {
|
||||||
private Token[] tokens;
|
private Token[] tokens;
|
||||||
|
|
||||||
private int i = -1;
|
private int i = -1;
|
||||||
|
|
|
@ -24,11 +24,7 @@ import java.util.ArrayList;
|
||||||
import java.util.Collection;
|
import java.util.Collection;
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
|
|
||||||
import org.apache.lucene.analysis.Analyzer;
|
import org.apache.lucene.analysis.*;
|
||||||
import org.apache.lucene.analysis.MockAnalyzer;
|
|
||||||
import org.apache.lucene.analysis.MockTokenizer;
|
|
||||||
import org.apache.lucene.analysis.TokenStream;
|
|
||||||
import org.apache.lucene.analysis.Tokenizer;
|
|
||||||
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
|
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
|
||||||
import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
|
import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
|
||||||
import org.apache.lucene.analysis.tokenattributes.TermToBytesRefAttribute;
|
import org.apache.lucene.analysis.tokenattributes.TermToBytesRefAttribute;
|
||||||
|
@ -198,10 +194,10 @@ public abstract class AbstractTestCase extends LuceneTestCase {
|
||||||
return phraseQuery;
|
return phraseQuery;
|
||||||
}
|
}
|
||||||
|
|
||||||
static final class BigramAnalyzer extends Analyzer {
|
static final class BigramAnalyzer extends ReusableAnalyzerBase {
|
||||||
@Override
|
@Override
|
||||||
public TokenStream tokenStream(String fieldName, Reader reader) {
|
public TokenStreamComponents createComponents(String fieldName, Reader reader) {
|
||||||
return new BasicNGramTokenizer( reader );
|
return new TokenStreamComponents(new BasicNGramTokenizer(reader));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -22,9 +22,7 @@ import java.io.Reader;
|
||||||
import java.util.HashSet;
|
import java.util.HashSet;
|
||||||
import java.util.Set;
|
import java.util.Set;
|
||||||
|
|
||||||
import org.apache.lucene.analysis.Analyzer;
|
import org.apache.lucene.analysis.*;
|
||||||
import org.apache.lucene.analysis.Token;
|
|
||||||
import org.apache.lucene.analysis.TokenStream;
|
|
||||||
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
|
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
|
||||||
import org.apache.lucene.search.BooleanQuery;
|
import org.apache.lucene.search.BooleanQuery;
|
||||||
import org.apache.lucene.search.BooleanClause.Occur;
|
import org.apache.lucene.search.BooleanClause.Occur;
|
||||||
|
@ -292,15 +290,15 @@ public class IndexTimeSynonymTest extends AbstractTestCase {
|
||||||
return token;
|
return token;
|
||||||
}
|
}
|
||||||
|
|
||||||
public static final class TokenArrayAnalyzer extends Analyzer {
|
public static final class TokenArrayAnalyzer extends ReusableAnalyzerBase {
|
||||||
Token[] tokens;
|
final Token[] tokens;
|
||||||
public TokenArrayAnalyzer( Token... tokens ){
|
public TokenArrayAnalyzer(Token... tokens) {
|
||||||
this.tokens = tokens;
|
this.tokens = tokens;
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public TokenStream tokenStream(String fieldName, Reader reader) {
|
public TokenStreamComponents createComponents(String fieldName, Reader reader) {
|
||||||
TokenStream ts = new TokenStream(Token.TOKEN_ATTRIBUTE_FACTORY) {
|
Tokenizer ts = new Tokenizer(Token.TOKEN_ATTRIBUTE_FACTORY) {
|
||||||
final AttributeImpl reusableToken = (AttributeImpl) addAttribute(CharTermAttribute.class);
|
final AttributeImpl reusableToken = (AttributeImpl) addAttribute(CharTermAttribute.class);
|
||||||
int p = 0;
|
int p = 0;
|
||||||
|
|
||||||
|
@ -318,7 +316,7 @@ public class IndexTimeSynonymTest extends AbstractTestCase {
|
||||||
this.p = 0;
|
this.p = 0;
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
return ts;
|
return new TokenStreamComponents(ts);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -17,8 +17,13 @@ package org.apache.lucene.analysis;
|
||||||
* limitations under the License.
|
* limitations under the License.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
|
import org.apache.lucene.store.AlreadyClosedException;
|
||||||
|
import org.apache.lucene.util.CloseableThreadLocal;
|
||||||
|
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
import java.io.Reader;
|
import java.io.Reader;
|
||||||
|
import java.util.HashMap;
|
||||||
|
import java.util.Map;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* An convenience subclass of Analyzer that makes it easy to implement
|
* An convenience subclass of Analyzer that makes it easy to implement
|
||||||
|
@ -38,6 +43,16 @@ import java.io.Reader;
|
||||||
*/
|
*/
|
||||||
public abstract class ReusableAnalyzerBase extends Analyzer {
|
public abstract class ReusableAnalyzerBase extends Analyzer {
|
||||||
|
|
||||||
|
private final ReuseStrategy reuseStrategy;
|
||||||
|
|
||||||
|
public ReusableAnalyzerBase() {
|
||||||
|
this(new GlobalReuseStrategy());
|
||||||
|
}
|
||||||
|
|
||||||
|
public ReusableAnalyzerBase(ReuseStrategy reuseStrategy) {
|
||||||
|
this.reuseStrategy = reuseStrategy;
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Creates a new {@link TokenStreamComponents} instance for this analyzer.
|
* Creates a new {@link TokenStreamComponents} instance for this analyzer.
|
||||||
*
|
*
|
||||||
|
@ -66,14 +81,15 @@ public abstract class ReusableAnalyzerBase extends Analyzer {
|
||||||
@Override
|
@Override
|
||||||
public final TokenStream reusableTokenStream(final String fieldName,
|
public final TokenStream reusableTokenStream(final String fieldName,
|
||||||
final Reader reader) throws IOException {
|
final Reader reader) throws IOException {
|
||||||
TokenStreamComponents streamChain = (TokenStreamComponents)
|
TokenStreamComponents components = reuseStrategy.getReusableComponents(fieldName);
|
||||||
getPreviousTokenStream();
|
|
||||||
final Reader r = initReader(reader);
|
final Reader r = initReader(reader);
|
||||||
if (streamChain == null || !streamChain.reset(r)) {
|
if (components == null) {
|
||||||
streamChain = createComponents(fieldName, r);
|
components = createComponents(fieldName, r);
|
||||||
setPreviousTokenStream(streamChain);
|
reuseStrategy.setReusableComponents(fieldName, components);
|
||||||
|
} else {
|
||||||
|
components.reset(r);
|
||||||
}
|
}
|
||||||
return streamChain.getTokenStream();
|
return components.getTokenStream();
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
@ -99,6 +115,15 @@ public abstract class ReusableAnalyzerBase extends Analyzer {
|
||||||
return reader;
|
return reader;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* {@inheritDoc}
|
||||||
|
*/
|
||||||
|
@Override
|
||||||
|
public void close() {
|
||||||
|
super.close();
|
||||||
|
reuseStrategy.close();
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* This class encapsulates the outer components of a token stream. It provides
|
* This class encapsulates the outer components of a token stream. It provides
|
||||||
* access to the source ({@link Tokenizer}) and the outer end (sink), an
|
* access to the source ({@link Tokenizer}) and the outer end (sink), an
|
||||||
|
@ -137,22 +162,16 @@ public abstract class ReusableAnalyzerBase extends Analyzer {
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Resets the encapsulated components with the given reader. This method by
|
* Resets the encapsulated components with the given reader. If the components
|
||||||
* default returns <code>true</code> indicating that the components have
|
* cannot be reset, an Exception should be thrown.
|
||||||
* been reset successfully. Subclasses of {@link ReusableAnalyzerBase} might use
|
|
||||||
* their own {@link TokenStreamComponents} returning <code>false</code> if
|
|
||||||
* the components cannot be reset.
|
|
||||||
*
|
*
|
||||||
* @param reader
|
* @param reader
|
||||||
* a reader to reset the source component
|
* a reader to reset the source component
|
||||||
* @return <code>true</code> if the components were reset, otherwise
|
|
||||||
* <code>false</code>
|
|
||||||
* @throws IOException
|
* @throws IOException
|
||||||
* if the component's reset method throws an {@link IOException}
|
* if the component's reset method throws an {@link IOException}
|
||||||
*/
|
*/
|
||||||
protected boolean reset(final Reader reader) throws IOException {
|
protected void reset(final Reader reader) throws IOException {
|
||||||
source.reset(reader);
|
source.reset(reader);
|
||||||
return true;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
@ -166,4 +185,124 @@ public abstract class ReusableAnalyzerBase extends Analyzer {
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Strategy defining how TokenStreamComponents are reused per call to
|
||||||
|
* {@link ReusableAnalyzerBase#tokenStream(String, java.io.Reader)}.
|
||||||
|
*/
|
||||||
|
public static abstract class ReuseStrategy {
|
||||||
|
|
||||||
|
private CloseableThreadLocal<Object> storedValue = new CloseableThreadLocal<Object>();
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Gets the reusable TokenStreamComponents for the field with the given name
|
||||||
|
*
|
||||||
|
* @param fieldName Name of the field whose reusable TokenStreamComponents
|
||||||
|
* are to be retrieved
|
||||||
|
* @return Reusable TokenStreamComponents for the field, or {@code null}
|
||||||
|
* if there was no previous components for the field
|
||||||
|
*/
|
||||||
|
public abstract TokenStreamComponents getReusableComponents(String fieldName);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Stores the given TokenStreamComponents as the reusable components for the
|
||||||
|
* field with the give name
|
||||||
|
*
|
||||||
|
* @param fieldName Name of the field whose TokenStreamComponents are being set
|
||||||
|
* @param components TokenStreamComponents which are to be reused for the field
|
||||||
|
*/
|
||||||
|
public abstract void setReusableComponents(String fieldName, TokenStreamComponents components);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Returns the currently stored value
|
||||||
|
*
|
||||||
|
* @return Currently stored value or {@code null} if no value is stored
|
||||||
|
*/
|
||||||
|
protected final Object getStoredValue() {
|
||||||
|
try {
|
||||||
|
return storedValue.get();
|
||||||
|
} catch (NullPointerException npe) {
|
||||||
|
if (storedValue == null) {
|
||||||
|
throw new AlreadyClosedException("this Analyzer is closed");
|
||||||
|
} else {
|
||||||
|
throw npe;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Sets the stored value
|
||||||
|
*
|
||||||
|
* @param storedValue Value to store
|
||||||
|
*/
|
||||||
|
protected final void setStoredValue(Object storedValue) {
|
||||||
|
try {
|
||||||
|
this.storedValue.set(storedValue);
|
||||||
|
} catch (NullPointerException npe) {
|
||||||
|
if (storedValue == null) {
|
||||||
|
throw new AlreadyClosedException("this Analyzer is closed");
|
||||||
|
} else {
|
||||||
|
throw npe;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Closes the ReuseStrategy, freeing any resources
|
||||||
|
*/
|
||||||
|
public void close() {
|
||||||
|
storedValue.close();
|
||||||
|
storedValue = null;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Implementation of {@link ReuseStrategy} that reuses the same components for
|
||||||
|
* every field.
|
||||||
|
*/
|
||||||
|
public final static class GlobalReuseStrategy extends ReuseStrategy {
|
||||||
|
|
||||||
|
/**
|
||||||
|
* {@inheritDoc}
|
||||||
|
*/
|
||||||
|
public TokenStreamComponents getReusableComponents(String fieldName) {
|
||||||
|
return (TokenStreamComponents) getStoredValue();
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* {@inheritDoc}
|
||||||
|
*/
|
||||||
|
public void setReusableComponents(String fieldName, TokenStreamComponents components) {
|
||||||
|
setStoredValue(components);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Implementation of {@link ReuseStrategy} that reuses components per-field by
|
||||||
|
* maintaining a Map of TokenStreamComponent per field name.
|
||||||
|
*/
|
||||||
|
public static class PerFieldReuseStrategy extends ReuseStrategy {
|
||||||
|
|
||||||
|
/**
|
||||||
|
* {@inheritDoc}
|
||||||
|
*/
|
||||||
|
@SuppressWarnings("unchecked")
|
||||||
|
public TokenStreamComponents getReusableComponents(String fieldName) {
|
||||||
|
Map<String, TokenStreamComponents> componentsPerField = (Map<String, TokenStreamComponents>) getStoredValue();
|
||||||
|
return componentsPerField != null ? componentsPerField.get(fieldName) : null;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* {@inheritDoc}
|
||||||
|
*/
|
||||||
|
@SuppressWarnings("unchecked")
|
||||||
|
public void setReusableComponents(String fieldName, TokenStreamComponents components) {
|
||||||
|
Map<String, TokenStreamComponents> componentsPerField = (Map<String, TokenStreamComponents>) getStoredValue();
|
||||||
|
if (componentsPerField == null) {
|
||||||
|
componentsPerField = new HashMap<String, TokenStreamComponents>();
|
||||||
|
setStoredValue(componentsPerField);
|
||||||
|
}
|
||||||
|
componentsPerField.put(fieldName, components);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
|
@ -42,7 +42,7 @@ import org.apache.lucene.util.automaton.CharacterRunAutomaton;
|
||||||
* </ul>
|
* </ul>
|
||||||
* @see MockTokenizer
|
* @see MockTokenizer
|
||||||
*/
|
*/
|
||||||
public final class MockAnalyzer extends Analyzer {
|
public final class MockAnalyzer extends ReusableAnalyzerBase {
|
||||||
private final CharacterRunAutomaton runAutomaton;
|
private final CharacterRunAutomaton runAutomaton;
|
||||||
private final boolean lowerCase;
|
private final boolean lowerCase;
|
||||||
private final CharacterRunAutomaton filter;
|
private final CharacterRunAutomaton filter;
|
||||||
|
@ -62,6 +62,7 @@ public final class MockAnalyzer extends Analyzer {
|
||||||
* @param enablePositionIncrements true if position increments should reflect filtered terms.
|
* @param enablePositionIncrements true if position increments should reflect filtered terms.
|
||||||
*/
|
*/
|
||||||
public MockAnalyzer(Random random, CharacterRunAutomaton runAutomaton, boolean lowerCase, CharacterRunAutomaton filter, boolean enablePositionIncrements) {
|
public MockAnalyzer(Random random, CharacterRunAutomaton runAutomaton, boolean lowerCase, CharacterRunAutomaton filter, boolean enablePositionIncrements) {
|
||||||
|
super(new PerFieldReuseStrategy());
|
||||||
this.random = random;
|
this.random = random;
|
||||||
this.runAutomaton = runAutomaton;
|
this.runAutomaton = runAutomaton;
|
||||||
this.lowerCase = lowerCase;
|
this.lowerCase = lowerCase;
|
||||||
|
@ -88,41 +89,11 @@ public final class MockAnalyzer extends Analyzer {
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public TokenStream tokenStream(String fieldName, Reader reader) {
|
public TokenStreamComponents createComponents(String fieldName, Reader reader) {
|
||||||
MockTokenizer tokenizer = new MockTokenizer(reader, runAutomaton, lowerCase);
|
MockTokenizer tokenizer = new MockTokenizer(reader, runAutomaton, lowerCase);
|
||||||
tokenizer.setEnableChecks(enableChecks);
|
tokenizer.setEnableChecks(enableChecks);
|
||||||
TokenFilter filt = new MockTokenFilter(tokenizer, filter, enablePositionIncrements);
|
TokenFilter filt = new MockTokenFilter(tokenizer, filter, enablePositionIncrements);
|
||||||
filt = maybePayload(filt, fieldName);
|
return new TokenStreamComponents(tokenizer, maybePayload(filt, fieldName));
|
||||||
return filt;
|
|
||||||
}
|
|
||||||
|
|
||||||
private class SavedStreams {
|
|
||||||
MockTokenizer tokenizer;
|
|
||||||
TokenFilter filter;
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public TokenStream reusableTokenStream(String fieldName, Reader reader)
|
|
||||||
throws IOException {
|
|
||||||
@SuppressWarnings("unchecked") Map<String,SavedStreams> map = (Map) getPreviousTokenStream();
|
|
||||||
if (map == null) {
|
|
||||||
map = new HashMap<String,SavedStreams>();
|
|
||||||
setPreviousTokenStream(map);
|
|
||||||
}
|
|
||||||
|
|
||||||
SavedStreams saved = map.get(fieldName);
|
|
||||||
if (saved == null) {
|
|
||||||
saved = new SavedStreams();
|
|
||||||
saved.tokenizer = new MockTokenizer(reader, runAutomaton, lowerCase);
|
|
||||||
saved.tokenizer.setEnableChecks(enableChecks);
|
|
||||||
saved.filter = new MockTokenFilter(saved.tokenizer, filter, enablePositionIncrements);
|
|
||||||
saved.filter = maybePayload(saved.filter, fieldName);
|
|
||||||
map.put(fieldName, saved);
|
|
||||||
return saved.filter;
|
|
||||||
} else {
|
|
||||||
saved.tokenizer.reset(reader);
|
|
||||||
return saved.filter;
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
private synchronized TokenFilter maybePayload(TokenFilter stream, String fieldName) {
|
private synchronized TokenFilter maybePayload(TokenFilter stream, String fieldName) {
|
||||||
|
|
|
@ -30,16 +30,15 @@ import java.io.Reader;
|
||||||
*
|
*
|
||||||
*
|
*
|
||||||
**/
|
**/
|
||||||
public final class MockPayloadAnalyzer extends Analyzer {
|
public final class MockPayloadAnalyzer extends ReusableAnalyzerBase {
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public TokenStream tokenStream(String fieldName, Reader reader) {
|
public TokenStreamComponents createComponents(String fieldName, Reader reader) {
|
||||||
TokenStream result = new MockTokenizer(reader, MockTokenizer.WHITESPACE, true);
|
Tokenizer result = new MockTokenizer(reader, MockTokenizer.WHITESPACE, true);
|
||||||
return new MockPayloadFilter(result, fieldName);
|
return new TokenStreamComponents(result, new MockPayloadFilter(result, fieldName));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
*
|
*
|
||||||
*
|
*
|
||||||
|
|
|
@ -19,6 +19,7 @@ package org.apache.lucene;
|
||||||
|
|
||||||
import java.io.Reader;
|
import java.io.Reader;
|
||||||
|
|
||||||
|
import org.apache.lucene.analysis.ReusableAnalyzerBase;
|
||||||
import org.apache.lucene.util.LuceneTestCase;
|
import org.apache.lucene.util.LuceneTestCase;
|
||||||
import org.apache.lucene.analysis.Analyzer;
|
import org.apache.lucene.analysis.Analyzer;
|
||||||
import org.apache.lucene.analysis.TokenStream;
|
import org.apache.lucene.analysis.TokenStream;
|
||||||
|
@ -34,32 +35,36 @@ public class TestAssertions extends LuceneTestCase {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
static class TestAnalyzer1 extends Analyzer {
|
static class TestAnalyzer1 extends ReusableAnalyzerBase {
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public final TokenStream tokenStream(String s, Reader r) { return null; }
|
protected TokenStreamComponents createComponents(String fieldName, Reader aReader) {
|
||||||
@Override
|
return null;
|
||||||
public final TokenStream reusableTokenStream(String s, Reader r) { return null; }
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
static final class TestAnalyzer2 extends Analyzer {
|
static final class TestAnalyzer2 extends ReusableAnalyzerBase {
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public TokenStream tokenStream(String s, Reader r) { return null; }
|
protected TokenStreamComponents createComponents(String fieldName, Reader aReader) {
|
||||||
@Override
|
return null;
|
||||||
public TokenStream reusableTokenStream(String s, Reader r) { return null; }
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
static class TestAnalyzer3 extends Analyzer {
|
static class TestAnalyzer3 extends ReusableAnalyzerBase {
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public TokenStream tokenStream(String s, Reader r) { return null; }
|
protected TokenStreamComponents createComponents(String fieldName, Reader aReader) {
|
||||||
@Override
|
return null;
|
||||||
public TokenStream reusableTokenStream(String s, Reader r) { return null; }
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
static class TestAnalyzer4 extends Analyzer {
|
static class TestAnalyzer4 extends ReusableAnalyzerBase {
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public final TokenStream tokenStream(String s, Reader r) { return null; }
|
protected TokenStreamComponents createComponents(String fieldName, Reader aReader) {
|
||||||
@Override
|
return null;
|
||||||
public TokenStream reusableTokenStream(String s, Reader r) { return null; }
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
static class TestTokenStream1 extends TokenStream {
|
static class TestTokenStream1 extends TokenStream {
|
||||||
|
|
|
@ -20,11 +20,7 @@ package org.apache.lucene.index;
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
import java.io.Reader;
|
import java.io.Reader;
|
||||||
|
|
||||||
import org.apache.lucene.analysis.Analyzer;
|
import org.apache.lucene.analysis.*;
|
||||||
import org.apache.lucene.analysis.MockAnalyzer;
|
|
||||||
import org.apache.lucene.analysis.MockTokenizer;
|
|
||||||
import org.apache.lucene.analysis.TokenFilter;
|
|
||||||
import org.apache.lucene.analysis.TokenStream;
|
|
||||||
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
|
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
|
||||||
import org.apache.lucene.analysis.tokenattributes.PayloadAttribute;
|
import org.apache.lucene.analysis.tokenattributes.PayloadAttribute;
|
||||||
import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
|
import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
|
||||||
|
@ -107,10 +103,10 @@ public class TestDocumentWriter extends LuceneTestCase {
|
||||||
}
|
}
|
||||||
|
|
||||||
public void testPositionIncrementGap() throws IOException {
|
public void testPositionIncrementGap() throws IOException {
|
||||||
Analyzer analyzer = new Analyzer() {
|
Analyzer analyzer = new ReusableAnalyzerBase() {
|
||||||
@Override
|
@Override
|
||||||
public TokenStream tokenStream(String fieldName, Reader reader) {
|
public TokenStreamComponents createComponents(String fieldName, Reader reader) {
|
||||||
return new MockTokenizer(reader, MockTokenizer.WHITESPACE, false);
|
return new TokenStreamComponents(new MockTokenizer(reader, MockTokenizer.WHITESPACE, false));
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
|
@ -142,10 +138,11 @@ public class TestDocumentWriter extends LuceneTestCase {
|
||||||
}
|
}
|
||||||
|
|
||||||
public void testTokenReuse() throws IOException {
|
public void testTokenReuse() throws IOException {
|
||||||
Analyzer analyzer = new Analyzer() {
|
Analyzer analyzer = new ReusableAnalyzerBase() {
|
||||||
@Override
|
@Override
|
||||||
public TokenStream tokenStream(String fieldName, Reader reader) {
|
public TokenStreamComponents createComponents(String fieldName, Reader reader) {
|
||||||
return new TokenFilter(new MockTokenizer(reader, MockTokenizer.WHITESPACE, false)) {
|
Tokenizer tokenizer = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false);
|
||||||
|
return new TokenStreamComponents(tokenizer, new TokenFilter(tokenizer) {
|
||||||
boolean first = true;
|
boolean first = true;
|
||||||
AttributeSource.State state;
|
AttributeSource.State state;
|
||||||
|
|
||||||
|
@ -187,7 +184,7 @@ public class TestDocumentWriter extends LuceneTestCase {
|
||||||
final CharTermAttribute termAtt = addAttribute(CharTermAttribute.class);
|
final CharTermAttribute termAtt = addAttribute(CharTermAttribute.class);
|
||||||
final PayloadAttribute payloadAtt = addAttribute(PayloadAttribute.class);
|
final PayloadAttribute payloadAtt = addAttribute(PayloadAttribute.class);
|
||||||
final PositionIncrementAttribute posIncrAtt = addAttribute(PositionIncrementAttribute.class);
|
final PositionIncrementAttribute posIncrAtt = addAttribute(PositionIncrementAttribute.class);
|
||||||
};
|
});
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
|
@ -31,11 +31,7 @@ import java.util.List;
|
||||||
import java.util.Map;
|
import java.util.Map;
|
||||||
import java.util.Random;
|
import java.util.Random;
|
||||||
|
|
||||||
import org.apache.lucene.analysis.Analyzer;
|
import org.apache.lucene.analysis.*;
|
||||||
import org.apache.lucene.analysis.MockAnalyzer;
|
|
||||||
import org.apache.lucene.analysis.MockTokenizer;
|
|
||||||
import org.apache.lucene.analysis.TokenStream;
|
|
||||||
import org.apache.lucene.analysis.Tokenizer;
|
|
||||||
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
|
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
|
||||||
import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
|
import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
|
||||||
import org.apache.lucene.document.BinaryField;
|
import org.apache.lucene.document.BinaryField;
|
||||||
|
@ -1710,10 +1706,10 @@ public class TestIndexWriter extends LuceneTestCase {
|
||||||
dir.close();
|
dir.close();
|
||||||
}
|
}
|
||||||
|
|
||||||
static final class StringSplitAnalyzer extends Analyzer {
|
static final class StringSplitAnalyzer extends ReusableAnalyzerBase {
|
||||||
@Override
|
@Override
|
||||||
public TokenStream tokenStream(String fieldName, Reader reader) {
|
public TokenStreamComponents createComponents(String fieldName, Reader reader) {
|
||||||
return new StringSplitTokenizer(reader);
|
return new TokenStreamComponents(new StringSplitTokenizer(reader));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -23,11 +23,7 @@ import java.util.HashMap;
|
||||||
import java.util.Map;
|
import java.util.Map;
|
||||||
import java.util.concurrent.atomic.AtomicBoolean;
|
import java.util.concurrent.atomic.AtomicBoolean;
|
||||||
|
|
||||||
import org.apache.lucene.analysis.Analyzer;
|
import org.apache.lucene.analysis.*;
|
||||||
import org.apache.lucene.analysis.MockAnalyzer;
|
|
||||||
import org.apache.lucene.analysis.MockFixedLengthPayloadFilter;
|
|
||||||
import org.apache.lucene.analysis.MockTokenizer;
|
|
||||||
import org.apache.lucene.analysis.TokenStream;
|
|
||||||
import org.apache.lucene.document.Document;
|
import org.apache.lucene.document.Document;
|
||||||
import org.apache.lucene.document.Field;
|
import org.apache.lucene.document.Field;
|
||||||
import org.apache.lucene.document.StringField;
|
import org.apache.lucene.document.StringField;
|
||||||
|
@ -179,21 +175,20 @@ public class TestIndexWriterCommit extends LuceneTestCase {
|
||||||
Analyzer analyzer;
|
Analyzer analyzer;
|
||||||
if (random.nextBoolean()) {
|
if (random.nextBoolean()) {
|
||||||
// no payloads
|
// no payloads
|
||||||
analyzer = new Analyzer() {
|
analyzer = new ReusableAnalyzerBase() {
|
||||||
@Override
|
@Override
|
||||||
public TokenStream tokenStream(String fieldName, Reader reader) {
|
public TokenStreamComponents createComponents(String fieldName, Reader reader) {
|
||||||
return new MockTokenizer(reader, MockTokenizer.WHITESPACE, true);
|
return new TokenStreamComponents(new MockTokenizer(reader, MockTokenizer.WHITESPACE, true));
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
} else {
|
} else {
|
||||||
// fixed length payloads
|
// fixed length payloads
|
||||||
final int length = random.nextInt(200);
|
final int length = random.nextInt(200);
|
||||||
analyzer = new Analyzer() {
|
analyzer = new ReusableAnalyzerBase() {
|
||||||
@Override
|
@Override
|
||||||
public TokenStream tokenStream(String fieldName, Reader reader) {
|
public TokenStreamComponents createComponents(String fieldName, Reader reader) {
|
||||||
return new MockFixedLengthPayloadFilter(random,
|
Tokenizer tokenizer = new MockTokenizer(reader, MockTokenizer.WHITESPACE, true);
|
||||||
new MockTokenizer(reader, MockTokenizer.WHITESPACE, true),
|
return new TokenStreamComponents(tokenizer, new MockFixedLengthPayloadFilter(random, tokenizer, length));
|
||||||
length);
|
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
|
|
@ -26,10 +26,7 @@ import java.util.Random;
|
||||||
import java.util.concurrent.atomic.AtomicInteger;
|
import java.util.concurrent.atomic.AtomicInteger;
|
||||||
import java.util.concurrent.atomic.AtomicBoolean;
|
import java.util.concurrent.atomic.AtomicBoolean;
|
||||||
|
|
||||||
import org.apache.lucene.analysis.Analyzer;
|
import org.apache.lucene.analysis.*;
|
||||||
import org.apache.lucene.analysis.MockAnalyzer;
|
|
||||||
import org.apache.lucene.analysis.MockTokenizer;
|
|
||||||
import org.apache.lucene.analysis.TokenStream;
|
|
||||||
import org.apache.lucene.document.Document;
|
import org.apache.lucene.document.Document;
|
||||||
import org.apache.lucene.document.FieldType;
|
import org.apache.lucene.document.FieldType;
|
||||||
import org.apache.lucene.document.StringField;
|
import org.apache.lucene.document.StringField;
|
||||||
|
@ -902,10 +899,10 @@ public class TestIndexWriterDelete extends LuceneTestCase {
|
||||||
final Random r = random;
|
final Random r = random;
|
||||||
Directory dir = newDirectory();
|
Directory dir = newDirectory();
|
||||||
// note this test explicitly disables payloads
|
// note this test explicitly disables payloads
|
||||||
final Analyzer analyzer = new Analyzer() {
|
final Analyzer analyzer = new ReusableAnalyzerBase() {
|
||||||
@Override
|
@Override
|
||||||
public TokenStream tokenStream(String fieldName, Reader reader) {
|
public TokenStreamComponents createComponents(String fieldName, Reader reader) {
|
||||||
return new MockTokenizer(reader, MockTokenizer.WHITESPACE, true);
|
return new TokenStreamComponents(new MockTokenizer(reader, MockTokenizer.WHITESPACE, true));
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
IndexWriter w = new IndexWriter(dir, newIndexWriterConfig( TEST_VERSION_CURRENT, analyzer).setRAMBufferSizeMB(1.0).setMaxBufferedDocs(IndexWriterConfig.DISABLE_AUTO_FLUSH).setMaxBufferedDeleteTerms(IndexWriterConfig.DISABLE_AUTO_FLUSH));
|
IndexWriter w = new IndexWriter(dir, newIndexWriterConfig( TEST_VERSION_CURRENT, analyzer).setRAMBufferSizeMB(1.0).setMaxBufferedDocs(IndexWriterConfig.DISABLE_AUTO_FLUSH).setMaxBufferedDeleteTerms(IndexWriterConfig.DISABLE_AUTO_FLUSH));
|
||||||
|
|
|
@ -27,11 +27,7 @@ import java.util.Iterator;
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
import java.util.Random;
|
import java.util.Random;
|
||||||
|
|
||||||
import org.apache.lucene.analysis.Analyzer;
|
import org.apache.lucene.analysis.*;
|
||||||
import org.apache.lucene.analysis.MockAnalyzer;
|
|
||||||
import org.apache.lucene.analysis.MockTokenizer;
|
|
||||||
import org.apache.lucene.analysis.TokenFilter;
|
|
||||||
import org.apache.lucene.analysis.TokenStream;
|
|
||||||
import org.apache.lucene.document.Document;
|
import org.apache.lucene.document.Document;
|
||||||
import org.apache.lucene.document.Field;
|
import org.apache.lucene.document.Field;
|
||||||
import org.apache.lucene.document.FieldType;
|
import org.apache.lucene.document.FieldType;
|
||||||
|
@ -390,12 +386,12 @@ public class TestIndexWriterExceptions extends LuceneTestCase {
|
||||||
doc.add(newField("field", "a field", TextField.TYPE_STORED));
|
doc.add(newField("field", "a field", TextField.TYPE_STORED));
|
||||||
w.addDocument(doc);
|
w.addDocument(doc);
|
||||||
|
|
||||||
Analyzer analyzer = new Analyzer() {
|
Analyzer analyzer = new ReusableAnalyzerBase(new ReusableAnalyzerBase.PerFieldReuseStrategy()) {
|
||||||
@Override
|
@Override
|
||||||
public TokenStream tokenStream(String fieldName, Reader reader) {
|
public TokenStreamComponents createComponents(String fieldName, Reader reader) {
|
||||||
MockTokenizer tokenizer = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false);
|
MockTokenizer tokenizer = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false);
|
||||||
tokenizer.setEnableChecks(false); // disable workflow checking as we forcefully close() in exceptional cases.
|
tokenizer.setEnableChecks(false); // disable workflow checking as we forcefully close() in exceptional cases.
|
||||||
return new CrashingFilter(fieldName, tokenizer);
|
return new TokenStreamComponents(tokenizer, new CrashingFilter(fieldName, tokenizer));
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
|
@ -458,13 +454,13 @@ public class TestIndexWriterExceptions extends LuceneTestCase {
|
||||||
// LUCENE-1072
|
// LUCENE-1072
|
||||||
public void testExceptionFromTokenStream() throws IOException {
|
public void testExceptionFromTokenStream() throws IOException {
|
||||||
Directory dir = newDirectory();
|
Directory dir = newDirectory();
|
||||||
IndexWriterConfig conf = newIndexWriterConfig( TEST_VERSION_CURRENT, new Analyzer() {
|
IndexWriterConfig conf = newIndexWriterConfig( TEST_VERSION_CURRENT, new ReusableAnalyzerBase() {
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public TokenStream tokenStream(String fieldName, Reader reader) {
|
public TokenStreamComponents createComponents(String fieldName, Reader reader) {
|
||||||
MockTokenizer tokenizer = new MockTokenizer(reader, MockTokenizer.SIMPLE, true);
|
MockTokenizer tokenizer = new MockTokenizer(reader, MockTokenizer.SIMPLE, true);
|
||||||
tokenizer.setEnableChecks(false); // disable workflow checking as we forcefully close() in exceptional cases.
|
tokenizer.setEnableChecks(false); // disable workflow checking as we forcefully close() in exceptional cases.
|
||||||
return new TokenFilter(tokenizer) {
|
return new TokenStreamComponents(tokenizer, new TokenFilter(tokenizer) {
|
||||||
private int count = 0;
|
private int count = 0;
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
|
@ -480,7 +476,7 @@ public class TestIndexWriterExceptions extends LuceneTestCase {
|
||||||
super.reset();
|
super.reset();
|
||||||
this.count = 0;
|
this.count = 0;
|
||||||
}
|
}
|
||||||
};
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
});
|
});
|
||||||
|
@ -595,12 +591,12 @@ public class TestIndexWriterExceptions extends LuceneTestCase {
|
||||||
}
|
}
|
||||||
|
|
||||||
public void testDocumentsWriterExceptions() throws IOException {
|
public void testDocumentsWriterExceptions() throws IOException {
|
||||||
Analyzer analyzer = new Analyzer() {
|
Analyzer analyzer = new ReusableAnalyzerBase(new ReusableAnalyzerBase.PerFieldReuseStrategy()) {
|
||||||
@Override
|
@Override
|
||||||
public TokenStream tokenStream(String fieldName, Reader reader) {
|
public TokenStreamComponents createComponents(String fieldName, Reader reader) {
|
||||||
MockTokenizer tokenizer = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false);
|
MockTokenizer tokenizer = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false);
|
||||||
tokenizer.setEnableChecks(false); // disable workflow checking as we forcefully close() in exceptional cases.
|
tokenizer.setEnableChecks(false); // disable workflow checking as we forcefully close() in exceptional cases.
|
||||||
return new CrashingFilter(fieldName, tokenizer);
|
return new TokenStreamComponents(tokenizer, new CrashingFilter(fieldName, tokenizer));
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
|
@ -691,12 +687,12 @@ public class TestIndexWriterExceptions extends LuceneTestCase {
|
||||||
}
|
}
|
||||||
|
|
||||||
public void testDocumentsWriterExceptionThreads() throws Exception {
|
public void testDocumentsWriterExceptionThreads() throws Exception {
|
||||||
Analyzer analyzer = new Analyzer() {
|
Analyzer analyzer = new ReusableAnalyzerBase(new ReusableAnalyzerBase.PerFieldReuseStrategy()) {
|
||||||
@Override
|
@Override
|
||||||
public TokenStream tokenStream(String fieldName, Reader reader) {
|
public TokenStreamComponents createComponents(String fieldName, Reader reader) {
|
||||||
MockTokenizer tokenizer = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false);
|
MockTokenizer tokenizer = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false);
|
||||||
tokenizer.setEnableChecks(false); // disable workflow checking as we forcefully close() in exceptional cases.
|
tokenizer.setEnableChecks(false); // disable workflow checking as we forcefully close() in exceptional cases.
|
||||||
return new CrashingFilter(fieldName, tokenizer);
|
return new TokenStreamComponents(tokenizer, new CrashingFilter(fieldName, tokenizer));
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
|
@ -20,10 +20,7 @@ package org.apache.lucene.index;
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
import java.io.Reader;
|
import java.io.Reader;
|
||||||
|
|
||||||
import org.apache.lucene.analysis.Analyzer;
|
import org.apache.lucene.analysis.*;
|
||||||
import org.apache.lucene.analysis.MockAnalyzer;
|
|
||||||
import org.apache.lucene.analysis.MockTokenizer;
|
|
||||||
import org.apache.lucene.analysis.TokenStream;
|
|
||||||
import org.apache.lucene.document.Document;
|
import org.apache.lucene.document.Document;
|
||||||
import org.apache.lucene.document.TextField;
|
import org.apache.lucene.document.TextField;
|
||||||
import org.apache.lucene.index.codecs.CodecProvider;
|
import org.apache.lucene.index.codecs.CodecProvider;
|
||||||
|
@ -71,10 +68,10 @@ public class TestLazyProxSkipping extends LuceneTestCase {
|
||||||
private void createIndex(int numHits) throws IOException {
|
private void createIndex(int numHits) throws IOException {
|
||||||
int numDocs = 500;
|
int numDocs = 500;
|
||||||
|
|
||||||
final Analyzer analyzer = new Analyzer() {
|
final Analyzer analyzer = new ReusableAnalyzerBase() {
|
||||||
@Override
|
@Override
|
||||||
public TokenStream tokenStream(String fieldName, Reader reader) {
|
public TokenStreamComponents createComponents(String fieldName, Reader reader) {
|
||||||
return new MockTokenizer(reader, MockTokenizer.WHITESPACE, true);
|
return new TokenStreamComponents(new MockTokenizer(reader, MockTokenizer.WHITESPACE, true));
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
Directory directory = new SeekCountingDirectory(new RAMDirectory());
|
Directory directory = new SeekCountingDirectory(new RAMDirectory());
|
||||||
|
|
|
@ -21,10 +21,7 @@ import java.io.IOException;
|
||||||
import java.io.Reader;
|
import java.io.Reader;
|
||||||
import java.util.concurrent.atomic.AtomicInteger;
|
import java.util.concurrent.atomic.AtomicInteger;
|
||||||
|
|
||||||
import org.apache.lucene.analysis.Analyzer;
|
import org.apache.lucene.analysis.*;
|
||||||
import org.apache.lucene.analysis.MockTokenizer;
|
|
||||||
import org.apache.lucene.analysis.TokenFilter;
|
|
||||||
import org.apache.lucene.analysis.TokenStream;
|
|
||||||
import org.apache.lucene.analysis.tokenattributes.PayloadAttribute;
|
import org.apache.lucene.analysis.tokenattributes.PayloadAttribute;
|
||||||
import org.apache.lucene.document.Document;
|
import org.apache.lucene.document.Document;
|
||||||
import org.apache.lucene.document.TextField;
|
import org.apache.lucene.document.TextField;
|
||||||
|
@ -114,11 +111,12 @@ public class TestMultiLevelSkipList extends LuceneTestCase {
|
||||||
assertEquals("Wrong payload for the target " + target + ": " + b.bytes[b.offset], (byte) target, b.bytes[b.offset]);
|
assertEquals("Wrong payload for the target " + target + ": " + b.bytes[b.offset], (byte) target, b.bytes[b.offset]);
|
||||||
}
|
}
|
||||||
|
|
||||||
private static class PayloadAnalyzer extends Analyzer {
|
private static class PayloadAnalyzer extends ReusableAnalyzerBase {
|
||||||
private final AtomicInteger payloadCount = new AtomicInteger(-1);
|
private final AtomicInteger payloadCount = new AtomicInteger(-1);
|
||||||
@Override
|
@Override
|
||||||
public TokenStream tokenStream(String fieldName, Reader reader) {
|
public TokenStreamComponents createComponents(String fieldName, Reader reader) {
|
||||||
return new PayloadFilter(payloadCount, new MockTokenizer(reader, MockTokenizer.WHITESPACE, true));
|
Tokenizer tokenizer = new MockTokenizer(reader, MockTokenizer.WHITESPACE, true);
|
||||||
|
return new TokenStreamComponents(tokenizer, new PayloadFilter(payloadCount, tokenizer));
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
|
@ -25,11 +25,7 @@ import java.util.HashMap;
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
import java.util.Map;
|
import java.util.Map;
|
||||||
|
|
||||||
import org.apache.lucene.analysis.Analyzer;
|
import org.apache.lucene.analysis.*;
|
||||||
import org.apache.lucene.analysis.MockTokenizer;
|
|
||||||
import org.apache.lucene.analysis.TokenFilter;
|
|
||||||
import org.apache.lucene.analysis.TokenStream;
|
|
||||||
import org.apache.lucene.analysis.MockAnalyzer;
|
|
||||||
import org.apache.lucene.analysis.tokenattributes.PayloadAttribute;
|
import org.apache.lucene.analysis.tokenattributes.PayloadAttribute;
|
||||||
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
|
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
|
||||||
import org.apache.lucene.document.Document;
|
import org.apache.lucene.document.Document;
|
||||||
|
@ -105,12 +101,12 @@ public class TestPayloads extends LuceneTestCase {
|
||||||
// so this field is used to check if the DocumentWriter correctly enables the payloads bit
|
// so this field is used to check if the DocumentWriter correctly enables the payloads bit
|
||||||
// even if only some term positions have payloads
|
// even if only some term positions have payloads
|
||||||
d.add(newField("f2", "This field has payloads in all docs", TextField.TYPE_UNSTORED));
|
d.add(newField("f2", "This field has payloads in all docs", TextField.TYPE_UNSTORED));
|
||||||
d.add(newField("f2", "This field has payloads in all docs", TextField.TYPE_UNSTORED));
|
d.add(newField("f2", "This field has payloads in all docs NO PAYLOAD", TextField.TYPE_UNSTORED));
|
||||||
// this field is used to verify if the SegmentMerger enables payloads for a field if it has payloads
|
// this field is used to verify if the SegmentMerger enables payloads for a field if it has payloads
|
||||||
// enabled in only some documents
|
// enabled in only some documents
|
||||||
d.add(newField("f3", "This field has payloads in some docs", TextField.TYPE_UNSTORED));
|
d.add(newField("f3", "This field has payloads in some docs", TextField.TYPE_UNSTORED));
|
||||||
// only add payload data for field f2
|
// only add payload data for field f2
|
||||||
analyzer.setPayloadData("f2", 1, "somedata".getBytes(), 0, 1);
|
analyzer.setPayloadData("f2", "somedata".getBytes(), 0, 1);
|
||||||
writer.addDocument(d);
|
writer.addDocument(d);
|
||||||
// flush
|
// flush
|
||||||
writer.close();
|
writer.close();
|
||||||
|
|
|
@ -20,8 +20,7 @@ package org.apache.lucene.index;
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
import java.io.Reader;
|
import java.io.Reader;
|
||||||
|
|
||||||
import org.apache.lucene.analysis.Analyzer;
|
import org.apache.lucene.analysis.ReusableAnalyzerBase;
|
||||||
import org.apache.lucene.analysis.TokenStream;
|
|
||||||
import org.apache.lucene.analysis.Tokenizer;
|
import org.apache.lucene.analysis.Tokenizer;
|
||||||
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
|
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
|
||||||
import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
|
import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
|
||||||
|
@ -64,10 +63,10 @@ public class TestSameTokenSamePosition extends LuceneTestCase {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
final class BugReproAnalyzer extends Analyzer{
|
final class BugReproAnalyzer extends ReusableAnalyzerBase {
|
||||||
@Override
|
@Override
|
||||||
public TokenStream tokenStream(String arg0, Reader arg1) {
|
public TokenStreamComponents createComponents(String arg0, Reader arg1) {
|
||||||
return new BugReproAnalyzerTokenizer();
|
return new TokenStreamComponents(new BugReproAnalyzerTokenizer());
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -137,7 +137,7 @@ public class TestTermVectorsReader extends LuceneTestCase {
|
||||||
super.tearDown();
|
super.tearDown();
|
||||||
}
|
}
|
||||||
|
|
||||||
private class MyTokenStream extends TokenStream {
|
private class MyTokenStream extends Tokenizer {
|
||||||
private int tokenUpto;
|
private int tokenUpto;
|
||||||
|
|
||||||
private final CharTermAttribute termAtt;
|
private final CharTermAttribute termAtt;
|
||||||
|
@ -175,10 +175,10 @@ public class TestTermVectorsReader extends LuceneTestCase {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
private class MyAnalyzer extends Analyzer {
|
private class MyAnalyzer extends ReusableAnalyzerBase {
|
||||||
@Override
|
@Override
|
||||||
public TokenStream tokenStream(String fieldName, Reader reader) {
|
public TokenStreamComponents createComponents(String fieldName, Reader reader) {
|
||||||
return new MyTokenStream();
|
return new TokenStreamComponents(new MyTokenStream());
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -22,7 +22,7 @@ import java.io.Reader;
|
||||||
import java.util.Random;
|
import java.util.Random;
|
||||||
|
|
||||||
import org.apache.lucene.analysis.Analyzer;
|
import org.apache.lucene.analysis.Analyzer;
|
||||||
import org.apache.lucene.analysis.TokenStream;
|
import org.apache.lucene.analysis.ReusableAnalyzerBase;
|
||||||
import org.apache.lucene.analysis.Tokenizer;
|
import org.apache.lucene.analysis.Tokenizer;
|
||||||
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
|
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
|
||||||
import org.apache.lucene.document.Document;
|
import org.apache.lucene.document.Document;
|
||||||
|
@ -77,10 +77,10 @@ public class TestTermdocPerf extends LuceneTestCase {
|
||||||
void addDocs(final Random random, Directory dir, final int ndocs, String field, final String val, final int maxTF, final float percentDocs) throws IOException {
|
void addDocs(final Random random, Directory dir, final int ndocs, String field, final String val, final int maxTF, final float percentDocs) throws IOException {
|
||||||
final RepeatingTokenStream ts = new RepeatingTokenStream(val, random, percentDocs, maxTF);
|
final RepeatingTokenStream ts = new RepeatingTokenStream(val, random, percentDocs, maxTF);
|
||||||
|
|
||||||
Analyzer analyzer = new Analyzer() {
|
Analyzer analyzer = new ReusableAnalyzerBase() {
|
||||||
@Override
|
@Override
|
||||||
public TokenStream tokenStream(String fieldName, Reader reader) {
|
public TokenStreamComponents createComponents(String fieldName, Reader reader) {
|
||||||
return ts;
|
return new TokenStreamComponents(ts);
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
|
@ -17,6 +17,7 @@ package org.apache.lucene.search;
|
||||||
* limitations under the License.
|
* limitations under the License.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
|
import org.apache.lucene.analysis.ReusableAnalyzerBase;
|
||||||
import org.apache.lucene.index.IndexWriterConfig;
|
import org.apache.lucene.index.IndexWriterConfig;
|
||||||
import org.apache.lucene.index.RandomIndexWriter;
|
import org.apache.lucene.index.RandomIndexWriter;
|
||||||
import org.apache.lucene.index.Term;
|
import org.apache.lucene.index.Term;
|
||||||
|
@ -345,7 +346,7 @@ public class TestMultiPhraseQuery extends LuceneTestCase {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
private static class CannedAnalyzer extends Analyzer {
|
private static class CannedAnalyzer extends ReusableAnalyzerBase {
|
||||||
private final TokenAndPos[] tokens;
|
private final TokenAndPos[] tokens;
|
||||||
|
|
||||||
public CannedAnalyzer(TokenAndPos[] tokens) {
|
public CannedAnalyzer(TokenAndPos[] tokens) {
|
||||||
|
@ -353,8 +354,8 @@ public class TestMultiPhraseQuery extends LuceneTestCase {
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public TokenStream tokenStream(String fieldName, Reader reader) {
|
public TokenStreamComponents createComponents(String fieldName, Reader reader) {
|
||||||
return new CannedTokenizer(tokens);
|
return new TokenStreamComponents(new CannedTokenizer(tokens));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -55,10 +55,10 @@ public class TestPhraseQuery extends LuceneTestCase {
|
||||||
@BeforeClass
|
@BeforeClass
|
||||||
public static void beforeClass() throws Exception {
|
public static void beforeClass() throws Exception {
|
||||||
directory = newDirectory();
|
directory = newDirectory();
|
||||||
Analyzer analyzer = new Analyzer() {
|
Analyzer analyzer = new ReusableAnalyzerBase() {
|
||||||
@Override
|
@Override
|
||||||
public TokenStream tokenStream(String fieldName, Reader reader) {
|
public TokenStreamComponents createComponents(String fieldName, Reader reader) {
|
||||||
return new MockTokenizer(reader, MockTokenizer.WHITESPACE, false);
|
return new TokenStreamComponents(new MockTokenizer(reader, MockTokenizer.WHITESPACE, false));
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
|
|
|
@ -56,10 +56,10 @@ public class TestPositionIncrement extends LuceneTestCase {
|
||||||
final static boolean VERBOSE = false;
|
final static boolean VERBOSE = false;
|
||||||
|
|
||||||
public void testSetPosition() throws Exception {
|
public void testSetPosition() throws Exception {
|
||||||
Analyzer analyzer = new Analyzer() {
|
Analyzer analyzer = new ReusableAnalyzerBase() {
|
||||||
@Override
|
@Override
|
||||||
public TokenStream tokenStream(String fieldName, Reader reader) {
|
public TokenStreamComponents createComponents(String fieldName, Reader reader) {
|
||||||
return new TokenStream() {
|
return new TokenStreamComponents(new Tokenizer() {
|
||||||
private final String[] TOKENS = {"1", "2", "3", "4", "5"};
|
private final String[] TOKENS = {"1", "2", "3", "4", "5"};
|
||||||
private final int[] INCREMENTS = {0, 2, 1, 0, 1};
|
private final int[] INCREMENTS = {0, 2, 1, 0, 1};
|
||||||
private int i = 0;
|
private int i = 0;
|
||||||
|
@ -85,7 +85,7 @@ public class TestPositionIncrement extends LuceneTestCase {
|
||||||
super.reset();
|
super.reset();
|
||||||
this.i = 0;
|
this.i = 0;
|
||||||
}
|
}
|
||||||
};
|
});
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
Directory store = newDirectory();
|
Directory store = newDirectory();
|
||||||
|
|
|
@ -190,7 +190,7 @@ public class TestTermRangeQuery extends LuceneTestCase {
|
||||||
assertFalse("queries with different inclusive are not equal", query.equals(other));
|
assertFalse("queries with different inclusive are not equal", query.equals(other));
|
||||||
}
|
}
|
||||||
|
|
||||||
private static class SingleCharAnalyzer extends Analyzer {
|
private static class SingleCharAnalyzer extends ReusableAnalyzerBase {
|
||||||
|
|
||||||
private static class SingleCharTokenizer extends Tokenizer {
|
private static class SingleCharTokenizer extends Tokenizer {
|
||||||
char[] buffer = new char[1];
|
char[] buffer = new char[1];
|
||||||
|
@ -225,19 +225,8 @@ public class TestTermRangeQuery extends LuceneTestCase {
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public TokenStream reusableTokenStream(String fieldName, Reader reader) throws IOException {
|
public TokenStreamComponents createComponents(String fieldName, Reader reader) {
|
||||||
Tokenizer tokenizer = (Tokenizer) getPreviousTokenStream();
|
return new TokenStreamComponents(new SingleCharTokenizer(reader));
|
||||||
if (tokenizer == null) {
|
|
||||||
tokenizer = new SingleCharTokenizer(reader);
|
|
||||||
setPreviousTokenStream(tokenizer);
|
|
||||||
} else
|
|
||||||
tokenizer.reset(reader);
|
|
||||||
return tokenizer;
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public TokenStream tokenStream(String fieldName, Reader reader) {
|
|
||||||
return new SingleCharTokenizer(reader);
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -55,14 +55,16 @@ public class PayloadHelper {
|
||||||
|
|
||||||
public IndexReader reader;
|
public IndexReader reader;
|
||||||
|
|
||||||
public final class PayloadAnalyzer extends Analyzer {
|
public final class PayloadAnalyzer extends ReusableAnalyzerBase {
|
||||||
|
|
||||||
|
public PayloadAnalyzer() {
|
||||||
|
super(new PerFieldReuseStrategy());
|
||||||
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public TokenStream tokenStream(String fieldName, Reader reader) {
|
public TokenStreamComponents createComponents(String fieldName, Reader reader) {
|
||||||
TokenStream result = new MockTokenizer(reader, MockTokenizer.SIMPLE, true);
|
Tokenizer result = new MockTokenizer(reader, MockTokenizer.SIMPLE, true);
|
||||||
result = new PayloadFilter(result, fieldName);
|
return new TokenStreamComponents(result, new PayloadFilter(result, fieldName));
|
||||||
return result;
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -55,12 +55,11 @@ public class TestPayloadNearQuery extends LuceneTestCase {
|
||||||
private static byte[] payload2 = new byte[]{2};
|
private static byte[] payload2 = new byte[]{2};
|
||||||
private static byte[] payload4 = new byte[]{4};
|
private static byte[] payload4 = new byte[]{4};
|
||||||
|
|
||||||
private static class PayloadAnalyzer extends Analyzer {
|
private static class PayloadAnalyzer extends ReusableAnalyzerBase {
|
||||||
@Override
|
@Override
|
||||||
public TokenStream tokenStream(String fieldName, Reader reader) {
|
public TokenStreamComponents createComponents(String fieldName, Reader reader) {
|
||||||
TokenStream result = new MockTokenizer(reader, MockTokenizer.SIMPLE, true);
|
Tokenizer result = new MockTokenizer(reader, MockTokenizer.SIMPLE, true);
|
||||||
result = new PayloadFilter(result, fieldName);
|
return new TokenStreamComponents(result, new PayloadFilter(result, fieldName));
|
||||||
return result;
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -64,14 +64,16 @@ public class TestPayloadTermQuery extends LuceneTestCase {
|
||||||
private static final byte[] payloadMultiField2 = new byte[]{4};
|
private static final byte[] payloadMultiField2 = new byte[]{4};
|
||||||
protected static Directory directory;
|
protected static Directory directory;
|
||||||
|
|
||||||
private static class PayloadAnalyzer extends Analyzer {
|
private static class PayloadAnalyzer extends ReusableAnalyzerBase {
|
||||||
|
|
||||||
|
private PayloadAnalyzer() {
|
||||||
|
super(new PerFieldReuseStrategy());
|
||||||
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public TokenStream tokenStream(String fieldName, Reader reader) {
|
public TokenStreamComponents createComponents(String fieldName, Reader reader) {
|
||||||
TokenStream result = new MockTokenizer(reader, MockTokenizer.SIMPLE, true);
|
Tokenizer result = new MockTokenizer(reader, MockTokenizer.SIMPLE, true);
|
||||||
result = new PayloadFilter(result, fieldName);
|
return new TokenStreamComponents(result, new PayloadFilter(result, fieldName));
|
||||||
return result;
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -25,6 +25,7 @@ import java.util.Collections;
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
|
|
||||||
import org.apache.lucene.analysis.Analyzer;
|
import org.apache.lucene.analysis.Analyzer;
|
||||||
|
import org.apache.lucene.analysis.MockAnalyzer;
|
||||||
import org.apache.lucene.analysis.MockTokenizer;
|
import org.apache.lucene.analysis.MockTokenizer;
|
||||||
import org.apache.lucene.analysis.TokenFilter;
|
import org.apache.lucene.analysis.TokenFilter;
|
||||||
import org.apache.lucene.analysis.TokenStream;
|
import org.apache.lucene.analysis.TokenStream;
|
||||||
|
@ -70,14 +71,12 @@ public class TestBasics extends LuceneTestCase {
|
||||||
private static Directory directory;
|
private static Directory directory;
|
||||||
|
|
||||||
static final class SimplePayloadFilter extends TokenFilter {
|
static final class SimplePayloadFilter extends TokenFilter {
|
||||||
String fieldName;
|
|
||||||
int pos;
|
int pos;
|
||||||
final PayloadAttribute payloadAttr;
|
final PayloadAttribute payloadAttr;
|
||||||
final CharTermAttribute termAttr;
|
final CharTermAttribute termAttr;
|
||||||
|
|
||||||
public SimplePayloadFilter(TokenStream input, String fieldName) {
|
public SimplePayloadFilter(TokenStream input) {
|
||||||
super(input);
|
super(input);
|
||||||
this.fieldName = fieldName;
|
|
||||||
pos = 0;
|
pos = 0;
|
||||||
payloadAttr = input.addAttribute(PayloadAttribute.class);
|
payloadAttr = input.addAttribute(PayloadAttribute.class);
|
||||||
termAttr = input.addAttribute(CharTermAttribute.class);
|
termAttr = input.addAttribute(CharTermAttribute.class);
|
||||||
|
@ -105,7 +104,7 @@ public class TestBasics extends LuceneTestCase {
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public TokenStream tokenStream(String fieldName, Reader reader) {
|
public TokenStream tokenStream(String fieldName, Reader reader) {
|
||||||
return new SimplePayloadFilter(new MockTokenizer(reader, MockTokenizer.SIMPLE, true), fieldName);
|
return new SimplePayloadFilter(new MockTokenizer(reader, MockTokenizer.SIMPLE, true));
|
||||||
}
|
}
|
||||||
|
|
||||||
};
|
};
|
||||||
|
|
|
@ -23,10 +23,7 @@ import java.util.Collection;
|
||||||
import java.util.HashSet;
|
import java.util.HashSet;
|
||||||
import java.util.Set;
|
import java.util.Set;
|
||||||
|
|
||||||
import org.apache.lucene.analysis.Analyzer;
|
import org.apache.lucene.analysis.*;
|
||||||
import org.apache.lucene.analysis.MockTokenizer;
|
|
||||||
import org.apache.lucene.analysis.TokenFilter;
|
|
||||||
import org.apache.lucene.analysis.TokenStream;
|
|
||||||
import org.apache.lucene.analysis.tokenattributes.PayloadAttribute;
|
import org.apache.lucene.analysis.tokenattributes.PayloadAttribute;
|
||||||
import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
|
import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
|
||||||
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
|
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
|
||||||
|
@ -479,18 +476,16 @@ public class TestPayloadSpans extends LuceneTestCase {
|
||||||
assertEquals(numSpans, cnt);
|
assertEquals(numSpans, cnt);
|
||||||
}
|
}
|
||||||
|
|
||||||
final class PayloadAnalyzer extends Analyzer {
|
final class PayloadAnalyzer extends ReusableAnalyzerBase {
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public TokenStream tokenStream(String fieldName, Reader reader) {
|
public TokenStreamComponents createComponents(String fieldName, Reader reader) {
|
||||||
TokenStream result = new MockTokenizer(reader, MockTokenizer.SIMPLE, true);
|
Tokenizer result = new MockTokenizer(reader, MockTokenizer.SIMPLE, true);
|
||||||
result = new PayloadFilter(result, fieldName);
|
return new TokenStreamComponents(result, new PayloadFilter(result));
|
||||||
return result;
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
final class PayloadFilter extends TokenFilter {
|
final class PayloadFilter extends TokenFilter {
|
||||||
String fieldName;
|
|
||||||
Set<String> entities = new HashSet<String>();
|
Set<String> entities = new HashSet<String>();
|
||||||
Set<String> nopayload = new HashSet<String>();
|
Set<String> nopayload = new HashSet<String>();
|
||||||
int pos;
|
int pos;
|
||||||
|
@ -498,9 +493,8 @@ public class TestPayloadSpans extends LuceneTestCase {
|
||||||
CharTermAttribute termAtt;
|
CharTermAttribute termAtt;
|
||||||
PositionIncrementAttribute posIncrAtt;
|
PositionIncrementAttribute posIncrAtt;
|
||||||
|
|
||||||
public PayloadFilter(TokenStream input, String fieldName) {
|
public PayloadFilter(TokenStream input) {
|
||||||
super(input);
|
super(input);
|
||||||
this.fieldName = fieldName;
|
|
||||||
pos = 0;
|
pos = 0;
|
||||||
entities.add("xx");
|
entities.add("xx");
|
||||||
entities.add("one");
|
entities.add("one");
|
||||||
|
@ -536,13 +530,12 @@ public class TestPayloadSpans extends LuceneTestCase {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
public final class TestPayloadAnalyzer extends Analyzer {
|
public final class TestPayloadAnalyzer extends ReusableAnalyzerBase {
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public TokenStream tokenStream(String fieldName, Reader reader) {
|
public TokenStreamComponents createComponents(String fieldName, Reader reader) {
|
||||||
TokenStream result = new MockTokenizer(reader, MockTokenizer.SIMPLE, true);
|
Tokenizer result = new MockTokenizer(reader, MockTokenizer.SIMPLE, true);
|
||||||
result = new PayloadFilter(result, fieldName);
|
return new TokenStreamComponents(result, new PayloadFilter(result));
|
||||||
return result;
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -106,3 +106,5 @@ Build
|
||||||
* LUCENE-2413: All analyzers in contrib/analyzers and contrib/icu were moved to the
|
* LUCENE-2413: All analyzers in contrib/analyzers and contrib/icu were moved to the
|
||||||
analysis module. The 'smartcn' and 'stempel' components now depend on 'common'.
|
analysis module. The 'smartcn' and 'stempel' components now depend on 'common'.
|
||||||
(Robert Muir)
|
(Robert Muir)
|
||||||
|
|
||||||
|
* LUCENE-3376: Moved ReusableAnalyzerBase into lucene core. (Chris Male)
|
||||||
|
|
|
@ -27,6 +27,7 @@ import java.util.regex.Matcher;
|
||||||
import java.util.regex.Pattern;
|
import java.util.regex.Pattern;
|
||||||
|
|
||||||
import org.apache.lucene.analysis.Analyzer;
|
import org.apache.lucene.analysis.Analyzer;
|
||||||
|
import org.apache.lucene.analysis.ReusableAnalyzerBase;
|
||||||
import org.apache.lucene.analysis.TokenStream;
|
import org.apache.lucene.analysis.TokenStream;
|
||||||
import org.apache.lucene.analysis.Tokenizer;
|
import org.apache.lucene.analysis.Tokenizer;
|
||||||
import org.apache.lucene.analysis.core.StopAnalyzer;
|
import org.apache.lucene.analysis.core.StopAnalyzer;
|
||||||
|
@ -66,7 +67,7 @@ import org.apache.lucene.util.Version;
|
||||||
* @deprecated (4.0) use the pattern-based analysis in the analysis/pattern package instead.
|
* @deprecated (4.0) use the pattern-based analysis in the analysis/pattern package instead.
|
||||||
*/
|
*/
|
||||||
@Deprecated
|
@Deprecated
|
||||||
public final class PatternAnalyzer extends Analyzer {
|
public final class PatternAnalyzer extends ReusableAnalyzerBase {
|
||||||
|
|
||||||
/** <code>"\\W+"</code>; Divides text at non-letters (NOT Character.isLetter(c)) */
|
/** <code>"\\W+"</code>; Divides text at non-letters (NOT Character.isLetter(c)) */
|
||||||
public static final Pattern NON_WORD_PATTERN = Pattern.compile("\\W+");
|
public static final Pattern NON_WORD_PATTERN = Pattern.compile("\\W+");
|
||||||
|
@ -187,25 +188,21 @@ public final class PatternAnalyzer extends Analyzer {
|
||||||
* the string to tokenize
|
* the string to tokenize
|
||||||
* @return a new token stream
|
* @return a new token stream
|
||||||
*/
|
*/
|
||||||
public TokenStream tokenStream(String fieldName, String text) {
|
public TokenStreamComponents createComponents(String fieldName, String text) {
|
||||||
// Ideally the Analyzer superclass should have a method with the same signature,
|
// Ideally the Analyzer superclass should have a method with the same signature,
|
||||||
// with a default impl that simply delegates to the StringReader flavour.
|
// with a default impl that simply delegates to the StringReader flavour.
|
||||||
if (text == null)
|
if (text == null)
|
||||||
throw new IllegalArgumentException("text must not be null");
|
throw new IllegalArgumentException("text must not be null");
|
||||||
|
|
||||||
TokenStream stream;
|
|
||||||
if (pattern == NON_WORD_PATTERN) { // fast path
|
if (pattern == NON_WORD_PATTERN) { // fast path
|
||||||
stream = new FastStringTokenizer(text, true, toLowerCase, stopWords);
|
return new TokenStreamComponents(new FastStringTokenizer(text, true, toLowerCase, stopWords));
|
||||||
}
|
} else if (pattern == WHITESPACE_PATTERN) { // fast path
|
||||||
else if (pattern == WHITESPACE_PATTERN) { // fast path
|
return new TokenStreamComponents(new FastStringTokenizer(text, false, toLowerCase, stopWords));
|
||||||
stream = new FastStringTokenizer(text, false, toLowerCase, stopWords);
|
|
||||||
}
|
|
||||||
else {
|
|
||||||
stream = new PatternTokenizer(text, pattern, toLowerCase);
|
|
||||||
if (stopWords != null) stream = new StopFilter(matchVersion, stream, stopWords);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
return stream;
|
Tokenizer tokenizer = new PatternTokenizer(text, pattern, toLowerCase);
|
||||||
|
TokenStream result = (stopWords != null) ? new StopFilter(matchVersion, tokenizer, stopWords) : tokenizer;
|
||||||
|
return new TokenStreamComponents(tokenizer, result);
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
@ -220,10 +217,10 @@ public final class PatternAnalyzer extends Analyzer {
|
||||||
* @return a new token stream
|
* @return a new token stream
|
||||||
*/
|
*/
|
||||||
@Override
|
@Override
|
||||||
public TokenStream tokenStream(String fieldName, Reader reader) {
|
public TokenStreamComponents createComponents(String fieldName, Reader reader) {
|
||||||
try {
|
try {
|
||||||
String text = toString(reader);
|
String text = toString(reader);
|
||||||
return tokenStream(fieldName, text);
|
return createComponents(fieldName, text);
|
||||||
} catch (IOException e) {
|
} catch (IOException e) {
|
||||||
throw new RuntimeException(e);
|
throw new RuntimeException(e);
|
||||||
}
|
}
|
||||||
|
|
|
@ -123,9 +123,9 @@ public final class ClassicAnalyzer extends StopwordAnalyzerBase {
|
||||||
tok = new StopFilter(matchVersion, tok, stopwords);
|
tok = new StopFilter(matchVersion, tok, stopwords);
|
||||||
return new TokenStreamComponents(src, tok) {
|
return new TokenStreamComponents(src, tok) {
|
||||||
@Override
|
@Override
|
||||||
protected boolean reset(final Reader reader) throws IOException {
|
protected void reset(final Reader reader) throws IOException {
|
||||||
src.setMaxTokenLength(ClassicAnalyzer.this.maxTokenLength);
|
src.setMaxTokenLength(ClassicAnalyzer.this.maxTokenLength);
|
||||||
return super.reset(reader);
|
super.reset(reader);
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
|
|
@ -124,9 +124,9 @@ public final class StandardAnalyzer extends StopwordAnalyzerBase {
|
||||||
tok = new StopFilter(matchVersion, tok, stopwords);
|
tok = new StopFilter(matchVersion, tok, stopwords);
|
||||||
return new TokenStreamComponents(src, tok) {
|
return new TokenStreamComponents(src, tok) {
|
||||||
@Override
|
@Override
|
||||||
protected boolean reset(final Reader reader) throws IOException {
|
protected void reset(final Reader reader) throws IOException {
|
||||||
src.setMaxTokenLength(StandardAnalyzer.this.maxTokenLength);
|
src.setMaxTokenLength(StandardAnalyzer.this.maxTokenLength);
|
||||||
return super.reset(reader);
|
super.reset(reader);
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
|
|
@ -21,9 +21,7 @@ import java.io.IOException;
|
||||||
import java.io.Reader;
|
import java.io.Reader;
|
||||||
import java.io.StringReader;
|
import java.io.StringReader;
|
||||||
|
|
||||||
import org.apache.lucene.analysis.BaseTokenStreamTestCase;
|
import org.apache.lucene.analysis.*;
|
||||||
import org.apache.lucene.analysis.Analyzer;
|
|
||||||
import org.apache.lucene.analysis.TokenStream;
|
|
||||||
import org.apache.lucene.analysis.core.WhitespaceTokenizer;
|
import org.apache.lucene.analysis.core.WhitespaceTokenizer;
|
||||||
import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
|
import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
|
||||||
import org.apache.lucene.util.Version;
|
import org.apache.lucene.util.Version;
|
||||||
|
@ -66,10 +64,10 @@ public class TestChineseTokenizer extends BaseTokenStreamTestCase
|
||||||
* Analyzer that just uses ChineseTokenizer, not ChineseFilter.
|
* Analyzer that just uses ChineseTokenizer, not ChineseFilter.
|
||||||
* convenience to show the behavior of the tokenizer
|
* convenience to show the behavior of the tokenizer
|
||||||
*/
|
*/
|
||||||
private class JustChineseTokenizerAnalyzer extends Analyzer {
|
private class JustChineseTokenizerAnalyzer extends ReusableAnalyzerBase {
|
||||||
@Override
|
@Override
|
||||||
public TokenStream tokenStream(String fieldName, Reader reader) {
|
public TokenStreamComponents createComponents(String fieldName, Reader reader) {
|
||||||
return new ChineseTokenizer(reader);
|
return new TokenStreamComponents(new ChineseTokenizer(reader));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -77,10 +75,11 @@ public class TestChineseTokenizer extends BaseTokenStreamTestCase
|
||||||
* Analyzer that just uses ChineseFilter, not ChineseTokenizer.
|
* Analyzer that just uses ChineseFilter, not ChineseTokenizer.
|
||||||
* convenience to show the behavior of the filter.
|
* convenience to show the behavior of the filter.
|
||||||
*/
|
*/
|
||||||
private class JustChineseFilterAnalyzer extends Analyzer {
|
private class JustChineseFilterAnalyzer extends ReusableAnalyzerBase {
|
||||||
@Override
|
@Override
|
||||||
public TokenStream tokenStream(String fieldName, Reader reader) {
|
public TokenStreamComponents createComponents(String fieldName, Reader reader) {
|
||||||
return new ChineseFilter(new WhitespaceTokenizer(Version.LUCENE_CURRENT, reader));
|
Tokenizer tokenizer = new WhitespaceTokenizer(Version.LUCENE_CURRENT, reader);
|
||||||
|
return new TokenStreamComponents(tokenizer, new ChineseFilter(tokenizer));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -19,11 +19,8 @@ package org.apache.lucene.analysis.commongrams;
|
||||||
import java.io.Reader;
|
import java.io.Reader;
|
||||||
import java.io.StringReader;
|
import java.io.StringReader;
|
||||||
import java.util.Arrays;
|
import java.util.Arrays;
|
||||||
import org.apache.lucene.analysis.Analyzer;
|
|
||||||
import org.apache.lucene.analysis.BaseTokenStreamTestCase;
|
import org.apache.lucene.analysis.*;
|
||||||
import org.apache.lucene.analysis.MockTokenizer;
|
|
||||||
import org.apache.lucene.analysis.TokenFilter;
|
|
||||||
import org.apache.lucene.analysis.TokenStream;
|
|
||||||
import org.apache.lucene.analysis.core.WhitespaceTokenizer;
|
import org.apache.lucene.analysis.core.WhitespaceTokenizer;
|
||||||
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
|
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
|
||||||
import org.apache.lucene.analysis.util.CharArraySet;
|
import org.apache.lucene.analysis.util.CharArraySet;
|
||||||
|
@ -87,11 +84,12 @@ public class CommonGramsFilterTest extends BaseTokenStreamTestCase {
|
||||||
* @return Map<String,String>
|
* @return Map<String,String>
|
||||||
*/
|
*/
|
||||||
public void testCommonGramsQueryFilter() throws Exception {
|
public void testCommonGramsQueryFilter() throws Exception {
|
||||||
Analyzer a = new Analyzer() {
|
Analyzer a = new ReusableAnalyzerBase() {
|
||||||
@Override
|
@Override
|
||||||
public TokenStream tokenStream(String field, Reader in) {
|
public TokenStreamComponents createComponents(String field, Reader in) {
|
||||||
return new CommonGramsQueryFilter(new CommonGramsFilter(TEST_VERSION_CURRENT,
|
Tokenizer tokenizer = new MockTokenizer(in, MockTokenizer.WHITESPACE, false);
|
||||||
new MockTokenizer(in, MockTokenizer.WHITESPACE, false), commonWords));
|
return new TokenStreamComponents(tokenizer, new CommonGramsQueryFilter(new CommonGramsFilter(TEST_VERSION_CURRENT,
|
||||||
|
tokenizer, commonWords)));
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
|
@ -156,11 +154,12 @@ public class CommonGramsFilterTest extends BaseTokenStreamTestCase {
|
||||||
}
|
}
|
||||||
|
|
||||||
public void testCommonGramsFilter() throws Exception {
|
public void testCommonGramsFilter() throws Exception {
|
||||||
Analyzer a = new Analyzer() {
|
Analyzer a = new ReusableAnalyzerBase() {
|
||||||
@Override
|
@Override
|
||||||
public TokenStream tokenStream(String field, Reader in) {
|
public TokenStreamComponents createComponents(String field, Reader in) {
|
||||||
return new CommonGramsFilter(TEST_VERSION_CURRENT,
|
Tokenizer tokenizer = new MockTokenizer(in, MockTokenizer.WHITESPACE, false);
|
||||||
new MockTokenizer(in, MockTokenizer.WHITESPACE, false), commonWords);
|
return new TokenStreamComponents(tokenizer, new CommonGramsFilter(TEST_VERSION_CURRENT,
|
||||||
|
tokenizer, commonWords));
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
|
@ -21,10 +21,7 @@ import java.io.IOException;
|
||||||
import java.io.Reader;
|
import java.io.Reader;
|
||||||
import java.io.StringReader;
|
import java.io.StringReader;
|
||||||
|
|
||||||
import org.apache.lucene.analysis.Analyzer;
|
import org.apache.lucene.analysis.*;
|
||||||
import org.apache.lucene.analysis.BaseTokenStreamTestCase;
|
|
||||||
import org.apache.lucene.analysis.TokenFilter;
|
|
||||||
import org.apache.lucene.analysis.TokenStream;
|
|
||||||
import org.apache.lucene.analysis.standard.StandardTokenizer;
|
import org.apache.lucene.analysis.standard.StandardTokenizer;
|
||||||
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
|
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
|
||||||
import org.apache.lucene.analysis.tokenattributes.PayloadAttribute;
|
import org.apache.lucene.analysis.tokenattributes.PayloadAttribute;
|
||||||
|
@ -120,12 +117,12 @@ public class TestAnalyzers extends BaseTokenStreamTestCase {
|
||||||
String[] y = StandardTokenizer.TOKEN_TYPES;
|
String[] y = StandardTokenizer.TOKEN_TYPES;
|
||||||
}
|
}
|
||||||
|
|
||||||
private static class LowerCaseWhitespaceAnalyzer extends Analyzer {
|
private static class LowerCaseWhitespaceAnalyzer extends ReusableAnalyzerBase {
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public TokenStream tokenStream(String fieldName, Reader reader) {
|
public TokenStreamComponents createComponents(String fieldName, Reader reader) {
|
||||||
return new LowerCaseFilter(TEST_VERSION_CURRENT,
|
Tokenizer tokenizer = new WhitespaceTokenizer(TEST_VERSION_CURRENT, reader);
|
||||||
new WhitespaceTokenizer(TEST_VERSION_CURRENT, reader));
|
return new TokenStreamComponents(tokenizer, new LowerCaseFilter(TEST_VERSION_CURRENT, tokenizer));
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
|
@ -18,6 +18,7 @@ package org.apache.lucene.analysis.miscellaneous;
|
||||||
*/
|
*/
|
||||||
|
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
|
import java.io.StringReader;
|
||||||
import java.util.Arrays;
|
import java.util.Arrays;
|
||||||
import java.util.regex.Pattern;
|
import java.util.regex.Pattern;
|
||||||
|
|
||||||
|
@ -128,7 +129,7 @@ public class PatternAnalyzerTest extends BaseTokenStreamTestCase {
|
||||||
assertTokenStreamContents(ts, expected);
|
assertTokenStreamContents(ts, expected);
|
||||||
|
|
||||||
// analysis of a String, uses PatternAnalyzer.tokenStream(String, String)
|
// analysis of a String, uses PatternAnalyzer.tokenStream(String, String)
|
||||||
TokenStream ts2 = analyzer.tokenStream("dummy", document);
|
TokenStream ts2 = analyzer.tokenStream("dummy", new StringReader(document));
|
||||||
assertTokenStreamContents(ts2, expected);
|
assertTokenStreamContents(ts2, expected);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -17,12 +17,7 @@
|
||||||
|
|
||||||
package org.apache.lucene.analysis.miscellaneous;
|
package org.apache.lucene.analysis.miscellaneous;
|
||||||
|
|
||||||
import org.apache.lucene.analysis.Analyzer;
|
import org.apache.lucene.analysis.*;
|
||||||
import org.apache.lucene.analysis.BaseTokenStreamTestCase;
|
|
||||||
import org.apache.lucene.analysis.MockTokenizer;
|
|
||||||
import org.apache.lucene.analysis.TokenFilter;
|
|
||||||
import org.apache.lucene.analysis.TokenStream;
|
|
||||||
import org.apache.lucene.analysis.Token;
|
|
||||||
import org.apache.lucene.analysis.core.StopFilter;
|
import org.apache.lucene.analysis.core.StopFilter;
|
||||||
import org.apache.lucene.analysis.standard.StandardAnalyzer;
|
import org.apache.lucene.analysis.standard.StandardAnalyzer;
|
||||||
import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
|
import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
|
||||||
|
@ -245,13 +240,13 @@ public class TestWordDelimiterFilter extends BaseTokenStreamTestCase {
|
||||||
new int[] { 1, 1, 1 });
|
new int[] { 1, 1, 1 });
|
||||||
|
|
||||||
/* analyzer that will consume tokens with large position increments */
|
/* analyzer that will consume tokens with large position increments */
|
||||||
Analyzer a2 = new Analyzer() {
|
Analyzer a2 = new ReusableAnalyzerBase() {
|
||||||
@Override
|
@Override
|
||||||
public TokenStream tokenStream(String field, Reader reader) {
|
public TokenStreamComponents createComponents(String field, Reader reader) {
|
||||||
return new WordDelimiterFilter(
|
Tokenizer tokenizer = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false);
|
||||||
new LargePosIncTokenFilter(
|
return new TokenStreamComponents(tokenizer, new WordDelimiterFilter(
|
||||||
new MockTokenizer(reader, MockTokenizer.WHITESPACE, false)),
|
new LargePosIncTokenFilter(tokenizer),
|
||||||
flags, protWords);
|
flags, protWords));
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
|
@ -278,13 +273,14 @@ public class TestWordDelimiterFilter extends BaseTokenStreamTestCase {
|
||||||
new int[] { 6, 14, 19 },
|
new int[] { 6, 14, 19 },
|
||||||
new int[] { 1, 11, 1 });
|
new int[] { 1, 11, 1 });
|
||||||
|
|
||||||
Analyzer a3 = new Analyzer() {
|
Analyzer a3 = new ReusableAnalyzerBase() {
|
||||||
@Override
|
@Override
|
||||||
public TokenStream tokenStream(String field, Reader reader) {
|
public TokenStreamComponents createComponents(String field, Reader reader) {
|
||||||
|
Tokenizer tokenizer = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false);
|
||||||
StopFilter filter = new StopFilter(TEST_VERSION_CURRENT,
|
StopFilter filter = new StopFilter(TEST_VERSION_CURRENT,
|
||||||
new MockTokenizer(reader, MockTokenizer.WHITESPACE, false), StandardAnalyzer.STOP_WORDS_SET);
|
tokenizer, StandardAnalyzer.STOP_WORDS_SET);
|
||||||
filter.setEnablePositionIncrements(true);
|
filter.setEnablePositionIncrements(true);
|
||||||
return new WordDelimiterFilter(filter, flags, protWords);
|
return new TokenStreamComponents(tokenizer, new WordDelimiterFilter(filter, flags, protWords));
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
|
@ -144,32 +144,6 @@ public class QueryAutoStopWordAnalyzerTest extends BaseTokenStreamTestCase {
|
||||||
assertTokenStreamContents(protectedTokenStream, new String[]{"boring"});
|
assertTokenStreamContents(protectedTokenStream, new String[]{"boring"});
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
|
||||||
* analyzer that does not support reuse
|
|
||||||
* it is LetterTokenizer on odd invocations, WhitespaceTokenizer on even.
|
|
||||||
*/
|
|
||||||
private class NonreusableAnalyzer extends Analyzer {
|
|
||||||
int invocationCount = 0;
|
|
||||||
@Override
|
|
||||||
public TokenStream tokenStream(String fieldName, Reader reader) {
|
|
||||||
if (++invocationCount % 2 == 0)
|
|
||||||
return new MockTokenizer(reader, MockTokenizer.WHITESPACE, false);
|
|
||||||
else
|
|
||||||
return new MockTokenizer(reader, MockTokenizer.SIMPLE, false);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
public void testWrappingNonReusableAnalyzer() throws Exception {
|
|
||||||
QueryAutoStopWordAnalyzer a = new QueryAutoStopWordAnalyzer(TEST_VERSION_CURRENT, new NonreusableAnalyzer());
|
|
||||||
a.addStopWords(reader, 10);
|
|
||||||
|
|
||||||
TokenStream tokenStream = a.reusableTokenStream("repetitiveField", new StringReader("boring"));
|
|
||||||
assertTokenStreamContents(tokenStream, new String[0]);
|
|
||||||
|
|
||||||
tokenStream = a.reusableTokenStream("repetitiveField", new StringReader("vaguelyboring"));
|
|
||||||
assertTokenStreamContents(tokenStream, new String[0]);
|
|
||||||
}
|
|
||||||
|
|
||||||
public void testTokenStream() throws Exception {
|
public void testTokenStream() throws Exception {
|
||||||
QueryAutoStopWordAnalyzer a = new QueryAutoStopWordAnalyzer(TEST_VERSION_CURRENT, new MockAnalyzer(random, MockTokenizer.WHITESPACE, false));
|
QueryAutoStopWordAnalyzer a = new QueryAutoStopWordAnalyzer(TEST_VERSION_CURRENT, new MockAnalyzer(random, MockTokenizer.WHITESPACE, false));
|
||||||
a.addStopWords(reader, 10);
|
a.addStopWords(reader, 10);
|
||||||
|
|
|
@ -149,40 +149,6 @@ public class ShingleAnalyzerWrapperTest extends BaseTokenStreamTestCase {
|
||||||
new int[] { 1, 0, 1, 0, 1, 0, 1 });
|
new int[] { 1, 0, 1, 0, 1, 0, 1 });
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
|
||||||
* analyzer that does not support reuse
|
|
||||||
* it is LetterTokenizer on odd invocations, WhitespaceTokenizer on even.
|
|
||||||
*/
|
|
||||||
private class NonreusableAnalyzer extends Analyzer {
|
|
||||||
int invocationCount = 0;
|
|
||||||
@Override
|
|
||||||
public TokenStream tokenStream(String fieldName, Reader reader) {
|
|
||||||
if (++invocationCount % 2 == 0)
|
|
||||||
return new MockTokenizer(reader, MockTokenizer.WHITESPACE, false);
|
|
||||||
else
|
|
||||||
return new MockTokenizer(reader, MockTokenizer.SIMPLE, false);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
public void testWrappedAnalyzerDoesNotReuse() throws Exception {
|
|
||||||
Analyzer a = new ShingleAnalyzerWrapper(new NonreusableAnalyzer());
|
|
||||||
assertAnalyzesToReuse(a, "please divide into shingles.",
|
|
||||||
new String[] { "please", "please divide", "divide", "divide into", "into", "into shingles", "shingles" },
|
|
||||||
new int[] { 0, 0, 7, 7, 14, 14, 19 },
|
|
||||||
new int[] { 6, 13, 13, 18, 18, 27, 27 },
|
|
||||||
new int[] { 1, 0, 1, 0, 1, 0, 1 });
|
|
||||||
assertAnalyzesToReuse(a, "please divide into shingles.",
|
|
||||||
new String[] { "please", "please divide", "divide", "divide into", "into", "into shingles.", "shingles." },
|
|
||||||
new int[] { 0, 0, 7, 7, 14, 14, 19 },
|
|
||||||
new int[] { 6, 13, 13, 18, 18, 28, 28 },
|
|
||||||
new int[] { 1, 0, 1, 0, 1, 0, 1 });
|
|
||||||
assertAnalyzesToReuse(a, "please divide into shingles.",
|
|
||||||
new String[] { "please", "please divide", "divide", "divide into", "into", "into shingles", "shingles" },
|
|
||||||
new int[] { 0, 0, 7, 7, 14, 14, 19 },
|
|
||||||
new int[] { 6, 13, 13, 18, 18, 27, 27 },
|
|
||||||
new int[] { 1, 0, 1, 0, 1, 0, 1 });
|
|
||||||
}
|
|
||||||
|
|
||||||
public void testNonDefaultMinShingleSize() throws Exception {
|
public void testNonDefaultMinShingleSize() throws Exception {
|
||||||
ShingleAnalyzerWrapper analyzer
|
ShingleAnalyzerWrapper analyzer
|
||||||
= new ShingleAnalyzerWrapper(new MockAnalyzer(random, MockTokenizer.WHITESPACE, false), 3, 4);
|
= new ShingleAnalyzerWrapper(new MockAnalyzer(random, MockTokenizer.WHITESPACE, false), 3, 4);
|
||||||
|
|
|
@ -18,9 +18,7 @@ package org.apache.lucene.collation;
|
||||||
*/
|
*/
|
||||||
|
|
||||||
|
|
||||||
import org.apache.lucene.analysis.CollationTestBase;
|
import org.apache.lucene.analysis.*;
|
||||||
import org.apache.lucene.analysis.TokenStream;
|
|
||||||
import org.apache.lucene.analysis.Analyzer;
|
|
||||||
import org.apache.lucene.analysis.core.KeywordTokenizer;
|
import org.apache.lucene.analysis.core.KeywordTokenizer;
|
||||||
import org.apache.lucene.util.BytesRef;
|
import org.apache.lucene.util.BytesRef;
|
||||||
|
|
||||||
|
@ -54,7 +52,7 @@ public class TestCollationKeyFilter extends CollationTestBase {
|
||||||
(collator.getCollationKey(secondRangeEndOriginal).toByteArray()));
|
(collator.getCollationKey(secondRangeEndOriginal).toByteArray()));
|
||||||
|
|
||||||
|
|
||||||
public final class TestAnalyzer extends Analyzer {
|
public final class TestAnalyzer extends ReusableAnalyzerBase {
|
||||||
private Collator _collator;
|
private Collator _collator;
|
||||||
|
|
||||||
TestAnalyzer(Collator collator) {
|
TestAnalyzer(Collator collator) {
|
||||||
|
@ -62,10 +60,9 @@ public class TestCollationKeyFilter extends CollationTestBase {
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public TokenStream tokenStream(String fieldName, Reader reader) {
|
public TokenStreamComponents createComponents(String fieldName, Reader reader) {
|
||||||
TokenStream result = new KeywordTokenizer(reader);
|
Tokenizer result = new KeywordTokenizer(reader);
|
||||||
result = new CollationKeyFilter(result, _collator);
|
return new TokenStreamComponents(result, new CollationKeyFilter(result, _collator));
|
||||||
return result;
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -20,20 +20,18 @@ package org.apache.lucene.analysis.icu;
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
import java.io.Reader;
|
import java.io.Reader;
|
||||||
|
|
||||||
import org.apache.lucene.analysis.Analyzer;
|
import org.apache.lucene.analysis.*;
|
||||||
import org.apache.lucene.analysis.BaseTokenStreamTestCase;
|
|
||||||
import org.apache.lucene.analysis.TokenStream;
|
|
||||||
import org.apache.lucene.analysis.core.WhitespaceTokenizer;
|
import org.apache.lucene.analysis.core.WhitespaceTokenizer;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Tests ICUFoldingFilter
|
* Tests ICUFoldingFilter
|
||||||
*/
|
*/
|
||||||
public class TestICUFoldingFilter extends BaseTokenStreamTestCase {
|
public class TestICUFoldingFilter extends BaseTokenStreamTestCase {
|
||||||
Analyzer a = new Analyzer() {
|
Analyzer a = new ReusableAnalyzerBase() {
|
||||||
@Override
|
@Override
|
||||||
public TokenStream tokenStream(String fieldName, Reader reader) {
|
public TokenStreamComponents createComponents(String fieldName, Reader reader) {
|
||||||
return new ICUFoldingFilter(
|
Tokenizer tokenizer = new WhitespaceTokenizer(TEST_VERSION_CURRENT, reader);
|
||||||
new WhitespaceTokenizer(TEST_VERSION_CURRENT, reader));
|
return new TokenStreamComponents(tokenizer, new ICUFoldingFilter(tokenizer));
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
public void testDefaults() throws IOException {
|
public void testDefaults() throws IOException {
|
||||||
|
|
|
@ -20,9 +20,7 @@ package org.apache.lucene.analysis.icu;
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
import java.io.Reader;
|
import java.io.Reader;
|
||||||
|
|
||||||
import org.apache.lucene.analysis.Analyzer;
|
import org.apache.lucene.analysis.*;
|
||||||
import org.apache.lucene.analysis.BaseTokenStreamTestCase;
|
|
||||||
import org.apache.lucene.analysis.TokenStream;
|
|
||||||
import org.apache.lucene.analysis.core.WhitespaceTokenizer;
|
import org.apache.lucene.analysis.core.WhitespaceTokenizer;
|
||||||
|
|
||||||
import com.ibm.icu.text.Normalizer2;
|
import com.ibm.icu.text.Normalizer2;
|
||||||
|
@ -31,11 +29,11 @@ import com.ibm.icu.text.Normalizer2;
|
||||||
* Tests the ICUNormalizer2Filter
|
* Tests the ICUNormalizer2Filter
|
||||||
*/
|
*/
|
||||||
public class TestICUNormalizer2Filter extends BaseTokenStreamTestCase {
|
public class TestICUNormalizer2Filter extends BaseTokenStreamTestCase {
|
||||||
Analyzer a = new Analyzer() {
|
Analyzer a = new ReusableAnalyzerBase() {
|
||||||
@Override
|
@Override
|
||||||
public TokenStream tokenStream(String fieldName, Reader reader) {
|
public TokenStreamComponents createComponents(String fieldName, Reader reader) {
|
||||||
return new ICUNormalizer2Filter(
|
Tokenizer tokenizer = new WhitespaceTokenizer(TEST_VERSION_CURRENT, reader);
|
||||||
new WhitespaceTokenizer(TEST_VERSION_CURRENT, reader));
|
return new TokenStreamComponents(tokenizer, new ICUNormalizer2Filter(tokenizer));
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
|
@ -61,13 +59,14 @@ public class TestICUNormalizer2Filter extends BaseTokenStreamTestCase {
|
||||||
}
|
}
|
||||||
|
|
||||||
public void testAlternate() throws IOException {
|
public void testAlternate() throws IOException {
|
||||||
Analyzer a = new Analyzer() {
|
Analyzer a = new ReusableAnalyzerBase() {
|
||||||
@Override
|
@Override
|
||||||
public TokenStream tokenStream(String fieldName, Reader reader) {
|
public TokenStreamComponents createComponents(String fieldName, Reader reader) {
|
||||||
return new ICUNormalizer2Filter(
|
Tokenizer tokenizer = new WhitespaceTokenizer(TEST_VERSION_CURRENT, reader);
|
||||||
new WhitespaceTokenizer(TEST_VERSION_CURRENT, reader),
|
return new TokenStreamComponents(tokenizer, new ICUNormalizer2Filter(
|
||||||
|
tokenizer,
|
||||||
/* specify nfc with decompose to get nfd */
|
/* specify nfc with decompose to get nfd */
|
||||||
Normalizer2.getInstance(null, "nfc", Normalizer2.Mode.DECOMPOSE));
|
Normalizer2.getInstance(null, "nfc", Normalizer2.Mode.DECOMPOSE)));
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
|
@ -20,9 +20,7 @@ package org.apache.lucene.collation;
|
||||||
|
|
||||||
import com.ibm.icu.text.Collator;
|
import com.ibm.icu.text.Collator;
|
||||||
|
|
||||||
import org.apache.lucene.analysis.CollationTestBase;
|
import org.apache.lucene.analysis.*;
|
||||||
import org.apache.lucene.analysis.TokenStream;
|
|
||||||
import org.apache.lucene.analysis.Analyzer;
|
|
||||||
import org.apache.lucene.analysis.core.KeywordTokenizer;
|
import org.apache.lucene.analysis.core.KeywordTokenizer;
|
||||||
import org.apache.lucene.util.BytesRef;
|
import org.apache.lucene.util.BytesRef;
|
||||||
|
|
||||||
|
@ -46,7 +44,7 @@ public class TestICUCollationKeyFilter extends CollationTestBase {
|
||||||
(collator.getCollationKey(secondRangeEndOriginal).toByteArray()));
|
(collator.getCollationKey(secondRangeEndOriginal).toByteArray()));
|
||||||
|
|
||||||
|
|
||||||
public final class TestAnalyzer extends Analyzer {
|
public final class TestAnalyzer extends ReusableAnalyzerBase {
|
||||||
private Collator _collator;
|
private Collator _collator;
|
||||||
|
|
||||||
TestAnalyzer(Collator collator) {
|
TestAnalyzer(Collator collator) {
|
||||||
|
@ -54,10 +52,9 @@ public class TestICUCollationKeyFilter extends CollationTestBase {
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public TokenStream tokenStream(String fieldName, Reader reader) {
|
public TokenStreamComponents createComponents(String fieldName, Reader reader) {
|
||||||
TokenStream result = new KeywordTokenizer(reader);
|
Tokenizer result = new KeywordTokenizer(reader);
|
||||||
result = new ICUCollationKeyFilter(result, _collator);
|
return new TokenStreamComponents(result, new ICUCollationKeyFilter(result, _collator));
|
||||||
return result;
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -25,6 +25,7 @@ import java.util.Collections;
|
||||||
import java.util.Set;
|
import java.util.Set;
|
||||||
|
|
||||||
import org.apache.lucene.analysis.Analyzer;
|
import org.apache.lucene.analysis.Analyzer;
|
||||||
|
import org.apache.lucene.analysis.ReusableAnalyzerBase;
|
||||||
import org.apache.lucene.analysis.en.PorterStemFilter;
|
import org.apache.lucene.analysis.en.PorterStemFilter;
|
||||||
import org.apache.lucene.analysis.util.WordlistLoader;
|
import org.apache.lucene.analysis.util.WordlistLoader;
|
||||||
import org.apache.lucene.analysis.TokenStream;
|
import org.apache.lucene.analysis.TokenStream;
|
||||||
|
@ -54,7 +55,7 @@ import org.apache.lucene.util.Version;
|
||||||
* </p>
|
* </p>
|
||||||
* @lucene.experimental
|
* @lucene.experimental
|
||||||
*/
|
*/
|
||||||
public final class SmartChineseAnalyzer extends Analyzer {
|
public final class SmartChineseAnalyzer extends ReusableAnalyzerBase {
|
||||||
|
|
||||||
private final Set<?> stopWords;
|
private final Set<?> stopWords;
|
||||||
|
|
||||||
|
@ -141,9 +142,9 @@ public final class SmartChineseAnalyzer extends Analyzer {
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public TokenStream tokenStream(String fieldName, Reader reader) {
|
public TokenStreamComponents createComponents(String fieldName, Reader reader) {
|
||||||
TokenStream result = new SentenceTokenizer(reader);
|
Tokenizer tokenizer = new SentenceTokenizer(reader);
|
||||||
result = new WordTokenFilter(result);
|
TokenStream result = new WordTokenFilter(tokenizer);
|
||||||
// result = new LowerCaseFilter(result);
|
// result = new LowerCaseFilter(result);
|
||||||
// LowerCaseFilter is not needed, as SegTokenFilter lowercases Basic Latin text.
|
// LowerCaseFilter is not needed, as SegTokenFilter lowercases Basic Latin text.
|
||||||
// The porter stemming is too strict, this is not a bug, this is a feature:)
|
// The porter stemming is too strict, this is not a bug, this is a feature:)
|
||||||
|
@ -151,32 +152,6 @@ public final class SmartChineseAnalyzer extends Analyzer {
|
||||||
if (!stopWords.isEmpty()) {
|
if (!stopWords.isEmpty()) {
|
||||||
result = new StopFilter(matchVersion, result, stopWords, false);
|
result = new StopFilter(matchVersion, result, stopWords, false);
|
||||||
}
|
}
|
||||||
return result;
|
return new TokenStreamComponents(tokenizer, result);
|
||||||
}
|
|
||||||
|
|
||||||
private static final class SavedStreams {
|
|
||||||
Tokenizer tokenStream;
|
|
||||||
TokenStream filteredTokenStream;
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public TokenStream reusableTokenStream(String fieldName, Reader reader)
|
|
||||||
throws IOException {
|
|
||||||
SavedStreams streams = (SavedStreams) getPreviousTokenStream();
|
|
||||||
if (streams == null) {
|
|
||||||
streams = new SavedStreams();
|
|
||||||
setPreviousTokenStream(streams);
|
|
||||||
streams.tokenStream = new SentenceTokenizer(reader);
|
|
||||||
streams.filteredTokenStream = new WordTokenFilter(streams.tokenStream);
|
|
||||||
streams.filteredTokenStream = new PorterStemFilter(streams.filteredTokenStream);
|
|
||||||
if (!stopWords.isEmpty()) {
|
|
||||||
streams.filteredTokenStream = new StopFilter(matchVersion, streams.filteredTokenStream, stopWords, false);
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
streams.tokenStream.reset(reader);
|
|
||||||
streams.filteredTokenStream.reset(); // reset WordTokenFilter's state
|
|
||||||
}
|
|
||||||
|
|
||||||
return streams.filteredTokenStream;
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -5,10 +5,7 @@ import java.io.Reader;
|
||||||
import java.util.HashSet;
|
import java.util.HashSet;
|
||||||
import java.util.Set;
|
import java.util.Set;
|
||||||
|
|
||||||
import org.apache.lucene.analysis.Analyzer;
|
import org.apache.lucene.analysis.*;
|
||||||
import org.apache.lucene.analysis.MockAnalyzer;
|
|
||||||
import org.apache.lucene.analysis.MockTokenizer;
|
|
||||||
import org.apache.lucene.analysis.TokenStream;
|
|
||||||
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
|
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
|
||||||
import org.apache.lucene.analysis.tokenattributes.PayloadAttribute;
|
import org.apache.lucene.analysis.tokenattributes.PayloadAttribute;
|
||||||
import org.apache.lucene.document.Document;
|
import org.apache.lucene.document.Document;
|
||||||
|
@ -140,10 +137,10 @@ public class CategoryListIteratorTest extends LuceneTestCase {
|
||||||
DataTokenStream dts2 = new DataTokenStream("2",new SortingIntEncoder(
|
DataTokenStream dts2 = new DataTokenStream("2",new SortingIntEncoder(
|
||||||
new UniqueValuesIntEncoder(new DGapIntEncoder(new VInt8IntEncoder()))));
|
new UniqueValuesIntEncoder(new DGapIntEncoder(new VInt8IntEncoder()))));
|
||||||
// this test requires that no payloads ever be randomly present!
|
// this test requires that no payloads ever be randomly present!
|
||||||
final Analyzer noPayloadsAnalyzer = new Analyzer() {
|
final Analyzer noPayloadsAnalyzer = new ReusableAnalyzerBase() {
|
||||||
@Override
|
@Override
|
||||||
public TokenStream tokenStream(String fieldName, Reader reader) {
|
public TokenStreamComponents createComponents(String fieldName, Reader reader) {
|
||||||
return new MockTokenizer(reader, MockTokenizer.KEYWORD, false);
|
return new TokenStreamComponents(new MockTokenizer(reader, MockTokenizer.KEYWORD, false));
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
// NOTE: test is wired to LogMP... because test relies on certain docids having payloads
|
// NOTE: test is wired to LogMP... because test relies on certain docids having payloads
|
||||||
|
|
|
@ -20,10 +20,7 @@ package org.apache.lucene.queryparser.analyzing;
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
import java.io.Reader;
|
import java.io.Reader;
|
||||||
|
|
||||||
import org.apache.lucene.analysis.Analyzer;
|
import org.apache.lucene.analysis.*;
|
||||||
import org.apache.lucene.analysis.MockTokenizer;
|
|
||||||
import org.apache.lucene.analysis.TokenFilter;
|
|
||||||
import org.apache.lucene.analysis.TokenStream;
|
|
||||||
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
|
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
|
||||||
import org.apache.lucene.queryparser.classic.ParseException;
|
import org.apache.lucene.queryparser.classic.ParseException;
|
||||||
import org.apache.lucene.util.LuceneTestCase;
|
import org.apache.lucene.util.LuceneTestCase;
|
||||||
|
@ -137,14 +134,11 @@ final class TestFoldingFilter extends TokenFilter {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
final class ASCIIAnalyzer extends org.apache.lucene.analysis.Analyzer {
|
final class ASCIIAnalyzer extends ReusableAnalyzerBase {
|
||||||
public ASCIIAnalyzer() {
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public TokenStream tokenStream(String fieldName, Reader reader) {
|
public TokenStreamComponents createComponents(String fieldName, Reader reader) {
|
||||||
TokenStream result = new MockTokenizer(reader, MockTokenizer.SIMPLE, true);
|
Tokenizer result = new MockTokenizer(reader, MockTokenizer.SIMPLE, true);
|
||||||
result = new TestFoldingFilter(result);
|
return new TokenStreamComponents(result, new TestFoldingFilter(result));
|
||||||
return result;
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -122,16 +122,12 @@ public class TestMultiAnalyzer extends BaseTokenStreamTestCase {
|
||||||
* Expands "multi" to "multi" and "multi2", both at the same position,
|
* Expands "multi" to "multi" and "multi2", both at the same position,
|
||||||
* and expands "triplemulti" to "triplemulti", "multi3", and "multi2".
|
* and expands "triplemulti" to "triplemulti", "multi3", and "multi2".
|
||||||
*/
|
*/
|
||||||
private class MultiAnalyzer extends Analyzer {
|
private class MultiAnalyzer extends ReusableAnalyzerBase {
|
||||||
|
|
||||||
public MultiAnalyzer() {
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public TokenStream tokenStream(String fieldName, Reader reader) {
|
public TokenStreamComponents createComponents(String fieldName, Reader reader) {
|
||||||
TokenStream result = new MockTokenizer(reader, MockTokenizer.WHITESPACE, true);
|
Tokenizer result = new MockTokenizer(reader, MockTokenizer.WHITESPACE, true);
|
||||||
result = new TestFilter(result);
|
return new TokenStreamComponents(result, new TestFilter(result));
|
||||||
return result;
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -196,16 +192,12 @@ public class TestMultiAnalyzer extends BaseTokenStreamTestCase {
|
||||||
* Analyzes "the quick brown" as: quick(incr=2) brown(incr=1).
|
* Analyzes "the quick brown" as: quick(incr=2) brown(incr=1).
|
||||||
* Does not work correctly for input other than "the quick brown ...".
|
* Does not work correctly for input other than "the quick brown ...".
|
||||||
*/
|
*/
|
||||||
private class PosIncrementAnalyzer extends Analyzer {
|
private class PosIncrementAnalyzer extends ReusableAnalyzerBase {
|
||||||
|
|
||||||
public PosIncrementAnalyzer() {
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public TokenStream tokenStream(String fieldName, Reader reader) {
|
public TokenStreamComponents createComponents(String fieldName, Reader reader) {
|
||||||
TokenStream result = new MockTokenizer(reader, MockTokenizer.WHITESPACE, true);
|
Tokenizer result = new MockTokenizer(reader, MockTokenizer.WHITESPACE, true);
|
||||||
result = new TestPosIncrementFilter(result);
|
return new TokenStreamComponents(result, new TestPosIncrementFilter(result));
|
||||||
return result;
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -22,9 +22,7 @@ import java.io.Reader;
|
||||||
import java.util.HashMap;
|
import java.util.HashMap;
|
||||||
import java.util.Map;
|
import java.util.Map;
|
||||||
|
|
||||||
import org.apache.lucene.analysis.Analyzer;
|
import org.apache.lucene.analysis.*;
|
||||||
import org.apache.lucene.analysis.TokenStream;
|
|
||||||
import org.apache.lucene.analysis.MockAnalyzer;
|
|
||||||
import org.apache.lucene.document.Document;
|
import org.apache.lucene.document.Document;
|
||||||
import org.apache.lucene.document.TextField;
|
import org.apache.lucene.document.TextField;
|
||||||
import org.apache.lucene.index.IndexWriter;
|
import org.apache.lucene.index.IndexWriter;
|
||||||
|
@ -302,22 +300,23 @@ public class TestMultiFieldQueryParser extends LuceneTestCase {
|
||||||
/**
|
/**
|
||||||
* Return empty tokens for field "f1".
|
* Return empty tokens for field "f1".
|
||||||
*/
|
*/
|
||||||
private static class AnalyzerReturningNull extends Analyzer {
|
private static class AnalyzerReturningNull extends ReusableAnalyzerBase {
|
||||||
MockAnalyzer stdAnalyzer = new MockAnalyzer(random);
|
MockAnalyzer stdAnalyzer = new MockAnalyzer(random);
|
||||||
|
|
||||||
public AnalyzerReturningNull() {
|
public AnalyzerReturningNull() {
|
||||||
|
super(new PerFieldReuseStrategy());
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public TokenStream tokenStream(String fieldName, Reader reader) {
|
public TokenStreamComponents createComponents(String fieldName, Reader reader) {
|
||||||
if ("f1".equals(fieldName)) {
|
if ("f1".equals(fieldName)) {
|
||||||
return new EmptyTokenStream();
|
return new TokenStreamComponents(new EmptyTokenStream());
|
||||||
} else {
|
} else {
|
||||||
return stdAnalyzer.tokenStream(fieldName, reader);
|
return stdAnalyzer.createComponents(fieldName, reader);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
private static class EmptyTokenStream extends TokenStream {
|
private static class EmptyTokenStream extends Tokenizer {
|
||||||
@Override
|
@Override
|
||||||
public boolean incrementToken() throws IOException {
|
public boolean incrementToken() throws IOException {
|
||||||
return false;
|
return false;
|
||||||
|
|
|
@ -18,6 +18,7 @@ package org.apache.lucene.queryparser.classic;
|
||||||
*/
|
*/
|
||||||
|
|
||||||
import org.apache.lucene.analysis.Analyzer;
|
import org.apache.lucene.analysis.Analyzer;
|
||||||
|
import org.apache.lucene.analysis.ReusableAnalyzerBase;
|
||||||
import org.apache.lucene.analysis.TokenStream;
|
import org.apache.lucene.analysis.TokenStream;
|
||||||
import org.apache.lucene.analysis.Tokenizer;
|
import org.apache.lucene.analysis.Tokenizer;
|
||||||
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
|
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
|
||||||
|
@ -41,7 +42,7 @@ public class TestMultiPhraseQueryParsing extends LuceneTestCase {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
private static class CannedAnalyzer extends Analyzer {
|
private static class CannedAnalyzer extends ReusableAnalyzerBase {
|
||||||
private final TokenAndPos[] tokens;
|
private final TokenAndPos[] tokens;
|
||||||
|
|
||||||
public CannedAnalyzer(TokenAndPos[] tokens) {
|
public CannedAnalyzer(TokenAndPos[] tokens) {
|
||||||
|
@ -49,8 +50,8 @@ public class TestMultiPhraseQueryParsing extends LuceneTestCase {
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public TokenStream tokenStream(String fieldName, Reader reader) {
|
public TokenStreamComponents createComponents(String fieldName, Reader reader) {
|
||||||
return new CannedTokenizer(tokens);
|
return new TokenStreamComponents(new CannedTokenizer(tokens));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -112,12 +112,13 @@ public class TestPrecedenceQueryParser extends LuceneTestCase {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
public static final class QPTestAnalyzer extends Analyzer {
|
public static final class QPTestAnalyzer extends ReusableAnalyzerBase {
|
||||||
|
|
||||||
/** Filters MockTokenizer with StopFilter. */
|
/** Filters MockTokenizer with StopFilter. */
|
||||||
@Override
|
@Override
|
||||||
public final TokenStream tokenStream(String fieldName, Reader reader) {
|
public final TokenStreamComponents createComponents(String fieldName, Reader reader) {
|
||||||
return new QPTestFilter(new MockTokenizer(reader, MockTokenizer.SIMPLE, true));
|
Tokenizer tokenizer = new MockTokenizer(reader, MockTokenizer.SIMPLE, true);
|
||||||
|
return new TokenStreamComponents(tokenizer, new QPTestFilter(tokenizer));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -143,16 +143,12 @@ public class TestMultiAnalyzerQPHelper extends LuceneTestCase {
|
||||||
* Expands "multi" to "multi" and "multi2", both at the same position, and
|
* Expands "multi" to "multi" and "multi2", both at the same position, and
|
||||||
* expands "triplemulti" to "triplemulti", "multi3", and "multi2".
|
* expands "triplemulti" to "triplemulti", "multi3", and "multi2".
|
||||||
*/
|
*/
|
||||||
private class MultiAnalyzer extends Analyzer {
|
private class MultiAnalyzer extends ReusableAnalyzerBase {
|
||||||
|
|
||||||
public MultiAnalyzer() {
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public TokenStream tokenStream(String fieldName, Reader reader) {
|
public TokenStreamComponents createComponents(String fieldName, Reader reader) {
|
||||||
TokenStream result = new MockTokenizer(reader, MockTokenizer.WHITESPACE, true);
|
Tokenizer result = new MockTokenizer(reader, MockTokenizer.WHITESPACE, true);
|
||||||
result = new TestFilter(result);
|
return new TokenStreamComponents(result, new TestFilter(result));
|
||||||
return result;
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -214,16 +210,12 @@ public class TestMultiAnalyzerQPHelper extends LuceneTestCase {
|
||||||
* Analyzes "the quick brown" as: quick(incr=2) brown(incr=1). Does not work
|
* Analyzes "the quick brown" as: quick(incr=2) brown(incr=1). Does not work
|
||||||
* correctly for input other than "the quick brown ...".
|
* correctly for input other than "the quick brown ...".
|
||||||
*/
|
*/
|
||||||
private class PosIncrementAnalyzer extends Analyzer {
|
private class PosIncrementAnalyzer extends ReusableAnalyzerBase {
|
||||||
|
|
||||||
public PosIncrementAnalyzer() {
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public TokenStream tokenStream(String fieldName, Reader reader) {
|
public TokenStreamComponents createComponents(String fieldName, Reader reader) {
|
||||||
TokenStream result = new MockTokenizer(reader, MockTokenizer.WHITESPACE, true);
|
Tokenizer result = new MockTokenizer(reader, MockTokenizer.WHITESPACE, true);
|
||||||
result = new TestPosIncrementFilter(result);
|
return new TokenStreamComponents(result, new TestPosIncrementFilter(result));
|
||||||
return result;
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -21,9 +21,7 @@ import java.io.Reader;
|
||||||
import java.util.HashMap;
|
import java.util.HashMap;
|
||||||
import java.util.Map;
|
import java.util.Map;
|
||||||
|
|
||||||
import org.apache.lucene.analysis.Analyzer;
|
import org.apache.lucene.analysis.*;
|
||||||
import org.apache.lucene.analysis.TokenStream;
|
|
||||||
import org.apache.lucene.analysis.MockAnalyzer;
|
|
||||||
import org.apache.lucene.document.Document;
|
import org.apache.lucene.document.Document;
|
||||||
import org.apache.lucene.document.TextField;
|
import org.apache.lucene.document.TextField;
|
||||||
import org.apache.lucene.index.IndexWriter;
|
import org.apache.lucene.index.IndexWriter;
|
||||||
|
@ -340,22 +338,23 @@ public class TestMultiFieldQPHelper extends LuceneTestCase {
|
||||||
/**
|
/**
|
||||||
* Return empty tokens for field "f1".
|
* Return empty tokens for field "f1".
|
||||||
*/
|
*/
|
||||||
private static final class AnalyzerReturningNull extends Analyzer {
|
private static final class AnalyzerReturningNull extends ReusableAnalyzerBase {
|
||||||
MockAnalyzer stdAnalyzer = new MockAnalyzer(random);
|
MockAnalyzer stdAnalyzer = new MockAnalyzer(random);
|
||||||
|
|
||||||
public AnalyzerReturningNull() {
|
public AnalyzerReturningNull() {
|
||||||
|
super(new PerFieldReuseStrategy());
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public TokenStream tokenStream(String fieldName, Reader reader) {
|
public TokenStreamComponents createComponents(String fieldName, Reader reader) {
|
||||||
if ("f1".equals(fieldName)) {
|
if ("f1".equals(fieldName)) {
|
||||||
return new EmptyTokenStream();
|
return new TokenStreamComponents(new EmptyTokenStream());
|
||||||
} else {
|
} else {
|
||||||
return stdAnalyzer.tokenStream(fieldName, reader);
|
return stdAnalyzer.createComponents(fieldName, reader);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
private static class EmptyTokenStream extends TokenStream {
|
private static class EmptyTokenStream extends Tokenizer {
|
||||||
@Override
|
@Override
|
||||||
public boolean incrementToken() {
|
public boolean incrementToken() {
|
||||||
return false;
|
return false;
|
||||||
|
|
|
@ -128,12 +128,13 @@ public class TestQPHelper extends LuceneTestCase {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
public static final class QPTestAnalyzer extends Analyzer {
|
public static final class QPTestAnalyzer extends ReusableAnalyzerBase {
|
||||||
|
|
||||||
/** Filters MockTokenizer with StopFilter. */
|
/** Filters MockTokenizer with StopFilter. */
|
||||||
@Override
|
@Override
|
||||||
public final TokenStream tokenStream(String fieldName, Reader reader) {
|
public final TokenStreamComponents createComponents(String fieldName, Reader reader) {
|
||||||
return new QPTestFilter(new MockTokenizer(reader, MockTokenizer.SIMPLE, true));
|
Tokenizer tokenizer = new MockTokenizer(reader, MockTokenizer.SIMPLE, true);
|
||||||
|
return new TokenStreamComponents(tokenizer, new QPTestFilter(tokenizer));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -344,10 +345,10 @@ public class TestQPHelper extends LuceneTestCase {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
private class SimpleCJKAnalyzer extends Analyzer {
|
private class SimpleCJKAnalyzer extends ReusableAnalyzerBase {
|
||||||
@Override
|
@Override
|
||||||
public TokenStream tokenStream(String fieldName, Reader reader) {
|
public TokenStreamComponents createComponents(String fieldName, Reader reader) {
|
||||||
return new SimpleCJKTokenizer(reader);
|
return new TokenStreamComponents(new SimpleCJKTokenizer(reader));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -1241,10 +1242,10 @@ public class TestQPHelper extends LuceneTestCase {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
private class CannedAnalyzer extends Analyzer {
|
private class CannedAnalyzer extends ReusableAnalyzerBase {
|
||||||
@Override
|
@Override
|
||||||
public TokenStream tokenStream(String ignored, Reader alsoIgnored) {
|
public TokenStreamComponents createComponents(String ignored, Reader alsoIgnored) {
|
||||||
return new CannedTokenStream();
|
return new TokenStreamComponents(new CannedTokenStream());
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -25,13 +25,7 @@ import java.util.Date;
|
||||||
import java.util.GregorianCalendar;
|
import java.util.GregorianCalendar;
|
||||||
import java.util.Locale;
|
import java.util.Locale;
|
||||||
|
|
||||||
import org.apache.lucene.analysis.Analyzer;
|
import org.apache.lucene.analysis.*;
|
||||||
import org.apache.lucene.analysis.MockAnalyzer;
|
|
||||||
import org.apache.lucene.analysis.MockTokenFilter;
|
|
||||||
import org.apache.lucene.analysis.MockTokenizer;
|
|
||||||
import org.apache.lucene.analysis.TokenFilter;
|
|
||||||
import org.apache.lucene.analysis.TokenStream;
|
|
||||||
import org.apache.lucene.analysis.Tokenizer;
|
|
||||||
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
|
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
|
||||||
import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
|
import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
|
||||||
import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
|
import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
|
||||||
|
@ -104,12 +98,13 @@ public abstract class QueryParserTestBase extends LuceneTestCase {
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
public static final class QPTestAnalyzer extends Analyzer {
|
public static final class QPTestAnalyzer extends ReusableAnalyzerBase {
|
||||||
|
|
||||||
/** Filters MockTokenizer with StopFilter. */
|
/** Filters MockTokenizer with StopFilter. */
|
||||||
@Override
|
@Override
|
||||||
public final TokenStream tokenStream(String fieldName, Reader reader) {
|
public TokenStreamComponents createComponents(String fieldName, Reader reader) {
|
||||||
return new QPTestFilter(new MockTokenizer(reader, MockTokenizer.SIMPLE, true));
|
Tokenizer tokenizer = new MockTokenizer(reader, MockTokenizer.SIMPLE, true);
|
||||||
|
return new TokenStreamComponents(tokenizer, new QPTestFilter(tokenizer));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -245,10 +240,10 @@ public abstract class QueryParserTestBase extends LuceneTestCase {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
private class SimpleCJKAnalyzer extends Analyzer {
|
private class SimpleCJKAnalyzer extends ReusableAnalyzerBase {
|
||||||
@Override
|
@Override
|
||||||
public TokenStream tokenStream(String fieldName, Reader reader) {
|
public TokenStreamComponents createComponents(String fieldName, Reader reader) {
|
||||||
return new SimpleCJKTokenizer(reader);
|
return new TokenStreamComponents(new SimpleCJKTokenizer(reader));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -348,10 +343,10 @@ public abstract class QueryParserTestBase extends LuceneTestCase {
|
||||||
assertQueryEquals("a OR -b", null, "a -b");
|
assertQueryEquals("a OR -b", null, "a -b");
|
||||||
|
|
||||||
// +,-,! should be directly adjacent to operand (i.e. not separated by whitespace) to be treated as an operator
|
// +,-,! should be directly adjacent to operand (i.e. not separated by whitespace) to be treated as an operator
|
||||||
Analyzer a = new Analyzer() {
|
Analyzer a = new ReusableAnalyzerBase() {
|
||||||
@Override
|
@Override
|
||||||
public TokenStream tokenStream(String fieldName, Reader reader) {
|
public TokenStreamComponents createComponents(String fieldName, Reader reader) {
|
||||||
return new MockTokenizer(reader, MockTokenizer.WHITESPACE, false);
|
return new TokenStreamComponents(new MockTokenizer(reader, MockTokenizer.WHITESPACE, false));
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
assertQueryEquals("a - b", a, "a - b");
|
assertQueryEquals("a - b", a, "a - b");
|
||||||
|
@ -1162,18 +1157,19 @@ public abstract class QueryParserTestBase extends LuceneTestCase {
|
||||||
}
|
}
|
||||||
|
|
||||||
/** whitespace+lowercase analyzer with synonyms */
|
/** whitespace+lowercase analyzer with synonyms */
|
||||||
private class Analyzer1 extends Analyzer {
|
private class Analyzer1 extends ReusableAnalyzerBase {
|
||||||
@Override
|
@Override
|
||||||
public TokenStream tokenStream(String fieldName, Reader reader) {
|
public TokenStreamComponents createComponents(String fieldName, Reader reader) {
|
||||||
return new MockSynonymFilter(new MockTokenizer(reader, MockTokenizer.WHITESPACE, true));
|
Tokenizer tokenizer = new MockTokenizer(reader, MockTokenizer.WHITESPACE, true);
|
||||||
|
return new TokenStreamComponents(tokenizer, new MockSynonymFilter(tokenizer));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/** whitespace+lowercase analyzer without synonyms */
|
/** whitespace+lowercase analyzer without synonyms */
|
||||||
private class Analyzer2 extends Analyzer {
|
private class Analyzer2 extends ReusableAnalyzerBase {
|
||||||
@Override
|
@Override
|
||||||
public TokenStream tokenStream(String fieldName, Reader reader) {
|
public TokenStreamComponents createComponents(String fieldName, Reader reader) {
|
||||||
return new MockTokenizer(reader, MockTokenizer.WHITESPACE, true);
|
return new TokenStreamComponents(new MockTokenizer(reader, MockTokenizer.WHITESPACE, true));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -1235,10 +1231,11 @@ public abstract class QueryParserTestBase extends LuceneTestCase {
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
private class MockCollationAnalyzer extends Analyzer {
|
private class MockCollationAnalyzer extends ReusableAnalyzerBase {
|
||||||
@Override
|
@Override
|
||||||
public TokenStream tokenStream(String fieldName, Reader reader) {
|
public TokenStreamComponents createComponents(String fieldName, Reader reader) {
|
||||||
return new MockCollationFilter(new MockTokenizer(reader, MockTokenizer.WHITESPACE, true));
|
Tokenizer tokenizer = new MockTokenizer(reader, MockTokenizer.WHITESPACE, true);
|
||||||
|
return new TokenStreamComponents(tokenizer, new MockCollationFilter(tokenizer));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue