LUCENE-3396: Collapsing Analyzer and ReusableAnalyzerBase together, mandating use of TokenStreamComponents

git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1175297 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Christopher John Male 2011-09-25 05:10:25 +00:00
parent 48e53be99a
commit 4ff0b2f82c
136 changed files with 895 additions and 948 deletions

View File

@ -174,6 +174,10 @@ Changes in backwards compatibility policy
* LUCENE-3396: ReusableAnalyzerBase.TokenStreamComponents.reset(Reader) now returns void instead
of boolean. If a Component cannot be reset, it should throw an Exception. (Chris Male)
* LUCENE-3396: ReusableAnalyzerBase has been renamed to Analyzer. All Analyzer implementations
must now use Analyzer.TokenStreamComponents, rather than overriding .tokenStream() and
.reusableTokenStream() (which are now final). (Chris Male)
Changes in Runtime Behavior
* LUCENE-2846: omitNorms now behaves like omitTermFrequencyAndPositions, if you

View File

@ -517,3 +517,8 @@ If you did this before (bytes is a byte[]):
you can now do this:
new BinaryField("field", bytes)
* LUCENE-3396: Analyzer.tokenStream() and .reusableTokenStream() have been made final.
It is now necessary to use Analyzer.TokenStreamComponents to define an analysis process.
Analyzer also has its own way of managing the reuse of TokenStreamComponents (either
globally, or per-field). To define another Strategy, implement Analyzer.ReuseStrategy.

View File

@ -1802,7 +1802,7 @@ public class HighlighterTest extends BaseTokenStreamTestCase implements Formatte
// behaviour to synonyms
// ===================================================================
final class SynonymAnalyzer extends ReusableAnalyzerBase {
final class SynonymAnalyzer extends Analyzer {
private Map<String,String> synonyms;
public SynonymAnalyzer(Map<String,String> synonyms) {

View File

@ -49,7 +49,7 @@ public class OffsetLimitTokenFilterTest extends BaseTokenStreamTestCase {
assertTokenStreamContents(filter, new String[] {"short", "toolong",
"evenmuchlongertext"});
checkOneTermReuse(new ReusableAnalyzerBase() {
checkOneTermReuse(new Analyzer() {
@Override
public TokenStreamComponents createComponents(String fieldName, Reader reader) {

View File

@ -48,7 +48,7 @@ import org.apache.lucene.util.LuceneTestCase;
public class TokenSourcesTest extends LuceneTestCase {
private static final String FIELD = "text";
private static final class OverlapAnalyzer extends ReusableAnalyzerBase {
private static final class OverlapAnalyzer extends Analyzer {
@Override
public TokenStreamComponents createComponents(String fieldName, Reader reader) {

View File

@ -194,7 +194,7 @@ public abstract class AbstractTestCase extends LuceneTestCase {
return phraseQuery;
}
static final class BigramAnalyzer extends ReusableAnalyzerBase {
static final class BigramAnalyzer extends Analyzer {
@Override
public TokenStreamComponents createComponents(String fieldName, Reader reader) {
return new TokenStreamComponents(new BasicNGramTokenizer(reader));

View File

@ -290,7 +290,7 @@ public class IndexTimeSynonymTest extends AbstractTestCase {
return token;
}
public static final class TokenArrayAnalyzer extends ReusableAnalyzerBase {
public static final class TokenArrayAnalyzer extends Analyzer {
final Token[] tokens;
public TokenArrayAnalyzer(Token... tokens) {
this.tokens = tokens;

View File

@ -1,6 +1,6 @@
package org.apache.lucene.analysis;
/**
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
@ -17,98 +17,106 @@ package org.apache.lucene.analysis;
* limitations under the License.
*/
import java.io.Reader;
import java.io.IOException;
import java.io.Closeable;
import java.lang.reflect.Modifier;
import org.apache.lucene.index.IndexableField;
import org.apache.lucene.util.CloseableThreadLocal;
import org.apache.lucene.store.AlreadyClosedException;
import org.apache.lucene.util.CloseableThreadLocal;
/** An Analyzer builds TokenStreams, which analyze text. It thus represents a
* policy for extracting index terms from text.
* <p>
* Typical implementations first build a Tokenizer, which breaks the stream of
* characters from the Reader into raw Tokens. One or more TokenFilters may
* then be applied to the output of the Tokenizer.
import java.io.IOException;
import java.io.Reader;
import java.util.HashMap;
import java.util.Map;
/**
* An Analyzer builds TokenStreams, which analyze text. It thus represents a
* policy for extracting index terms from text.
* <p>
* To prevent consistency problems, this class does not allow subclasses to
* extend {@link #reusableTokenStream(String, Reader)} or
* {@link #tokenStream(String, Reader)} directly. Instead, subclasses must
* implement {@link #createComponents(String, Reader)}.
* </p>
* <p>The {@code Analyzer}-API in Lucene is based on the decorator pattern.
* Therefore all non-abstract subclasses must be final or their {@link #tokenStream}
* and {@link #reusableTokenStream} implementations must be final! This is checked
* Therefore all non-abstract subclasses must be final! This is checked
* when Java assertions are enabled.
*/
public abstract class Analyzer implements Closeable {
public abstract class Analyzer {
protected Analyzer() {
super();
assert assertFinal();
private final ReuseStrategy reuseStrategy;
public Analyzer() {
this(new GlobalReuseStrategy());
}
public Analyzer(ReuseStrategy reuseStrategy) {
this.reuseStrategy = reuseStrategy;
}
/**
* Creates a new {@link TokenStreamComponents} instance for this analyzer.
*
* @param fieldName
* the name of the fields content passed to the
* {@link TokenStreamComponents} sink as a reader
* @param aReader
* the reader passed to the {@link Tokenizer} constructor
* @return the {@link TokenStreamComponents} for this analyzer.
*/
protected abstract TokenStreamComponents createComponents(String fieldName,
Reader aReader);
/**
* Creates a TokenStream that is allowed to be re-use from the previous time
* that the same thread called this method. Callers that do not need to use
* more than one TokenStream at the same time from this analyzer should use
* this method for better performance.
* <p>
* This method uses {@link #createComponents(String, Reader)} to obtain an
* instance of {@link TokenStreamComponents}. It returns the sink of the
* components and stores the components internally. Subsequent calls to this
* method will reuse the previously stored components after resetting them
* through {@link TokenStreamComponents#reset(Reader)}.
* </p>
*
* @param fieldName the name of the field the created TokenStream is used for
* @param reader the reader the streams source reads from
*/
public final TokenStream reusableTokenStream(final String fieldName,
final Reader reader) throws IOException {
TokenStreamComponents components = reuseStrategy.getReusableComponents(fieldName);
final Reader r = initReader(reader);
if (components == null) {
components = createComponents(fieldName, r);
reuseStrategy.setReusableComponents(fieldName, components);
} else {
components.reset(r);
}
return components.getTokenStream();
}
/**
* Creates a TokenStream which tokenizes all the text in the provided
* Reader.
* <p>
* This method uses {@link #createComponents(String, Reader)} to obtain an
* instance of {@link TokenStreamComponents} and returns the sink of the
* components. Each calls to this method will create a new instance of
* {@link TokenStreamComponents}. Created {@link TokenStream} instances are
* never reused.
* </p>
*
* @param fieldName the name of the field the created TokenStream is used for
* @param reader the reader the streams source reads from
*/
public final TokenStream tokenStream(final String fieldName,
final Reader reader) {
return createComponents(fieldName, initReader(reader)).getTokenStream();
}
private boolean assertFinal() {
try {
final Class<?> clazz = getClass();
if (!clazz.desiredAssertionStatus())
return true;
assert clazz.isAnonymousClass() ||
(clazz.getModifiers() & (Modifier.FINAL | Modifier.PRIVATE)) != 0 ||
(
Modifier.isFinal(clazz.getMethod("tokenStream", String.class, Reader.class).getModifiers()) &&
Modifier.isFinal(clazz.getMethod("reusableTokenStream", String.class, Reader.class).getModifiers())
) :
"Analyzer implementation classes or at least their tokenStream() and reusableTokenStream() implementations must be final";
return true;
} catch (NoSuchMethodException nsme) {
return false;
}
}
/** Creates a TokenStream which tokenizes all the text in the provided
* Reader. Must be able to handle null field name for
* backward compatibility.
/**
* Override this if you want to add a CharFilter chain.
*/
public abstract TokenStream tokenStream(String fieldName, Reader reader);
/** Creates a TokenStream that is allowed to be re-used
* from the previous time that the same thread called
* this method. Callers that do not need to use more
* than one TokenStream at the same time from this
* analyzer should use this method for better
* performance.
*/
public TokenStream reusableTokenStream(String fieldName, Reader reader) throws IOException {
return tokenStream(fieldName, reader);
}
private CloseableThreadLocal<Object> tokenStreams = new CloseableThreadLocal<Object>();
/** Used by Analyzers that implement reusableTokenStream
* to retrieve previously saved TokenStreams for re-use
* by the same thread. */
protected Object getPreviousTokenStream() {
try {
return tokenStreams.get();
} catch (NullPointerException npe) {
if (tokenStreams == null) {
throw new AlreadyClosedException("this Analyzer is closed");
} else {
throw npe;
}
}
}
/** Used by Analyzers that implement reusableTokenStream
* to save a TokenStream for later re-use by the same
* thread. */
protected void setPreviousTokenStream(Object obj) {
try {
tokenStreams.set(obj);
} catch (NullPointerException npe) {
if (tokenStreams == null) {
throw new AlreadyClosedException("this Analyzer is closed");
} else {
throw npe;
}
}
protected Reader initReader(Reader reader) {
return reader;
}
/**
@ -149,7 +157,196 @@ public abstract class Analyzer implements Closeable {
/** Frees persistent resources used by this Analyzer */
public void close() {
tokenStreams.close();
tokenStreams = null;
reuseStrategy.close();
}
/**
* This class encapsulates the outer components of a token stream. It provides
* access to the source ({@link Tokenizer}) and the outer end (sink), an
* instance of {@link TokenFilter} which also serves as the
* {@link TokenStream} returned by
* {@link Analyzer#tokenStream(String, Reader)} and
* {@link Analyzer#reusableTokenStream(String, Reader)}.
*/
public static class TokenStreamComponents {
protected final Tokenizer source;
protected final TokenStream sink;
/**
* Creates a new {@link TokenStreamComponents} instance.
*
* @param source
* the analyzer's tokenizer
* @param result
* the analyzer's resulting token stream
*/
public TokenStreamComponents(final Tokenizer source,
final TokenStream result) {
this.source = source;
this.sink = result;
}
/**
* Creates a new {@link TokenStreamComponents} instance.
*
* @param source
* the analyzer's tokenizer
*/
public TokenStreamComponents(final Tokenizer source) {
this.source = source;
this.sink = source;
}
/**
* Resets the encapsulated components with the given reader. If the components
* cannot be reset, an Exception should be thrown.
*
* @param reader
* a reader to reset the source component
* @throws IOException
* if the component's reset method throws an {@link IOException}
*/
protected void reset(final Reader reader) throws IOException {
source.reset(reader);
}
/**
* Returns the sink {@link TokenStream}
*
* @return the sink {@link TokenStream}
*/
public TokenStream getTokenStream() {
return sink;
}
/**
* Returns the component's {@link Tokenizer}
*
* @return Component's {@link Tokenizer}
*/
public Tokenizer getTokenizer() {
return source;
}
}
/**
* Strategy defining how TokenStreamComponents are reused per call to
* {@link Analyzer#tokenStream(String, java.io.Reader)}.
*/
public static abstract class ReuseStrategy {
private CloseableThreadLocal<Object> storedValue = new CloseableThreadLocal<Object>();
/**
* Gets the reusable TokenStreamComponents for the field with the given name
*
* @param fieldName Name of the field whose reusable TokenStreamComponents
* are to be retrieved
* @return Reusable TokenStreamComponents for the field, or {@code null}
* if there was no previous components for the field
*/
public abstract TokenStreamComponents getReusableComponents(String fieldName);
/**
* Stores the given TokenStreamComponents as the reusable components for the
* field with the give name
*
* @param fieldName Name of the field whose TokenStreamComponents are being set
* @param components TokenStreamComponents which are to be reused for the field
*/
public abstract void setReusableComponents(String fieldName, TokenStreamComponents components);
/**
* Returns the currently stored value
*
* @return Currently stored value or {@code null} if no value is stored
*/
protected final Object getStoredValue() {
try {
return storedValue.get();
} catch (NullPointerException npe) {
if (storedValue == null) {
throw new AlreadyClosedException("this Analyzer is closed");
} else {
throw npe;
}
}
}
/**
* Sets the stored value
*
* @param storedValue Value to store
*/
protected final void setStoredValue(Object storedValue) {
try {
this.storedValue.set(storedValue);
} catch (NullPointerException npe) {
if (storedValue == null) {
throw new AlreadyClosedException("this Analyzer is closed");
} else {
throw npe;
}
}
}
/**
* Closes the ReuseStrategy, freeing any resources
*/
public void close() {
storedValue.close();
storedValue = null;
}
}
/**
* Implementation of {@link ReuseStrategy} that reuses the same components for
* every field.
*/
public final static class GlobalReuseStrategy extends ReuseStrategy {
/**
* {@inheritDoc}
*/
public TokenStreamComponents getReusableComponents(String fieldName) {
return (TokenStreamComponents) getStoredValue();
}
/**
* {@inheritDoc}
*/
public void setReusableComponents(String fieldName, TokenStreamComponents components) {
setStoredValue(components);
}
}
/**
* Implementation of {@link ReuseStrategy} that reuses components per-field by
* maintaining a Map of TokenStreamComponent per field name.
*/
public static class PerFieldReuseStrategy extends ReuseStrategy {
/**
* {@inheritDoc}
*/
@SuppressWarnings("unchecked")
public TokenStreamComponents getReusableComponents(String fieldName) {
Map<String, TokenStreamComponents> componentsPerField = (Map<String, TokenStreamComponents>) getStoredValue();
return componentsPerField != null ? componentsPerField.get(fieldName) : null;
}
/**
* {@inheritDoc}
*/
@SuppressWarnings("unchecked")
public void setReusableComponents(String fieldName, TokenStreamComponents components) {
Map<String, TokenStreamComponents> componentsPerField = (Map<String, TokenStreamComponents>) getStoredValue();
if (componentsPerField == null) {
componentsPerField = new HashMap<String, TokenStreamComponents>();
setStoredValue(componentsPerField);
}
componentsPerField.put(fieldName, components);
}
}
}

View File

@ -0,0 +1,155 @@
package org.apache.lucene.analysis;
/**
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import java.io.Reader;
import java.io.IOException;
import java.io.Closeable;
import java.lang.reflect.Modifier;
import org.apache.lucene.index.IndexableField;
import org.apache.lucene.util.CloseableThreadLocal;
import org.apache.lucene.store.AlreadyClosedException;
/** An Analyzer builds TokenStreams, which analyze text. It thus represents a
* policy for extracting index terms from text.
* <p>
* Typical implementations first build a Tokenizer, which breaks the stream of
* characters from the Reader into raw Tokens. One or more TokenFilters may
* then be applied to the output of the Tokenizer.
* <p>The {@code Analyzer}-API in Lucene is based on the decorator pattern.
* Therefore all non-abstract subclasses must be final or their {@link #tokenStream}
* and {@link #reusableTokenStream} implementations must be final! This is checked
* when Java assertions are enabled.
*/
public abstract class Analyzer implements Closeable {
protected Analyzer() {
super();
assert assertFinal();
}
private boolean assertFinal() {
try {
final Class<?> clazz = getClass();
if (!clazz.desiredAssertionStatus())
return true;
assert clazz.isAnonymousClass() ||
(clazz.getModifiers() & (Modifier.FINAL | Modifier.PRIVATE)) != 0 ||
(
Modifier.isFinal(clazz.getMethod("tokenStream", String.class, Reader.class).getModifiers()) &&
Modifier.isFinal(clazz.getMethod("reusableTokenStream", String.class, Reader.class).getModifiers())
) :
"Analyzer implementation classes or at least their tokenStream() and reusableTokenStream() implementations must be final";
return true;
} catch (NoSuchMethodException nsme) {
return false;
}
}
/** Creates a TokenStream which tokenizes all the text in the provided
* Reader. Must be able to handle null field name for
* backward compatibility.
*/
public abstract TokenStream tokenStream(String fieldName, Reader reader);
/** Creates a TokenStream that is allowed to be re-used
* from the previous time that the same thread called
* this method. Callers that do not need to use more
* than one TokenStream at the same time from this
* analyzer should use this method for better
* performance.
*/
public TokenStream reusableTokenStream(String fieldName, Reader reader) throws IOException {
return tokenStream(fieldName, reader);
}
private CloseableThreadLocal<Object> tokenStreams = new CloseableThreadLocal<Object>();
/** Used by Analyzers that implement reusableTokenStream
* to retrieve previously saved TokenStreams for re-use
* by the same thread. */
protected Object getPreviousTokenStream() {
try {
return tokenStreams.get();
} catch (NullPointerException npe) {
if (tokenStreams == null) {
throw new AlreadyClosedException("this Analyzer is closed");
} else {
throw npe;
}
}
}
/** Used by Analyzers that implement reusableTokenStream
* to save a TokenStream for later re-use by the same
* thread. */
protected void setPreviousTokenStream(Object obj) {
try {
tokenStreams.set(obj);
} catch (NullPointerException npe) {
if (tokenStreams == null) {
throw new AlreadyClosedException("this Analyzer is closed");
} else {
throw npe;
}
}
}
/**
* Invoked before indexing a IndexableField instance if
* terms have already been added to that field. This allows custom
* analyzers to place an automatic position increment gap between
* IndexbleField instances using the same field name. The default value
* position increment gap is 0. With a 0 position increment gap and
* the typical default token position increment of 1, all terms in a field,
* including across IndexableField instances, are in successive positions, allowing
* exact PhraseQuery matches, for instance, across IndexableField instance boundaries.
*
* @param fieldName IndexableField name being indexed.
* @return position increment gap, added to the next token emitted from {@link #tokenStream(String,Reader)}
*/
public int getPositionIncrementGap(String fieldName) {
return 0;
}
/**
* Just like {@link #getPositionIncrementGap}, except for
* Token offsets instead. By default this returns 1 for
* tokenized fields and, as if the fields were joined
* with an extra space character, and 0 for un-tokenized
* fields. This method is only called if the field
* produced at least one token for indexing.
*
* @param field the field just indexed
* @return offset gap, added to the next token emitted from {@link #tokenStream(String,Reader)}
*/
public int getOffsetGap(IndexableField field) {
if (field.fieldType().tokenized()) {
return 1;
} else {
return 0;
}
}
/** Frees persistent resources used by this Analyzer */
public void close() {
tokenStreams.close();
tokenStreams = null;
}
}

View File

@ -0,0 +1,89 @@
package org.apache.lucene.analysis;
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import org.apache.lucene.index.IndexableField;
import java.io.Reader;
/**
* Extension to {@link Analyzer} suitable for Analyzers which wrap
* other Analyzers.
* <p/>
* {@link #getWrappedAnalyzer(String)} allows the Analyzer
* to wrap multiple Analyzers which are selected on a per field basis.
* <p/>
* {@link #wrapComponents(String, Analyzer.TokenStreamComponents)} allows the
* TokenStreamComponents of the wrapped Analyzer to then be wrapped
* (such as adding a new {@link TokenFilter} to form new TokenStreamComponents.
*/
public abstract class AnalyzerWrapper extends Analyzer {
/**
* Creates a new AnalyzerWrapper. Since the {@link Analyzer.ReuseStrategy} of
* the wrapped Analyzers are unknown, {@link Analyzer.PerFieldReuseStrategy} is assumed
*/
protected AnalyzerWrapper() {
super(new PerFieldReuseStrategy());
}
/**
* Retrieves the wrapped Analyzer appropriate for analyzing the field with
* the given name
*
* @param fieldName Name of the field which is to be analyzed
* @return Analyzer for the field with the given name. Assumed to be non-null
*/
protected abstract Analyzer getWrappedAnalyzer(String fieldName);
/**
* Wraps / alters the given TokenStreamComponents, taken from the wrapped
* Analyzer, to form new components. It is through this method that new
* TokenFilters can be added by AnalyzerWrappers.
*
*
* @param fieldName Name of the field which is to be analyzed
* @param components TokenStreamComponents taken from the wrapped Analyzer
* @return Wrapped / altered TokenStreamComponents.
*/
protected abstract TokenStreamComponents wrapComponents(String fieldName, TokenStreamComponents components);
/**
* {@inheritDoc}
*/
@Override
protected final TokenStreamComponents createComponents(String fieldName, Reader aReader) {
return wrapComponents(fieldName, getWrappedAnalyzer(fieldName).createComponents(fieldName, aReader));
}
/**
* {@inheritDoc}
*/
@Override
public final int getPositionIncrementGap(String fieldName) {
return getWrappedAnalyzer(fieldName).getPositionIncrementGap(fieldName);
}
/**
* {@inheritDoc}
*/
@Override
public final int getOffsetGap(IndexableField field) {
return getWrappedAnalyzer(field.name()).getOffsetGap(field);
}
}

View File

@ -1,308 +0,0 @@
package org.apache.lucene.analysis;
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import org.apache.lucene.store.AlreadyClosedException;
import org.apache.lucene.util.CloseableThreadLocal;
import java.io.IOException;
import java.io.Reader;
import java.util.HashMap;
import java.util.Map;
/**
* An convenience subclass of Analyzer that makes it easy to implement
* {@link TokenStream} reuse.
* <p>
* ReusableAnalyzerBase is a simplification of Analyzer that supports easy reuse
* for the most common use-cases. Analyzers such as
* PerFieldAnalyzerWrapper that behave differently depending upon the
* field name need to subclass Analyzer directly instead.
* </p>
* <p>
* To prevent consistency problems, this class does not allow subclasses to
* extend {@link #reusableTokenStream(String, Reader)} or
* {@link #tokenStream(String, Reader)} directly. Instead, subclasses must
* implement {@link #createComponents(String, Reader)}.
* </p>
*/
public abstract class ReusableAnalyzerBase extends Analyzer {
private final ReuseStrategy reuseStrategy;
public ReusableAnalyzerBase() {
this(new GlobalReuseStrategy());
}
public ReusableAnalyzerBase(ReuseStrategy reuseStrategy) {
this.reuseStrategy = reuseStrategy;
}
/**
* Creates a new {@link TokenStreamComponents} instance for this analyzer.
*
* @param fieldName
* the name of the fields content passed to the
* {@link TokenStreamComponents} sink as a reader
* @param aReader
* the reader passed to the {@link Tokenizer} constructor
* @return the {@link TokenStreamComponents} for this analyzer.
*/
protected abstract TokenStreamComponents createComponents(String fieldName,
Reader aReader);
/**
* This method uses {@link #createComponents(String, Reader)} to obtain an
* instance of {@link TokenStreamComponents}. It returns the sink of the
* components and stores the components internally. Subsequent calls to this
* method will reuse the previously stored components if and only if the
* {@link TokenStreamComponents#reset(Reader)} method returned
* <code>true</code>. Otherwise a new instance of
* {@link TokenStreamComponents} is created.
*
* @param fieldName the name of the field the created TokenStream is used for
* @param reader the reader the streams source reads from
*/
@Override
public final TokenStream reusableTokenStream(final String fieldName,
final Reader reader) throws IOException {
TokenStreamComponents components = reuseStrategy.getReusableComponents(fieldName);
final Reader r = initReader(reader);
if (components == null) {
components = createComponents(fieldName, r);
reuseStrategy.setReusableComponents(fieldName, components);
} else {
components.reset(r);
}
return components.getTokenStream();
}
/**
* This method uses {@link #createComponents(String, Reader)} to obtain an
* instance of {@link TokenStreamComponents} and returns the sink of the
* components. Each calls to this method will create a new instance of
* {@link TokenStreamComponents}. Created {@link TokenStream} instances are
* never reused.
*
* @param fieldName the name of the field the created TokenStream is used for
* @param reader the reader the streams source reads from
*/
@Override
public final TokenStream tokenStream(final String fieldName,
final Reader reader) {
return createComponents(fieldName, initReader(reader)).getTokenStream();
}
/**
* Override this if you want to add a CharFilter chain.
*/
protected Reader initReader(Reader reader) {
return reader;
}
/**
* {@inheritDoc}
*/
@Override
public void close() {
super.close();
reuseStrategy.close();
}
/**
* This class encapsulates the outer components of a token stream. It provides
* access to the source ({@link Tokenizer}) and the outer end (sink), an
* instance of {@link TokenFilter} which also serves as the
* {@link TokenStream} returned by
* {@link Analyzer#tokenStream(String, Reader)} and
* {@link Analyzer#reusableTokenStream(String, Reader)}.
*/
public static class TokenStreamComponents {
protected final Tokenizer source;
protected final TokenStream sink;
/**
* Creates a new {@link TokenStreamComponents} instance.
*
* @param source
* the analyzer's tokenizer
* @param result
* the analyzer's resulting token stream
*/
public TokenStreamComponents(final Tokenizer source,
final TokenStream result) {
this.source = source;
this.sink = result;
}
/**
* Creates a new {@link TokenStreamComponents} instance.
*
* @param source
* the analyzer's tokenizer
*/
public TokenStreamComponents(final Tokenizer source) {
this.source = source;
this.sink = source;
}
/**
* Resets the encapsulated components with the given reader. If the components
* cannot be reset, an Exception should be thrown.
*
* @param reader
* a reader to reset the source component
* @throws IOException
* if the component's reset method throws an {@link IOException}
*/
protected void reset(final Reader reader) throws IOException {
source.reset(reader);
}
/**
* Returns the sink {@link TokenStream}
*
* @return the sink {@link TokenStream}
*/
protected TokenStream getTokenStream() {
return sink;
}
}
/**
* Strategy defining how TokenStreamComponents are reused per call to
* {@link ReusableAnalyzerBase#tokenStream(String, java.io.Reader)}.
*/
public static abstract class ReuseStrategy {
private CloseableThreadLocal<Object> storedValue = new CloseableThreadLocal<Object>();
/**
* Gets the reusable TokenStreamComponents for the field with the given name
*
* @param fieldName Name of the field whose reusable TokenStreamComponents
* are to be retrieved
* @return Reusable TokenStreamComponents for the field, or {@code null}
* if there was no previous components for the field
*/
public abstract TokenStreamComponents getReusableComponents(String fieldName);
/**
* Stores the given TokenStreamComponents as the reusable components for the
* field with the give name
*
* @param fieldName Name of the field whose TokenStreamComponents are being set
* @param components TokenStreamComponents which are to be reused for the field
*/
public abstract void setReusableComponents(String fieldName, TokenStreamComponents components);
/**
* Returns the currently stored value
*
* @return Currently stored value or {@code null} if no value is stored
*/
protected final Object getStoredValue() {
try {
return storedValue.get();
} catch (NullPointerException npe) {
if (storedValue == null) {
throw new AlreadyClosedException("this Analyzer is closed");
} else {
throw npe;
}
}
}
/**
* Sets the stored value
*
* @param storedValue Value to store
*/
protected final void setStoredValue(Object storedValue) {
try {
this.storedValue.set(storedValue);
} catch (NullPointerException npe) {
if (storedValue == null) {
throw new AlreadyClosedException("this Analyzer is closed");
} else {
throw npe;
}
}
}
/**
* Closes the ReuseStrategy, freeing any resources
*/
public void close() {
storedValue.close();
storedValue = null;
}
}
/**
* Implementation of {@link ReuseStrategy} that reuses the same components for
* every field.
*/
public final static class GlobalReuseStrategy extends ReuseStrategy {
/**
* {@inheritDoc}
*/
public TokenStreamComponents getReusableComponents(String fieldName) {
return (TokenStreamComponents) getStoredValue();
}
/**
* {@inheritDoc}
*/
public void setReusableComponents(String fieldName, TokenStreamComponents components) {
setStoredValue(components);
}
}
/**
* Implementation of {@link ReuseStrategy} that reuses components per-field by
* maintaining a Map of TokenStreamComponent per field name.
*/
public static class PerFieldReuseStrategy extends ReuseStrategy {
/**
* {@inheritDoc}
*/
@SuppressWarnings("unchecked")
public TokenStreamComponents getReusableComponents(String fieldName) {
Map<String, TokenStreamComponents> componentsPerField = (Map<String, TokenStreamComponents>) getStoredValue();
return componentsPerField != null ? componentsPerField.get(fieldName) : null;
}
/**
* {@inheritDoc}
*/
@SuppressWarnings("unchecked")
public void setReusableComponents(String fieldName, TokenStreamComponents components) {
Map<String, TokenStreamComponents> componentsPerField = (Map<String, TokenStreamComponents>) getStoredValue();
if (componentsPerField == null) {
componentsPerField = new HashMap<String, TokenStreamComponents>();
setStoredValue(componentsPerField);
}
componentsPerField.put(fieldName, components);
}
}
}

View File

@ -42,7 +42,7 @@ import org.apache.lucene.util.automaton.CharacterRunAutomaton;
* </ul>
* @see MockTokenizer
*/
public final class MockAnalyzer extends ReusableAnalyzerBase {
public final class MockAnalyzer extends Analyzer {
private final CharacterRunAutomaton runAutomaton;
private final boolean lowerCase;
private final CharacterRunAutomaton filter;

View File

@ -30,7 +30,7 @@ import java.io.Reader;
*
*
**/
public final class MockPayloadAnalyzer extends ReusableAnalyzerBase {
public final class MockPayloadAnalyzer extends Analyzer {
@Override
public TokenStreamComponents createComponents(String fieldName, Reader reader) {

View File

@ -19,7 +19,6 @@ package org.apache.lucene;
import java.io.Reader;
import org.apache.lucene.analysis.ReusableAnalyzerBase;
import org.apache.lucene.util.LuceneTestCase;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.TokenStream;
@ -35,7 +34,7 @@ public class TestAssertions extends LuceneTestCase {
}
}
static class TestAnalyzer1 extends ReusableAnalyzerBase {
static class TestAnalyzer1 extends Analyzer {
@Override
protected TokenStreamComponents createComponents(String fieldName, Reader aReader) {
@ -43,7 +42,7 @@ public class TestAssertions extends LuceneTestCase {
}
}
static final class TestAnalyzer2 extends ReusableAnalyzerBase {
static final class TestAnalyzer2 extends Analyzer {
@Override
protected TokenStreamComponents createComponents(String fieldName, Reader aReader) {
@ -51,7 +50,7 @@ public class TestAssertions extends LuceneTestCase {
}
}
static class TestAnalyzer3 extends ReusableAnalyzerBase {
static class TestAnalyzer3 extends Analyzer {
@Override
protected TokenStreamComponents createComponents(String fieldName, Reader aReader) {
@ -59,7 +58,7 @@ public class TestAssertions extends LuceneTestCase {
}
}
static class TestAnalyzer4 extends ReusableAnalyzerBase {
static class TestAnalyzer4 extends Analyzer {
@Override
protected TokenStreamComponents createComponents(String fieldName, Reader aReader) {

View File

@ -103,7 +103,7 @@ public class TestDocumentWriter extends LuceneTestCase {
}
public void testPositionIncrementGap() throws IOException {
Analyzer analyzer = new ReusableAnalyzerBase() {
Analyzer analyzer = new Analyzer() {
@Override
public TokenStreamComponents createComponents(String fieldName, Reader reader) {
return new TokenStreamComponents(new MockTokenizer(reader, MockTokenizer.WHITESPACE, false));
@ -138,7 +138,7 @@ public class TestDocumentWriter extends LuceneTestCase {
}
public void testTokenReuse() throws IOException {
Analyzer analyzer = new ReusableAnalyzerBase() {
Analyzer analyzer = new Analyzer() {
@Override
public TokenStreamComponents createComponents(String fieldName, Reader reader) {
Tokenizer tokenizer = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false);

View File

@ -1706,7 +1706,7 @@ public class TestIndexWriter extends LuceneTestCase {
dir.close();
}
static final class StringSplitAnalyzer extends ReusableAnalyzerBase {
static final class StringSplitAnalyzer extends Analyzer {
@Override
public TokenStreamComponents createComponents(String fieldName, Reader reader) {
return new TokenStreamComponents(new StringSplitTokenizer(reader));

View File

@ -175,7 +175,7 @@ public class TestIndexWriterCommit extends LuceneTestCase {
Analyzer analyzer;
if (random.nextBoolean()) {
// no payloads
analyzer = new ReusableAnalyzerBase() {
analyzer = new Analyzer() {
@Override
public TokenStreamComponents createComponents(String fieldName, Reader reader) {
return new TokenStreamComponents(new MockTokenizer(reader, MockTokenizer.WHITESPACE, true));
@ -184,7 +184,7 @@ public class TestIndexWriterCommit extends LuceneTestCase {
} else {
// fixed length payloads
final int length = random.nextInt(200);
analyzer = new ReusableAnalyzerBase() {
analyzer = new Analyzer() {
@Override
public TokenStreamComponents createComponents(String fieldName, Reader reader) {
Tokenizer tokenizer = new MockTokenizer(reader, MockTokenizer.WHITESPACE, true);

View File

@ -899,7 +899,7 @@ public class TestIndexWriterDelete extends LuceneTestCase {
final Random r = random;
Directory dir = newDirectory();
// note this test explicitly disables payloads
final Analyzer analyzer = new ReusableAnalyzerBase() {
final Analyzer analyzer = new Analyzer() {
@Override
public TokenStreamComponents createComponents(String fieldName, Reader reader) {
return new TokenStreamComponents(new MockTokenizer(reader, MockTokenizer.WHITESPACE, true));

View File

@ -386,7 +386,7 @@ public class TestIndexWriterExceptions extends LuceneTestCase {
doc.add(newField("field", "a field", TextField.TYPE_STORED));
w.addDocument(doc);
Analyzer analyzer = new ReusableAnalyzerBase(new ReusableAnalyzerBase.PerFieldReuseStrategy()) {
Analyzer analyzer = new Analyzer(new Analyzer.PerFieldReuseStrategy()) {
@Override
public TokenStreamComponents createComponents(String fieldName, Reader reader) {
MockTokenizer tokenizer = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false);
@ -454,7 +454,7 @@ public class TestIndexWriterExceptions extends LuceneTestCase {
// LUCENE-1072
public void testExceptionFromTokenStream() throws IOException {
Directory dir = newDirectory();
IndexWriterConfig conf = newIndexWriterConfig( TEST_VERSION_CURRENT, new ReusableAnalyzerBase() {
IndexWriterConfig conf = newIndexWriterConfig( TEST_VERSION_CURRENT, new Analyzer() {
@Override
public TokenStreamComponents createComponents(String fieldName, Reader reader) {
@ -591,7 +591,7 @@ public class TestIndexWriterExceptions extends LuceneTestCase {
}
public void testDocumentsWriterExceptions() throws IOException {
Analyzer analyzer = new ReusableAnalyzerBase(new ReusableAnalyzerBase.PerFieldReuseStrategy()) {
Analyzer analyzer = new Analyzer(new Analyzer.PerFieldReuseStrategy()) {
@Override
public TokenStreamComponents createComponents(String fieldName, Reader reader) {
MockTokenizer tokenizer = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false);
@ -687,7 +687,7 @@ public class TestIndexWriterExceptions extends LuceneTestCase {
}
public void testDocumentsWriterExceptionThreads() throws Exception {
Analyzer analyzer = new ReusableAnalyzerBase(new ReusableAnalyzerBase.PerFieldReuseStrategy()) {
Analyzer analyzer = new Analyzer(new Analyzer.PerFieldReuseStrategy()) {
@Override
public TokenStreamComponents createComponents(String fieldName, Reader reader) {
MockTokenizer tokenizer = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false);

View File

@ -68,7 +68,7 @@ public class TestLazyProxSkipping extends LuceneTestCase {
private void createIndex(int numHits) throws IOException {
int numDocs = 500;
final Analyzer analyzer = new ReusableAnalyzerBase() {
final Analyzer analyzer = new Analyzer() {
@Override
public TokenStreamComponents createComponents(String fieldName, Reader reader) {
return new TokenStreamComponents(new MockTokenizer(reader, MockTokenizer.WHITESPACE, true));

View File

@ -111,7 +111,7 @@ public class TestMultiLevelSkipList extends LuceneTestCase {
assertEquals("Wrong payload for the target " + target + ": " + b.bytes[b.offset], (byte) target, b.bytes[b.offset]);
}
private static class PayloadAnalyzer extends ReusableAnalyzerBase {
private static class PayloadAnalyzer extends Analyzer {
private final AtomicInteger payloadCount = new AtomicInteger(-1);
@Override
public TokenStreamComponents createComponents(String fieldName, Reader reader) {

View File

@ -405,7 +405,7 @@ public class TestPayloads extends LuceneTestCase {
/**
* This Analyzer uses an WhitespaceTokenizer and PayloadFilter.
*/
private static class PayloadAnalyzer extends ReusableAnalyzerBase {
private static class PayloadAnalyzer extends Analyzer {
Map<String,PayloadData> fieldToData = new HashMap<String,PayloadData>();
public PayloadAnalyzer() {

View File

@ -20,7 +20,7 @@ package org.apache.lucene.index;
import java.io.IOException;
import java.io.Reader;
import org.apache.lucene.analysis.ReusableAnalyzerBase;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.Tokenizer;
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
@ -63,7 +63,7 @@ public class TestSameTokenSamePosition extends LuceneTestCase {
}
}
final class BugReproAnalyzer extends ReusableAnalyzerBase {
final class BugReproAnalyzer extends Analyzer {
@Override
public TokenStreamComponents createComponents(String arg0, Reader arg1) {
return new TokenStreamComponents(new BugReproAnalyzerTokenizer());

View File

@ -175,7 +175,7 @@ public class TestTermVectorsReader extends LuceneTestCase {
}
}
private class MyAnalyzer extends ReusableAnalyzerBase {
private class MyAnalyzer extends Analyzer {
@Override
public TokenStreamComponents createComponents(String fieldName, Reader reader) {
return new TokenStreamComponents(new MyTokenStream());

View File

@ -22,7 +22,6 @@ import java.io.Reader;
import java.util.Random;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.ReusableAnalyzerBase;
import org.apache.lucene.analysis.Tokenizer;
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
import org.apache.lucene.document.Document;
@ -77,7 +76,7 @@ public class TestTermdocPerf extends LuceneTestCase {
void addDocs(final Random random, Directory dir, final int ndocs, String field, final String val, final int maxTF, final float percentDocs) throws IOException {
final RepeatingTokenStream ts = new RepeatingTokenStream(val, random, percentDocs, maxTF);
Analyzer analyzer = new ReusableAnalyzerBase() {
Analyzer analyzer = new Analyzer() {
@Override
public TokenStreamComponents createComponents(String fieldName, Reader reader) {
return new TokenStreamComponents(ts);

View File

@ -17,7 +17,6 @@ package org.apache.lucene.search;
* limitations under the License.
*/
import org.apache.lucene.analysis.ReusableAnalyzerBase;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.index.RandomIndexWriter;
import org.apache.lucene.index.Term;
@ -346,7 +345,7 @@ public class TestMultiPhraseQuery extends LuceneTestCase {
}
}
private static class CannedAnalyzer extends ReusableAnalyzerBase {
private static class CannedAnalyzer extends Analyzer {
private final TokenAndPos[] tokens;
public CannedAnalyzer(TokenAndPos[] tokens) {

View File

@ -55,7 +55,7 @@ public class TestPhraseQuery extends LuceneTestCase {
@BeforeClass
public static void beforeClass() throws Exception {
directory = newDirectory();
Analyzer analyzer = new ReusableAnalyzerBase() {
Analyzer analyzer = new Analyzer() {
@Override
public TokenStreamComponents createComponents(String fieldName, Reader reader) {
return new TokenStreamComponents(new MockTokenizer(reader, MockTokenizer.WHITESPACE, false));

View File

@ -56,7 +56,7 @@ public class TestPositionIncrement extends LuceneTestCase {
final static boolean VERBOSE = false;
public void testSetPosition() throws Exception {
Analyzer analyzer = new ReusableAnalyzerBase() {
Analyzer analyzer = new Analyzer() {
@Override
public TokenStreamComponents createComponents(String fieldName, Reader reader) {
return new TokenStreamComponents(new Tokenizer() {

View File

@ -190,7 +190,7 @@ public class TestTermRangeQuery extends LuceneTestCase {
assertFalse("queries with different inclusive are not equal", query.equals(other));
}
private static class SingleCharAnalyzer extends ReusableAnalyzerBase {
private static class SingleCharAnalyzer extends Analyzer {
private static class SingleCharTokenizer extends Tokenizer {
char[] buffer = new char[1];

View File

@ -55,7 +55,7 @@ public class PayloadHelper {
public IndexReader reader;
public final class PayloadAnalyzer extends ReusableAnalyzerBase {
public final class PayloadAnalyzer extends Analyzer {
public PayloadAnalyzer() {
super(new PerFieldReuseStrategy());

View File

@ -55,7 +55,7 @@ public class TestPayloadNearQuery extends LuceneTestCase {
private static byte[] payload2 = new byte[]{2};
private static byte[] payload4 = new byte[]{4};
private static class PayloadAnalyzer extends ReusableAnalyzerBase {
private static class PayloadAnalyzer extends Analyzer {
@Override
public TokenStreamComponents createComponents(String fieldName, Reader reader) {
Tokenizer result = new MockTokenizer(reader, MockTokenizer.SIMPLE, true);

View File

@ -64,7 +64,7 @@ public class TestPayloadTermQuery extends LuceneTestCase {
private static final byte[] payloadMultiField2 = new byte[]{4};
protected static Directory directory;
private static class PayloadAnalyzer extends ReusableAnalyzerBase {
private static class PayloadAnalyzer extends Analyzer {
private PayloadAnalyzer() {
super(new PerFieldReuseStrategy());

View File

@ -96,7 +96,7 @@ public class TestBasics extends LuceneTestCase {
}
}
static final Analyzer simplePayloadAnalyzer = new ReusableAnalyzerBase() {
static final Analyzer simplePayloadAnalyzer = new Analyzer() {
@Override
public TokenStreamComponents createComponents(String fieldName, Reader reader) {

View File

@ -476,7 +476,7 @@ public class TestPayloadSpans extends LuceneTestCase {
assertEquals(numSpans, cnt);
}
final class PayloadAnalyzer extends ReusableAnalyzerBase {
final class PayloadAnalyzer extends Analyzer {
@Override
public TokenStreamComponents createComponents(String fieldName, Reader reader) {
@ -530,7 +530,7 @@ public class TestPayloadSpans extends LuceneTestCase {
}
}
public final class TestPayloadAnalyzer extends ReusableAnalyzerBase {
public final class TestPayloadAnalyzer extends Analyzer {
@Override
public TokenStreamComponents createComponents(String fieldName, Reader reader) {

View File

@ -126,10 +126,10 @@ public final class ArabicAnalyzer extends StopwordAnalyzerBase {
/**
* Creates
* {@link org.apache.lucene.analysis.ReusableAnalyzerBase.TokenStreamComponents}
* {@link org.apache.lucene.analysis.Analyzer.TokenStreamComponents}
* used to tokenize all the text in the provided {@link Reader}.
*
* @return {@link org.apache.lucene.analysis.ReusableAnalyzerBase.TokenStreamComponents}
* @return {@link org.apache.lucene.analysis.Analyzer.TokenStreamComponents}
* built from an {@link StandardTokenizer} filtered with
* {@link LowerCaseFilter}, {@link StopFilter},
* {@link ArabicNormalizationFilter}, {@link KeywordMarkerFilter}

View File

@ -107,11 +107,11 @@ public final class BulgarianAnalyzer extends StopwordAnalyzerBase {
/**
* Creates a
* {@link org.apache.lucene.analysis.ReusableAnalyzerBase.TokenStreamComponents}
* {@link org.apache.lucene.analysis.Analyzer.TokenStreamComponents}
* which tokenizes all the text in the provided {@link Reader}.
*
* @return A
* {@link org.apache.lucene.analysis.ReusableAnalyzerBase.TokenStreamComponents}
* {@link org.apache.lucene.analysis.Analyzer.TokenStreamComponents}
* built from an {@link StandardTokenizer} filtered with
* {@link StandardFilter}, {@link LowerCaseFilter}, {@link StopFilter}
* , {@link KeywordMarkerFilter} if a stem exclusion set is

View File

@ -117,10 +117,10 @@ public final class BrazilianAnalyzer extends StopwordAnalyzerBase {
/**
* Creates
* {@link org.apache.lucene.analysis.ReusableAnalyzerBase.TokenStreamComponents}
* {@link org.apache.lucene.analysis.Analyzer.TokenStreamComponents}
* used to tokenize all the text in the provided {@link Reader}.
*
* @return {@link org.apache.lucene.analysis.ReusableAnalyzerBase.TokenStreamComponents}
* @return {@link org.apache.lucene.analysis.Analyzer.TokenStreamComponents}
* built from a {@link StandardTokenizer} filtered with
* {@link LowerCaseFilter}, {@link StandardFilter}, {@link StopFilter}
* , and {@link BrazilianStemFilter}.

View File

@ -105,11 +105,11 @@ public final class CatalanAnalyzer extends StopwordAnalyzerBase {
/**
* Creates a
* {@link org.apache.lucene.analysis.ReusableAnalyzerBase.TokenStreamComponents}
* {@link org.apache.lucene.analysis.Analyzer.TokenStreamComponents}
* which tokenizes all the text in the provided {@link Reader}.
*
* @return A
* {@link org.apache.lucene.analysis.ReusableAnalyzerBase.TokenStreamComponents}
* {@link org.apache.lucene.analysis.Analyzer.TokenStreamComponents}
* built from an {@link StandardTokenizer} filtered with
* {@link StandardFilter}, {@link LowerCaseFilter}, {@link StopFilter}
* , {@link KeywordMarkerFilter} if a stem exclusion set is

View File

@ -20,7 +20,6 @@ package org.apache.lucene.analysis.cn;
import java.io.Reader;
import org.apache.lucene.analysis.standard.StandardAnalyzer; // javadoc @link
import org.apache.lucene.analysis.ReusableAnalyzerBase;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.Tokenizer;
@ -31,14 +30,14 @@ import org.apache.lucene.analysis.Tokenizer;
* This analyzer will be removed in Lucene 5.0
*/
@Deprecated
public final class ChineseAnalyzer extends ReusableAnalyzerBase {
public final class ChineseAnalyzer extends Analyzer {
/**
* Creates
* {@link org.apache.lucene.analysis.ReusableAnalyzerBase.TokenStreamComponents}
* {@link org.apache.lucene.analysis.Analyzer.TokenStreamComponents}
* used to tokenize all the text in the provided {@link Reader}.
*
* @return {@link org.apache.lucene.analysis.ReusableAnalyzerBase.TokenStreamComponents}
* @return {@link org.apache.lucene.analysis.Analyzer.TokenStreamComponents}
* built from a {@link ChineseTokenizer} filtered with
* {@link ChineseFilter}
*/

View File

@ -19,13 +19,13 @@ package org.apache.lucene.analysis.core;
import java.io.Reader;
import org.apache.lucene.analysis.ReusableAnalyzerBase;
import org.apache.lucene.analysis.Analyzer;
/**
* "Tokenizes" the entire stream as a single token. This is useful
* for data like zip codes, ids, and some product names.
*/
public final class KeywordAnalyzer extends ReusableAnalyzerBase {
public final class KeywordAnalyzer extends Analyzer {
public KeywordAnalyzer() {
}

View File

@ -21,7 +21,6 @@ import java.io.Reader;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.util.CharTokenizer;
import org.apache.lucene.analysis.ReusableAnalyzerBase;
import org.apache.lucene.util.Version;
/** An {@link Analyzer} that filters {@link LetterTokenizer}
@ -36,7 +35,7 @@ import org.apache.lucene.util.Version;
* </ul>
* <p>
**/
public final class SimpleAnalyzer extends ReusableAnalyzerBase {
public final class SimpleAnalyzer extends Analyzer {
private final Version matchVersion;

View File

@ -95,10 +95,10 @@ public final class StopAnalyzer extends StopwordAnalyzerBase {
/**
* Creates
* {@link org.apache.lucene.analysis.ReusableAnalyzerBase.TokenStreamComponents}
* {@link org.apache.lucene.analysis.Analyzer.TokenStreamComponents}
* used to tokenize all the text in the provided {@link Reader}.
*
* @return {@link org.apache.lucene.analysis.ReusableAnalyzerBase.TokenStreamComponents}
* @return {@link org.apache.lucene.analysis.Analyzer.TokenStreamComponents}
* built from a {@link LowerCaseTokenizer} filtered with
* {@link StopFilter}
*/

View File

@ -19,8 +19,8 @@ package org.apache.lucene.analysis.core;
import java.io.Reader;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.util.CharTokenizer;
import org.apache.lucene.analysis.ReusableAnalyzerBase;
import org.apache.lucene.util.Version;
/**
@ -35,7 +35,7 @@ import org.apache.lucene.util.Version;
* </ul>
* <p>
**/
public final class WhitespaceAnalyzer extends ReusableAnalyzerBase {
public final class WhitespaceAnalyzer extends Analyzer {
private final Version matchVersion;

View File

@ -26,7 +26,6 @@ import org.apache.lucene.analysis.Tokenizer;
import org.apache.lucene.analysis.standard.StandardFilter;
import org.apache.lucene.analysis.standard.StandardTokenizer;
import org.apache.lucene.analysis.util.CharArraySet;
import org.apache.lucene.analysis.ReusableAnalyzerBase;
import org.apache.lucene.analysis.util.StopwordAnalyzerBase;
import org.apache.lucene.analysis.util.WordlistLoader;
import org.apache.lucene.util.Version;
@ -122,10 +121,10 @@ public final class CzechAnalyzer extends StopwordAnalyzerBase {
/**
* Creates
* {@link org.apache.lucene.analysis.ReusableAnalyzerBase.TokenStreamComponents}
* {@link org.apache.lucene.analysis.Analyzer.TokenStreamComponents}
* used to tokenize all the text in the provided {@link Reader}.
*
* @return {@link org.apache.lucene.analysis.ReusableAnalyzerBase.TokenStreamComponents}
* @return {@link org.apache.lucene.analysis.Analyzer.TokenStreamComponents}
* built from a {@link StandardTokenizer} filtered with
* {@link StandardFilter}, {@link LowerCaseFilter}, {@link StopFilter}
* , and {@link CzechStemFilter} (only if version is >= LUCENE_31). If
@ -135,7 +134,7 @@ public final class CzechAnalyzer extends StopwordAnalyzerBase {
* {@link CzechStemFilter}.
*/
@Override
protected ReusableAnalyzerBase.TokenStreamComponents createComponents(String fieldName,
protected TokenStreamComponents createComponents(String fieldName,
Reader reader) {
final Tokenizer source = new StandardTokenizer(matchVersion, reader);
TokenStream result = new StandardFilter(matchVersion, source);

View File

@ -106,11 +106,11 @@ public final class DanishAnalyzer extends StopwordAnalyzerBase {
/**
* Creates a
* {@link org.apache.lucene.analysis.ReusableAnalyzerBase.TokenStreamComponents}
* {@link org.apache.lucene.analysis.Analyzer.TokenStreamComponents}
* which tokenizes all the text in the provided {@link Reader}.
*
* @return A
* {@link org.apache.lucene.analysis.ReusableAnalyzerBase.TokenStreamComponents}
* {@link org.apache.lucene.analysis.Analyzer.TokenStreamComponents}
* built from an {@link StandardTokenizer} filtered with
* {@link StandardFilter}, {@link LowerCaseFilter}, {@link StopFilter}
* , {@link KeywordMarkerFilter} if a stem exclusion set is

View File

@ -158,10 +158,10 @@ public final class GermanAnalyzer extends StopwordAnalyzerBase {
/**
* Creates
* {@link org.apache.lucene.analysis.ReusableAnalyzerBase.TokenStreamComponents}
* {@link org.apache.lucene.analysis.Analyzer.TokenStreamComponents}
* used to tokenize all the text in the provided {@link Reader}.
*
* @return {@link org.apache.lucene.analysis.ReusableAnalyzerBase.TokenStreamComponents}
* @return {@link org.apache.lucene.analysis.Analyzer.TokenStreamComponents}
* built from a {@link StandardTokenizer} filtered with
* {@link StandardFilter}, {@link LowerCaseFilter}, {@link StopFilter}
* , {@link KeywordMarkerFilter} if a stem exclusion set is

View File

@ -101,10 +101,10 @@ public final class GreekAnalyzer extends StopwordAnalyzerBase {
/**
* Creates
* {@link org.apache.lucene.analysis.ReusableAnalyzerBase.TokenStreamComponents}
* {@link org.apache.lucene.analysis.Analyzer.TokenStreamComponents}
* used to tokenize all the text in the provided {@link Reader}.
*
* @return {@link org.apache.lucene.analysis.ReusableAnalyzerBase.TokenStreamComponents}
* @return {@link org.apache.lucene.analysis.Analyzer.TokenStreamComponents}
* built from a {@link StandardTokenizer} filtered with
* {@link GreekLowerCaseFilter}, {@link StandardFilter},
* {@link StopFilter}, and {@link GreekStemFilter}

View File

@ -89,11 +89,11 @@ public final class EnglishAnalyzer extends StopwordAnalyzerBase {
/**
* Creates a
* {@link org.apache.lucene.analysis.ReusableAnalyzerBase.TokenStreamComponents}
* {@link org.apache.lucene.analysis.Analyzer.TokenStreamComponents}
* which tokenizes all the text in the provided {@link Reader}.
*
* @return A
* {@link org.apache.lucene.analysis.ReusableAnalyzerBase.TokenStreamComponents}
* {@link org.apache.lucene.analysis.Analyzer.TokenStreamComponents}
* built from an {@link StandardTokenizer} filtered with
* {@link StandardFilter}, {@link LowerCaseFilter}, {@link StopFilter}
* , {@link KeywordMarkerFilter} if a stem exclusion set is

View File

@ -106,11 +106,11 @@ public final class SpanishAnalyzer extends StopwordAnalyzerBase {
/**
* Creates a
* {@link org.apache.lucene.analysis.ReusableAnalyzerBase.TokenStreamComponents}
* {@link org.apache.lucene.analysis.Analyzer.TokenStreamComponents}
* which tokenizes all the text in the provided {@link Reader}.
*
* @return A
* {@link org.apache.lucene.analysis.ReusableAnalyzerBase.TokenStreamComponents}
* {@link org.apache.lucene.analysis.Analyzer.TokenStreamComponents}
* built from an {@link StandardTokenizer} filtered with
* {@link StandardFilter}, {@link LowerCaseFilter}, {@link StopFilter}
* , {@link KeywordMarkerFilter} if a stem exclusion set is

View File

@ -105,11 +105,11 @@ public final class BasqueAnalyzer extends StopwordAnalyzerBase {
/**
* Creates a
* {@link org.apache.lucene.analysis.ReusableAnalyzerBase.TokenStreamComponents}
* {@link org.apache.lucene.analysis.Analyzer.TokenStreamComponents}
* which tokenizes all the text in the provided {@link Reader}.
*
* @return A
* {@link org.apache.lucene.analysis.ReusableAnalyzerBase.TokenStreamComponents}
* {@link org.apache.lucene.analysis.Analyzer.TokenStreamComponents}
* built from an {@link StandardTokenizer} filtered with
* {@link StandardFilter}, {@link LowerCaseFilter}, {@link StopFilter}
* , {@link KeywordMarkerFilter} if a stem exclusion set is

View File

@ -107,10 +107,10 @@ public final class PersianAnalyzer extends StopwordAnalyzerBase {
/**
* Creates
* {@link org.apache.lucene.analysis.ReusableAnalyzerBase.TokenStreamComponents}
* {@link org.apache.lucene.analysis.Analyzer.TokenStreamComponents}
* used to tokenize all the text in the provided {@link Reader}.
*
* @return {@link org.apache.lucene.analysis.ReusableAnalyzerBase.TokenStreamComponents}
* @return {@link org.apache.lucene.analysis.Analyzer.TokenStreamComponents}
* built from a {@link StandardTokenizer} filtered with
* {@link LowerCaseFilter}, {@link ArabicNormalizationFilter},
* {@link PersianNormalizationFilter} and Persian Stop words

View File

@ -106,11 +106,11 @@ public final class FinnishAnalyzer extends StopwordAnalyzerBase {
/**
* Creates a
* {@link org.apache.lucene.analysis.ReusableAnalyzerBase.TokenStreamComponents}
* {@link org.apache.lucene.analysis.Analyzer.TokenStreamComponents}
* which tokenizes all the text in the provided {@link Reader}.
*
* @return A
* {@link org.apache.lucene.analysis.ReusableAnalyzerBase.TokenStreamComponents}
* {@link org.apache.lucene.analysis.Analyzer.TokenStreamComponents}
* built from an {@link StandardTokenizer} filtered with
* {@link StandardFilter}, {@link LowerCaseFilter}, {@link StopFilter}
* , {@link KeywordMarkerFilter} if a stem exclusion set is

View File

@ -168,10 +168,10 @@ public final class FrenchAnalyzer extends StopwordAnalyzerBase {
/**
* Creates
* {@link org.apache.lucene.analysis.ReusableAnalyzerBase.TokenStreamComponents}
* {@link org.apache.lucene.analysis.Analyzer.TokenStreamComponents}
* used to tokenize all the text in the provided {@link Reader}.
*
* @return {@link org.apache.lucene.analysis.ReusableAnalyzerBase.TokenStreamComponents}
* @return {@link org.apache.lucene.analysis.Analyzer.TokenStreamComponents}
* built from a {@link StandardTokenizer} filtered with
* {@link StandardFilter}, {@link ElisionFilter},
* {@link LowerCaseFilter}, {@link StopFilter},

View File

@ -104,11 +104,11 @@ public final class GalicianAnalyzer extends StopwordAnalyzerBase {
/**
* Creates a
* {@link org.apache.lucene.analysis.ReusableAnalyzerBase.TokenStreamComponents}
* {@link org.apache.lucene.analysis.Analyzer.TokenStreamComponents}
* which tokenizes all the text in the provided {@link Reader}.
*
* @return A
* {@link org.apache.lucene.analysis.ReusableAnalyzerBase.TokenStreamComponents}
* {@link org.apache.lucene.analysis.Analyzer.TokenStreamComponents}
* built from an {@link StandardTokenizer} filtered with
* {@link StandardFilter}, {@link LowerCaseFilter}, {@link StopFilter}
* , {@link KeywordMarkerFilter} if a stem exclusion set is

View File

@ -106,10 +106,10 @@ public final class HindiAnalyzer extends StopwordAnalyzerBase {
/**
* Creates
* {@link org.apache.lucene.analysis.ReusableAnalyzerBase.TokenStreamComponents}
* {@link org.apache.lucene.analysis.Analyzer.TokenStreamComponents}
* used to tokenize all the text in the provided {@link Reader}.
*
* @return {@link org.apache.lucene.analysis.ReusableAnalyzerBase.TokenStreamComponents}
* @return {@link org.apache.lucene.analysis.Analyzer.TokenStreamComponents}
* built from a {@link IndicTokenizer} filtered with
* {@link LowerCaseFilter}, {@link IndicNormalizationFilter},
* {@link HindiNormalizationFilter}, {@link KeywordMarkerFilter}

View File

@ -106,11 +106,11 @@ public final class HungarianAnalyzer extends StopwordAnalyzerBase {
/**
* Creates a
* {@link org.apache.lucene.analysis.ReusableAnalyzerBase.TokenStreamComponents}
* {@link org.apache.lucene.analysis.Analyzer.TokenStreamComponents}
* which tokenizes all the text in the provided {@link Reader}.
*
* @return A
* {@link org.apache.lucene.analysis.ReusableAnalyzerBase.TokenStreamComponents}
* {@link org.apache.lucene.analysis.Analyzer.TokenStreamComponents}
* built from an {@link StandardTokenizer} filtered with
* {@link StandardFilter}, {@link LowerCaseFilter}, {@link StopFilter}
* , {@link KeywordMarkerFilter} if a stem exclusion set is

View File

@ -105,11 +105,11 @@ public final class ArmenianAnalyzer extends StopwordAnalyzerBase {
/**
* Creates a
* {@link org.apache.lucene.analysis.ReusableAnalyzerBase.TokenStreamComponents}
* {@link org.apache.lucene.analysis.Analyzer.TokenStreamComponents}
* which tokenizes all the text in the provided {@link Reader}.
*
* @return A
* {@link org.apache.lucene.analysis.ReusableAnalyzerBase.TokenStreamComponents}
* {@link org.apache.lucene.analysis.Analyzer.TokenStreamComponents}
* built from an {@link StandardTokenizer} filtered with
* {@link StandardFilter}, {@link LowerCaseFilter}, {@link StopFilter}
* , {@link KeywordMarkerFilter} if a stem exclusion set is

View File

@ -106,10 +106,10 @@ public final class IndonesianAnalyzer extends StopwordAnalyzerBase {
/**
* Creates
* {@link org.apache.lucene.analysis.ReusableAnalyzerBase.TokenStreamComponents}
* {@link org.apache.lucene.analysis.Analyzer.TokenStreamComponents}
* used to tokenize all the text in the provided {@link Reader}.
*
* @return {@link org.apache.lucene.analysis.ReusableAnalyzerBase.TokenStreamComponents}
* @return {@link org.apache.lucene.analysis.Analyzer.TokenStreamComponents}
* built from an {@link StandardTokenizer} filtered with
* {@link StandardFilter}, {@link LowerCaseFilter},
* {@link StopFilter}, {@link KeywordMarkerFilter}

View File

@ -123,11 +123,11 @@ public final class ItalianAnalyzer extends StopwordAnalyzerBase {
/**
* Creates a
* {@link org.apache.lucene.analysis.ReusableAnalyzerBase.TokenStreamComponents}
* {@link org.apache.lucene.analysis.Analyzer.TokenStreamComponents}
* which tokenizes all the text in the provided {@link Reader}.
*
* @return A
* {@link org.apache.lucene.analysis.ReusableAnalyzerBase.TokenStreamComponents}
* {@link org.apache.lucene.analysis.Analyzer.TokenStreamComponents}
* built from an {@link StandardTokenizer} filtered with
* {@link StandardFilter}, {@link ElisionFilter}, {@link LowerCaseFilter}, {@link StopFilter}
* , {@link KeywordMarkerFilter} if a stem exclusion set is

View File

@ -104,11 +104,11 @@ public final class LatvianAnalyzer extends StopwordAnalyzerBase {
/**
* Creates a
* {@link org.apache.lucene.analysis.ReusableAnalyzerBase.TokenStreamComponents}
* {@link org.apache.lucene.analysis.Analyzer.TokenStreamComponents}
* which tokenizes all the text in the provided {@link Reader}.
*
* @return A
* {@link org.apache.lucene.analysis.ReusableAnalyzerBase.TokenStreamComponents}
* {@link org.apache.lucene.analysis.Analyzer.TokenStreamComponents}
* built from an {@link StandardTokenizer} filtered with
* {@link StandardFilter}, {@link LowerCaseFilter}, {@link StopFilter}
* , {@link KeywordMarkerFilter} if a stem exclusion set is

View File

@ -18,17 +18,13 @@ package org.apache.lucene.analysis.miscellaneous;
*/
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.index.IndexableField;
import java.io.Reader;
import java.io.IOException;
import org.apache.lucene.analysis.AnalyzerWrapper;
/**
* This Analyzer limits the number of tokens while indexing. It is
* a replacement for the maximum field length setting inside {@link org.apache.lucene.index.IndexWriter}.
*/
public final class LimitTokenCountAnalyzer extends Analyzer {
public final class LimitTokenCountAnalyzer extends AnalyzerWrapper {
private final Analyzer delegate;
private final int maxTokenCount;
@ -39,29 +35,16 @@ public final class LimitTokenCountAnalyzer extends Analyzer {
this.delegate = delegate;
this.maxTokenCount = maxTokenCount;
}
@Override
public TokenStream tokenStream(String fieldName, Reader reader) {
return new LimitTokenCountFilter(
delegate.tokenStream(fieldName, reader), maxTokenCount
);
}
@Override
public TokenStream reusableTokenStream(String fieldName, Reader reader) throws IOException {
return new LimitTokenCountFilter(
delegate.reusableTokenStream(fieldName, reader), maxTokenCount
);
}
@Override
public int getPositionIncrementGap(String fieldName) {
return delegate.getPositionIncrementGap(fieldName);
protected Analyzer getWrappedAnalyzer(String fieldName) {
return delegate;
}
@Override
public int getOffsetGap(IndexableField field) {
return delegate.getOffsetGap(field);
protected TokenStreamComponents wrapComponents(String fieldName, TokenStreamComponents components) {
return new TokenStreamComponents(components.getTokenizer(),
new LimitTokenCountFilter(components.getTokenStream(), maxTokenCount));
}
@Override

View File

@ -27,7 +27,6 @@ import java.util.regex.Matcher;
import java.util.regex.Pattern;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.ReusableAnalyzerBase;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.Tokenizer;
import org.apache.lucene.analysis.core.StopAnalyzer;
@ -67,7 +66,7 @@ import org.apache.lucene.util.Version;
* @deprecated (4.0) use the pattern-based analysis in the analysis/pattern package instead.
*/
@Deprecated
public final class PatternAnalyzer extends ReusableAnalyzerBase {
public final class PatternAnalyzer extends Analyzer {
/** <code>"\\W+"</code>; Divides text at non-letters (NOT Character.isLetter(c)) */
public static final Pattern NON_WORD_PATTERN = Pattern.compile("\\W+");

View File

@ -18,14 +18,10 @@ package org.apache.lucene.analysis.miscellaneous;
*/
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.index.IndexableField;
import org.apache.lucene.analysis.AnalyzerWrapper;
import java.io.Reader;
import java.io.IOException;
import java.util.Collections;
import java.util.Map;
import java.util.HashMap;
/**
* This analyzer is used to facilitate scenarios where different
@ -50,7 +46,7 @@ import java.util.HashMap;
* <p>A PerFieldAnalyzerWrapper can be used like any other analyzer, for both indexing
* and query parsing.
*/
public final class PerFieldAnalyzerWrapper extends Analyzer {
public final class PerFieldAnalyzerWrapper extends AnalyzerWrapper {
private final Analyzer defaultAnalyzer;
private final Map<String, Analyzer> fieldAnalyzers;
@ -74,47 +70,20 @@ public final class PerFieldAnalyzerWrapper extends Analyzer {
* used for those fields
*/
public PerFieldAnalyzerWrapper(Analyzer defaultAnalyzer,
Map<String,Analyzer> fieldAnalyzers) {
Map<String, Analyzer> fieldAnalyzers) {
this.defaultAnalyzer = defaultAnalyzer;
this.fieldAnalyzers = (fieldAnalyzers != null) ? fieldAnalyzers : Collections.<String, Analyzer>emptyMap();
}
@Override
public TokenStream tokenStream(String fieldName, Reader reader) {
protected Analyzer getWrappedAnalyzer(String fieldName) {
Analyzer analyzer = fieldAnalyzers.get(fieldName);
if (analyzer == null) {
analyzer = defaultAnalyzer;
}
return analyzer.tokenStream(fieldName, reader);
}
@Override
public TokenStream reusableTokenStream(String fieldName, Reader reader) throws IOException {
Analyzer analyzer = fieldAnalyzers.get(fieldName);
if (analyzer == null)
analyzer = defaultAnalyzer;
return analyzer.reusableTokenStream(fieldName, reader);
}
/** Return the positionIncrementGap from the analyzer assigned to fieldName */
@Override
public int getPositionIncrementGap(String fieldName) {
Analyzer analyzer = fieldAnalyzers.get(fieldName);
if (analyzer == null)
analyzer = defaultAnalyzer;
return analyzer.getPositionIncrementGap(fieldName);
return (analyzer != null) ? analyzer : defaultAnalyzer;
}
/** Return the offsetGap from the analyzer assigned to field */
@Override
public int getOffsetGap(IndexableField field) {
Analyzer analyzer = fieldAnalyzers.get(field.name());
if (analyzer == null) {
analyzer = defaultAnalyzer;
}
return analyzer.getOffsetGap(field);
protected TokenStreamComponents wrapComponents(String fieldName, TokenStreamComponents components) {
return components;
}
@Override

View File

@ -29,7 +29,6 @@ import org.apache.lucene.analysis.standard.StandardFilter;
import org.apache.lucene.analysis.standard.StandardTokenizer;
import org.apache.lucene.analysis.standard.StandardAnalyzer; // for javadoc
import org.apache.lucene.analysis.util.CharArraySet;
import org.apache.lucene.analysis.ReusableAnalyzerBase;
import org.apache.lucene.analysis.util.WordlistLoader;
import org.apache.lucene.util.Version;
@ -66,7 +65,7 @@ import java.util.Map;
* <p><b>NOTE</b>: This class uses the same {@link Version}
* dependent settings as {@link StandardAnalyzer}.</p>
*/
public final class DutchAnalyzer extends ReusableAnalyzerBase {
public final class DutchAnalyzer extends Analyzer {
/** File containing default Dutch stopwords. */
public final static String DEFAULT_STOPWORD_FILE = "dutch_stop.txt";

View File

@ -106,11 +106,11 @@ public final class NorwegianAnalyzer extends StopwordAnalyzerBase {
/**
* Creates a
* {@link org.apache.lucene.analysis.ReusableAnalyzerBase.TokenStreamComponents}
* {@link org.apache.lucene.analysis.Analyzer.TokenStreamComponents}
* which tokenizes all the text in the provided {@link Reader}.
*
* @return A
* {@link org.apache.lucene.analysis.ReusableAnalyzerBase.TokenStreamComponents}
* {@link org.apache.lucene.analysis.Analyzer.TokenStreamComponents}
* built from an {@link StandardTokenizer} filtered with
* {@link StandardFilter}, {@link LowerCaseFilter}, {@link StopFilter}
* , {@link KeywordMarkerFilter} if a stem exclusion set is

View File

@ -106,11 +106,11 @@ public final class PortugueseAnalyzer extends StopwordAnalyzerBase {
/**
* Creates a
* {@link org.apache.lucene.analysis.ReusableAnalyzerBase.TokenStreamComponents}
* {@link org.apache.lucene.analysis.Analyzer.TokenStreamComponents}
* which tokenizes all the text in the provided {@link Reader}.
*
* @return A
* {@link org.apache.lucene.analysis.ReusableAnalyzerBase.TokenStreamComponents}
* {@link org.apache.lucene.analysis.Analyzer.TokenStreamComponents}
* built from an {@link StandardTokenizer} filtered with
* {@link StandardFilter}, {@link LowerCaseFilter}, {@link StopFilter}
* , {@link KeywordMarkerFilter} if a stem exclusion set is

View File

@ -16,20 +16,19 @@ package org.apache.lucene.analysis.query;
* limitations under the License.
*/
import org.apache.lucene.analysis.AnalyzerWrapper;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.Term;
import org.apache.lucene.index.TermsEnum;
import org.apache.lucene.index.Terms;
import org.apache.lucene.index.MultiFields;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.core.StopFilter;
import org.apache.lucene.util.CharsRef;
import org.apache.lucene.util.Version;
import org.apache.lucene.util.BytesRef;
import java.io.IOException;
import java.io.Reader;
import java.util.*;
/**
@ -42,7 +41,7 @@ import java.util.*;
* this term to take 2 seconds.
* </p>
*/
public final class QueryAutoStopWordAnalyzer extends Analyzer {
public final class QueryAutoStopWordAnalyzer extends AnalyzerWrapper {
private final Analyzer delegate;
private final Map<String, Set<String>> stopWordsPerField = new HashMap<String, Set<String>>();
@ -101,7 +100,7 @@ public final class QueryAutoStopWordAnalyzer extends Analyzer {
*/
public QueryAutoStopWordAnalyzer(
Version matchVersion,
Analyzer delegate,
Analyzer delegate,
IndexReader indexReader,
float maxPercentDocs) throws IOException {
this(matchVersion, delegate, indexReader, indexReader.getFieldNames(IndexReader.FieldOption.INDEXED), maxPercentDocs);
@ -168,79 +167,18 @@ public final class QueryAutoStopWordAnalyzer extends Analyzer {
}
@Override
public TokenStream tokenStream(String fieldName, Reader reader) {
TokenStream result;
try {
result = delegate.reusableTokenStream(fieldName, reader);
} catch (IOException e) {
result = delegate.tokenStream(fieldName, reader);
}
Set<String> stopWords = stopWordsPerField.get(fieldName);
if (stopWords != null) {
result = new StopFilter(matchVersion, result, stopWords);
}
return result;
}
private class SavedStreams {
/* the underlying stream */
TokenStream wrapped;
/*
* when there are no stopwords for the field, refers to wrapped.
* if there stopwords, it is a StopFilter around wrapped.
*/
TokenStream withStopFilter;
protected Analyzer getWrappedAnalyzer(String fieldName) {
return delegate;
}
@SuppressWarnings("unchecked")
@Override
public TokenStream reusableTokenStream(String fieldName, Reader reader) throws IOException {
/* map of SavedStreams for each field */
Map<String,SavedStreams> streamMap = (Map<String,SavedStreams>) getPreviousTokenStream();
if (streamMap == null) {
streamMap = new HashMap<String, SavedStreams>();
setPreviousTokenStream(streamMap);
protected TokenStreamComponents wrapComponents(String fieldName, TokenStreamComponents components) {
Set<String> stopWords = stopWordsPerField.get(fieldName);
if (stopWords == null) {
return components;
}
SavedStreams streams = streamMap.get(fieldName);
if (streams == null) {
/* an entry for this field does not exist, create one */
streams = new SavedStreams();
streamMap.put(fieldName, streams);
streams.wrapped = delegate.reusableTokenStream(fieldName, reader);
/* if there are any stopwords for the field, save the stopfilter */
Set<String> stopWords = stopWordsPerField.get(fieldName);
if (stopWords != null) {
streams.withStopFilter = new StopFilter(matchVersion, streams.wrapped, stopWords);
} else {
streams.withStopFilter = streams.wrapped;
}
} else {
/*
* an entry for this field exists, verify the wrapped stream has not
* changed. if it has not, reuse it, otherwise wrap the new stream.
*/
TokenStream result = delegate.reusableTokenStream(fieldName, reader);
if (result == streams.wrapped) {
/* the wrapped analyzer reused the stream */
} else {
/*
* the wrapped analyzer did not. if there are any stopwords for the
* field, create a new StopFilter around the new stream
*/
streams.wrapped = result;
Set<String> stopWords = stopWordsPerField.get(fieldName);
if (stopWords != null) {
streams.withStopFilter = new StopFilter(matchVersion, streams.wrapped, stopWords);
} else {
streams.withStopFilter = streams.wrapped;
}
}
}
return streams.withStopFilter;
StopFilter stopFilter = new StopFilter(matchVersion, components.getTokenStream(), stopWords);
return new TokenStreamComponents(components.getTokenizer(), stopFilter);
}
/**

View File

@ -110,11 +110,11 @@ public final class RomanianAnalyzer extends StopwordAnalyzerBase {
/**
* Creates a
* {@link org.apache.lucene.analysis.ReusableAnalyzerBase.TokenStreamComponents}
* {@link org.apache.lucene.analysis.Analyzer.TokenStreamComponents}
* which tokenizes all the text in the provided {@link Reader}.
*
* @return A
* {@link org.apache.lucene.analysis.ReusableAnalyzerBase.TokenStreamComponents}
* {@link org.apache.lucene.analysis.Analyzer.TokenStreamComponents}
* built from an {@link StandardTokenizer} filtered with
* {@link StandardFilter}, {@link LowerCaseFilter}, {@link StopFilter}
* , {@link KeywordMarkerFilter} if a stem exclusion set is

View File

@ -139,10 +139,10 @@ public final class RussianAnalyzer extends StopwordAnalyzerBase
/**
* Creates
* {@link org.apache.lucene.analysis.ReusableAnalyzerBase.TokenStreamComponents}
* {@link org.apache.lucene.analysis.Analyzer.TokenStreamComponents}
* used to tokenize all the text in the provided {@link Reader}.
*
* @return {@link org.apache.lucene.analysis.ReusableAnalyzerBase.TokenStreamComponents}
* @return {@link org.apache.lucene.analysis.Analyzer.TokenStreamComponents}
* built from a {@link StandardTokenizer} filtered with
* {@link StandardFilter}, {@link LowerCaseFilter}, {@link StopFilter}
* , {@link KeywordMarkerFilter} if a stem exclusion set is

View File

@ -17,11 +17,8 @@ package org.apache.lucene.analysis.shingle;
* limitations under the License.
*/
import java.io.IOException;
import java.io.Reader;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.AnalyzerWrapper;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.util.Version;
@ -31,7 +28,7 @@ import org.apache.lucene.util.Version;
* A shingle is another name for a token based n-gram.
* </p>
*/
public final class ShingleAnalyzerWrapper extends Analyzer {
public final class ShingleAnalyzerWrapper extends AnalyzerWrapper {
private final Analyzer defaultAnalyzer;
private final int maxShingleSize;
@ -140,48 +137,18 @@ public final class ShingleAnalyzerWrapper extends Analyzer {
}
@Override
public TokenStream tokenStream(String fieldName, Reader reader) {
TokenStream wrapped;
try {
wrapped = defaultAnalyzer.reusableTokenStream(fieldName, reader);
} catch (IOException e) {
wrapped = defaultAnalyzer.tokenStream(fieldName, reader);
}
ShingleFilter filter = new ShingleFilter(wrapped, minShingleSize, maxShingleSize);
protected Analyzer getWrappedAnalyzer(String fieldName) {
return defaultAnalyzer;
}
@Override
protected TokenStreamComponents wrapComponents(String fieldName, TokenStreamComponents components) {
ShingleFilter filter = new ShingleFilter(components.getTokenStream(), minShingleSize, maxShingleSize);
filter.setMinShingleSize(minShingleSize);
filter.setMaxShingleSize(maxShingleSize);
filter.setTokenSeparator(tokenSeparator);
filter.setOutputUnigrams(outputUnigrams);
filter.setOutputUnigramsIfNoShingles(outputUnigramsIfNoShingles);
return filter;
}
private class SavedStreams {
TokenStream wrapped;
ShingleFilter shingle;
}
@Override
public TokenStream reusableTokenStream(String fieldName, Reader reader) throws IOException {
SavedStreams streams = (SavedStreams) getPreviousTokenStream();
if (streams == null) {
streams = new SavedStreams();
streams.wrapped = defaultAnalyzer.reusableTokenStream(fieldName, reader);
streams.shingle = new ShingleFilter(streams.wrapped);
setPreviousTokenStream(streams);
} else {
TokenStream result = defaultAnalyzer.reusableTokenStream(fieldName, reader);
if (result != streams.wrapped) {
/* the wrapped analyzer did not, create a new shingle around the new one */
streams.wrapped = result;
streams.shingle = new ShingleFilter(streams.wrapped);
}
}
streams.shingle.setMaxShingleSize(maxShingleSize);
streams.shingle.setMinShingleSize(minShingleSize);
streams.shingle.setTokenSeparator(tokenSeparator);
streams.shingle.setOutputUnigrams(outputUnigrams);
streams.shingle.setOutputUnigramsIfNoShingles(outputUnigramsIfNoShingles);
return streams.shingle;
return new TokenStreamComponents(components.getTokenizer(), filter);
}
}

View File

@ -24,7 +24,6 @@ import org.apache.lucene.analysis.en.EnglishPossessiveFilter;
import org.apache.lucene.analysis.standard.*;
import org.apache.lucene.analysis.tr.TurkishLowerCaseFilter;
import org.apache.lucene.analysis.util.CharArraySet;
import org.apache.lucene.analysis.ReusableAnalyzerBase;
import org.apache.lucene.util.Version;
import java.io.Reader;
@ -47,7 +46,7 @@ import java.util.Set;
* This analyzer will be removed in Lucene 5.0
*/
@Deprecated
public final class SnowballAnalyzer extends ReusableAnalyzerBase {
public final class SnowballAnalyzer extends Analyzer {
private String name;
private Set<?> stopSet;
private final Version matchVersion;

View File

@ -106,11 +106,11 @@ public final class SwedishAnalyzer extends StopwordAnalyzerBase {
/**
* Creates a
* {@link org.apache.lucene.analysis.ReusableAnalyzerBase.TokenStreamComponents}
* {@link org.apache.lucene.analysis.Analyzer.TokenStreamComponents}
* which tokenizes all the text in the provided {@link Reader}.
*
* @return A
* {@link org.apache.lucene.analysis.ReusableAnalyzerBase.TokenStreamComponents}
* {@link org.apache.lucene.analysis.Analyzer.TokenStreamComponents}
* built from an {@link StandardTokenizer} filtered with
* {@link StandardFilter}, {@link LowerCaseFilter}, {@link StopFilter}
* , {@link KeywordMarkerFilter} if a stem exclusion set is

View File

@ -27,7 +27,6 @@ import org.apache.lucene.analysis.core.StopFilter;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.analysis.standard.StandardFilter;
import org.apache.lucene.analysis.standard.StandardTokenizer;
import org.apache.lucene.analysis.ReusableAnalyzerBase;
import org.apache.lucene.util.Version;
/**
@ -36,7 +35,7 @@ import org.apache.lucene.util.Version;
* <p><b>NOTE</b>: This class uses the same {@link Version}
* dependent settings as {@link StandardAnalyzer}.</p>
*/
public final class ThaiAnalyzer extends ReusableAnalyzerBase {
public final class ThaiAnalyzer extends Analyzer {
private final Version matchVersion;
public ThaiAnalyzer(Version matchVersion) {
@ -45,10 +44,10 @@ public final class ThaiAnalyzer extends ReusableAnalyzerBase {
/**
* Creates
* {@link org.apache.lucene.analysis.ReusableAnalyzerBase.TokenStreamComponents}
* {@link org.apache.lucene.analysis.Analyzer.TokenStreamComponents}
* used to tokenize all the text in the provided {@link Reader}.
*
* @return {@link org.apache.lucene.analysis.ReusableAnalyzerBase.TokenStreamComponents}
* @return {@link org.apache.lucene.analysis.Analyzer.TokenStreamComponents}
* built from a {@link StandardTokenizer} filtered with
* {@link StandardFilter}, {@link LowerCaseFilter}, {@link ThaiWordFilter}, and
* {@link StopFilter}

View File

@ -109,11 +109,11 @@ public final class TurkishAnalyzer extends StopwordAnalyzerBase {
/**
* Creates a
* {@link org.apache.lucene.analysis.ReusableAnalyzerBase.TokenStreamComponents}
* {@link org.apache.lucene.analysis.Analyzer.TokenStreamComponents}
* which tokenizes all the text in the provided {@link Reader}.
*
* @return A
* {@link org.apache.lucene.analysis.ReusableAnalyzerBase.TokenStreamComponents}
* {@link org.apache.lucene.analysis.Analyzer.TokenStreamComponents}
* built from an {@link StandardTokenizer} filtered with
* {@link StandardFilter}, {@link TurkishLowerCaseFilter},
* {@link StopFilter}, {@link KeywordMarkerFilter} if a stem

View File

@ -20,15 +20,14 @@ package org.apache.lucene.analysis.util;
import java.io.IOException;
import java.util.Set;
import org.apache.lucene.analysis.ReusableAnalyzerBase;
import org.apache.lucene.analysis.util.WordlistLoader;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.util.Version;
/**
* Base class for Analyzers that need to make use of stopword sets.
*
*/
public abstract class StopwordAnalyzerBase extends ReusableAnalyzerBase {
public abstract class StopwordAnalyzerBase extends Analyzer {
/**
* An immutable stopword set
@ -92,7 +91,7 @@ public abstract class StopwordAnalyzerBase extends ReusableAnalyzerBase {
* if loading the stopwords throws an {@link IOException}
*/
protected static CharArraySet loadStopwordSet(final boolean ignoreCase,
final Class<? extends ReusableAnalyzerBase> aClass, final String resource,
final Class<? extends Analyzer> aClass, final String resource,
final String comment) throws IOException {
final Set<String> wordSet = WordlistLoader.getWordSet(aClass, resource,
comment);

View File

@ -18,8 +18,8 @@ package org.apache.lucene.collation;
*/
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.core.KeywordTokenizer;
import org.apache.lucene.analysis.ReusableAnalyzerBase;
import org.apache.lucene.util.IndexableBinaryStringTools; // javadoc @link
import org.apache.lucene.util.Version;
@ -82,7 +82,7 @@ import java.io.Reader;
* versions will encode the bytes with {@link IndexableBinaryStringTools}.
* </ul>
*/
public final class CollationKeyAnalyzer extends ReusableAnalyzerBase {
public final class CollationKeyAnalyzer extends Analyzer {
private final Collator collator;
private final CollationAttributeFactory factory;
private final Version matchVersion;

View File

@ -64,7 +64,7 @@ public class TestChineseTokenizer extends BaseTokenStreamTestCase
* Analyzer that just uses ChineseTokenizer, not ChineseFilter.
* convenience to show the behavior of the tokenizer
*/
private class JustChineseTokenizerAnalyzer extends ReusableAnalyzerBase {
private class JustChineseTokenizerAnalyzer extends Analyzer {
@Override
public TokenStreamComponents createComponents(String fieldName, Reader reader) {
return new TokenStreamComponents(new ChineseTokenizer(reader));
@ -75,7 +75,7 @@ public class TestChineseTokenizer extends BaseTokenStreamTestCase
* Analyzer that just uses ChineseFilter, not ChineseTokenizer.
* convenience to show the behavior of the filter.
*/
private class JustChineseFilterAnalyzer extends ReusableAnalyzerBase {
private class JustChineseFilterAnalyzer extends Analyzer {
@Override
public TokenStreamComponents createComponents(String fieldName, Reader reader) {
Tokenizer tokenizer = new WhitespaceTokenizer(Version.LUCENE_CURRENT, reader);

View File

@ -84,7 +84,7 @@ public class CommonGramsFilterTest extends BaseTokenStreamTestCase {
* @return Map<String,String>
*/
public void testCommonGramsQueryFilter() throws Exception {
Analyzer a = new ReusableAnalyzerBase() {
Analyzer a = new Analyzer() {
@Override
public TokenStreamComponents createComponents(String field, Reader in) {
Tokenizer tokenizer = new MockTokenizer(in, MockTokenizer.WHITESPACE, false);
@ -154,7 +154,7 @@ public class CommonGramsFilterTest extends BaseTokenStreamTestCase {
}
public void testCommonGramsFilter() throws Exception {
Analyzer a = new ReusableAnalyzerBase() {
Analyzer a = new Analyzer() {
@Override
public TokenStreamComponents createComponents(String field, Reader in) {
Tokenizer tokenizer = new MockTokenizer(in, MockTokenizer.WHITESPACE, false);

View File

@ -117,7 +117,7 @@ public class TestAnalyzers extends BaseTokenStreamTestCase {
String[] y = StandardTokenizer.TOKEN_TYPES;
}
private static class LowerCaseWhitespaceAnalyzer extends ReusableAnalyzerBase {
private static class LowerCaseWhitespaceAnalyzer extends Analyzer {
@Override
public TokenStreamComponents createComponents(String fieldName, Reader reader) {

View File

@ -5,7 +5,6 @@ import org.apache.lucene.analysis.BaseTokenStreamTestCase;
import org.apache.lucene.analysis.Tokenizer;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.analysis.standard.StandardTokenizer;
import org.apache.lucene.analysis.ReusableAnalyzerBase;
import org.apache.lucene.util.Version;
import java.io.IOException;
@ -43,7 +42,7 @@ public class TestStandardAnalyzer extends BaseTokenStreamTestCase {
BaseTokenStreamTestCase.assertTokenStreamContents(tokenizer, new String[] { "testing", "1234" });
}
private Analyzer a = new ReusableAnalyzerBase() {
private Analyzer a = new Analyzer() {
@Override
protected TokenStreamComponents createComponents
(String fieldName, Reader reader) {

View File

@ -8,7 +8,6 @@ import org.apache.lucene.analysis.Tokenizer;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.analysis.standard.UAX29URLEmailTokenizer;
import org.apache.lucene.analysis.tokenattributes.TypeAttribute;
import org.apache.lucene.analysis.ReusableAnalyzerBase;
import org.apache.lucene.util.Version;
import java.io.BufferedReader;
@ -50,7 +49,7 @@ public class TestUAX29URLEmailTokenizer extends BaseTokenStreamTestCase {
BaseTokenStreamTestCase.assertTokenStreamContents(tokenizer, new String[] { "testing", "1234" });
}
private Analyzer a = new ReusableAnalyzerBase() {
private Analyzer a = new Analyzer() {
@Override
protected TokenStreamComponents createComponents
(String fieldName, Reader reader) {
@ -99,7 +98,7 @@ public class TestUAX29URLEmailTokenizer extends BaseTokenStreamTestCase {
}
}
private Analyzer urlAnalyzer = new ReusableAnalyzerBase() {
private Analyzer urlAnalyzer = new Analyzer() {
@Override
protected TokenStreamComponents createComponents(String fieldName, Reader reader) {
UAX29URLEmailTokenizer tokenizer = new UAX29URLEmailTokenizer(TEST_VERSION_CURRENT, reader);
@ -109,7 +108,7 @@ public class TestUAX29URLEmailTokenizer extends BaseTokenStreamTestCase {
}
};
private Analyzer emailAnalyzer = new ReusableAnalyzerBase() {
private Analyzer emailAnalyzer = new Analyzer() {
@Override
protected TokenStreamComponents createComponents(String fieldName, Reader reader) {
UAX29URLEmailTokenizer tokenizer = new UAX29URLEmailTokenizer(TEST_VERSION_CURRENT, reader);
@ -431,7 +430,7 @@ public class TestUAX29URLEmailTokenizer extends BaseTokenStreamTestCase {
/** @deprecated remove this and sophisticated backwards layer in 5.0 */
@Deprecated
public void testCombiningMarksBackwards() throws Exception {
Analyzer a = new ReusableAnalyzerBase() {
Analyzer a = new Analyzer() {
@Override
protected TokenStreamComponents createComponents
(String fieldName, Reader reader) {

View File

@ -24,7 +24,6 @@ import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.BaseTokenStreamTestCase;
import org.apache.lucene.analysis.MockTokenizer;
import org.apache.lucene.analysis.Tokenizer;
import org.apache.lucene.analysis.ReusableAnalyzerBase;
import static org.apache.lucene.analysis.VocabularyAssert.*;
@ -32,7 +31,7 @@ import static org.apache.lucene.analysis.VocabularyAssert.*;
* Simple tests for {@link GermanLightStemFilter}
*/
public class TestGermanLightStemFilter extends BaseTokenStreamTestCase {
private Analyzer analyzer = new ReusableAnalyzerBase() {
private Analyzer analyzer = new Analyzer() {
@Override
protected TokenStreamComponents createComponents(String fieldName,
Reader reader) {

View File

@ -24,7 +24,6 @@ import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.BaseTokenStreamTestCase;
import org.apache.lucene.analysis.MockTokenizer;
import org.apache.lucene.analysis.Tokenizer;
import org.apache.lucene.analysis.ReusableAnalyzerBase;
import static org.apache.lucene.analysis.VocabularyAssert.*;
@ -32,7 +31,7 @@ import static org.apache.lucene.analysis.VocabularyAssert.*;
* Simple tests for {@link GermanMinimalStemFilter}
*/
public class TestGermanMinimalStemFilter extends BaseTokenStreamTestCase {
private Analyzer analyzer = new ReusableAnalyzerBase() {
private Analyzer analyzer = new Analyzer() {
@Override
protected TokenStreamComponents createComponents(String fieldName,
Reader reader) {

View File

@ -25,7 +25,6 @@ import org.apache.lucene.analysis.BaseTokenStreamTestCase;
import org.apache.lucene.analysis.Tokenizer;
import org.apache.lucene.analysis.core.KeywordTokenizer;
import org.apache.lucene.analysis.core.LowerCaseFilter;
import org.apache.lucene.analysis.ReusableAnalyzerBase;
import static org.apache.lucene.analysis.VocabularyAssert.*;
@ -36,7 +35,7 @@ import static org.apache.lucene.analysis.VocabularyAssert.*;
*
*/
public class TestGermanStemFilter extends BaseTokenStreamTestCase {
Analyzer analyzer = new ReusableAnalyzerBase() {
Analyzer analyzer = new Analyzer() {
@Override
protected TokenStreamComponents createComponents(String fieldName,
Reader reader) {

View File

@ -24,13 +24,12 @@ import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.BaseTokenStreamTestCase;
import org.apache.lucene.analysis.MockTokenizer;
import org.apache.lucene.analysis.Tokenizer;
import org.apache.lucene.analysis.ReusableAnalyzerBase;
/**
* Simple tests for {@link EnglishMinimalStemFilter}
*/
public class TestEnglishMinimalStemFilter extends BaseTokenStreamTestCase {
private Analyzer analyzer = new ReusableAnalyzerBase() {
private Analyzer analyzer = new Analyzer() {
@Override
protected TokenStreamComponents createComponents(String fieldName,
Reader reader) {

View File

@ -25,13 +25,12 @@ import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.BaseTokenStreamTestCase;
import org.apache.lucene.analysis.MockTokenizer;
import org.apache.lucene.analysis.Tokenizer;
import org.apache.lucene.analysis.ReusableAnalyzerBase;
/**
* Tests for {@link KStemmer}
*/
public class TestKStemmer extends BaseTokenStreamTestCase {
Analyzer a = new ReusableAnalyzerBase() {
Analyzer a = new Analyzer() {
@Override
protected TokenStreamComponents createComponents(String fieldName, Reader reader) {
Tokenizer tokenizer = new MockTokenizer(reader, MockTokenizer.WHITESPACE, true);

View File

@ -24,7 +24,6 @@ import java.io.StringReader;
import org.apache.lucene.analysis.BaseTokenStreamTestCase;
import org.apache.lucene.analysis.miscellaneous.KeywordMarkerFilter;
import org.apache.lucene.analysis.util.CharArraySet;
import org.apache.lucene.analysis.ReusableAnalyzerBase;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.MockTokenizer;
import org.apache.lucene.analysis.TokenStream;
@ -36,7 +35,7 @@ import static org.apache.lucene.analysis.VocabularyAssert.*;
* Test the PorterStemFilter with Martin Porter's test data.
*/
public class TestPorterStemFilter extends BaseTokenStreamTestCase {
Analyzer a = new ReusableAnalyzerBase() {
Analyzer a = new Analyzer() {
@Override
protected TokenStreamComponents createComponents(String fieldName,
Reader reader) {

View File

@ -24,7 +24,6 @@ import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.BaseTokenStreamTestCase;
import org.apache.lucene.analysis.MockTokenizer;
import org.apache.lucene.analysis.Tokenizer;
import org.apache.lucene.analysis.ReusableAnalyzerBase;
import static org.apache.lucene.analysis.VocabularyAssert.*;
@ -32,7 +31,7 @@ import static org.apache.lucene.analysis.VocabularyAssert.*;
* Simple tests for {@link SpanishLightStemFilter}
*/
public class TestSpanishLightStemFilter extends BaseTokenStreamTestCase {
private Analyzer analyzer = new ReusableAnalyzerBase() {
private Analyzer analyzer = new Analyzer() {
@Override
protected TokenStreamComponents createComponents(String fieldName,
Reader reader) {

View File

@ -24,7 +24,6 @@ import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.BaseTokenStreamTestCase;
import org.apache.lucene.analysis.MockTokenizer;
import org.apache.lucene.analysis.Tokenizer;
import org.apache.lucene.analysis.ReusableAnalyzerBase;
import static org.apache.lucene.analysis.VocabularyAssert.*;
@ -32,7 +31,7 @@ import static org.apache.lucene.analysis.VocabularyAssert.*;
* Simple tests for {@link FinnishLightStemFilter}
*/
public class TestFinnishLightStemFilter extends BaseTokenStreamTestCase {
private Analyzer analyzer = new ReusableAnalyzerBase() {
private Analyzer analyzer = new Analyzer() {
@Override
protected TokenStreamComponents createComponents(String fieldName,
Reader reader) {

View File

@ -24,7 +24,6 @@ import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.BaseTokenStreamTestCase;
import org.apache.lucene.analysis.MockTokenizer;
import org.apache.lucene.analysis.Tokenizer;
import org.apache.lucene.analysis.ReusableAnalyzerBase;
import static org.apache.lucene.analysis.VocabularyAssert.*;
@ -32,7 +31,7 @@ import static org.apache.lucene.analysis.VocabularyAssert.*;
* Simple tests for {@link FrenchLightStemFilter}
*/
public class TestFrenchLightStemFilter extends BaseTokenStreamTestCase {
private Analyzer analyzer = new ReusableAnalyzerBase() {
private Analyzer analyzer = new Analyzer() {
@Override
protected TokenStreamComponents createComponents(String fieldName,
Reader reader) {

View File

@ -24,7 +24,6 @@ import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.BaseTokenStreamTestCase;
import org.apache.lucene.analysis.MockTokenizer;
import org.apache.lucene.analysis.Tokenizer;
import org.apache.lucene.analysis.ReusableAnalyzerBase;
import static org.apache.lucene.analysis.VocabularyAssert.*;
@ -32,7 +31,7 @@ import static org.apache.lucene.analysis.VocabularyAssert.*;
* Simple tests for {@link FrenchMinimalStemFilter}
*/
public class TestFrenchMinimalStemFilter extends BaseTokenStreamTestCase {
private Analyzer analyzer = new ReusableAnalyzerBase() {
private Analyzer analyzer = new Analyzer() {
@Override
protected TokenStreamComponents createComponents(String fieldName,
Reader reader) {

View File

@ -28,13 +28,12 @@ import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.Tokenizer;
import org.apache.lucene.analysis.core.LowerCaseFilter;
import org.apache.lucene.analysis.standard.StandardTokenizer;
import org.apache.lucene.analysis.ReusableAnalyzerBase;
/**
* Simple tests for {@link GalicianStemFilter}
*/
public class TestGalicianStemFilter extends BaseTokenStreamTestCase {
private Analyzer analyzer = new ReusableAnalyzerBase() {
private Analyzer analyzer = new Analyzer() {
@Override
protected TokenStreamComponents createComponents(String fieldName,
Reader reader) {

View File

@ -24,7 +24,6 @@ import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.BaseTokenStreamTestCase;
import org.apache.lucene.analysis.MockTokenizer;
import org.apache.lucene.analysis.Tokenizer;
import org.apache.lucene.analysis.ReusableAnalyzerBase;
import static org.apache.lucene.analysis.VocabularyAssert.*;
@ -32,7 +31,7 @@ import static org.apache.lucene.analysis.VocabularyAssert.*;
* Simple tests for {@link HungarianLightStemFilter}
*/
public class TestHungarianLightStemFilter extends BaseTokenStreamTestCase {
private Analyzer analyzer = new ReusableAnalyzerBase() {
private Analyzer analyzer = new Analyzer() {
@Override
protected TokenStreamComponents createComponents(String fieldName,
Reader reader) {

View File

@ -24,14 +24,13 @@ import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.BaseTokenStreamTestCase;
import org.apache.lucene.analysis.Tokenizer;
import org.apache.lucene.analysis.core.KeywordTokenizer;
import org.apache.lucene.analysis.ReusableAnalyzerBase;
/**
* Tests {@link IndonesianStemmer}
*/
public class TestIndonesianStemmer extends BaseTokenStreamTestCase {
/* full stemming, no stopwords */
Analyzer a = new ReusableAnalyzerBase() {
Analyzer a = new Analyzer() {
@Override
public TokenStreamComponents createComponents(String fieldName, Reader reader) {
Tokenizer tokenizer = new KeywordTokenizer(reader);
@ -112,7 +111,7 @@ public class TestIndonesianStemmer extends BaseTokenStreamTestCase {
}
/* inflectional-only stemming */
Analyzer b = new ReusableAnalyzerBase() {
Analyzer b = new Analyzer() {
@Override
public TokenStreamComponents createComponents(String fieldName, Reader reader) {
Tokenizer tokenizer = new KeywordTokenizer(reader);

View File

@ -24,7 +24,6 @@ import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.BaseTokenStreamTestCase;
import org.apache.lucene.analysis.MockTokenizer;
import org.apache.lucene.analysis.Tokenizer;
import org.apache.lucene.analysis.ReusableAnalyzerBase;
import static org.apache.lucene.analysis.VocabularyAssert.*;
@ -32,7 +31,7 @@ import static org.apache.lucene.analysis.VocabularyAssert.*;
* Simple tests for {@link ItalianLightStemFilter}
*/
public class TestItalianLightStemFilter extends BaseTokenStreamTestCase {
private Analyzer analyzer = new ReusableAnalyzerBase() {
private Analyzer analyzer = new Analyzer() {
@Override
protected TokenStreamComponents createComponents(String fieldName,
Reader reader) {

View File

@ -24,13 +24,12 @@ import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.BaseTokenStreamTestCase;
import org.apache.lucene.analysis.MockTokenizer;
import org.apache.lucene.analysis.Tokenizer;
import org.apache.lucene.analysis.ReusableAnalyzerBase;
/**
* Basic tests for {@link LatvianStemmer}
*/
public class TestLatvianStemmer extends BaseTokenStreamTestCase {
private Analyzer a = new ReusableAnalyzerBase() {
private Analyzer a = new Analyzer() {
@Override
protected TokenStreamComponents createComponents(String fieldName, Reader reader) {
Tokenizer tokenizer = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false);

View File

@ -213,7 +213,7 @@ public class TestWordDelimiterFilter extends BaseTokenStreamTestCase {
final CharArraySet protWords = new CharArraySet(TEST_VERSION_CURRENT, new HashSet<String>(Arrays.asList("NUTCH")), false);
/* analyzer that uses whitespace + wdf */
Analyzer a = new ReusableAnalyzerBase() {
Analyzer a = new Analyzer() {
@Override
public TokenStreamComponents createComponents(String field, Reader reader) {
Tokenizer tokenizer = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false);
@ -241,7 +241,7 @@ public class TestWordDelimiterFilter extends BaseTokenStreamTestCase {
new int[] { 1, 1, 1 });
/* analyzer that will consume tokens with large position increments */
Analyzer a2 = new ReusableAnalyzerBase() {
Analyzer a2 = new Analyzer() {
@Override
public TokenStreamComponents createComponents(String field, Reader reader) {
Tokenizer tokenizer = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false);
@ -274,7 +274,7 @@ public class TestWordDelimiterFilter extends BaseTokenStreamTestCase {
new int[] { 6, 14, 19 },
new int[] { 1, 11, 1 });
Analyzer a3 = new ReusableAnalyzerBase() {
Analyzer a3 = new Analyzer() {
@Override
public TokenStreamComponents createComponents(String field, Reader reader) {
Tokenizer tokenizer = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false);

View File

@ -26,7 +26,6 @@ import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.Tokenizer;
import org.apache.lucene.analysis.core.LowerCaseFilter;
import org.apache.lucene.analysis.standard.StandardTokenizer;
import org.apache.lucene.analysis.ReusableAnalyzerBase;
import static org.apache.lucene.analysis.VocabularyAssert.*;
@ -34,7 +33,7 @@ import static org.apache.lucene.analysis.VocabularyAssert.*;
* Simple tests for {@link PortugueseLightStemFilter}
*/
public class TestPortugueseLightStemFilter extends BaseTokenStreamTestCase {
private Analyzer analyzer = new ReusableAnalyzerBase() {
private Analyzer analyzer = new Analyzer() {
@Override
protected TokenStreamComponents createComponents(String fieldName,
Reader reader) {

View File

@ -26,7 +26,6 @@ import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.Tokenizer;
import org.apache.lucene.analysis.core.LowerCaseFilter;
import org.apache.lucene.analysis.standard.StandardTokenizer;
import org.apache.lucene.analysis.ReusableAnalyzerBase;
import static org.apache.lucene.analysis.VocabularyAssert.*;
@ -34,7 +33,7 @@ import static org.apache.lucene.analysis.VocabularyAssert.*;
* Simple tests for {@link PortugueseMinimalStemFilter}
*/
public class TestPortugueseMinimalStemFilter extends BaseTokenStreamTestCase {
private Analyzer analyzer = new ReusableAnalyzerBase() {
private Analyzer analyzer = new Analyzer() {
@Override
protected TokenStreamComponents createComponents(String fieldName,
Reader reader) {

View File

@ -28,13 +28,12 @@ import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.Tokenizer;
import org.apache.lucene.analysis.core.LowerCaseFilter;
import org.apache.lucene.analysis.standard.StandardTokenizer;
import org.apache.lucene.analysis.ReusableAnalyzerBase;
/**
* Simple tests for {@link PortugueseStemFilter}
*/
public class TestPortugueseStemFilter extends BaseTokenStreamTestCase {
private Analyzer analyzer = new ReusableAnalyzerBase() {
private Analyzer analyzer = new Analyzer() {
@Override
protected TokenStreamComponents createComponents(String fieldName,
Reader reader) {

Some files were not shown because too many files have changed in this diff Show More