mirror of https://github.com/apache/lucene.git
LUCENE-3396: Collapsing Analyzer and ReusableAnalyzerBase together, mandating use of TokenStreamComponents
git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1175297 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
48e53be99a
commit
4ff0b2f82c
|
@ -174,6 +174,10 @@ Changes in backwards compatibility policy
|
|||
* LUCENE-3396: ReusableAnalyzerBase.TokenStreamComponents.reset(Reader) now returns void instead
|
||||
of boolean. If a Component cannot be reset, it should throw an Exception. (Chris Male)
|
||||
|
||||
* LUCENE-3396: ReusableAnalyzerBase has been renamed to Analyzer. All Analyzer implementations
|
||||
must now use Analyzer.TokenStreamComponents, rather than overriding .tokenStream() and
|
||||
.reusableTokenStream() (which are now final). (Chris Male)
|
||||
|
||||
Changes in Runtime Behavior
|
||||
|
||||
* LUCENE-2846: omitNorms now behaves like omitTermFrequencyAndPositions, if you
|
||||
|
|
|
@ -517,3 +517,8 @@ If you did this before (bytes is a byte[]):
|
|||
you can now do this:
|
||||
|
||||
new BinaryField("field", bytes)
|
||||
|
||||
* LUCENE-3396: Analyzer.tokenStream() and .reusableTokenStream() have been made final.
|
||||
It is now necessary to use Analyzer.TokenStreamComponents to define an analysis process.
|
||||
Analyzer also has its own way of managing the reuse of TokenStreamComponents (either
|
||||
globally, or per-field). To define another Strategy, implement Analyzer.ReuseStrategy.
|
|
@ -1802,7 +1802,7 @@ public class HighlighterTest extends BaseTokenStreamTestCase implements Formatte
|
|||
// behaviour to synonyms
|
||||
// ===================================================================
|
||||
|
||||
final class SynonymAnalyzer extends ReusableAnalyzerBase {
|
||||
final class SynonymAnalyzer extends Analyzer {
|
||||
private Map<String,String> synonyms;
|
||||
|
||||
public SynonymAnalyzer(Map<String,String> synonyms) {
|
||||
|
|
|
@ -49,7 +49,7 @@ public class OffsetLimitTokenFilterTest extends BaseTokenStreamTestCase {
|
|||
assertTokenStreamContents(filter, new String[] {"short", "toolong",
|
||||
"evenmuchlongertext"});
|
||||
|
||||
checkOneTermReuse(new ReusableAnalyzerBase() {
|
||||
checkOneTermReuse(new Analyzer() {
|
||||
|
||||
@Override
|
||||
public TokenStreamComponents createComponents(String fieldName, Reader reader) {
|
||||
|
|
|
@ -48,7 +48,7 @@ import org.apache.lucene.util.LuceneTestCase;
|
|||
public class TokenSourcesTest extends LuceneTestCase {
|
||||
private static final String FIELD = "text";
|
||||
|
||||
private static final class OverlapAnalyzer extends ReusableAnalyzerBase {
|
||||
private static final class OverlapAnalyzer extends Analyzer {
|
||||
|
||||
@Override
|
||||
public TokenStreamComponents createComponents(String fieldName, Reader reader) {
|
||||
|
|
|
@ -194,7 +194,7 @@ public abstract class AbstractTestCase extends LuceneTestCase {
|
|||
return phraseQuery;
|
||||
}
|
||||
|
||||
static final class BigramAnalyzer extends ReusableAnalyzerBase {
|
||||
static final class BigramAnalyzer extends Analyzer {
|
||||
@Override
|
||||
public TokenStreamComponents createComponents(String fieldName, Reader reader) {
|
||||
return new TokenStreamComponents(new BasicNGramTokenizer(reader));
|
||||
|
|
|
@ -290,7 +290,7 @@ public class IndexTimeSynonymTest extends AbstractTestCase {
|
|||
return token;
|
||||
}
|
||||
|
||||
public static final class TokenArrayAnalyzer extends ReusableAnalyzerBase {
|
||||
public static final class TokenArrayAnalyzer extends Analyzer {
|
||||
final Token[] tokens;
|
||||
public TokenArrayAnalyzer(Token... tokens) {
|
||||
this.tokens = tokens;
|
||||
|
|
|
@ -1,6 +1,6 @@
|
|||
package org.apache.lucene.analysis;
|
||||
|
||||
/**
|
||||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
|
@ -17,98 +17,106 @@ package org.apache.lucene.analysis;
|
|||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import java.io.Reader;
|
||||
import java.io.IOException;
|
||||
import java.io.Closeable;
|
||||
import java.lang.reflect.Modifier;
|
||||
|
||||
import org.apache.lucene.index.IndexableField;
|
||||
import org.apache.lucene.util.CloseableThreadLocal;
|
||||
import org.apache.lucene.store.AlreadyClosedException;
|
||||
import org.apache.lucene.util.CloseableThreadLocal;
|
||||
|
||||
/** An Analyzer builds TokenStreams, which analyze text. It thus represents a
|
||||
* policy for extracting index terms from text.
|
||||
* <p>
|
||||
* Typical implementations first build a Tokenizer, which breaks the stream of
|
||||
* characters from the Reader into raw Tokens. One or more TokenFilters may
|
||||
* then be applied to the output of the Tokenizer.
|
||||
import java.io.IOException;
|
||||
import java.io.Reader;
|
||||
import java.util.HashMap;
|
||||
import java.util.Map;
|
||||
|
||||
/**
|
||||
* An Analyzer builds TokenStreams, which analyze text. It thus represents a
|
||||
* policy for extracting index terms from text.
|
||||
* <p>
|
||||
* To prevent consistency problems, this class does not allow subclasses to
|
||||
* extend {@link #reusableTokenStream(String, Reader)} or
|
||||
* {@link #tokenStream(String, Reader)} directly. Instead, subclasses must
|
||||
* implement {@link #createComponents(String, Reader)}.
|
||||
* </p>
|
||||
* <p>The {@code Analyzer}-API in Lucene is based on the decorator pattern.
|
||||
* Therefore all non-abstract subclasses must be final or their {@link #tokenStream}
|
||||
* and {@link #reusableTokenStream} implementations must be final! This is checked
|
||||
* Therefore all non-abstract subclasses must be final! This is checked
|
||||
* when Java assertions are enabled.
|
||||
*/
|
||||
public abstract class Analyzer implements Closeable {
|
||||
public abstract class Analyzer {
|
||||
|
||||
protected Analyzer() {
|
||||
super();
|
||||
assert assertFinal();
|
||||
private final ReuseStrategy reuseStrategy;
|
||||
|
||||
public Analyzer() {
|
||||
this(new GlobalReuseStrategy());
|
||||
}
|
||||
|
||||
private boolean assertFinal() {
|
||||
try {
|
||||
final Class<?> clazz = getClass();
|
||||
if (!clazz.desiredAssertionStatus())
|
||||
return true;
|
||||
assert clazz.isAnonymousClass() ||
|
||||
(clazz.getModifiers() & (Modifier.FINAL | Modifier.PRIVATE)) != 0 ||
|
||||
(
|
||||
Modifier.isFinal(clazz.getMethod("tokenStream", String.class, Reader.class).getModifiers()) &&
|
||||
Modifier.isFinal(clazz.getMethod("reusableTokenStream", String.class, Reader.class).getModifiers())
|
||||
) :
|
||||
"Analyzer implementation classes or at least their tokenStream() and reusableTokenStream() implementations must be final";
|
||||
return true;
|
||||
} catch (NoSuchMethodException nsme) {
|
||||
return false;
|
||||
}
|
||||
public Analyzer(ReuseStrategy reuseStrategy) {
|
||||
this.reuseStrategy = reuseStrategy;
|
||||
}
|
||||
|
||||
/** Creates a TokenStream which tokenizes all the text in the provided
|
||||
* Reader. Must be able to handle null field name for
|
||||
* backward compatibility.
|
||||
/**
|
||||
* Creates a new {@link TokenStreamComponents} instance for this analyzer.
|
||||
*
|
||||
* @param fieldName
|
||||
* the name of the fields content passed to the
|
||||
* {@link TokenStreamComponents} sink as a reader
|
||||
* @param aReader
|
||||
* the reader passed to the {@link Tokenizer} constructor
|
||||
* @return the {@link TokenStreamComponents} for this analyzer.
|
||||
*/
|
||||
public abstract TokenStream tokenStream(String fieldName, Reader reader);
|
||||
protected abstract TokenStreamComponents createComponents(String fieldName,
|
||||
Reader aReader);
|
||||
|
||||
/** Creates a TokenStream that is allowed to be re-used
|
||||
* from the previous time that the same thread called
|
||||
* this method. Callers that do not need to use more
|
||||
* than one TokenStream at the same time from this
|
||||
* analyzer should use this method for better
|
||||
* performance.
|
||||
/**
|
||||
* Creates a TokenStream that is allowed to be re-use from the previous time
|
||||
* that the same thread called this method. Callers that do not need to use
|
||||
* more than one TokenStream at the same time from this analyzer should use
|
||||
* this method for better performance.
|
||||
* <p>
|
||||
* This method uses {@link #createComponents(String, Reader)} to obtain an
|
||||
* instance of {@link TokenStreamComponents}. It returns the sink of the
|
||||
* components and stores the components internally. Subsequent calls to this
|
||||
* method will reuse the previously stored components after resetting them
|
||||
* through {@link TokenStreamComponents#reset(Reader)}.
|
||||
* </p>
|
||||
*
|
||||
* @param fieldName the name of the field the created TokenStream is used for
|
||||
* @param reader the reader the streams source reads from
|
||||
*/
|
||||
public TokenStream reusableTokenStream(String fieldName, Reader reader) throws IOException {
|
||||
return tokenStream(fieldName, reader);
|
||||
public final TokenStream reusableTokenStream(final String fieldName,
|
||||
final Reader reader) throws IOException {
|
||||
TokenStreamComponents components = reuseStrategy.getReusableComponents(fieldName);
|
||||
final Reader r = initReader(reader);
|
||||
if (components == null) {
|
||||
components = createComponents(fieldName, r);
|
||||
reuseStrategy.setReusableComponents(fieldName, components);
|
||||
} else {
|
||||
components.reset(r);
|
||||
}
|
||||
return components.getTokenStream();
|
||||
}
|
||||
|
||||
private CloseableThreadLocal<Object> tokenStreams = new CloseableThreadLocal<Object>();
|
||||
|
||||
/** Used by Analyzers that implement reusableTokenStream
|
||||
* to retrieve previously saved TokenStreams for re-use
|
||||
* by the same thread. */
|
||||
protected Object getPreviousTokenStream() {
|
||||
try {
|
||||
return tokenStreams.get();
|
||||
} catch (NullPointerException npe) {
|
||||
if (tokenStreams == null) {
|
||||
throw new AlreadyClosedException("this Analyzer is closed");
|
||||
} else {
|
||||
throw npe;
|
||||
}
|
||||
}
|
||||
/**
|
||||
* Creates a TokenStream which tokenizes all the text in the provided
|
||||
* Reader.
|
||||
* <p>
|
||||
* This method uses {@link #createComponents(String, Reader)} to obtain an
|
||||
* instance of {@link TokenStreamComponents} and returns the sink of the
|
||||
* components. Each calls to this method will create a new instance of
|
||||
* {@link TokenStreamComponents}. Created {@link TokenStream} instances are
|
||||
* never reused.
|
||||
* </p>
|
||||
*
|
||||
* @param fieldName the name of the field the created TokenStream is used for
|
||||
* @param reader the reader the streams source reads from
|
||||
*/
|
||||
public final TokenStream tokenStream(final String fieldName,
|
||||
final Reader reader) {
|
||||
return createComponents(fieldName, initReader(reader)).getTokenStream();
|
||||
}
|
||||
|
||||
/** Used by Analyzers that implement reusableTokenStream
|
||||
* to save a TokenStream for later re-use by the same
|
||||
* thread. */
|
||||
protected void setPreviousTokenStream(Object obj) {
|
||||
try {
|
||||
tokenStreams.set(obj);
|
||||
} catch (NullPointerException npe) {
|
||||
if (tokenStreams == null) {
|
||||
throw new AlreadyClosedException("this Analyzer is closed");
|
||||
} else {
|
||||
throw npe;
|
||||
}
|
||||
}
|
||||
/**
|
||||
* Override this if you want to add a CharFilter chain.
|
||||
*/
|
||||
protected Reader initReader(Reader reader) {
|
||||
return reader;
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -149,7 +157,196 @@ public abstract class Analyzer implements Closeable {
|
|||
|
||||
/** Frees persistent resources used by this Analyzer */
|
||||
public void close() {
|
||||
tokenStreams.close();
|
||||
tokenStreams = null;
|
||||
reuseStrategy.close();
|
||||
}
|
||||
|
||||
/**
|
||||
* This class encapsulates the outer components of a token stream. It provides
|
||||
* access to the source ({@link Tokenizer}) and the outer end (sink), an
|
||||
* instance of {@link TokenFilter} which also serves as the
|
||||
* {@link TokenStream} returned by
|
||||
* {@link Analyzer#tokenStream(String, Reader)} and
|
||||
* {@link Analyzer#reusableTokenStream(String, Reader)}.
|
||||
*/
|
||||
public static class TokenStreamComponents {
|
||||
protected final Tokenizer source;
|
||||
protected final TokenStream sink;
|
||||
|
||||
/**
|
||||
* Creates a new {@link TokenStreamComponents} instance.
|
||||
*
|
||||
* @param source
|
||||
* the analyzer's tokenizer
|
||||
* @param result
|
||||
* the analyzer's resulting token stream
|
||||
*/
|
||||
public TokenStreamComponents(final Tokenizer source,
|
||||
final TokenStream result) {
|
||||
this.source = source;
|
||||
this.sink = result;
|
||||
}
|
||||
|
||||
/**
|
||||
* Creates a new {@link TokenStreamComponents} instance.
|
||||
*
|
||||
* @param source
|
||||
* the analyzer's tokenizer
|
||||
*/
|
||||
public TokenStreamComponents(final Tokenizer source) {
|
||||
this.source = source;
|
||||
this.sink = source;
|
||||
}
|
||||
|
||||
/**
|
||||
* Resets the encapsulated components with the given reader. If the components
|
||||
* cannot be reset, an Exception should be thrown.
|
||||
*
|
||||
* @param reader
|
||||
* a reader to reset the source component
|
||||
* @throws IOException
|
||||
* if the component's reset method throws an {@link IOException}
|
||||
*/
|
||||
protected void reset(final Reader reader) throws IOException {
|
||||
source.reset(reader);
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the sink {@link TokenStream}
|
||||
*
|
||||
* @return the sink {@link TokenStream}
|
||||
*/
|
||||
public TokenStream getTokenStream() {
|
||||
return sink;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the component's {@link Tokenizer}
|
||||
*
|
||||
* @return Component's {@link Tokenizer}
|
||||
*/
|
||||
public Tokenizer getTokenizer() {
|
||||
return source;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Strategy defining how TokenStreamComponents are reused per call to
|
||||
* {@link Analyzer#tokenStream(String, java.io.Reader)}.
|
||||
*/
|
||||
public static abstract class ReuseStrategy {
|
||||
|
||||
private CloseableThreadLocal<Object> storedValue = new CloseableThreadLocal<Object>();
|
||||
|
||||
/**
|
||||
* Gets the reusable TokenStreamComponents for the field with the given name
|
||||
*
|
||||
* @param fieldName Name of the field whose reusable TokenStreamComponents
|
||||
* are to be retrieved
|
||||
* @return Reusable TokenStreamComponents for the field, or {@code null}
|
||||
* if there was no previous components for the field
|
||||
*/
|
||||
public abstract TokenStreamComponents getReusableComponents(String fieldName);
|
||||
|
||||
/**
|
||||
* Stores the given TokenStreamComponents as the reusable components for the
|
||||
* field with the give name
|
||||
*
|
||||
* @param fieldName Name of the field whose TokenStreamComponents are being set
|
||||
* @param components TokenStreamComponents which are to be reused for the field
|
||||
*/
|
||||
public abstract void setReusableComponents(String fieldName, TokenStreamComponents components);
|
||||
|
||||
/**
|
||||
* Returns the currently stored value
|
||||
*
|
||||
* @return Currently stored value or {@code null} if no value is stored
|
||||
*/
|
||||
protected final Object getStoredValue() {
|
||||
try {
|
||||
return storedValue.get();
|
||||
} catch (NullPointerException npe) {
|
||||
if (storedValue == null) {
|
||||
throw new AlreadyClosedException("this Analyzer is closed");
|
||||
} else {
|
||||
throw npe;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Sets the stored value
|
||||
*
|
||||
* @param storedValue Value to store
|
||||
*/
|
||||
protected final void setStoredValue(Object storedValue) {
|
||||
try {
|
||||
this.storedValue.set(storedValue);
|
||||
} catch (NullPointerException npe) {
|
||||
if (storedValue == null) {
|
||||
throw new AlreadyClosedException("this Analyzer is closed");
|
||||
} else {
|
||||
throw npe;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Closes the ReuseStrategy, freeing any resources
|
||||
*/
|
||||
public void close() {
|
||||
storedValue.close();
|
||||
storedValue = null;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Implementation of {@link ReuseStrategy} that reuses the same components for
|
||||
* every field.
|
||||
*/
|
||||
public final static class GlobalReuseStrategy extends ReuseStrategy {
|
||||
|
||||
/**
|
||||
* {@inheritDoc}
|
||||
*/
|
||||
public TokenStreamComponents getReusableComponents(String fieldName) {
|
||||
return (TokenStreamComponents) getStoredValue();
|
||||
}
|
||||
|
||||
/**
|
||||
* {@inheritDoc}
|
||||
*/
|
||||
public void setReusableComponents(String fieldName, TokenStreamComponents components) {
|
||||
setStoredValue(components);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Implementation of {@link ReuseStrategy} that reuses components per-field by
|
||||
* maintaining a Map of TokenStreamComponent per field name.
|
||||
*/
|
||||
public static class PerFieldReuseStrategy extends ReuseStrategy {
|
||||
|
||||
/**
|
||||
* {@inheritDoc}
|
||||
*/
|
||||
@SuppressWarnings("unchecked")
|
||||
public TokenStreamComponents getReusableComponents(String fieldName) {
|
||||
Map<String, TokenStreamComponents> componentsPerField = (Map<String, TokenStreamComponents>) getStoredValue();
|
||||
return componentsPerField != null ? componentsPerField.get(fieldName) : null;
|
||||
}
|
||||
|
||||
/**
|
||||
* {@inheritDoc}
|
||||
*/
|
||||
@SuppressWarnings("unchecked")
|
||||
public void setReusableComponents(String fieldName, TokenStreamComponents components) {
|
||||
Map<String, TokenStreamComponents> componentsPerField = (Map<String, TokenStreamComponents>) getStoredValue();
|
||||
if (componentsPerField == null) {
|
||||
componentsPerField = new HashMap<String, TokenStreamComponents>();
|
||||
setStoredValue(componentsPerField);
|
||||
}
|
||||
componentsPerField.put(fieldName, components);
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
|
|
@ -0,0 +1,155 @@
|
|||
package org.apache.lucene.analysis;
|
||||
|
||||
/**
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import java.io.Reader;
|
||||
import java.io.IOException;
|
||||
import java.io.Closeable;
|
||||
import java.lang.reflect.Modifier;
|
||||
|
||||
import org.apache.lucene.index.IndexableField;
|
||||
import org.apache.lucene.util.CloseableThreadLocal;
|
||||
import org.apache.lucene.store.AlreadyClosedException;
|
||||
|
||||
/** An Analyzer builds TokenStreams, which analyze text. It thus represents a
|
||||
* policy for extracting index terms from text.
|
||||
* <p>
|
||||
* Typical implementations first build a Tokenizer, which breaks the stream of
|
||||
* characters from the Reader into raw Tokens. One or more TokenFilters may
|
||||
* then be applied to the output of the Tokenizer.
|
||||
* <p>The {@code Analyzer}-API in Lucene is based on the decorator pattern.
|
||||
* Therefore all non-abstract subclasses must be final or their {@link #tokenStream}
|
||||
* and {@link #reusableTokenStream} implementations must be final! This is checked
|
||||
* when Java assertions are enabled.
|
||||
*/
|
||||
public abstract class Analyzer implements Closeable {
|
||||
|
||||
protected Analyzer() {
|
||||
super();
|
||||
assert assertFinal();
|
||||
}
|
||||
|
||||
private boolean assertFinal() {
|
||||
try {
|
||||
final Class<?> clazz = getClass();
|
||||
if (!clazz.desiredAssertionStatus())
|
||||
return true;
|
||||
assert clazz.isAnonymousClass() ||
|
||||
(clazz.getModifiers() & (Modifier.FINAL | Modifier.PRIVATE)) != 0 ||
|
||||
(
|
||||
Modifier.isFinal(clazz.getMethod("tokenStream", String.class, Reader.class).getModifiers()) &&
|
||||
Modifier.isFinal(clazz.getMethod("reusableTokenStream", String.class, Reader.class).getModifiers())
|
||||
) :
|
||||
"Analyzer implementation classes or at least their tokenStream() and reusableTokenStream() implementations must be final";
|
||||
return true;
|
||||
} catch (NoSuchMethodException nsme) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
/** Creates a TokenStream which tokenizes all the text in the provided
|
||||
* Reader. Must be able to handle null field name for
|
||||
* backward compatibility.
|
||||
*/
|
||||
public abstract TokenStream tokenStream(String fieldName, Reader reader);
|
||||
|
||||
/** Creates a TokenStream that is allowed to be re-used
|
||||
* from the previous time that the same thread called
|
||||
* this method. Callers that do not need to use more
|
||||
* than one TokenStream at the same time from this
|
||||
* analyzer should use this method for better
|
||||
* performance.
|
||||
*/
|
||||
public TokenStream reusableTokenStream(String fieldName, Reader reader) throws IOException {
|
||||
return tokenStream(fieldName, reader);
|
||||
}
|
||||
|
||||
private CloseableThreadLocal<Object> tokenStreams = new CloseableThreadLocal<Object>();
|
||||
|
||||
/** Used by Analyzers that implement reusableTokenStream
|
||||
* to retrieve previously saved TokenStreams for re-use
|
||||
* by the same thread. */
|
||||
protected Object getPreviousTokenStream() {
|
||||
try {
|
||||
return tokenStreams.get();
|
||||
} catch (NullPointerException npe) {
|
||||
if (tokenStreams == null) {
|
||||
throw new AlreadyClosedException("this Analyzer is closed");
|
||||
} else {
|
||||
throw npe;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/** Used by Analyzers that implement reusableTokenStream
|
||||
* to save a TokenStream for later re-use by the same
|
||||
* thread. */
|
||||
protected void setPreviousTokenStream(Object obj) {
|
||||
try {
|
||||
tokenStreams.set(obj);
|
||||
} catch (NullPointerException npe) {
|
||||
if (tokenStreams == null) {
|
||||
throw new AlreadyClosedException("this Analyzer is closed");
|
||||
} else {
|
||||
throw npe;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Invoked before indexing a IndexableField instance if
|
||||
* terms have already been added to that field. This allows custom
|
||||
* analyzers to place an automatic position increment gap between
|
||||
* IndexbleField instances using the same field name. The default value
|
||||
* position increment gap is 0. With a 0 position increment gap and
|
||||
* the typical default token position increment of 1, all terms in a field,
|
||||
* including across IndexableField instances, are in successive positions, allowing
|
||||
* exact PhraseQuery matches, for instance, across IndexableField instance boundaries.
|
||||
*
|
||||
* @param fieldName IndexableField name being indexed.
|
||||
* @return position increment gap, added to the next token emitted from {@link #tokenStream(String,Reader)}
|
||||
*/
|
||||
public int getPositionIncrementGap(String fieldName) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
/**
|
||||
* Just like {@link #getPositionIncrementGap}, except for
|
||||
* Token offsets instead. By default this returns 1 for
|
||||
* tokenized fields and, as if the fields were joined
|
||||
* with an extra space character, and 0 for un-tokenized
|
||||
* fields. This method is only called if the field
|
||||
* produced at least one token for indexing.
|
||||
*
|
||||
* @param field the field just indexed
|
||||
* @return offset gap, added to the next token emitted from {@link #tokenStream(String,Reader)}
|
||||
*/
|
||||
public int getOffsetGap(IndexableField field) {
|
||||
if (field.fieldType().tokenized()) {
|
||||
return 1;
|
||||
} else {
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
|
||||
/** Frees persistent resources used by this Analyzer */
|
||||
public void close() {
|
||||
tokenStreams.close();
|
||||
tokenStreams = null;
|
||||
}
|
||||
}
|
|
@ -0,0 +1,89 @@
|
|||
package org.apache.lucene.analysis;
|
||||
|
||||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import org.apache.lucene.index.IndexableField;
|
||||
|
||||
import java.io.Reader;
|
||||
|
||||
/**
|
||||
* Extension to {@link Analyzer} suitable for Analyzers which wrap
|
||||
* other Analyzers.
|
||||
* <p/>
|
||||
* {@link #getWrappedAnalyzer(String)} allows the Analyzer
|
||||
* to wrap multiple Analyzers which are selected on a per field basis.
|
||||
* <p/>
|
||||
* {@link #wrapComponents(String, Analyzer.TokenStreamComponents)} allows the
|
||||
* TokenStreamComponents of the wrapped Analyzer to then be wrapped
|
||||
* (such as adding a new {@link TokenFilter} to form new TokenStreamComponents.
|
||||
*/
|
||||
public abstract class AnalyzerWrapper extends Analyzer {
|
||||
|
||||
/**
|
||||
* Creates a new AnalyzerWrapper. Since the {@link Analyzer.ReuseStrategy} of
|
||||
* the wrapped Analyzers are unknown, {@link Analyzer.PerFieldReuseStrategy} is assumed
|
||||
*/
|
||||
protected AnalyzerWrapper() {
|
||||
super(new PerFieldReuseStrategy());
|
||||
}
|
||||
|
||||
/**
|
||||
* Retrieves the wrapped Analyzer appropriate for analyzing the field with
|
||||
* the given name
|
||||
*
|
||||
* @param fieldName Name of the field which is to be analyzed
|
||||
* @return Analyzer for the field with the given name. Assumed to be non-null
|
||||
*/
|
||||
protected abstract Analyzer getWrappedAnalyzer(String fieldName);
|
||||
|
||||
/**
|
||||
* Wraps / alters the given TokenStreamComponents, taken from the wrapped
|
||||
* Analyzer, to form new components. It is through this method that new
|
||||
* TokenFilters can be added by AnalyzerWrappers.
|
||||
*
|
||||
*
|
||||
* @param fieldName Name of the field which is to be analyzed
|
||||
* @param components TokenStreamComponents taken from the wrapped Analyzer
|
||||
* @return Wrapped / altered TokenStreamComponents.
|
||||
*/
|
||||
protected abstract TokenStreamComponents wrapComponents(String fieldName, TokenStreamComponents components);
|
||||
|
||||
/**
|
||||
* {@inheritDoc}
|
||||
*/
|
||||
@Override
|
||||
protected final TokenStreamComponents createComponents(String fieldName, Reader aReader) {
|
||||
return wrapComponents(fieldName, getWrappedAnalyzer(fieldName).createComponents(fieldName, aReader));
|
||||
}
|
||||
|
||||
/**
|
||||
* {@inheritDoc}
|
||||
*/
|
||||
@Override
|
||||
public final int getPositionIncrementGap(String fieldName) {
|
||||
return getWrappedAnalyzer(fieldName).getPositionIncrementGap(fieldName);
|
||||
}
|
||||
|
||||
/**
|
||||
* {@inheritDoc}
|
||||
*/
|
||||
@Override
|
||||
public final int getOffsetGap(IndexableField field) {
|
||||
return getWrappedAnalyzer(field.name()).getOffsetGap(field);
|
||||
}
|
||||
}
|
|
@ -1,308 +0,0 @@
|
|||
package org.apache.lucene.analysis;
|
||||
|
||||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import org.apache.lucene.store.AlreadyClosedException;
|
||||
import org.apache.lucene.util.CloseableThreadLocal;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.io.Reader;
|
||||
import java.util.HashMap;
|
||||
import java.util.Map;
|
||||
|
||||
/**
|
||||
* An convenience subclass of Analyzer that makes it easy to implement
|
||||
* {@link TokenStream} reuse.
|
||||
* <p>
|
||||
* ReusableAnalyzerBase is a simplification of Analyzer that supports easy reuse
|
||||
* for the most common use-cases. Analyzers such as
|
||||
* PerFieldAnalyzerWrapper that behave differently depending upon the
|
||||
* field name need to subclass Analyzer directly instead.
|
||||
* </p>
|
||||
* <p>
|
||||
* To prevent consistency problems, this class does not allow subclasses to
|
||||
* extend {@link #reusableTokenStream(String, Reader)} or
|
||||
* {@link #tokenStream(String, Reader)} directly. Instead, subclasses must
|
||||
* implement {@link #createComponents(String, Reader)}.
|
||||
* </p>
|
||||
*/
|
||||
public abstract class ReusableAnalyzerBase extends Analyzer {
|
||||
|
||||
private final ReuseStrategy reuseStrategy;
|
||||
|
||||
public ReusableAnalyzerBase() {
|
||||
this(new GlobalReuseStrategy());
|
||||
}
|
||||
|
||||
public ReusableAnalyzerBase(ReuseStrategy reuseStrategy) {
|
||||
this.reuseStrategy = reuseStrategy;
|
||||
}
|
||||
|
||||
/**
|
||||
* Creates a new {@link TokenStreamComponents} instance for this analyzer.
|
||||
*
|
||||
* @param fieldName
|
||||
* the name of the fields content passed to the
|
||||
* {@link TokenStreamComponents} sink as a reader
|
||||
* @param aReader
|
||||
* the reader passed to the {@link Tokenizer} constructor
|
||||
* @return the {@link TokenStreamComponents} for this analyzer.
|
||||
*/
|
||||
protected abstract TokenStreamComponents createComponents(String fieldName,
|
||||
Reader aReader);
|
||||
|
||||
/**
|
||||
* This method uses {@link #createComponents(String, Reader)} to obtain an
|
||||
* instance of {@link TokenStreamComponents}. It returns the sink of the
|
||||
* components and stores the components internally. Subsequent calls to this
|
||||
* method will reuse the previously stored components if and only if the
|
||||
* {@link TokenStreamComponents#reset(Reader)} method returned
|
||||
* <code>true</code>. Otherwise a new instance of
|
||||
* {@link TokenStreamComponents} is created.
|
||||
*
|
||||
* @param fieldName the name of the field the created TokenStream is used for
|
||||
* @param reader the reader the streams source reads from
|
||||
*/
|
||||
@Override
|
||||
public final TokenStream reusableTokenStream(final String fieldName,
|
||||
final Reader reader) throws IOException {
|
||||
TokenStreamComponents components = reuseStrategy.getReusableComponents(fieldName);
|
||||
final Reader r = initReader(reader);
|
||||
if (components == null) {
|
||||
components = createComponents(fieldName, r);
|
||||
reuseStrategy.setReusableComponents(fieldName, components);
|
||||
} else {
|
||||
components.reset(r);
|
||||
}
|
||||
return components.getTokenStream();
|
||||
}
|
||||
|
||||
/**
|
||||
* This method uses {@link #createComponents(String, Reader)} to obtain an
|
||||
* instance of {@link TokenStreamComponents} and returns the sink of the
|
||||
* components. Each calls to this method will create a new instance of
|
||||
* {@link TokenStreamComponents}. Created {@link TokenStream} instances are
|
||||
* never reused.
|
||||
*
|
||||
* @param fieldName the name of the field the created TokenStream is used for
|
||||
* @param reader the reader the streams source reads from
|
||||
*/
|
||||
@Override
|
||||
public final TokenStream tokenStream(final String fieldName,
|
||||
final Reader reader) {
|
||||
return createComponents(fieldName, initReader(reader)).getTokenStream();
|
||||
}
|
||||
|
||||
/**
|
||||
* Override this if you want to add a CharFilter chain.
|
||||
*/
|
||||
protected Reader initReader(Reader reader) {
|
||||
return reader;
|
||||
}
|
||||
|
||||
/**
|
||||
* {@inheritDoc}
|
||||
*/
|
||||
@Override
|
||||
public void close() {
|
||||
super.close();
|
||||
reuseStrategy.close();
|
||||
}
|
||||
|
||||
/**
|
||||
* This class encapsulates the outer components of a token stream. It provides
|
||||
* access to the source ({@link Tokenizer}) and the outer end (sink), an
|
||||
* instance of {@link TokenFilter} which also serves as the
|
||||
* {@link TokenStream} returned by
|
||||
* {@link Analyzer#tokenStream(String, Reader)} and
|
||||
* {@link Analyzer#reusableTokenStream(String, Reader)}.
|
||||
*/
|
||||
public static class TokenStreamComponents {
|
||||
protected final Tokenizer source;
|
||||
protected final TokenStream sink;
|
||||
|
||||
/**
|
||||
* Creates a new {@link TokenStreamComponents} instance.
|
||||
*
|
||||
* @param source
|
||||
* the analyzer's tokenizer
|
||||
* @param result
|
||||
* the analyzer's resulting token stream
|
||||
*/
|
||||
public TokenStreamComponents(final Tokenizer source,
|
||||
final TokenStream result) {
|
||||
this.source = source;
|
||||
this.sink = result;
|
||||
}
|
||||
|
||||
/**
|
||||
* Creates a new {@link TokenStreamComponents} instance.
|
||||
*
|
||||
* @param source
|
||||
* the analyzer's tokenizer
|
||||
*/
|
||||
public TokenStreamComponents(final Tokenizer source) {
|
||||
this.source = source;
|
||||
this.sink = source;
|
||||
}
|
||||
|
||||
/**
|
||||
* Resets the encapsulated components with the given reader. If the components
|
||||
* cannot be reset, an Exception should be thrown.
|
||||
*
|
||||
* @param reader
|
||||
* a reader to reset the source component
|
||||
* @throws IOException
|
||||
* if the component's reset method throws an {@link IOException}
|
||||
*/
|
||||
protected void reset(final Reader reader) throws IOException {
|
||||
source.reset(reader);
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the sink {@link TokenStream}
|
||||
*
|
||||
* @return the sink {@link TokenStream}
|
||||
*/
|
||||
protected TokenStream getTokenStream() {
|
||||
return sink;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
/**
|
||||
* Strategy defining how TokenStreamComponents are reused per call to
|
||||
* {@link ReusableAnalyzerBase#tokenStream(String, java.io.Reader)}.
|
||||
*/
|
||||
public static abstract class ReuseStrategy {
|
||||
|
||||
private CloseableThreadLocal<Object> storedValue = new CloseableThreadLocal<Object>();
|
||||
|
||||
/**
|
||||
* Gets the reusable TokenStreamComponents for the field with the given name
|
||||
*
|
||||
* @param fieldName Name of the field whose reusable TokenStreamComponents
|
||||
* are to be retrieved
|
||||
* @return Reusable TokenStreamComponents for the field, or {@code null}
|
||||
* if there was no previous components for the field
|
||||
*/
|
||||
public abstract TokenStreamComponents getReusableComponents(String fieldName);
|
||||
|
||||
/**
|
||||
* Stores the given TokenStreamComponents as the reusable components for the
|
||||
* field with the give name
|
||||
*
|
||||
* @param fieldName Name of the field whose TokenStreamComponents are being set
|
||||
* @param components TokenStreamComponents which are to be reused for the field
|
||||
*/
|
||||
public abstract void setReusableComponents(String fieldName, TokenStreamComponents components);
|
||||
|
||||
/**
|
||||
* Returns the currently stored value
|
||||
*
|
||||
* @return Currently stored value or {@code null} if no value is stored
|
||||
*/
|
||||
protected final Object getStoredValue() {
|
||||
try {
|
||||
return storedValue.get();
|
||||
} catch (NullPointerException npe) {
|
||||
if (storedValue == null) {
|
||||
throw new AlreadyClosedException("this Analyzer is closed");
|
||||
} else {
|
||||
throw npe;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Sets the stored value
|
||||
*
|
||||
* @param storedValue Value to store
|
||||
*/
|
||||
protected final void setStoredValue(Object storedValue) {
|
||||
try {
|
||||
this.storedValue.set(storedValue);
|
||||
} catch (NullPointerException npe) {
|
||||
if (storedValue == null) {
|
||||
throw new AlreadyClosedException("this Analyzer is closed");
|
||||
} else {
|
||||
throw npe;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Closes the ReuseStrategy, freeing any resources
|
||||
*/
|
||||
public void close() {
|
||||
storedValue.close();
|
||||
storedValue = null;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Implementation of {@link ReuseStrategy} that reuses the same components for
|
||||
* every field.
|
||||
*/
|
||||
public final static class GlobalReuseStrategy extends ReuseStrategy {
|
||||
|
||||
/**
|
||||
* {@inheritDoc}
|
||||
*/
|
||||
public TokenStreamComponents getReusableComponents(String fieldName) {
|
||||
return (TokenStreamComponents) getStoredValue();
|
||||
}
|
||||
|
||||
/**
|
||||
* {@inheritDoc}
|
||||
*/
|
||||
public void setReusableComponents(String fieldName, TokenStreamComponents components) {
|
||||
setStoredValue(components);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Implementation of {@link ReuseStrategy} that reuses components per-field by
|
||||
* maintaining a Map of TokenStreamComponent per field name.
|
||||
*/
|
||||
public static class PerFieldReuseStrategy extends ReuseStrategy {
|
||||
|
||||
/**
|
||||
* {@inheritDoc}
|
||||
*/
|
||||
@SuppressWarnings("unchecked")
|
||||
public TokenStreamComponents getReusableComponents(String fieldName) {
|
||||
Map<String, TokenStreamComponents> componentsPerField = (Map<String, TokenStreamComponents>) getStoredValue();
|
||||
return componentsPerField != null ? componentsPerField.get(fieldName) : null;
|
||||
}
|
||||
|
||||
/**
|
||||
* {@inheritDoc}
|
||||
*/
|
||||
@SuppressWarnings("unchecked")
|
||||
public void setReusableComponents(String fieldName, TokenStreamComponents components) {
|
||||
Map<String, TokenStreamComponents> componentsPerField = (Map<String, TokenStreamComponents>) getStoredValue();
|
||||
if (componentsPerField == null) {
|
||||
componentsPerField = new HashMap<String, TokenStreamComponents>();
|
||||
setStoredValue(componentsPerField);
|
||||
}
|
||||
componentsPerField.put(fieldName, components);
|
||||
}
|
||||
}
|
||||
|
||||
}
|
|
@ -42,7 +42,7 @@ import org.apache.lucene.util.automaton.CharacterRunAutomaton;
|
|||
* </ul>
|
||||
* @see MockTokenizer
|
||||
*/
|
||||
public final class MockAnalyzer extends ReusableAnalyzerBase {
|
||||
public final class MockAnalyzer extends Analyzer {
|
||||
private final CharacterRunAutomaton runAutomaton;
|
||||
private final boolean lowerCase;
|
||||
private final CharacterRunAutomaton filter;
|
||||
|
|
|
@ -30,7 +30,7 @@ import java.io.Reader;
|
|||
*
|
||||
*
|
||||
**/
|
||||
public final class MockPayloadAnalyzer extends ReusableAnalyzerBase {
|
||||
public final class MockPayloadAnalyzer extends Analyzer {
|
||||
|
||||
@Override
|
||||
public TokenStreamComponents createComponents(String fieldName, Reader reader) {
|
||||
|
|
|
@ -19,7 +19,6 @@ package org.apache.lucene;
|
|||
|
||||
import java.io.Reader;
|
||||
|
||||
import org.apache.lucene.analysis.ReusableAnalyzerBase;
|
||||
import org.apache.lucene.util.LuceneTestCase;
|
||||
import org.apache.lucene.analysis.Analyzer;
|
||||
import org.apache.lucene.analysis.TokenStream;
|
||||
|
@ -35,7 +34,7 @@ public class TestAssertions extends LuceneTestCase {
|
|||
}
|
||||
}
|
||||
|
||||
static class TestAnalyzer1 extends ReusableAnalyzerBase {
|
||||
static class TestAnalyzer1 extends Analyzer {
|
||||
|
||||
@Override
|
||||
protected TokenStreamComponents createComponents(String fieldName, Reader aReader) {
|
||||
|
@ -43,7 +42,7 @@ public class TestAssertions extends LuceneTestCase {
|
|||
}
|
||||
}
|
||||
|
||||
static final class TestAnalyzer2 extends ReusableAnalyzerBase {
|
||||
static final class TestAnalyzer2 extends Analyzer {
|
||||
|
||||
@Override
|
||||
protected TokenStreamComponents createComponents(String fieldName, Reader aReader) {
|
||||
|
@ -51,7 +50,7 @@ public class TestAssertions extends LuceneTestCase {
|
|||
}
|
||||
}
|
||||
|
||||
static class TestAnalyzer3 extends ReusableAnalyzerBase {
|
||||
static class TestAnalyzer3 extends Analyzer {
|
||||
|
||||
@Override
|
||||
protected TokenStreamComponents createComponents(String fieldName, Reader aReader) {
|
||||
|
@ -59,7 +58,7 @@ public class TestAssertions extends LuceneTestCase {
|
|||
}
|
||||
}
|
||||
|
||||
static class TestAnalyzer4 extends ReusableAnalyzerBase {
|
||||
static class TestAnalyzer4 extends Analyzer {
|
||||
|
||||
@Override
|
||||
protected TokenStreamComponents createComponents(String fieldName, Reader aReader) {
|
||||
|
|
|
@ -103,7 +103,7 @@ public class TestDocumentWriter extends LuceneTestCase {
|
|||
}
|
||||
|
||||
public void testPositionIncrementGap() throws IOException {
|
||||
Analyzer analyzer = new ReusableAnalyzerBase() {
|
||||
Analyzer analyzer = new Analyzer() {
|
||||
@Override
|
||||
public TokenStreamComponents createComponents(String fieldName, Reader reader) {
|
||||
return new TokenStreamComponents(new MockTokenizer(reader, MockTokenizer.WHITESPACE, false));
|
||||
|
@ -138,7 +138,7 @@ public class TestDocumentWriter extends LuceneTestCase {
|
|||
}
|
||||
|
||||
public void testTokenReuse() throws IOException {
|
||||
Analyzer analyzer = new ReusableAnalyzerBase() {
|
||||
Analyzer analyzer = new Analyzer() {
|
||||
@Override
|
||||
public TokenStreamComponents createComponents(String fieldName, Reader reader) {
|
||||
Tokenizer tokenizer = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false);
|
||||
|
|
|
@ -1706,7 +1706,7 @@ public class TestIndexWriter extends LuceneTestCase {
|
|||
dir.close();
|
||||
}
|
||||
|
||||
static final class StringSplitAnalyzer extends ReusableAnalyzerBase {
|
||||
static final class StringSplitAnalyzer extends Analyzer {
|
||||
@Override
|
||||
public TokenStreamComponents createComponents(String fieldName, Reader reader) {
|
||||
return new TokenStreamComponents(new StringSplitTokenizer(reader));
|
||||
|
|
|
@ -175,7 +175,7 @@ public class TestIndexWriterCommit extends LuceneTestCase {
|
|||
Analyzer analyzer;
|
||||
if (random.nextBoolean()) {
|
||||
// no payloads
|
||||
analyzer = new ReusableAnalyzerBase() {
|
||||
analyzer = new Analyzer() {
|
||||
@Override
|
||||
public TokenStreamComponents createComponents(String fieldName, Reader reader) {
|
||||
return new TokenStreamComponents(new MockTokenizer(reader, MockTokenizer.WHITESPACE, true));
|
||||
|
@ -184,7 +184,7 @@ public class TestIndexWriterCommit extends LuceneTestCase {
|
|||
} else {
|
||||
// fixed length payloads
|
||||
final int length = random.nextInt(200);
|
||||
analyzer = new ReusableAnalyzerBase() {
|
||||
analyzer = new Analyzer() {
|
||||
@Override
|
||||
public TokenStreamComponents createComponents(String fieldName, Reader reader) {
|
||||
Tokenizer tokenizer = new MockTokenizer(reader, MockTokenizer.WHITESPACE, true);
|
||||
|
|
|
@ -899,7 +899,7 @@ public class TestIndexWriterDelete extends LuceneTestCase {
|
|||
final Random r = random;
|
||||
Directory dir = newDirectory();
|
||||
// note this test explicitly disables payloads
|
||||
final Analyzer analyzer = new ReusableAnalyzerBase() {
|
||||
final Analyzer analyzer = new Analyzer() {
|
||||
@Override
|
||||
public TokenStreamComponents createComponents(String fieldName, Reader reader) {
|
||||
return new TokenStreamComponents(new MockTokenizer(reader, MockTokenizer.WHITESPACE, true));
|
||||
|
|
|
@ -386,7 +386,7 @@ public class TestIndexWriterExceptions extends LuceneTestCase {
|
|||
doc.add(newField("field", "a field", TextField.TYPE_STORED));
|
||||
w.addDocument(doc);
|
||||
|
||||
Analyzer analyzer = new ReusableAnalyzerBase(new ReusableAnalyzerBase.PerFieldReuseStrategy()) {
|
||||
Analyzer analyzer = new Analyzer(new Analyzer.PerFieldReuseStrategy()) {
|
||||
@Override
|
||||
public TokenStreamComponents createComponents(String fieldName, Reader reader) {
|
||||
MockTokenizer tokenizer = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false);
|
||||
|
@ -454,7 +454,7 @@ public class TestIndexWriterExceptions extends LuceneTestCase {
|
|||
// LUCENE-1072
|
||||
public void testExceptionFromTokenStream() throws IOException {
|
||||
Directory dir = newDirectory();
|
||||
IndexWriterConfig conf = newIndexWriterConfig( TEST_VERSION_CURRENT, new ReusableAnalyzerBase() {
|
||||
IndexWriterConfig conf = newIndexWriterConfig( TEST_VERSION_CURRENT, new Analyzer() {
|
||||
|
||||
@Override
|
||||
public TokenStreamComponents createComponents(String fieldName, Reader reader) {
|
||||
|
@ -591,7 +591,7 @@ public class TestIndexWriterExceptions extends LuceneTestCase {
|
|||
}
|
||||
|
||||
public void testDocumentsWriterExceptions() throws IOException {
|
||||
Analyzer analyzer = new ReusableAnalyzerBase(new ReusableAnalyzerBase.PerFieldReuseStrategy()) {
|
||||
Analyzer analyzer = new Analyzer(new Analyzer.PerFieldReuseStrategy()) {
|
||||
@Override
|
||||
public TokenStreamComponents createComponents(String fieldName, Reader reader) {
|
||||
MockTokenizer tokenizer = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false);
|
||||
|
@ -687,7 +687,7 @@ public class TestIndexWriterExceptions extends LuceneTestCase {
|
|||
}
|
||||
|
||||
public void testDocumentsWriterExceptionThreads() throws Exception {
|
||||
Analyzer analyzer = new ReusableAnalyzerBase(new ReusableAnalyzerBase.PerFieldReuseStrategy()) {
|
||||
Analyzer analyzer = new Analyzer(new Analyzer.PerFieldReuseStrategy()) {
|
||||
@Override
|
||||
public TokenStreamComponents createComponents(String fieldName, Reader reader) {
|
||||
MockTokenizer tokenizer = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false);
|
||||
|
|
|
@ -68,7 +68,7 @@ public class TestLazyProxSkipping extends LuceneTestCase {
|
|||
private void createIndex(int numHits) throws IOException {
|
||||
int numDocs = 500;
|
||||
|
||||
final Analyzer analyzer = new ReusableAnalyzerBase() {
|
||||
final Analyzer analyzer = new Analyzer() {
|
||||
@Override
|
||||
public TokenStreamComponents createComponents(String fieldName, Reader reader) {
|
||||
return new TokenStreamComponents(new MockTokenizer(reader, MockTokenizer.WHITESPACE, true));
|
||||
|
|
|
@ -111,7 +111,7 @@ public class TestMultiLevelSkipList extends LuceneTestCase {
|
|||
assertEquals("Wrong payload for the target " + target + ": " + b.bytes[b.offset], (byte) target, b.bytes[b.offset]);
|
||||
}
|
||||
|
||||
private static class PayloadAnalyzer extends ReusableAnalyzerBase {
|
||||
private static class PayloadAnalyzer extends Analyzer {
|
||||
private final AtomicInteger payloadCount = new AtomicInteger(-1);
|
||||
@Override
|
||||
public TokenStreamComponents createComponents(String fieldName, Reader reader) {
|
||||
|
|
|
@ -405,7 +405,7 @@ public class TestPayloads extends LuceneTestCase {
|
|||
/**
|
||||
* This Analyzer uses an WhitespaceTokenizer and PayloadFilter.
|
||||
*/
|
||||
private static class PayloadAnalyzer extends ReusableAnalyzerBase {
|
||||
private static class PayloadAnalyzer extends Analyzer {
|
||||
Map<String,PayloadData> fieldToData = new HashMap<String,PayloadData>();
|
||||
|
||||
public PayloadAnalyzer() {
|
||||
|
|
|
@ -20,7 +20,7 @@ package org.apache.lucene.index;
|
|||
import java.io.IOException;
|
||||
import java.io.Reader;
|
||||
|
||||
import org.apache.lucene.analysis.ReusableAnalyzerBase;
|
||||
import org.apache.lucene.analysis.Analyzer;
|
||||
import org.apache.lucene.analysis.Tokenizer;
|
||||
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
|
||||
import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
|
||||
|
@ -63,7 +63,7 @@ public class TestSameTokenSamePosition extends LuceneTestCase {
|
|||
}
|
||||
}
|
||||
|
||||
final class BugReproAnalyzer extends ReusableAnalyzerBase {
|
||||
final class BugReproAnalyzer extends Analyzer {
|
||||
@Override
|
||||
public TokenStreamComponents createComponents(String arg0, Reader arg1) {
|
||||
return new TokenStreamComponents(new BugReproAnalyzerTokenizer());
|
||||
|
|
|
@ -175,7 +175,7 @@ public class TestTermVectorsReader extends LuceneTestCase {
|
|||
}
|
||||
}
|
||||
|
||||
private class MyAnalyzer extends ReusableAnalyzerBase {
|
||||
private class MyAnalyzer extends Analyzer {
|
||||
@Override
|
||||
public TokenStreamComponents createComponents(String fieldName, Reader reader) {
|
||||
return new TokenStreamComponents(new MyTokenStream());
|
||||
|
|
|
@ -22,7 +22,6 @@ import java.io.Reader;
|
|||
import java.util.Random;
|
||||
|
||||
import org.apache.lucene.analysis.Analyzer;
|
||||
import org.apache.lucene.analysis.ReusableAnalyzerBase;
|
||||
import org.apache.lucene.analysis.Tokenizer;
|
||||
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
|
||||
import org.apache.lucene.document.Document;
|
||||
|
@ -77,7 +76,7 @@ public class TestTermdocPerf extends LuceneTestCase {
|
|||
void addDocs(final Random random, Directory dir, final int ndocs, String field, final String val, final int maxTF, final float percentDocs) throws IOException {
|
||||
final RepeatingTokenStream ts = new RepeatingTokenStream(val, random, percentDocs, maxTF);
|
||||
|
||||
Analyzer analyzer = new ReusableAnalyzerBase() {
|
||||
Analyzer analyzer = new Analyzer() {
|
||||
@Override
|
||||
public TokenStreamComponents createComponents(String fieldName, Reader reader) {
|
||||
return new TokenStreamComponents(ts);
|
||||
|
|
|
@ -17,7 +17,6 @@ package org.apache.lucene.search;
|
|||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import org.apache.lucene.analysis.ReusableAnalyzerBase;
|
||||
import org.apache.lucene.index.IndexWriterConfig;
|
||||
import org.apache.lucene.index.RandomIndexWriter;
|
||||
import org.apache.lucene.index.Term;
|
||||
|
@ -346,7 +345,7 @@ public class TestMultiPhraseQuery extends LuceneTestCase {
|
|||
}
|
||||
}
|
||||
|
||||
private static class CannedAnalyzer extends ReusableAnalyzerBase {
|
||||
private static class CannedAnalyzer extends Analyzer {
|
||||
private final TokenAndPos[] tokens;
|
||||
|
||||
public CannedAnalyzer(TokenAndPos[] tokens) {
|
||||
|
|
|
@ -55,7 +55,7 @@ public class TestPhraseQuery extends LuceneTestCase {
|
|||
@BeforeClass
|
||||
public static void beforeClass() throws Exception {
|
||||
directory = newDirectory();
|
||||
Analyzer analyzer = new ReusableAnalyzerBase() {
|
||||
Analyzer analyzer = new Analyzer() {
|
||||
@Override
|
||||
public TokenStreamComponents createComponents(String fieldName, Reader reader) {
|
||||
return new TokenStreamComponents(new MockTokenizer(reader, MockTokenizer.WHITESPACE, false));
|
||||
|
|
|
@ -56,7 +56,7 @@ public class TestPositionIncrement extends LuceneTestCase {
|
|||
final static boolean VERBOSE = false;
|
||||
|
||||
public void testSetPosition() throws Exception {
|
||||
Analyzer analyzer = new ReusableAnalyzerBase() {
|
||||
Analyzer analyzer = new Analyzer() {
|
||||
@Override
|
||||
public TokenStreamComponents createComponents(String fieldName, Reader reader) {
|
||||
return new TokenStreamComponents(new Tokenizer() {
|
||||
|
|
|
@ -190,7 +190,7 @@ public class TestTermRangeQuery extends LuceneTestCase {
|
|||
assertFalse("queries with different inclusive are not equal", query.equals(other));
|
||||
}
|
||||
|
||||
private static class SingleCharAnalyzer extends ReusableAnalyzerBase {
|
||||
private static class SingleCharAnalyzer extends Analyzer {
|
||||
|
||||
private static class SingleCharTokenizer extends Tokenizer {
|
||||
char[] buffer = new char[1];
|
||||
|
|
|
@ -55,7 +55,7 @@ public class PayloadHelper {
|
|||
|
||||
public IndexReader reader;
|
||||
|
||||
public final class PayloadAnalyzer extends ReusableAnalyzerBase {
|
||||
public final class PayloadAnalyzer extends Analyzer {
|
||||
|
||||
public PayloadAnalyzer() {
|
||||
super(new PerFieldReuseStrategy());
|
||||
|
|
|
@ -55,7 +55,7 @@ public class TestPayloadNearQuery extends LuceneTestCase {
|
|||
private static byte[] payload2 = new byte[]{2};
|
||||
private static byte[] payload4 = new byte[]{4};
|
||||
|
||||
private static class PayloadAnalyzer extends ReusableAnalyzerBase {
|
||||
private static class PayloadAnalyzer extends Analyzer {
|
||||
@Override
|
||||
public TokenStreamComponents createComponents(String fieldName, Reader reader) {
|
||||
Tokenizer result = new MockTokenizer(reader, MockTokenizer.SIMPLE, true);
|
||||
|
|
|
@ -64,7 +64,7 @@ public class TestPayloadTermQuery extends LuceneTestCase {
|
|||
private static final byte[] payloadMultiField2 = new byte[]{4};
|
||||
protected static Directory directory;
|
||||
|
||||
private static class PayloadAnalyzer extends ReusableAnalyzerBase {
|
||||
private static class PayloadAnalyzer extends Analyzer {
|
||||
|
||||
private PayloadAnalyzer() {
|
||||
super(new PerFieldReuseStrategy());
|
||||
|
|
|
@ -96,7 +96,7 @@ public class TestBasics extends LuceneTestCase {
|
|||
}
|
||||
}
|
||||
|
||||
static final Analyzer simplePayloadAnalyzer = new ReusableAnalyzerBase() {
|
||||
static final Analyzer simplePayloadAnalyzer = new Analyzer() {
|
||||
|
||||
@Override
|
||||
public TokenStreamComponents createComponents(String fieldName, Reader reader) {
|
||||
|
|
|
@ -476,7 +476,7 @@ public class TestPayloadSpans extends LuceneTestCase {
|
|||
assertEquals(numSpans, cnt);
|
||||
}
|
||||
|
||||
final class PayloadAnalyzer extends ReusableAnalyzerBase {
|
||||
final class PayloadAnalyzer extends Analyzer {
|
||||
|
||||
@Override
|
||||
public TokenStreamComponents createComponents(String fieldName, Reader reader) {
|
||||
|
@ -530,7 +530,7 @@ public class TestPayloadSpans extends LuceneTestCase {
|
|||
}
|
||||
}
|
||||
|
||||
public final class TestPayloadAnalyzer extends ReusableAnalyzerBase {
|
||||
public final class TestPayloadAnalyzer extends Analyzer {
|
||||
|
||||
@Override
|
||||
public TokenStreamComponents createComponents(String fieldName, Reader reader) {
|
||||
|
|
|
@ -126,10 +126,10 @@ public final class ArabicAnalyzer extends StopwordAnalyzerBase {
|
|||
|
||||
/**
|
||||
* Creates
|
||||
* {@link org.apache.lucene.analysis.ReusableAnalyzerBase.TokenStreamComponents}
|
||||
* {@link org.apache.lucene.analysis.Analyzer.TokenStreamComponents}
|
||||
* used to tokenize all the text in the provided {@link Reader}.
|
||||
*
|
||||
* @return {@link org.apache.lucene.analysis.ReusableAnalyzerBase.TokenStreamComponents}
|
||||
* @return {@link org.apache.lucene.analysis.Analyzer.TokenStreamComponents}
|
||||
* built from an {@link StandardTokenizer} filtered with
|
||||
* {@link LowerCaseFilter}, {@link StopFilter},
|
||||
* {@link ArabicNormalizationFilter}, {@link KeywordMarkerFilter}
|
||||
|
|
|
@ -107,11 +107,11 @@ public final class BulgarianAnalyzer extends StopwordAnalyzerBase {
|
|||
|
||||
/**
|
||||
* Creates a
|
||||
* {@link org.apache.lucene.analysis.ReusableAnalyzerBase.TokenStreamComponents}
|
||||
* {@link org.apache.lucene.analysis.Analyzer.TokenStreamComponents}
|
||||
* which tokenizes all the text in the provided {@link Reader}.
|
||||
*
|
||||
* @return A
|
||||
* {@link org.apache.lucene.analysis.ReusableAnalyzerBase.TokenStreamComponents}
|
||||
* {@link org.apache.lucene.analysis.Analyzer.TokenStreamComponents}
|
||||
* built from an {@link StandardTokenizer} filtered with
|
||||
* {@link StandardFilter}, {@link LowerCaseFilter}, {@link StopFilter}
|
||||
* , {@link KeywordMarkerFilter} if a stem exclusion set is
|
||||
|
|
|
@ -117,10 +117,10 @@ public final class BrazilianAnalyzer extends StopwordAnalyzerBase {
|
|||
|
||||
/**
|
||||
* Creates
|
||||
* {@link org.apache.lucene.analysis.ReusableAnalyzerBase.TokenStreamComponents}
|
||||
* {@link org.apache.lucene.analysis.Analyzer.TokenStreamComponents}
|
||||
* used to tokenize all the text in the provided {@link Reader}.
|
||||
*
|
||||
* @return {@link org.apache.lucene.analysis.ReusableAnalyzerBase.TokenStreamComponents}
|
||||
* @return {@link org.apache.lucene.analysis.Analyzer.TokenStreamComponents}
|
||||
* built from a {@link StandardTokenizer} filtered with
|
||||
* {@link LowerCaseFilter}, {@link StandardFilter}, {@link StopFilter}
|
||||
* , and {@link BrazilianStemFilter}.
|
||||
|
|
|
@ -105,11 +105,11 @@ public final class CatalanAnalyzer extends StopwordAnalyzerBase {
|
|||
|
||||
/**
|
||||
* Creates a
|
||||
* {@link org.apache.lucene.analysis.ReusableAnalyzerBase.TokenStreamComponents}
|
||||
* {@link org.apache.lucene.analysis.Analyzer.TokenStreamComponents}
|
||||
* which tokenizes all the text in the provided {@link Reader}.
|
||||
*
|
||||
* @return A
|
||||
* {@link org.apache.lucene.analysis.ReusableAnalyzerBase.TokenStreamComponents}
|
||||
* {@link org.apache.lucene.analysis.Analyzer.TokenStreamComponents}
|
||||
* built from an {@link StandardTokenizer} filtered with
|
||||
* {@link StandardFilter}, {@link LowerCaseFilter}, {@link StopFilter}
|
||||
* , {@link KeywordMarkerFilter} if a stem exclusion set is
|
||||
|
|
|
@ -20,7 +20,6 @@ package org.apache.lucene.analysis.cn;
|
|||
import java.io.Reader;
|
||||
|
||||
import org.apache.lucene.analysis.standard.StandardAnalyzer; // javadoc @link
|
||||
import org.apache.lucene.analysis.ReusableAnalyzerBase;
|
||||
import org.apache.lucene.analysis.Analyzer;
|
||||
import org.apache.lucene.analysis.Tokenizer;
|
||||
|
||||
|
@ -31,14 +30,14 @@ import org.apache.lucene.analysis.Tokenizer;
|
|||
* This analyzer will be removed in Lucene 5.0
|
||||
*/
|
||||
@Deprecated
|
||||
public final class ChineseAnalyzer extends ReusableAnalyzerBase {
|
||||
public final class ChineseAnalyzer extends Analyzer {
|
||||
|
||||
/**
|
||||
* Creates
|
||||
* {@link org.apache.lucene.analysis.ReusableAnalyzerBase.TokenStreamComponents}
|
||||
* {@link org.apache.lucene.analysis.Analyzer.TokenStreamComponents}
|
||||
* used to tokenize all the text in the provided {@link Reader}.
|
||||
*
|
||||
* @return {@link org.apache.lucene.analysis.ReusableAnalyzerBase.TokenStreamComponents}
|
||||
* @return {@link org.apache.lucene.analysis.Analyzer.TokenStreamComponents}
|
||||
* built from a {@link ChineseTokenizer} filtered with
|
||||
* {@link ChineseFilter}
|
||||
*/
|
||||
|
|
|
@ -19,13 +19,13 @@ package org.apache.lucene.analysis.core;
|
|||
|
||||
import java.io.Reader;
|
||||
|
||||
import org.apache.lucene.analysis.ReusableAnalyzerBase;
|
||||
import org.apache.lucene.analysis.Analyzer;
|
||||
|
||||
/**
|
||||
* "Tokenizes" the entire stream as a single token. This is useful
|
||||
* for data like zip codes, ids, and some product names.
|
||||
*/
|
||||
public final class KeywordAnalyzer extends ReusableAnalyzerBase {
|
||||
public final class KeywordAnalyzer extends Analyzer {
|
||||
public KeywordAnalyzer() {
|
||||
}
|
||||
|
||||
|
|
|
@ -21,7 +21,6 @@ import java.io.Reader;
|
|||
|
||||
import org.apache.lucene.analysis.Analyzer;
|
||||
import org.apache.lucene.analysis.util.CharTokenizer;
|
||||
import org.apache.lucene.analysis.ReusableAnalyzerBase;
|
||||
import org.apache.lucene.util.Version;
|
||||
|
||||
/** An {@link Analyzer} that filters {@link LetterTokenizer}
|
||||
|
@ -36,7 +35,7 @@ import org.apache.lucene.util.Version;
|
|||
* </ul>
|
||||
* <p>
|
||||
**/
|
||||
public final class SimpleAnalyzer extends ReusableAnalyzerBase {
|
||||
public final class SimpleAnalyzer extends Analyzer {
|
||||
|
||||
private final Version matchVersion;
|
||||
|
||||
|
|
|
@ -95,10 +95,10 @@ public final class StopAnalyzer extends StopwordAnalyzerBase {
|
|||
|
||||
/**
|
||||
* Creates
|
||||
* {@link org.apache.lucene.analysis.ReusableAnalyzerBase.TokenStreamComponents}
|
||||
* {@link org.apache.lucene.analysis.Analyzer.TokenStreamComponents}
|
||||
* used to tokenize all the text in the provided {@link Reader}.
|
||||
*
|
||||
* @return {@link org.apache.lucene.analysis.ReusableAnalyzerBase.TokenStreamComponents}
|
||||
* @return {@link org.apache.lucene.analysis.Analyzer.TokenStreamComponents}
|
||||
* built from a {@link LowerCaseTokenizer} filtered with
|
||||
* {@link StopFilter}
|
||||
*/
|
||||
|
|
|
@ -19,8 +19,8 @@ package org.apache.lucene.analysis.core;
|
|||
|
||||
import java.io.Reader;
|
||||
|
||||
import org.apache.lucene.analysis.Analyzer;
|
||||
import org.apache.lucene.analysis.util.CharTokenizer;
|
||||
import org.apache.lucene.analysis.ReusableAnalyzerBase;
|
||||
import org.apache.lucene.util.Version;
|
||||
|
||||
/**
|
||||
|
@ -35,7 +35,7 @@ import org.apache.lucene.util.Version;
|
|||
* </ul>
|
||||
* <p>
|
||||
**/
|
||||
public final class WhitespaceAnalyzer extends ReusableAnalyzerBase {
|
||||
public final class WhitespaceAnalyzer extends Analyzer {
|
||||
|
||||
private final Version matchVersion;
|
||||
|
||||
|
|
|
@ -26,7 +26,6 @@ import org.apache.lucene.analysis.Tokenizer;
|
|||
import org.apache.lucene.analysis.standard.StandardFilter;
|
||||
import org.apache.lucene.analysis.standard.StandardTokenizer;
|
||||
import org.apache.lucene.analysis.util.CharArraySet;
|
||||
import org.apache.lucene.analysis.ReusableAnalyzerBase;
|
||||
import org.apache.lucene.analysis.util.StopwordAnalyzerBase;
|
||||
import org.apache.lucene.analysis.util.WordlistLoader;
|
||||
import org.apache.lucene.util.Version;
|
||||
|
@ -122,10 +121,10 @@ public final class CzechAnalyzer extends StopwordAnalyzerBase {
|
|||
|
||||
/**
|
||||
* Creates
|
||||
* {@link org.apache.lucene.analysis.ReusableAnalyzerBase.TokenStreamComponents}
|
||||
* {@link org.apache.lucene.analysis.Analyzer.TokenStreamComponents}
|
||||
* used to tokenize all the text in the provided {@link Reader}.
|
||||
*
|
||||
* @return {@link org.apache.lucene.analysis.ReusableAnalyzerBase.TokenStreamComponents}
|
||||
* @return {@link org.apache.lucene.analysis.Analyzer.TokenStreamComponents}
|
||||
* built from a {@link StandardTokenizer} filtered with
|
||||
* {@link StandardFilter}, {@link LowerCaseFilter}, {@link StopFilter}
|
||||
* , and {@link CzechStemFilter} (only if version is >= LUCENE_31). If
|
||||
|
@ -135,7 +134,7 @@ public final class CzechAnalyzer extends StopwordAnalyzerBase {
|
|||
* {@link CzechStemFilter}.
|
||||
*/
|
||||
@Override
|
||||
protected ReusableAnalyzerBase.TokenStreamComponents createComponents(String fieldName,
|
||||
protected TokenStreamComponents createComponents(String fieldName,
|
||||
Reader reader) {
|
||||
final Tokenizer source = new StandardTokenizer(matchVersion, reader);
|
||||
TokenStream result = new StandardFilter(matchVersion, source);
|
||||
|
|
|
@ -106,11 +106,11 @@ public final class DanishAnalyzer extends StopwordAnalyzerBase {
|
|||
|
||||
/**
|
||||
* Creates a
|
||||
* {@link org.apache.lucene.analysis.ReusableAnalyzerBase.TokenStreamComponents}
|
||||
* {@link org.apache.lucene.analysis.Analyzer.TokenStreamComponents}
|
||||
* which tokenizes all the text in the provided {@link Reader}.
|
||||
*
|
||||
* @return A
|
||||
* {@link org.apache.lucene.analysis.ReusableAnalyzerBase.TokenStreamComponents}
|
||||
* {@link org.apache.lucene.analysis.Analyzer.TokenStreamComponents}
|
||||
* built from an {@link StandardTokenizer} filtered with
|
||||
* {@link StandardFilter}, {@link LowerCaseFilter}, {@link StopFilter}
|
||||
* , {@link KeywordMarkerFilter} if a stem exclusion set is
|
||||
|
|
|
@ -158,10 +158,10 @@ public final class GermanAnalyzer extends StopwordAnalyzerBase {
|
|||
|
||||
/**
|
||||
* Creates
|
||||
* {@link org.apache.lucene.analysis.ReusableAnalyzerBase.TokenStreamComponents}
|
||||
* {@link org.apache.lucene.analysis.Analyzer.TokenStreamComponents}
|
||||
* used to tokenize all the text in the provided {@link Reader}.
|
||||
*
|
||||
* @return {@link org.apache.lucene.analysis.ReusableAnalyzerBase.TokenStreamComponents}
|
||||
* @return {@link org.apache.lucene.analysis.Analyzer.TokenStreamComponents}
|
||||
* built from a {@link StandardTokenizer} filtered with
|
||||
* {@link StandardFilter}, {@link LowerCaseFilter}, {@link StopFilter}
|
||||
* , {@link KeywordMarkerFilter} if a stem exclusion set is
|
||||
|
|
|
@ -101,10 +101,10 @@ public final class GreekAnalyzer extends StopwordAnalyzerBase {
|
|||
|
||||
/**
|
||||
* Creates
|
||||
* {@link org.apache.lucene.analysis.ReusableAnalyzerBase.TokenStreamComponents}
|
||||
* {@link org.apache.lucene.analysis.Analyzer.TokenStreamComponents}
|
||||
* used to tokenize all the text in the provided {@link Reader}.
|
||||
*
|
||||
* @return {@link org.apache.lucene.analysis.ReusableAnalyzerBase.TokenStreamComponents}
|
||||
* @return {@link org.apache.lucene.analysis.Analyzer.TokenStreamComponents}
|
||||
* built from a {@link StandardTokenizer} filtered with
|
||||
* {@link GreekLowerCaseFilter}, {@link StandardFilter},
|
||||
* {@link StopFilter}, and {@link GreekStemFilter}
|
||||
|
|
|
@ -89,11 +89,11 @@ public final class EnglishAnalyzer extends StopwordAnalyzerBase {
|
|||
|
||||
/**
|
||||
* Creates a
|
||||
* {@link org.apache.lucene.analysis.ReusableAnalyzerBase.TokenStreamComponents}
|
||||
* {@link org.apache.lucene.analysis.Analyzer.TokenStreamComponents}
|
||||
* which tokenizes all the text in the provided {@link Reader}.
|
||||
*
|
||||
* @return A
|
||||
* {@link org.apache.lucene.analysis.ReusableAnalyzerBase.TokenStreamComponents}
|
||||
* {@link org.apache.lucene.analysis.Analyzer.TokenStreamComponents}
|
||||
* built from an {@link StandardTokenizer} filtered with
|
||||
* {@link StandardFilter}, {@link LowerCaseFilter}, {@link StopFilter}
|
||||
* , {@link KeywordMarkerFilter} if a stem exclusion set is
|
||||
|
|
|
@ -106,11 +106,11 @@ public final class SpanishAnalyzer extends StopwordAnalyzerBase {
|
|||
|
||||
/**
|
||||
* Creates a
|
||||
* {@link org.apache.lucene.analysis.ReusableAnalyzerBase.TokenStreamComponents}
|
||||
* {@link org.apache.lucene.analysis.Analyzer.TokenStreamComponents}
|
||||
* which tokenizes all the text in the provided {@link Reader}.
|
||||
*
|
||||
* @return A
|
||||
* {@link org.apache.lucene.analysis.ReusableAnalyzerBase.TokenStreamComponents}
|
||||
* {@link org.apache.lucene.analysis.Analyzer.TokenStreamComponents}
|
||||
* built from an {@link StandardTokenizer} filtered with
|
||||
* {@link StandardFilter}, {@link LowerCaseFilter}, {@link StopFilter}
|
||||
* , {@link KeywordMarkerFilter} if a stem exclusion set is
|
||||
|
|
|
@ -105,11 +105,11 @@ public final class BasqueAnalyzer extends StopwordAnalyzerBase {
|
|||
|
||||
/**
|
||||
* Creates a
|
||||
* {@link org.apache.lucene.analysis.ReusableAnalyzerBase.TokenStreamComponents}
|
||||
* {@link org.apache.lucene.analysis.Analyzer.TokenStreamComponents}
|
||||
* which tokenizes all the text in the provided {@link Reader}.
|
||||
*
|
||||
* @return A
|
||||
* {@link org.apache.lucene.analysis.ReusableAnalyzerBase.TokenStreamComponents}
|
||||
* {@link org.apache.lucene.analysis.Analyzer.TokenStreamComponents}
|
||||
* built from an {@link StandardTokenizer} filtered with
|
||||
* {@link StandardFilter}, {@link LowerCaseFilter}, {@link StopFilter}
|
||||
* , {@link KeywordMarkerFilter} if a stem exclusion set is
|
||||
|
|
|
@ -107,10 +107,10 @@ public final class PersianAnalyzer extends StopwordAnalyzerBase {
|
|||
|
||||
/**
|
||||
* Creates
|
||||
* {@link org.apache.lucene.analysis.ReusableAnalyzerBase.TokenStreamComponents}
|
||||
* {@link org.apache.lucene.analysis.Analyzer.TokenStreamComponents}
|
||||
* used to tokenize all the text in the provided {@link Reader}.
|
||||
*
|
||||
* @return {@link org.apache.lucene.analysis.ReusableAnalyzerBase.TokenStreamComponents}
|
||||
* @return {@link org.apache.lucene.analysis.Analyzer.TokenStreamComponents}
|
||||
* built from a {@link StandardTokenizer} filtered with
|
||||
* {@link LowerCaseFilter}, {@link ArabicNormalizationFilter},
|
||||
* {@link PersianNormalizationFilter} and Persian Stop words
|
||||
|
|
|
@ -106,11 +106,11 @@ public final class FinnishAnalyzer extends StopwordAnalyzerBase {
|
|||
|
||||
/**
|
||||
* Creates a
|
||||
* {@link org.apache.lucene.analysis.ReusableAnalyzerBase.TokenStreamComponents}
|
||||
* {@link org.apache.lucene.analysis.Analyzer.TokenStreamComponents}
|
||||
* which tokenizes all the text in the provided {@link Reader}.
|
||||
*
|
||||
* @return A
|
||||
* {@link org.apache.lucene.analysis.ReusableAnalyzerBase.TokenStreamComponents}
|
||||
* {@link org.apache.lucene.analysis.Analyzer.TokenStreamComponents}
|
||||
* built from an {@link StandardTokenizer} filtered with
|
||||
* {@link StandardFilter}, {@link LowerCaseFilter}, {@link StopFilter}
|
||||
* , {@link KeywordMarkerFilter} if a stem exclusion set is
|
||||
|
|
|
@ -168,10 +168,10 @@ public final class FrenchAnalyzer extends StopwordAnalyzerBase {
|
|||
|
||||
/**
|
||||
* Creates
|
||||
* {@link org.apache.lucene.analysis.ReusableAnalyzerBase.TokenStreamComponents}
|
||||
* {@link org.apache.lucene.analysis.Analyzer.TokenStreamComponents}
|
||||
* used to tokenize all the text in the provided {@link Reader}.
|
||||
*
|
||||
* @return {@link org.apache.lucene.analysis.ReusableAnalyzerBase.TokenStreamComponents}
|
||||
* @return {@link org.apache.lucene.analysis.Analyzer.TokenStreamComponents}
|
||||
* built from a {@link StandardTokenizer} filtered with
|
||||
* {@link StandardFilter}, {@link ElisionFilter},
|
||||
* {@link LowerCaseFilter}, {@link StopFilter},
|
||||
|
|
|
@ -104,11 +104,11 @@ public final class GalicianAnalyzer extends StopwordAnalyzerBase {
|
|||
|
||||
/**
|
||||
* Creates a
|
||||
* {@link org.apache.lucene.analysis.ReusableAnalyzerBase.TokenStreamComponents}
|
||||
* {@link org.apache.lucene.analysis.Analyzer.TokenStreamComponents}
|
||||
* which tokenizes all the text in the provided {@link Reader}.
|
||||
*
|
||||
* @return A
|
||||
* {@link org.apache.lucene.analysis.ReusableAnalyzerBase.TokenStreamComponents}
|
||||
* {@link org.apache.lucene.analysis.Analyzer.TokenStreamComponents}
|
||||
* built from an {@link StandardTokenizer} filtered with
|
||||
* {@link StandardFilter}, {@link LowerCaseFilter}, {@link StopFilter}
|
||||
* , {@link KeywordMarkerFilter} if a stem exclusion set is
|
||||
|
|
|
@ -106,10 +106,10 @@ public final class HindiAnalyzer extends StopwordAnalyzerBase {
|
|||
|
||||
/**
|
||||
* Creates
|
||||
* {@link org.apache.lucene.analysis.ReusableAnalyzerBase.TokenStreamComponents}
|
||||
* {@link org.apache.lucene.analysis.Analyzer.TokenStreamComponents}
|
||||
* used to tokenize all the text in the provided {@link Reader}.
|
||||
*
|
||||
* @return {@link org.apache.lucene.analysis.ReusableAnalyzerBase.TokenStreamComponents}
|
||||
* @return {@link org.apache.lucene.analysis.Analyzer.TokenStreamComponents}
|
||||
* built from a {@link IndicTokenizer} filtered with
|
||||
* {@link LowerCaseFilter}, {@link IndicNormalizationFilter},
|
||||
* {@link HindiNormalizationFilter}, {@link KeywordMarkerFilter}
|
||||
|
|
|
@ -106,11 +106,11 @@ public final class HungarianAnalyzer extends StopwordAnalyzerBase {
|
|||
|
||||
/**
|
||||
* Creates a
|
||||
* {@link org.apache.lucene.analysis.ReusableAnalyzerBase.TokenStreamComponents}
|
||||
* {@link org.apache.lucene.analysis.Analyzer.TokenStreamComponents}
|
||||
* which tokenizes all the text in the provided {@link Reader}.
|
||||
*
|
||||
* @return A
|
||||
* {@link org.apache.lucene.analysis.ReusableAnalyzerBase.TokenStreamComponents}
|
||||
* {@link org.apache.lucene.analysis.Analyzer.TokenStreamComponents}
|
||||
* built from an {@link StandardTokenizer} filtered with
|
||||
* {@link StandardFilter}, {@link LowerCaseFilter}, {@link StopFilter}
|
||||
* , {@link KeywordMarkerFilter} if a stem exclusion set is
|
||||
|
|
|
@ -105,11 +105,11 @@ public final class ArmenianAnalyzer extends StopwordAnalyzerBase {
|
|||
|
||||
/**
|
||||
* Creates a
|
||||
* {@link org.apache.lucene.analysis.ReusableAnalyzerBase.TokenStreamComponents}
|
||||
* {@link org.apache.lucene.analysis.Analyzer.TokenStreamComponents}
|
||||
* which tokenizes all the text in the provided {@link Reader}.
|
||||
*
|
||||
* @return A
|
||||
* {@link org.apache.lucene.analysis.ReusableAnalyzerBase.TokenStreamComponents}
|
||||
* {@link org.apache.lucene.analysis.Analyzer.TokenStreamComponents}
|
||||
* built from an {@link StandardTokenizer} filtered with
|
||||
* {@link StandardFilter}, {@link LowerCaseFilter}, {@link StopFilter}
|
||||
* , {@link KeywordMarkerFilter} if a stem exclusion set is
|
||||
|
|
|
@ -106,10 +106,10 @@ public final class IndonesianAnalyzer extends StopwordAnalyzerBase {
|
|||
|
||||
/**
|
||||
* Creates
|
||||
* {@link org.apache.lucene.analysis.ReusableAnalyzerBase.TokenStreamComponents}
|
||||
* {@link org.apache.lucene.analysis.Analyzer.TokenStreamComponents}
|
||||
* used to tokenize all the text in the provided {@link Reader}.
|
||||
*
|
||||
* @return {@link org.apache.lucene.analysis.ReusableAnalyzerBase.TokenStreamComponents}
|
||||
* @return {@link org.apache.lucene.analysis.Analyzer.TokenStreamComponents}
|
||||
* built from an {@link StandardTokenizer} filtered with
|
||||
* {@link StandardFilter}, {@link LowerCaseFilter},
|
||||
* {@link StopFilter}, {@link KeywordMarkerFilter}
|
||||
|
|
|
@ -123,11 +123,11 @@ public final class ItalianAnalyzer extends StopwordAnalyzerBase {
|
|||
|
||||
/**
|
||||
* Creates a
|
||||
* {@link org.apache.lucene.analysis.ReusableAnalyzerBase.TokenStreamComponents}
|
||||
* {@link org.apache.lucene.analysis.Analyzer.TokenStreamComponents}
|
||||
* which tokenizes all the text in the provided {@link Reader}.
|
||||
*
|
||||
* @return A
|
||||
* {@link org.apache.lucene.analysis.ReusableAnalyzerBase.TokenStreamComponents}
|
||||
* {@link org.apache.lucene.analysis.Analyzer.TokenStreamComponents}
|
||||
* built from an {@link StandardTokenizer} filtered with
|
||||
* {@link StandardFilter}, {@link ElisionFilter}, {@link LowerCaseFilter}, {@link StopFilter}
|
||||
* , {@link KeywordMarkerFilter} if a stem exclusion set is
|
||||
|
|
|
@ -104,11 +104,11 @@ public final class LatvianAnalyzer extends StopwordAnalyzerBase {
|
|||
|
||||
/**
|
||||
* Creates a
|
||||
* {@link org.apache.lucene.analysis.ReusableAnalyzerBase.TokenStreamComponents}
|
||||
* {@link org.apache.lucene.analysis.Analyzer.TokenStreamComponents}
|
||||
* which tokenizes all the text in the provided {@link Reader}.
|
||||
*
|
||||
* @return A
|
||||
* {@link org.apache.lucene.analysis.ReusableAnalyzerBase.TokenStreamComponents}
|
||||
* {@link org.apache.lucene.analysis.Analyzer.TokenStreamComponents}
|
||||
* built from an {@link StandardTokenizer} filtered with
|
||||
* {@link StandardFilter}, {@link LowerCaseFilter}, {@link StopFilter}
|
||||
* , {@link KeywordMarkerFilter} if a stem exclusion set is
|
||||
|
|
|
@ -18,17 +18,13 @@ package org.apache.lucene.analysis.miscellaneous;
|
|||
*/
|
||||
|
||||
import org.apache.lucene.analysis.Analyzer;
|
||||
import org.apache.lucene.analysis.TokenStream;
|
||||
import org.apache.lucene.index.IndexableField;
|
||||
|
||||
import java.io.Reader;
|
||||
import java.io.IOException;
|
||||
import org.apache.lucene.analysis.AnalyzerWrapper;
|
||||
|
||||
/**
|
||||
* This Analyzer limits the number of tokens while indexing. It is
|
||||
* a replacement for the maximum field length setting inside {@link org.apache.lucene.index.IndexWriter}.
|
||||
*/
|
||||
public final class LimitTokenCountAnalyzer extends Analyzer {
|
||||
public final class LimitTokenCountAnalyzer extends AnalyzerWrapper {
|
||||
private final Analyzer delegate;
|
||||
private final int maxTokenCount;
|
||||
|
||||
|
@ -41,27 +37,14 @@ public final class LimitTokenCountAnalyzer extends Analyzer {
|
|||
}
|
||||
|
||||
@Override
|
||||
public TokenStream tokenStream(String fieldName, Reader reader) {
|
||||
return new LimitTokenCountFilter(
|
||||
delegate.tokenStream(fieldName, reader), maxTokenCount
|
||||
);
|
||||
protected Analyzer getWrappedAnalyzer(String fieldName) {
|
||||
return delegate;
|
||||
}
|
||||
|
||||
@Override
|
||||
public TokenStream reusableTokenStream(String fieldName, Reader reader) throws IOException {
|
||||
return new LimitTokenCountFilter(
|
||||
delegate.reusableTokenStream(fieldName, reader), maxTokenCount
|
||||
);
|
||||
}
|
||||
|
||||
@Override
|
||||
public int getPositionIncrementGap(String fieldName) {
|
||||
return delegate.getPositionIncrementGap(fieldName);
|
||||
}
|
||||
|
||||
@Override
|
||||
public int getOffsetGap(IndexableField field) {
|
||||
return delegate.getOffsetGap(field);
|
||||
protected TokenStreamComponents wrapComponents(String fieldName, TokenStreamComponents components) {
|
||||
return new TokenStreamComponents(components.getTokenizer(),
|
||||
new LimitTokenCountFilter(components.getTokenStream(), maxTokenCount));
|
||||
}
|
||||
|
||||
@Override
|
||||
|
|
|
@ -27,7 +27,6 @@ import java.util.regex.Matcher;
|
|||
import java.util.regex.Pattern;
|
||||
|
||||
import org.apache.lucene.analysis.Analyzer;
|
||||
import org.apache.lucene.analysis.ReusableAnalyzerBase;
|
||||
import org.apache.lucene.analysis.TokenStream;
|
||||
import org.apache.lucene.analysis.Tokenizer;
|
||||
import org.apache.lucene.analysis.core.StopAnalyzer;
|
||||
|
@ -67,7 +66,7 @@ import org.apache.lucene.util.Version;
|
|||
* @deprecated (4.0) use the pattern-based analysis in the analysis/pattern package instead.
|
||||
*/
|
||||
@Deprecated
|
||||
public final class PatternAnalyzer extends ReusableAnalyzerBase {
|
||||
public final class PatternAnalyzer extends Analyzer {
|
||||
|
||||
/** <code>"\\W+"</code>; Divides text at non-letters (NOT Character.isLetter(c)) */
|
||||
public static final Pattern NON_WORD_PATTERN = Pattern.compile("\\W+");
|
||||
|
|
|
@ -18,14 +18,10 @@ package org.apache.lucene.analysis.miscellaneous;
|
|||
*/
|
||||
|
||||
import org.apache.lucene.analysis.Analyzer;
|
||||
import org.apache.lucene.analysis.TokenStream;
|
||||
import org.apache.lucene.index.IndexableField;
|
||||
import org.apache.lucene.analysis.AnalyzerWrapper;
|
||||
|
||||
import java.io.Reader;
|
||||
import java.io.IOException;
|
||||
import java.util.Collections;
|
||||
import java.util.Map;
|
||||
import java.util.HashMap;
|
||||
|
||||
/**
|
||||
* This analyzer is used to facilitate scenarios where different
|
||||
|
@ -50,7 +46,7 @@ import java.util.HashMap;
|
|||
* <p>A PerFieldAnalyzerWrapper can be used like any other analyzer, for both indexing
|
||||
* and query parsing.
|
||||
*/
|
||||
public final class PerFieldAnalyzerWrapper extends Analyzer {
|
||||
public final class PerFieldAnalyzerWrapper extends AnalyzerWrapper {
|
||||
private final Analyzer defaultAnalyzer;
|
||||
private final Map<String, Analyzer> fieldAnalyzers;
|
||||
|
||||
|
@ -74,47 +70,20 @@ public final class PerFieldAnalyzerWrapper extends Analyzer {
|
|||
* used for those fields
|
||||
*/
|
||||
public PerFieldAnalyzerWrapper(Analyzer defaultAnalyzer,
|
||||
Map<String,Analyzer> fieldAnalyzers) {
|
||||
Map<String, Analyzer> fieldAnalyzers) {
|
||||
this.defaultAnalyzer = defaultAnalyzer;
|
||||
this.fieldAnalyzers = (fieldAnalyzers != null) ? fieldAnalyzers : Collections.<String, Analyzer>emptyMap();
|
||||
}
|
||||
|
||||
@Override
|
||||
public TokenStream tokenStream(String fieldName, Reader reader) {
|
||||
protected Analyzer getWrappedAnalyzer(String fieldName) {
|
||||
Analyzer analyzer = fieldAnalyzers.get(fieldName);
|
||||
if (analyzer == null) {
|
||||
analyzer = defaultAnalyzer;
|
||||
}
|
||||
|
||||
return analyzer.tokenStream(fieldName, reader);
|
||||
return (analyzer != null) ? analyzer : defaultAnalyzer;
|
||||
}
|
||||
|
||||
@Override
|
||||
public TokenStream reusableTokenStream(String fieldName, Reader reader) throws IOException {
|
||||
Analyzer analyzer = fieldAnalyzers.get(fieldName);
|
||||
if (analyzer == null)
|
||||
analyzer = defaultAnalyzer;
|
||||
|
||||
return analyzer.reusableTokenStream(fieldName, reader);
|
||||
}
|
||||
|
||||
/** Return the positionIncrementGap from the analyzer assigned to fieldName */
|
||||
@Override
|
||||
public int getPositionIncrementGap(String fieldName) {
|
||||
Analyzer analyzer = fieldAnalyzers.get(fieldName);
|
||||
if (analyzer == null)
|
||||
analyzer = defaultAnalyzer;
|
||||
return analyzer.getPositionIncrementGap(fieldName);
|
||||
}
|
||||
|
||||
/** Return the offsetGap from the analyzer assigned to field */
|
||||
@Override
|
||||
public int getOffsetGap(IndexableField field) {
|
||||
Analyzer analyzer = fieldAnalyzers.get(field.name());
|
||||
if (analyzer == null) {
|
||||
analyzer = defaultAnalyzer;
|
||||
}
|
||||
return analyzer.getOffsetGap(field);
|
||||
protected TokenStreamComponents wrapComponents(String fieldName, TokenStreamComponents components) {
|
||||
return components;
|
||||
}
|
||||
|
||||
@Override
|
||||
|
|
|
@ -29,7 +29,6 @@ import org.apache.lucene.analysis.standard.StandardFilter;
|
|||
import org.apache.lucene.analysis.standard.StandardTokenizer;
|
||||
import org.apache.lucene.analysis.standard.StandardAnalyzer; // for javadoc
|
||||
import org.apache.lucene.analysis.util.CharArraySet;
|
||||
import org.apache.lucene.analysis.ReusableAnalyzerBase;
|
||||
import org.apache.lucene.analysis.util.WordlistLoader;
|
||||
import org.apache.lucene.util.Version;
|
||||
|
||||
|
@ -66,7 +65,7 @@ import java.util.Map;
|
|||
* <p><b>NOTE</b>: This class uses the same {@link Version}
|
||||
* dependent settings as {@link StandardAnalyzer}.</p>
|
||||
*/
|
||||
public final class DutchAnalyzer extends ReusableAnalyzerBase {
|
||||
public final class DutchAnalyzer extends Analyzer {
|
||||
|
||||
/** File containing default Dutch stopwords. */
|
||||
public final static String DEFAULT_STOPWORD_FILE = "dutch_stop.txt";
|
||||
|
|
|
@ -106,11 +106,11 @@ public final class NorwegianAnalyzer extends StopwordAnalyzerBase {
|
|||
|
||||
/**
|
||||
* Creates a
|
||||
* {@link org.apache.lucene.analysis.ReusableAnalyzerBase.TokenStreamComponents}
|
||||
* {@link org.apache.lucene.analysis.Analyzer.TokenStreamComponents}
|
||||
* which tokenizes all the text in the provided {@link Reader}.
|
||||
*
|
||||
* @return A
|
||||
* {@link org.apache.lucene.analysis.ReusableAnalyzerBase.TokenStreamComponents}
|
||||
* {@link org.apache.lucene.analysis.Analyzer.TokenStreamComponents}
|
||||
* built from an {@link StandardTokenizer} filtered with
|
||||
* {@link StandardFilter}, {@link LowerCaseFilter}, {@link StopFilter}
|
||||
* , {@link KeywordMarkerFilter} if a stem exclusion set is
|
||||
|
|
|
@ -106,11 +106,11 @@ public final class PortugueseAnalyzer extends StopwordAnalyzerBase {
|
|||
|
||||
/**
|
||||
* Creates a
|
||||
* {@link org.apache.lucene.analysis.ReusableAnalyzerBase.TokenStreamComponents}
|
||||
* {@link org.apache.lucene.analysis.Analyzer.TokenStreamComponents}
|
||||
* which tokenizes all the text in the provided {@link Reader}.
|
||||
*
|
||||
* @return A
|
||||
* {@link org.apache.lucene.analysis.ReusableAnalyzerBase.TokenStreamComponents}
|
||||
* {@link org.apache.lucene.analysis.Analyzer.TokenStreamComponents}
|
||||
* built from an {@link StandardTokenizer} filtered with
|
||||
* {@link StandardFilter}, {@link LowerCaseFilter}, {@link StopFilter}
|
||||
* , {@link KeywordMarkerFilter} if a stem exclusion set is
|
||||
|
|
|
@ -16,20 +16,19 @@ package org.apache.lucene.analysis.query;
|
|||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import org.apache.lucene.analysis.AnalyzerWrapper;
|
||||
import org.apache.lucene.index.IndexReader;
|
||||
import org.apache.lucene.index.Term;
|
||||
import org.apache.lucene.index.TermsEnum;
|
||||
import org.apache.lucene.index.Terms;
|
||||
import org.apache.lucene.index.MultiFields;
|
||||
import org.apache.lucene.analysis.Analyzer;
|
||||
import org.apache.lucene.analysis.TokenStream;
|
||||
import org.apache.lucene.analysis.core.StopFilter;
|
||||
import org.apache.lucene.util.CharsRef;
|
||||
import org.apache.lucene.util.Version;
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.io.Reader;
|
||||
import java.util.*;
|
||||
|
||||
/**
|
||||
|
@ -42,7 +41,7 @@ import java.util.*;
|
|||
* this term to take 2 seconds.
|
||||
* </p>
|
||||
*/
|
||||
public final class QueryAutoStopWordAnalyzer extends Analyzer {
|
||||
public final class QueryAutoStopWordAnalyzer extends AnalyzerWrapper {
|
||||
|
||||
private final Analyzer delegate;
|
||||
private final Map<String, Set<String>> stopWordsPerField = new HashMap<String, Set<String>>();
|
||||
|
@ -168,79 +167,18 @@ public final class QueryAutoStopWordAnalyzer extends Analyzer {
|
|||
}
|
||||
|
||||
@Override
|
||||
public TokenStream tokenStream(String fieldName, Reader reader) {
|
||||
TokenStream result;
|
||||
try {
|
||||
result = delegate.reusableTokenStream(fieldName, reader);
|
||||
} catch (IOException e) {
|
||||
result = delegate.tokenStream(fieldName, reader);
|
||||
}
|
||||
Set<String> stopWords = stopWordsPerField.get(fieldName);
|
||||
if (stopWords != null) {
|
||||
result = new StopFilter(matchVersion, result, stopWords);
|
||||
}
|
||||
return result;
|
||||
protected Analyzer getWrappedAnalyzer(String fieldName) {
|
||||
return delegate;
|
||||
}
|
||||
|
||||
private class SavedStreams {
|
||||
/* the underlying stream */
|
||||
TokenStream wrapped;
|
||||
|
||||
/*
|
||||
* when there are no stopwords for the field, refers to wrapped.
|
||||
* if there stopwords, it is a StopFilter around wrapped.
|
||||
*/
|
||||
TokenStream withStopFilter;
|
||||
}
|
||||
|
||||
@SuppressWarnings("unchecked")
|
||||
@Override
|
||||
public TokenStream reusableTokenStream(String fieldName, Reader reader) throws IOException {
|
||||
/* map of SavedStreams for each field */
|
||||
Map<String,SavedStreams> streamMap = (Map<String,SavedStreams>) getPreviousTokenStream();
|
||||
if (streamMap == null) {
|
||||
streamMap = new HashMap<String, SavedStreams>();
|
||||
setPreviousTokenStream(streamMap);
|
||||
protected TokenStreamComponents wrapComponents(String fieldName, TokenStreamComponents components) {
|
||||
Set<String> stopWords = stopWordsPerField.get(fieldName);
|
||||
if (stopWords == null) {
|
||||
return components;
|
||||
}
|
||||
|
||||
SavedStreams streams = streamMap.get(fieldName);
|
||||
if (streams == null) {
|
||||
/* an entry for this field does not exist, create one */
|
||||
streams = new SavedStreams();
|
||||
streamMap.put(fieldName, streams);
|
||||
streams.wrapped = delegate.reusableTokenStream(fieldName, reader);
|
||||
|
||||
/* if there are any stopwords for the field, save the stopfilter */
|
||||
Set<String> stopWords = stopWordsPerField.get(fieldName);
|
||||
if (stopWords != null) {
|
||||
streams.withStopFilter = new StopFilter(matchVersion, streams.wrapped, stopWords);
|
||||
} else {
|
||||
streams.withStopFilter = streams.wrapped;
|
||||
}
|
||||
} else {
|
||||
/*
|
||||
* an entry for this field exists, verify the wrapped stream has not
|
||||
* changed. if it has not, reuse it, otherwise wrap the new stream.
|
||||
*/
|
||||
TokenStream result = delegate.reusableTokenStream(fieldName, reader);
|
||||
if (result == streams.wrapped) {
|
||||
/* the wrapped analyzer reused the stream */
|
||||
} else {
|
||||
/*
|
||||
* the wrapped analyzer did not. if there are any stopwords for the
|
||||
* field, create a new StopFilter around the new stream
|
||||
*/
|
||||
streams.wrapped = result;
|
||||
Set<String> stopWords = stopWordsPerField.get(fieldName);
|
||||
if (stopWords != null) {
|
||||
streams.withStopFilter = new StopFilter(matchVersion, streams.wrapped, stopWords);
|
||||
} else {
|
||||
streams.withStopFilter = streams.wrapped;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return streams.withStopFilter;
|
||||
StopFilter stopFilter = new StopFilter(matchVersion, components.getTokenStream(), stopWords);
|
||||
return new TokenStreamComponents(components.getTokenizer(), stopFilter);
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
|
@ -110,11 +110,11 @@ public final class RomanianAnalyzer extends StopwordAnalyzerBase {
|
|||
|
||||
/**
|
||||
* Creates a
|
||||
* {@link org.apache.lucene.analysis.ReusableAnalyzerBase.TokenStreamComponents}
|
||||
* {@link org.apache.lucene.analysis.Analyzer.TokenStreamComponents}
|
||||
* which tokenizes all the text in the provided {@link Reader}.
|
||||
*
|
||||
* @return A
|
||||
* {@link org.apache.lucene.analysis.ReusableAnalyzerBase.TokenStreamComponents}
|
||||
* {@link org.apache.lucene.analysis.Analyzer.TokenStreamComponents}
|
||||
* built from an {@link StandardTokenizer} filtered with
|
||||
* {@link StandardFilter}, {@link LowerCaseFilter}, {@link StopFilter}
|
||||
* , {@link KeywordMarkerFilter} if a stem exclusion set is
|
||||
|
|
|
@ -139,10 +139,10 @@ public final class RussianAnalyzer extends StopwordAnalyzerBase
|
|||
|
||||
/**
|
||||
* Creates
|
||||
* {@link org.apache.lucene.analysis.ReusableAnalyzerBase.TokenStreamComponents}
|
||||
* {@link org.apache.lucene.analysis.Analyzer.TokenStreamComponents}
|
||||
* used to tokenize all the text in the provided {@link Reader}.
|
||||
*
|
||||
* @return {@link org.apache.lucene.analysis.ReusableAnalyzerBase.TokenStreamComponents}
|
||||
* @return {@link org.apache.lucene.analysis.Analyzer.TokenStreamComponents}
|
||||
* built from a {@link StandardTokenizer} filtered with
|
||||
* {@link StandardFilter}, {@link LowerCaseFilter}, {@link StopFilter}
|
||||
* , {@link KeywordMarkerFilter} if a stem exclusion set is
|
||||
|
|
|
@ -17,11 +17,8 @@ package org.apache.lucene.analysis.shingle;
|
|||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import java.io.IOException;
|
||||
import java.io.Reader;
|
||||
|
||||
import org.apache.lucene.analysis.Analyzer;
|
||||
import org.apache.lucene.analysis.TokenStream;
|
||||
import org.apache.lucene.analysis.AnalyzerWrapper;
|
||||
import org.apache.lucene.analysis.standard.StandardAnalyzer;
|
||||
import org.apache.lucene.util.Version;
|
||||
|
||||
|
@ -31,7 +28,7 @@ import org.apache.lucene.util.Version;
|
|||
* A shingle is another name for a token based n-gram.
|
||||
* </p>
|
||||
*/
|
||||
public final class ShingleAnalyzerWrapper extends Analyzer {
|
||||
public final class ShingleAnalyzerWrapper extends AnalyzerWrapper {
|
||||
|
||||
private final Analyzer defaultAnalyzer;
|
||||
private final int maxShingleSize;
|
||||
|
@ -140,48 +137,18 @@ public final class ShingleAnalyzerWrapper extends Analyzer {
|
|||
}
|
||||
|
||||
@Override
|
||||
public TokenStream tokenStream(String fieldName, Reader reader) {
|
||||
TokenStream wrapped;
|
||||
try {
|
||||
wrapped = defaultAnalyzer.reusableTokenStream(fieldName, reader);
|
||||
} catch (IOException e) {
|
||||
wrapped = defaultAnalyzer.tokenStream(fieldName, reader);
|
||||
}
|
||||
ShingleFilter filter = new ShingleFilter(wrapped, minShingleSize, maxShingleSize);
|
||||
protected Analyzer getWrappedAnalyzer(String fieldName) {
|
||||
return defaultAnalyzer;
|
||||
}
|
||||
|
||||
@Override
|
||||
protected TokenStreamComponents wrapComponents(String fieldName, TokenStreamComponents components) {
|
||||
ShingleFilter filter = new ShingleFilter(components.getTokenStream(), minShingleSize, maxShingleSize);
|
||||
filter.setMinShingleSize(minShingleSize);
|
||||
filter.setMaxShingleSize(maxShingleSize);
|
||||
filter.setTokenSeparator(tokenSeparator);
|
||||
filter.setOutputUnigrams(outputUnigrams);
|
||||
filter.setOutputUnigramsIfNoShingles(outputUnigramsIfNoShingles);
|
||||
return filter;
|
||||
}
|
||||
|
||||
private class SavedStreams {
|
||||
TokenStream wrapped;
|
||||
ShingleFilter shingle;
|
||||
}
|
||||
|
||||
@Override
|
||||
public TokenStream reusableTokenStream(String fieldName, Reader reader) throws IOException {
|
||||
SavedStreams streams = (SavedStreams) getPreviousTokenStream();
|
||||
if (streams == null) {
|
||||
streams = new SavedStreams();
|
||||
streams.wrapped = defaultAnalyzer.reusableTokenStream(fieldName, reader);
|
||||
streams.shingle = new ShingleFilter(streams.wrapped);
|
||||
setPreviousTokenStream(streams);
|
||||
} else {
|
||||
TokenStream result = defaultAnalyzer.reusableTokenStream(fieldName, reader);
|
||||
if (result != streams.wrapped) {
|
||||
/* the wrapped analyzer did not, create a new shingle around the new one */
|
||||
streams.wrapped = result;
|
||||
streams.shingle = new ShingleFilter(streams.wrapped);
|
||||
}
|
||||
}
|
||||
streams.shingle.setMaxShingleSize(maxShingleSize);
|
||||
streams.shingle.setMinShingleSize(minShingleSize);
|
||||
streams.shingle.setTokenSeparator(tokenSeparator);
|
||||
streams.shingle.setOutputUnigrams(outputUnigrams);
|
||||
streams.shingle.setOutputUnigramsIfNoShingles(outputUnigramsIfNoShingles);
|
||||
return streams.shingle;
|
||||
return new TokenStreamComponents(components.getTokenizer(), filter);
|
||||
}
|
||||
}
|
||||
|
|
|
@ -24,7 +24,6 @@ import org.apache.lucene.analysis.en.EnglishPossessiveFilter;
|
|||
import org.apache.lucene.analysis.standard.*;
|
||||
import org.apache.lucene.analysis.tr.TurkishLowerCaseFilter;
|
||||
import org.apache.lucene.analysis.util.CharArraySet;
|
||||
import org.apache.lucene.analysis.ReusableAnalyzerBase;
|
||||
import org.apache.lucene.util.Version;
|
||||
|
||||
import java.io.Reader;
|
||||
|
@ -47,7 +46,7 @@ import java.util.Set;
|
|||
* This analyzer will be removed in Lucene 5.0
|
||||
*/
|
||||
@Deprecated
|
||||
public final class SnowballAnalyzer extends ReusableAnalyzerBase {
|
||||
public final class SnowballAnalyzer extends Analyzer {
|
||||
private String name;
|
||||
private Set<?> stopSet;
|
||||
private final Version matchVersion;
|
||||
|
|
|
@ -106,11 +106,11 @@ public final class SwedishAnalyzer extends StopwordAnalyzerBase {
|
|||
|
||||
/**
|
||||
* Creates a
|
||||
* {@link org.apache.lucene.analysis.ReusableAnalyzerBase.TokenStreamComponents}
|
||||
* {@link org.apache.lucene.analysis.Analyzer.TokenStreamComponents}
|
||||
* which tokenizes all the text in the provided {@link Reader}.
|
||||
*
|
||||
* @return A
|
||||
* {@link org.apache.lucene.analysis.ReusableAnalyzerBase.TokenStreamComponents}
|
||||
* {@link org.apache.lucene.analysis.Analyzer.TokenStreamComponents}
|
||||
* built from an {@link StandardTokenizer} filtered with
|
||||
* {@link StandardFilter}, {@link LowerCaseFilter}, {@link StopFilter}
|
||||
* , {@link KeywordMarkerFilter} if a stem exclusion set is
|
||||
|
|
|
@ -27,7 +27,6 @@ import org.apache.lucene.analysis.core.StopFilter;
|
|||
import org.apache.lucene.analysis.standard.StandardAnalyzer;
|
||||
import org.apache.lucene.analysis.standard.StandardFilter;
|
||||
import org.apache.lucene.analysis.standard.StandardTokenizer;
|
||||
import org.apache.lucene.analysis.ReusableAnalyzerBase;
|
||||
import org.apache.lucene.util.Version;
|
||||
|
||||
/**
|
||||
|
@ -36,7 +35,7 @@ import org.apache.lucene.util.Version;
|
|||
* <p><b>NOTE</b>: This class uses the same {@link Version}
|
||||
* dependent settings as {@link StandardAnalyzer}.</p>
|
||||
*/
|
||||
public final class ThaiAnalyzer extends ReusableAnalyzerBase {
|
||||
public final class ThaiAnalyzer extends Analyzer {
|
||||
private final Version matchVersion;
|
||||
|
||||
public ThaiAnalyzer(Version matchVersion) {
|
||||
|
@ -45,10 +44,10 @@ public final class ThaiAnalyzer extends ReusableAnalyzerBase {
|
|||
|
||||
/**
|
||||
* Creates
|
||||
* {@link org.apache.lucene.analysis.ReusableAnalyzerBase.TokenStreamComponents}
|
||||
* {@link org.apache.lucene.analysis.Analyzer.TokenStreamComponents}
|
||||
* used to tokenize all the text in the provided {@link Reader}.
|
||||
*
|
||||
* @return {@link org.apache.lucene.analysis.ReusableAnalyzerBase.TokenStreamComponents}
|
||||
* @return {@link org.apache.lucene.analysis.Analyzer.TokenStreamComponents}
|
||||
* built from a {@link StandardTokenizer} filtered with
|
||||
* {@link StandardFilter}, {@link LowerCaseFilter}, {@link ThaiWordFilter}, and
|
||||
* {@link StopFilter}
|
||||
|
|
|
@ -109,11 +109,11 @@ public final class TurkishAnalyzer extends StopwordAnalyzerBase {
|
|||
|
||||
/**
|
||||
* Creates a
|
||||
* {@link org.apache.lucene.analysis.ReusableAnalyzerBase.TokenStreamComponents}
|
||||
* {@link org.apache.lucene.analysis.Analyzer.TokenStreamComponents}
|
||||
* which tokenizes all the text in the provided {@link Reader}.
|
||||
*
|
||||
* @return A
|
||||
* {@link org.apache.lucene.analysis.ReusableAnalyzerBase.TokenStreamComponents}
|
||||
* {@link org.apache.lucene.analysis.Analyzer.TokenStreamComponents}
|
||||
* built from an {@link StandardTokenizer} filtered with
|
||||
* {@link StandardFilter}, {@link TurkishLowerCaseFilter},
|
||||
* {@link StopFilter}, {@link KeywordMarkerFilter} if a stem
|
||||
|
|
|
@ -20,15 +20,14 @@ package org.apache.lucene.analysis.util;
|
|||
import java.io.IOException;
|
||||
import java.util.Set;
|
||||
|
||||
import org.apache.lucene.analysis.ReusableAnalyzerBase;
|
||||
import org.apache.lucene.analysis.util.WordlistLoader;
|
||||
import org.apache.lucene.analysis.Analyzer;
|
||||
import org.apache.lucene.util.Version;
|
||||
|
||||
/**
|
||||
* Base class for Analyzers that need to make use of stopword sets.
|
||||
*
|
||||
*/
|
||||
public abstract class StopwordAnalyzerBase extends ReusableAnalyzerBase {
|
||||
public abstract class StopwordAnalyzerBase extends Analyzer {
|
||||
|
||||
/**
|
||||
* An immutable stopword set
|
||||
|
@ -92,7 +91,7 @@ public abstract class StopwordAnalyzerBase extends ReusableAnalyzerBase {
|
|||
* if loading the stopwords throws an {@link IOException}
|
||||
*/
|
||||
protected static CharArraySet loadStopwordSet(final boolean ignoreCase,
|
||||
final Class<? extends ReusableAnalyzerBase> aClass, final String resource,
|
||||
final Class<? extends Analyzer> aClass, final String resource,
|
||||
final String comment) throws IOException {
|
||||
final Set<String> wordSet = WordlistLoader.getWordSet(aClass, resource,
|
||||
comment);
|
||||
|
|
|
@ -18,8 +18,8 @@ package org.apache.lucene.collation;
|
|||
*/
|
||||
|
||||
|
||||
import org.apache.lucene.analysis.Analyzer;
|
||||
import org.apache.lucene.analysis.core.KeywordTokenizer;
|
||||
import org.apache.lucene.analysis.ReusableAnalyzerBase;
|
||||
import org.apache.lucene.util.IndexableBinaryStringTools; // javadoc @link
|
||||
import org.apache.lucene.util.Version;
|
||||
|
||||
|
@ -82,7 +82,7 @@ import java.io.Reader;
|
|||
* versions will encode the bytes with {@link IndexableBinaryStringTools}.
|
||||
* </ul>
|
||||
*/
|
||||
public final class CollationKeyAnalyzer extends ReusableAnalyzerBase {
|
||||
public final class CollationKeyAnalyzer extends Analyzer {
|
||||
private final Collator collator;
|
||||
private final CollationAttributeFactory factory;
|
||||
private final Version matchVersion;
|
||||
|
|
|
@ -64,7 +64,7 @@ public class TestChineseTokenizer extends BaseTokenStreamTestCase
|
|||
* Analyzer that just uses ChineseTokenizer, not ChineseFilter.
|
||||
* convenience to show the behavior of the tokenizer
|
||||
*/
|
||||
private class JustChineseTokenizerAnalyzer extends ReusableAnalyzerBase {
|
||||
private class JustChineseTokenizerAnalyzer extends Analyzer {
|
||||
@Override
|
||||
public TokenStreamComponents createComponents(String fieldName, Reader reader) {
|
||||
return new TokenStreamComponents(new ChineseTokenizer(reader));
|
||||
|
@ -75,7 +75,7 @@ public class TestChineseTokenizer extends BaseTokenStreamTestCase
|
|||
* Analyzer that just uses ChineseFilter, not ChineseTokenizer.
|
||||
* convenience to show the behavior of the filter.
|
||||
*/
|
||||
private class JustChineseFilterAnalyzer extends ReusableAnalyzerBase {
|
||||
private class JustChineseFilterAnalyzer extends Analyzer {
|
||||
@Override
|
||||
public TokenStreamComponents createComponents(String fieldName, Reader reader) {
|
||||
Tokenizer tokenizer = new WhitespaceTokenizer(Version.LUCENE_CURRENT, reader);
|
||||
|
|
|
@ -84,7 +84,7 @@ public class CommonGramsFilterTest extends BaseTokenStreamTestCase {
|
|||
* @return Map<String,String>
|
||||
*/
|
||||
public void testCommonGramsQueryFilter() throws Exception {
|
||||
Analyzer a = new ReusableAnalyzerBase() {
|
||||
Analyzer a = new Analyzer() {
|
||||
@Override
|
||||
public TokenStreamComponents createComponents(String field, Reader in) {
|
||||
Tokenizer tokenizer = new MockTokenizer(in, MockTokenizer.WHITESPACE, false);
|
||||
|
@ -154,7 +154,7 @@ public class CommonGramsFilterTest extends BaseTokenStreamTestCase {
|
|||
}
|
||||
|
||||
public void testCommonGramsFilter() throws Exception {
|
||||
Analyzer a = new ReusableAnalyzerBase() {
|
||||
Analyzer a = new Analyzer() {
|
||||
@Override
|
||||
public TokenStreamComponents createComponents(String field, Reader in) {
|
||||
Tokenizer tokenizer = new MockTokenizer(in, MockTokenizer.WHITESPACE, false);
|
||||
|
|
|
@ -117,7 +117,7 @@ public class TestAnalyzers extends BaseTokenStreamTestCase {
|
|||
String[] y = StandardTokenizer.TOKEN_TYPES;
|
||||
}
|
||||
|
||||
private static class LowerCaseWhitespaceAnalyzer extends ReusableAnalyzerBase {
|
||||
private static class LowerCaseWhitespaceAnalyzer extends Analyzer {
|
||||
|
||||
@Override
|
||||
public TokenStreamComponents createComponents(String fieldName, Reader reader) {
|
||||
|
|
|
@ -5,7 +5,6 @@ import org.apache.lucene.analysis.BaseTokenStreamTestCase;
|
|||
import org.apache.lucene.analysis.Tokenizer;
|
||||
import org.apache.lucene.analysis.standard.StandardAnalyzer;
|
||||
import org.apache.lucene.analysis.standard.StandardTokenizer;
|
||||
import org.apache.lucene.analysis.ReusableAnalyzerBase;
|
||||
import org.apache.lucene.util.Version;
|
||||
|
||||
import java.io.IOException;
|
||||
|
@ -43,7 +42,7 @@ public class TestStandardAnalyzer extends BaseTokenStreamTestCase {
|
|||
BaseTokenStreamTestCase.assertTokenStreamContents(tokenizer, new String[] { "testing", "1234" });
|
||||
}
|
||||
|
||||
private Analyzer a = new ReusableAnalyzerBase() {
|
||||
private Analyzer a = new Analyzer() {
|
||||
@Override
|
||||
protected TokenStreamComponents createComponents
|
||||
(String fieldName, Reader reader) {
|
||||
|
|
|
@ -8,7 +8,6 @@ import org.apache.lucene.analysis.Tokenizer;
|
|||
import org.apache.lucene.analysis.standard.StandardAnalyzer;
|
||||
import org.apache.lucene.analysis.standard.UAX29URLEmailTokenizer;
|
||||
import org.apache.lucene.analysis.tokenattributes.TypeAttribute;
|
||||
import org.apache.lucene.analysis.ReusableAnalyzerBase;
|
||||
import org.apache.lucene.util.Version;
|
||||
|
||||
import java.io.BufferedReader;
|
||||
|
@ -50,7 +49,7 @@ public class TestUAX29URLEmailTokenizer extends BaseTokenStreamTestCase {
|
|||
BaseTokenStreamTestCase.assertTokenStreamContents(tokenizer, new String[] { "testing", "1234" });
|
||||
}
|
||||
|
||||
private Analyzer a = new ReusableAnalyzerBase() {
|
||||
private Analyzer a = new Analyzer() {
|
||||
@Override
|
||||
protected TokenStreamComponents createComponents
|
||||
(String fieldName, Reader reader) {
|
||||
|
@ -99,7 +98,7 @@ public class TestUAX29URLEmailTokenizer extends BaseTokenStreamTestCase {
|
|||
}
|
||||
}
|
||||
|
||||
private Analyzer urlAnalyzer = new ReusableAnalyzerBase() {
|
||||
private Analyzer urlAnalyzer = new Analyzer() {
|
||||
@Override
|
||||
protected TokenStreamComponents createComponents(String fieldName, Reader reader) {
|
||||
UAX29URLEmailTokenizer tokenizer = new UAX29URLEmailTokenizer(TEST_VERSION_CURRENT, reader);
|
||||
|
@ -109,7 +108,7 @@ public class TestUAX29URLEmailTokenizer extends BaseTokenStreamTestCase {
|
|||
}
|
||||
};
|
||||
|
||||
private Analyzer emailAnalyzer = new ReusableAnalyzerBase() {
|
||||
private Analyzer emailAnalyzer = new Analyzer() {
|
||||
@Override
|
||||
protected TokenStreamComponents createComponents(String fieldName, Reader reader) {
|
||||
UAX29URLEmailTokenizer tokenizer = new UAX29URLEmailTokenizer(TEST_VERSION_CURRENT, reader);
|
||||
|
@ -431,7 +430,7 @@ public class TestUAX29URLEmailTokenizer extends BaseTokenStreamTestCase {
|
|||
/** @deprecated remove this and sophisticated backwards layer in 5.0 */
|
||||
@Deprecated
|
||||
public void testCombiningMarksBackwards() throws Exception {
|
||||
Analyzer a = new ReusableAnalyzerBase() {
|
||||
Analyzer a = new Analyzer() {
|
||||
@Override
|
||||
protected TokenStreamComponents createComponents
|
||||
(String fieldName, Reader reader) {
|
||||
|
|
|
@ -24,7 +24,6 @@ import org.apache.lucene.analysis.Analyzer;
|
|||
import org.apache.lucene.analysis.BaseTokenStreamTestCase;
|
||||
import org.apache.lucene.analysis.MockTokenizer;
|
||||
import org.apache.lucene.analysis.Tokenizer;
|
||||
import org.apache.lucene.analysis.ReusableAnalyzerBase;
|
||||
|
||||
import static org.apache.lucene.analysis.VocabularyAssert.*;
|
||||
|
||||
|
@ -32,7 +31,7 @@ import static org.apache.lucene.analysis.VocabularyAssert.*;
|
|||
* Simple tests for {@link GermanLightStemFilter}
|
||||
*/
|
||||
public class TestGermanLightStemFilter extends BaseTokenStreamTestCase {
|
||||
private Analyzer analyzer = new ReusableAnalyzerBase() {
|
||||
private Analyzer analyzer = new Analyzer() {
|
||||
@Override
|
||||
protected TokenStreamComponents createComponents(String fieldName,
|
||||
Reader reader) {
|
||||
|
|
|
@ -24,7 +24,6 @@ import org.apache.lucene.analysis.Analyzer;
|
|||
import org.apache.lucene.analysis.BaseTokenStreamTestCase;
|
||||
import org.apache.lucene.analysis.MockTokenizer;
|
||||
import org.apache.lucene.analysis.Tokenizer;
|
||||
import org.apache.lucene.analysis.ReusableAnalyzerBase;
|
||||
|
||||
import static org.apache.lucene.analysis.VocabularyAssert.*;
|
||||
|
||||
|
@ -32,7 +31,7 @@ import static org.apache.lucene.analysis.VocabularyAssert.*;
|
|||
* Simple tests for {@link GermanMinimalStemFilter}
|
||||
*/
|
||||
public class TestGermanMinimalStemFilter extends BaseTokenStreamTestCase {
|
||||
private Analyzer analyzer = new ReusableAnalyzerBase() {
|
||||
private Analyzer analyzer = new Analyzer() {
|
||||
@Override
|
||||
protected TokenStreamComponents createComponents(String fieldName,
|
||||
Reader reader) {
|
||||
|
|
|
@ -25,7 +25,6 @@ import org.apache.lucene.analysis.BaseTokenStreamTestCase;
|
|||
import org.apache.lucene.analysis.Tokenizer;
|
||||
import org.apache.lucene.analysis.core.KeywordTokenizer;
|
||||
import org.apache.lucene.analysis.core.LowerCaseFilter;
|
||||
import org.apache.lucene.analysis.ReusableAnalyzerBase;
|
||||
|
||||
import static org.apache.lucene.analysis.VocabularyAssert.*;
|
||||
|
||||
|
@ -36,7 +35,7 @@ import static org.apache.lucene.analysis.VocabularyAssert.*;
|
|||
*
|
||||
*/
|
||||
public class TestGermanStemFilter extends BaseTokenStreamTestCase {
|
||||
Analyzer analyzer = new ReusableAnalyzerBase() {
|
||||
Analyzer analyzer = new Analyzer() {
|
||||
@Override
|
||||
protected TokenStreamComponents createComponents(String fieldName,
|
||||
Reader reader) {
|
||||
|
|
|
@ -24,13 +24,12 @@ import org.apache.lucene.analysis.Analyzer;
|
|||
import org.apache.lucene.analysis.BaseTokenStreamTestCase;
|
||||
import org.apache.lucene.analysis.MockTokenizer;
|
||||
import org.apache.lucene.analysis.Tokenizer;
|
||||
import org.apache.lucene.analysis.ReusableAnalyzerBase;
|
||||
|
||||
/**
|
||||
* Simple tests for {@link EnglishMinimalStemFilter}
|
||||
*/
|
||||
public class TestEnglishMinimalStemFilter extends BaseTokenStreamTestCase {
|
||||
private Analyzer analyzer = new ReusableAnalyzerBase() {
|
||||
private Analyzer analyzer = new Analyzer() {
|
||||
@Override
|
||||
protected TokenStreamComponents createComponents(String fieldName,
|
||||
Reader reader) {
|
||||
|
|
|
@ -25,13 +25,12 @@ import org.apache.lucene.analysis.Analyzer;
|
|||
import org.apache.lucene.analysis.BaseTokenStreamTestCase;
|
||||
import org.apache.lucene.analysis.MockTokenizer;
|
||||
import org.apache.lucene.analysis.Tokenizer;
|
||||
import org.apache.lucene.analysis.ReusableAnalyzerBase;
|
||||
|
||||
/**
|
||||
* Tests for {@link KStemmer}
|
||||
*/
|
||||
public class TestKStemmer extends BaseTokenStreamTestCase {
|
||||
Analyzer a = new ReusableAnalyzerBase() {
|
||||
Analyzer a = new Analyzer() {
|
||||
@Override
|
||||
protected TokenStreamComponents createComponents(String fieldName, Reader reader) {
|
||||
Tokenizer tokenizer = new MockTokenizer(reader, MockTokenizer.WHITESPACE, true);
|
||||
|
|
|
@ -24,7 +24,6 @@ import java.io.StringReader;
|
|||
import org.apache.lucene.analysis.BaseTokenStreamTestCase;
|
||||
import org.apache.lucene.analysis.miscellaneous.KeywordMarkerFilter;
|
||||
import org.apache.lucene.analysis.util.CharArraySet;
|
||||
import org.apache.lucene.analysis.ReusableAnalyzerBase;
|
||||
import org.apache.lucene.analysis.Analyzer;
|
||||
import org.apache.lucene.analysis.MockTokenizer;
|
||||
import org.apache.lucene.analysis.TokenStream;
|
||||
|
@ -36,7 +35,7 @@ import static org.apache.lucene.analysis.VocabularyAssert.*;
|
|||
* Test the PorterStemFilter with Martin Porter's test data.
|
||||
*/
|
||||
public class TestPorterStemFilter extends BaseTokenStreamTestCase {
|
||||
Analyzer a = new ReusableAnalyzerBase() {
|
||||
Analyzer a = new Analyzer() {
|
||||
@Override
|
||||
protected TokenStreamComponents createComponents(String fieldName,
|
||||
Reader reader) {
|
||||
|
|
|
@ -24,7 +24,6 @@ import org.apache.lucene.analysis.Analyzer;
|
|||
import org.apache.lucene.analysis.BaseTokenStreamTestCase;
|
||||
import org.apache.lucene.analysis.MockTokenizer;
|
||||
import org.apache.lucene.analysis.Tokenizer;
|
||||
import org.apache.lucene.analysis.ReusableAnalyzerBase;
|
||||
|
||||
import static org.apache.lucene.analysis.VocabularyAssert.*;
|
||||
|
||||
|
@ -32,7 +31,7 @@ import static org.apache.lucene.analysis.VocabularyAssert.*;
|
|||
* Simple tests for {@link SpanishLightStemFilter}
|
||||
*/
|
||||
public class TestSpanishLightStemFilter extends BaseTokenStreamTestCase {
|
||||
private Analyzer analyzer = new ReusableAnalyzerBase() {
|
||||
private Analyzer analyzer = new Analyzer() {
|
||||
@Override
|
||||
protected TokenStreamComponents createComponents(String fieldName,
|
||||
Reader reader) {
|
||||
|
|
|
@ -24,7 +24,6 @@ import org.apache.lucene.analysis.Analyzer;
|
|||
import org.apache.lucene.analysis.BaseTokenStreamTestCase;
|
||||
import org.apache.lucene.analysis.MockTokenizer;
|
||||
import org.apache.lucene.analysis.Tokenizer;
|
||||
import org.apache.lucene.analysis.ReusableAnalyzerBase;
|
||||
|
||||
import static org.apache.lucene.analysis.VocabularyAssert.*;
|
||||
|
||||
|
@ -32,7 +31,7 @@ import static org.apache.lucene.analysis.VocabularyAssert.*;
|
|||
* Simple tests for {@link FinnishLightStemFilter}
|
||||
*/
|
||||
public class TestFinnishLightStemFilter extends BaseTokenStreamTestCase {
|
||||
private Analyzer analyzer = new ReusableAnalyzerBase() {
|
||||
private Analyzer analyzer = new Analyzer() {
|
||||
@Override
|
||||
protected TokenStreamComponents createComponents(String fieldName,
|
||||
Reader reader) {
|
||||
|
|
|
@ -24,7 +24,6 @@ import org.apache.lucene.analysis.Analyzer;
|
|||
import org.apache.lucene.analysis.BaseTokenStreamTestCase;
|
||||
import org.apache.lucene.analysis.MockTokenizer;
|
||||
import org.apache.lucene.analysis.Tokenizer;
|
||||
import org.apache.lucene.analysis.ReusableAnalyzerBase;
|
||||
|
||||
import static org.apache.lucene.analysis.VocabularyAssert.*;
|
||||
|
||||
|
@ -32,7 +31,7 @@ import static org.apache.lucene.analysis.VocabularyAssert.*;
|
|||
* Simple tests for {@link FrenchLightStemFilter}
|
||||
*/
|
||||
public class TestFrenchLightStemFilter extends BaseTokenStreamTestCase {
|
||||
private Analyzer analyzer = new ReusableAnalyzerBase() {
|
||||
private Analyzer analyzer = new Analyzer() {
|
||||
@Override
|
||||
protected TokenStreamComponents createComponents(String fieldName,
|
||||
Reader reader) {
|
||||
|
|
|
@ -24,7 +24,6 @@ import org.apache.lucene.analysis.Analyzer;
|
|||
import org.apache.lucene.analysis.BaseTokenStreamTestCase;
|
||||
import org.apache.lucene.analysis.MockTokenizer;
|
||||
import org.apache.lucene.analysis.Tokenizer;
|
||||
import org.apache.lucene.analysis.ReusableAnalyzerBase;
|
||||
|
||||
import static org.apache.lucene.analysis.VocabularyAssert.*;
|
||||
|
||||
|
@ -32,7 +31,7 @@ import static org.apache.lucene.analysis.VocabularyAssert.*;
|
|||
* Simple tests for {@link FrenchMinimalStemFilter}
|
||||
*/
|
||||
public class TestFrenchMinimalStemFilter extends BaseTokenStreamTestCase {
|
||||
private Analyzer analyzer = new ReusableAnalyzerBase() {
|
||||
private Analyzer analyzer = new Analyzer() {
|
||||
@Override
|
||||
protected TokenStreamComponents createComponents(String fieldName,
|
||||
Reader reader) {
|
||||
|
|
|
@ -28,13 +28,12 @@ import org.apache.lucene.analysis.TokenStream;
|
|||
import org.apache.lucene.analysis.Tokenizer;
|
||||
import org.apache.lucene.analysis.core.LowerCaseFilter;
|
||||
import org.apache.lucene.analysis.standard.StandardTokenizer;
|
||||
import org.apache.lucene.analysis.ReusableAnalyzerBase;
|
||||
|
||||
/**
|
||||
* Simple tests for {@link GalicianStemFilter}
|
||||
*/
|
||||
public class TestGalicianStemFilter extends BaseTokenStreamTestCase {
|
||||
private Analyzer analyzer = new ReusableAnalyzerBase() {
|
||||
private Analyzer analyzer = new Analyzer() {
|
||||
@Override
|
||||
protected TokenStreamComponents createComponents(String fieldName,
|
||||
Reader reader) {
|
||||
|
|
|
@ -24,7 +24,6 @@ import org.apache.lucene.analysis.Analyzer;
|
|||
import org.apache.lucene.analysis.BaseTokenStreamTestCase;
|
||||
import org.apache.lucene.analysis.MockTokenizer;
|
||||
import org.apache.lucene.analysis.Tokenizer;
|
||||
import org.apache.lucene.analysis.ReusableAnalyzerBase;
|
||||
|
||||
import static org.apache.lucene.analysis.VocabularyAssert.*;
|
||||
|
||||
|
@ -32,7 +31,7 @@ import static org.apache.lucene.analysis.VocabularyAssert.*;
|
|||
* Simple tests for {@link HungarianLightStemFilter}
|
||||
*/
|
||||
public class TestHungarianLightStemFilter extends BaseTokenStreamTestCase {
|
||||
private Analyzer analyzer = new ReusableAnalyzerBase() {
|
||||
private Analyzer analyzer = new Analyzer() {
|
||||
@Override
|
||||
protected TokenStreamComponents createComponents(String fieldName,
|
||||
Reader reader) {
|
||||
|
|
|
@ -24,14 +24,13 @@ import org.apache.lucene.analysis.Analyzer;
|
|||
import org.apache.lucene.analysis.BaseTokenStreamTestCase;
|
||||
import org.apache.lucene.analysis.Tokenizer;
|
||||
import org.apache.lucene.analysis.core.KeywordTokenizer;
|
||||
import org.apache.lucene.analysis.ReusableAnalyzerBase;
|
||||
|
||||
/**
|
||||
* Tests {@link IndonesianStemmer}
|
||||
*/
|
||||
public class TestIndonesianStemmer extends BaseTokenStreamTestCase {
|
||||
/* full stemming, no stopwords */
|
||||
Analyzer a = new ReusableAnalyzerBase() {
|
||||
Analyzer a = new Analyzer() {
|
||||
@Override
|
||||
public TokenStreamComponents createComponents(String fieldName, Reader reader) {
|
||||
Tokenizer tokenizer = new KeywordTokenizer(reader);
|
||||
|
@ -112,7 +111,7 @@ public class TestIndonesianStemmer extends BaseTokenStreamTestCase {
|
|||
}
|
||||
|
||||
/* inflectional-only stemming */
|
||||
Analyzer b = new ReusableAnalyzerBase() {
|
||||
Analyzer b = new Analyzer() {
|
||||
@Override
|
||||
public TokenStreamComponents createComponents(String fieldName, Reader reader) {
|
||||
Tokenizer tokenizer = new KeywordTokenizer(reader);
|
||||
|
|
|
@ -24,7 +24,6 @@ import org.apache.lucene.analysis.Analyzer;
|
|||
import org.apache.lucene.analysis.BaseTokenStreamTestCase;
|
||||
import org.apache.lucene.analysis.MockTokenizer;
|
||||
import org.apache.lucene.analysis.Tokenizer;
|
||||
import org.apache.lucene.analysis.ReusableAnalyzerBase;
|
||||
|
||||
import static org.apache.lucene.analysis.VocabularyAssert.*;
|
||||
|
||||
|
@ -32,7 +31,7 @@ import static org.apache.lucene.analysis.VocabularyAssert.*;
|
|||
* Simple tests for {@link ItalianLightStemFilter}
|
||||
*/
|
||||
public class TestItalianLightStemFilter extends BaseTokenStreamTestCase {
|
||||
private Analyzer analyzer = new ReusableAnalyzerBase() {
|
||||
private Analyzer analyzer = new Analyzer() {
|
||||
@Override
|
||||
protected TokenStreamComponents createComponents(String fieldName,
|
||||
Reader reader) {
|
||||
|
|
|
@ -24,13 +24,12 @@ import org.apache.lucene.analysis.Analyzer;
|
|||
import org.apache.lucene.analysis.BaseTokenStreamTestCase;
|
||||
import org.apache.lucene.analysis.MockTokenizer;
|
||||
import org.apache.lucene.analysis.Tokenizer;
|
||||
import org.apache.lucene.analysis.ReusableAnalyzerBase;
|
||||
|
||||
/**
|
||||
* Basic tests for {@link LatvianStemmer}
|
||||
*/
|
||||
public class TestLatvianStemmer extends BaseTokenStreamTestCase {
|
||||
private Analyzer a = new ReusableAnalyzerBase() {
|
||||
private Analyzer a = new Analyzer() {
|
||||
@Override
|
||||
protected TokenStreamComponents createComponents(String fieldName, Reader reader) {
|
||||
Tokenizer tokenizer = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false);
|
||||
|
|
|
@ -213,7 +213,7 @@ public class TestWordDelimiterFilter extends BaseTokenStreamTestCase {
|
|||
final CharArraySet protWords = new CharArraySet(TEST_VERSION_CURRENT, new HashSet<String>(Arrays.asList("NUTCH")), false);
|
||||
|
||||
/* analyzer that uses whitespace + wdf */
|
||||
Analyzer a = new ReusableAnalyzerBase() {
|
||||
Analyzer a = new Analyzer() {
|
||||
@Override
|
||||
public TokenStreamComponents createComponents(String field, Reader reader) {
|
||||
Tokenizer tokenizer = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false);
|
||||
|
@ -241,7 +241,7 @@ public class TestWordDelimiterFilter extends BaseTokenStreamTestCase {
|
|||
new int[] { 1, 1, 1 });
|
||||
|
||||
/* analyzer that will consume tokens with large position increments */
|
||||
Analyzer a2 = new ReusableAnalyzerBase() {
|
||||
Analyzer a2 = new Analyzer() {
|
||||
@Override
|
||||
public TokenStreamComponents createComponents(String field, Reader reader) {
|
||||
Tokenizer tokenizer = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false);
|
||||
|
@ -274,7 +274,7 @@ public class TestWordDelimiterFilter extends BaseTokenStreamTestCase {
|
|||
new int[] { 6, 14, 19 },
|
||||
new int[] { 1, 11, 1 });
|
||||
|
||||
Analyzer a3 = new ReusableAnalyzerBase() {
|
||||
Analyzer a3 = new Analyzer() {
|
||||
@Override
|
||||
public TokenStreamComponents createComponents(String field, Reader reader) {
|
||||
Tokenizer tokenizer = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false);
|
||||
|
|
|
@ -26,7 +26,6 @@ import org.apache.lucene.analysis.TokenStream;
|
|||
import org.apache.lucene.analysis.Tokenizer;
|
||||
import org.apache.lucene.analysis.core.LowerCaseFilter;
|
||||
import org.apache.lucene.analysis.standard.StandardTokenizer;
|
||||
import org.apache.lucene.analysis.ReusableAnalyzerBase;
|
||||
|
||||
import static org.apache.lucene.analysis.VocabularyAssert.*;
|
||||
|
||||
|
@ -34,7 +33,7 @@ import static org.apache.lucene.analysis.VocabularyAssert.*;
|
|||
* Simple tests for {@link PortugueseLightStemFilter}
|
||||
*/
|
||||
public class TestPortugueseLightStemFilter extends BaseTokenStreamTestCase {
|
||||
private Analyzer analyzer = new ReusableAnalyzerBase() {
|
||||
private Analyzer analyzer = new Analyzer() {
|
||||
@Override
|
||||
protected TokenStreamComponents createComponents(String fieldName,
|
||||
Reader reader) {
|
||||
|
|
|
@ -26,7 +26,6 @@ import org.apache.lucene.analysis.TokenStream;
|
|||
import org.apache.lucene.analysis.Tokenizer;
|
||||
import org.apache.lucene.analysis.core.LowerCaseFilter;
|
||||
import org.apache.lucene.analysis.standard.StandardTokenizer;
|
||||
import org.apache.lucene.analysis.ReusableAnalyzerBase;
|
||||
|
||||
import static org.apache.lucene.analysis.VocabularyAssert.*;
|
||||
|
||||
|
@ -34,7 +33,7 @@ import static org.apache.lucene.analysis.VocabularyAssert.*;
|
|||
* Simple tests for {@link PortugueseMinimalStemFilter}
|
||||
*/
|
||||
public class TestPortugueseMinimalStemFilter extends BaseTokenStreamTestCase {
|
||||
private Analyzer analyzer = new ReusableAnalyzerBase() {
|
||||
private Analyzer analyzer = new Analyzer() {
|
||||
@Override
|
||||
protected TokenStreamComponents createComponents(String fieldName,
|
||||
Reader reader) {
|
||||
|
|
|
@ -28,13 +28,12 @@ import org.apache.lucene.analysis.TokenStream;
|
|||
import org.apache.lucene.analysis.Tokenizer;
|
||||
import org.apache.lucene.analysis.core.LowerCaseFilter;
|
||||
import org.apache.lucene.analysis.standard.StandardTokenizer;
|
||||
import org.apache.lucene.analysis.ReusableAnalyzerBase;
|
||||
|
||||
/**
|
||||
* Simple tests for {@link PortugueseStemFilter}
|
||||
*/
|
||||
public class TestPortugueseStemFilter extends BaseTokenStreamTestCase {
|
||||
private Analyzer analyzer = new ReusableAnalyzerBase() {
|
||||
private Analyzer analyzer = new Analyzer() {
|
||||
@Override
|
||||
protected TokenStreamComponents createComponents(String fieldName,
|
||||
Reader reader) {
|
||||
|
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in New Issue