LUCENE-5640: Refactor Token, add new PackedTokenAttributeImpl, make use of Java 7 MethodHandles in DEFAULT_ATTRIBUTE_FACTORY

git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1592914 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Uwe Schindler 2014-05-06 22:24:58 +00:00
parent b234e9748f
commit 0086a6e644
72 changed files with 769 additions and 883 deletions

View File

@ -100,6 +100,10 @@ Changes in Backwards Compatibility Policy
can be used by custom fieldtypes, which don't use the Analyzer, but
implement their own TokenStream. (Uwe Schindler, Robert Muir)
* LUCENE-5640: AttributeSource.AttributeFactory was moved to a
top-level class: org.apache.lucene.util.AttributeFactory
(Uwe Schindler, Robert Muir)
API Changes
* LUCENE-5582: Deprecate IndexOutput.length (just use
@ -126,6 +130,9 @@ API Changes
* LUCENE-5633: Change NoMergePolicy to a singleton with no distinction between
compound and non-compound types. (Shai Erera)
* LUCENE-5640: The Token class was deprecated. Since Lucene 2.9, TokenStreams
are using Attributes, Token is no longer used. (Uwe Schindler, Robert Muir)
Optimizations
* LUCENE-5603: hunspell stemmer more efficiently strips prefixes
@ -140,9 +147,11 @@ Optimizations
* LUCENE-5634: IndexWriter reuses TokenStream instances for String and Numeric
fields by default. (Uwe Schindler, Shay Banon, Mike McCandless, Robert Muir)
* LUCENE-5638: TokenStream uses a more performant AttributeFactory by default,
that packs the core attributes into one impl, for faster clearAttributes(),
saveState(), and restoreState(). (Uwe Schindler, Robert Muir)
* LUCENE-5638, LUCENE-5640: TokenStream uses a more performant AttributeFactory
by default, that packs the core attributes into one implementation
(PackedTokenAttributeImpl), for faster clearAttributes(), saveState(), and
restoreState(). In addition, AttributeFactory uses Java 7 MethodHandles for
instantiating Attribute implementations. (Uwe Schindler, Robert Muir)
* LUCENE-5609: Changed the default NumericField precisionStep from 4
to 8 (for int/float) and 16 (for long/double), for faster indexing

View File

@ -23,6 +23,7 @@ import java.io.Reader;
import org.apache.lucene.analysis.Tokenizer;
import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
import org.apache.lucene.util.AttributeFactory;
import org.apache.lucene.util.AttributeSource;
/**

View File

@ -18,7 +18,7 @@ package org.apache.lucene.analysis.core;
*/
import org.apache.lucene.analysis.util.TokenizerFactory;
import org.apache.lucene.util.AttributeSource.AttributeFactory;
import org.apache.lucene.util.AttributeFactory;
import java.io.Reader;
import java.util.Map;

View File

@ -19,6 +19,7 @@ package org.apache.lucene.analysis.core;
import org.apache.lucene.analysis.Tokenizer;
import org.apache.lucene.analysis.util.CharTokenizer;
import org.apache.lucene.util.AttributeFactory;
import org.apache.lucene.util.Version;
/**
@ -55,7 +56,7 @@ public class LetterTokenizer extends CharTokenizer {
/**
* Construct a new LetterTokenizer using a given
* {@link org.apache.lucene.util.AttributeSource.AttributeFactory}.
* {@link org.apache.lucene.util.AttributeFactory}.
*
* @param matchVersion
* Lucene version to match See {@link <a href="#version">above</a>}

View File

@ -18,7 +18,7 @@ package org.apache.lucene.analysis.core;
*/
import org.apache.lucene.analysis.util.TokenizerFactory;
import org.apache.lucene.util.AttributeSource.AttributeFactory;
import org.apache.lucene.util.AttributeFactory;
import java.util.Map;

View File

@ -21,6 +21,7 @@ import java.io.Reader;
import org.apache.lucene.analysis.Tokenizer;
import org.apache.lucene.analysis.util.CharTokenizer;
import org.apache.lucene.util.AttributeFactory;
import org.apache.lucene.util.AttributeSource;
import org.apache.lucene.util.Version;
@ -60,7 +61,7 @@ public final class LowerCaseTokenizer extends LetterTokenizer {
/**
* Construct a new LowerCaseTokenizer using a given
* {@link org.apache.lucene.util.AttributeSource.AttributeFactory}.
* {@link org.apache.lucene.util.AttributeFactory}.
*
* @param matchVersion
* Lucene version to match See {@link <a href="#version">above</a>}

View File

@ -20,7 +20,7 @@ package org.apache.lucene.analysis.core;
import org.apache.lucene.analysis.util.AbstractAnalysisFactory;
import org.apache.lucene.analysis.util.MultiTermAwareComponent;
import org.apache.lucene.analysis.util.TokenizerFactory;
import org.apache.lucene.util.AttributeSource.AttributeFactory;
import org.apache.lucene.util.AttributeFactory;
import java.util.HashMap;
import java.util.Map;

View File

@ -21,6 +21,7 @@ import java.io.Reader;
import org.apache.lucene.analysis.Tokenizer;
import org.apache.lucene.analysis.util.CharTokenizer;
import org.apache.lucene.util.AttributeFactory;
import org.apache.lucene.util.AttributeSource;
import org.apache.lucene.util.Version;
@ -50,7 +51,7 @@ public final class WhitespaceTokenizer extends CharTokenizer {
/**
* Construct a new WhitespaceTokenizer using a given
* {@link org.apache.lucene.util.AttributeSource.AttributeFactory}.
* {@link org.apache.lucene.util.AttributeFactory}.
*
* @param
* matchVersion Lucene version to match See

View File

@ -18,7 +18,7 @@ package org.apache.lucene.analysis.core;
*/
import org.apache.lucene.analysis.util.TokenizerFactory;
import org.apache.lucene.util.AttributeSource.AttributeFactory;
import org.apache.lucene.util.AttributeFactory;
import java.io.Reader;
import java.util.Map;

View File

@ -24,7 +24,9 @@ import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
/**
* A {@link TokenStream} containing a single token.
* @deprecated Do not use this anymore!
*/
@Deprecated
public final class SingleTokenTokenStream extends TokenStream {
private boolean exhausted = false;

View File

@ -20,6 +20,7 @@ package org.apache.lucene.analysis.ngram;
import java.io.Reader;
import org.apache.lucene.analysis.Tokenizer;
import org.apache.lucene.util.AttributeFactory;
import org.apache.lucene.util.Version;
/**
@ -49,7 +50,7 @@ public class EdgeNGramTokenizer extends NGramTokenizer {
* Creates EdgeNGramTokenizer that can generate n-grams in the sizes of the given range
*
* @param version the Lucene match version
* @param factory {@link org.apache.lucene.util.AttributeSource.AttributeFactory} to use
* @param factory {@link org.apache.lucene.util.AttributeFactory} to use
* @param minGram the smallest n-gram to generate
* @param maxGram the largest n-gram to generate
*/

View File

@ -18,7 +18,7 @@ package org.apache.lucene.analysis.ngram;
*/
import org.apache.lucene.analysis.util.TokenizerFactory;
import org.apache.lucene.util.AttributeSource.AttributeFactory;
import org.apache.lucene.util.AttributeFactory;
import java.io.Reader;
import java.util.Map;

View File

@ -23,6 +23,7 @@ import java.io.Reader;
import org.apache.lucene.analysis.Tokenizer;
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
import org.apache.lucene.util.AttributeFactory;
/**
* Old broken version of {@link NGramTokenizer}.
@ -54,7 +55,7 @@ public final class Lucene43NGramTokenizer extends Tokenizer {
/**
* Creates NGramTokenizer with given min and max n-grams.
* @param factory {@link org.apache.lucene.util.AttributeSource.AttributeFactory} to use
* @param factory {@link org.apache.lucene.util.AttributeFactory} to use
* @param minGram the smallest n-gram to generate
* @param maxGram the largest n-gram to generate
*/

View File

@ -26,6 +26,7 @@ import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
import org.apache.lucene.analysis.tokenattributes.PositionLengthAttribute;
import org.apache.lucene.analysis.util.CharacterUtils;
import org.apache.lucene.util.AttributeFactory;
import org.apache.lucene.util.Version;
/**
@ -99,7 +100,7 @@ public class NGramTokenizer extends Tokenizer {
/**
* Creates NGramTokenizer with given min and max n-grams.
* @param version the lucene compatibility <a href="#version">version</a>
* @param factory {@link org.apache.lucene.util.AttributeSource.AttributeFactory} to use
* @param factory {@link org.apache.lucene.util.AttributeFactory} to use
* @param minGram the smallest n-gram to generate
* @param maxGram the largest n-gram to generate
*/

View File

@ -20,7 +20,7 @@ package org.apache.lucene.analysis.ngram;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.Tokenizer;
import org.apache.lucene.analysis.util.TokenizerFactory;
import org.apache.lucene.util.AttributeSource.AttributeFactory;
import org.apache.lucene.util.AttributeFactory;
import org.apache.lucene.util.Version;
import java.io.Reader;

View File

@ -17,13 +17,12 @@ package org.apache.lucene.analysis.path;
*/
import java.io.IOException;
import java.io.Reader;
import org.apache.lucene.analysis.Token;
import org.apache.lucene.analysis.Tokenizer;
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
import org.apache.lucene.util.AttributeFactory;
/**
* Tokenizer for path-like hierarchies.
@ -69,7 +68,7 @@ public class PathHierarchyTokenizer extends Tokenizer {
}
public PathHierarchyTokenizer(int bufferSize, char delimiter, char replacement, int skip) {
this(Token.TOKEN_ATTRIBUTE_FACTORY, bufferSize, delimiter, replacement, skip);
this(DEFAULT_TOKEN_ATTRIBUTE_FACTORY, bufferSize, delimiter, replacement, skip);
}
public PathHierarchyTokenizer

View File

@ -21,7 +21,7 @@ import java.util.Map;
import org.apache.lucene.analysis.Tokenizer;
import org.apache.lucene.analysis.util.TokenizerFactory;
import org.apache.lucene.util.AttributeSource.AttributeFactory;
import org.apache.lucene.util.AttributeFactory;
/**
* Factory for {@link PathHierarchyTokenizer}.

View File

@ -17,15 +17,14 @@ package org.apache.lucene.analysis.path;
*/
import java.io.IOException;
import java.io.Reader;
import java.util.ArrayList;
import java.util.List;
import org.apache.lucene.analysis.Token;
import org.apache.lucene.analysis.Tokenizer;
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
import org.apache.lucene.util.AttributeFactory;
/**
* Tokenizer for domain-like hierarchies.
@ -82,7 +81,7 @@ public class ReversePathHierarchyTokenizer extends Tokenizer {
}
public ReversePathHierarchyTokenizer( int bufferSize, char delimiter, char replacement, int skip) {
this(Token.TOKEN_ATTRIBUTE_FACTORY, bufferSize, delimiter, replacement, skip);
this(DEFAULT_TOKEN_ATTRIBUTE_FACTORY, bufferSize, delimiter, replacement, skip);
}
public ReversePathHierarchyTokenizer
(AttributeFactory factory, int bufferSize, char delimiter, char replacement, int skip) {

View File

@ -22,10 +22,10 @@ import java.io.Reader;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import org.apache.lucene.analysis.Token;
import org.apache.lucene.analysis.Tokenizer;
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
import org.apache.lucene.util.AttributeFactory;
/**
* This tokenizer uses regex pattern matching to construct distinct tokens
@ -67,7 +67,7 @@ public final class PatternTokenizer extends Tokenizer {
/** creates a new PatternTokenizer returning tokens from group (-1 for split functionality) */
public PatternTokenizer(Pattern pattern, int group) {
this(Token.TOKEN_ATTRIBUTE_FACTORY, pattern, group);
this(DEFAULT_TOKEN_ATTRIBUTE_FACTORY, pattern, group);
}
/** creates a new PatternTokenizer returning tokens from group (-1 for split functionality) */

View File

@ -21,7 +21,7 @@ import java.util.Map;
import java.util.regex.Pattern;
import org.apache.lucene.analysis.util.TokenizerFactory;
import org.apache.lucene.util.AttributeSource.AttributeFactory;
import org.apache.lucene.util.AttributeFactory;
/**
* Factory for {@link PatternTokenizer}.

View File

@ -25,6 +25,7 @@ import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
import org.apache.lucene.analysis.tokenattributes.TypeAttribute;
import org.apache.lucene.util.AttributeFactory;
import org.apache.lucene.util.AttributeSource;
import org.apache.lucene.util.Version;
@ -106,7 +107,7 @@ public final class ClassicTokenizer extends Tokenizer {
}
/**
* Creates a new ClassicTokenizer with a given {@link org.apache.lucene.util.AttributeSource.AttributeFactory}
* Creates a new ClassicTokenizer with a given {@link org.apache.lucene.util.AttributeFactory}
*/
public ClassicTokenizer(Version matchVersion, AttributeFactory factory) {
super(factory);

View File

@ -18,7 +18,7 @@ package org.apache.lucene.analysis.standard;
*/
import org.apache.lucene.analysis.util.TokenizerFactory;
import org.apache.lucene.util.AttributeSource.AttributeFactory;
import org.apache.lucene.util.AttributeFactory;
import java.util.Map;

View File

@ -25,6 +25,7 @@ import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
import org.apache.lucene.analysis.tokenattributes.TypeAttribute;
import org.apache.lucene.util.AttributeFactory;
import org.apache.lucene.util.AttributeSource;
import org.apache.lucene.util.Version;
@ -120,7 +121,7 @@ public final class StandardTokenizer extends Tokenizer {
}
/**
* Creates a new StandardTokenizer with a given {@link org.apache.lucene.util.AttributeSource.AttributeFactory}
* Creates a new StandardTokenizer with a given {@link org.apache.lucene.util.AttributeFactory}
*/
public StandardTokenizer(Version matchVersion, AttributeFactory factory) {
super(factory);

View File

@ -18,7 +18,7 @@ package org.apache.lucene.analysis.standard;
*/
import org.apache.lucene.analysis.util.TokenizerFactory;
import org.apache.lucene.util.AttributeSource.AttributeFactory;
import org.apache.lucene.util.AttributeFactory;
import java.util.Map;

View File

@ -27,9 +27,9 @@ import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
import org.apache.lucene.analysis.tokenattributes.TypeAttribute;
import org.apache.lucene.util.AttributeFactory;
import org.apache.lucene.util.AttributeSource;
import org.apache.lucene.util.Version;
import org.apache.lucene.util.AttributeSource.AttributeFactory;
/**
* This class implements Word Break rules from the Unicode Text Segmentation

View File

@ -18,7 +18,7 @@ package org.apache.lucene.analysis.standard;
*/
import org.apache.lucene.analysis.util.TokenizerFactory;
import org.apache.lucene.util.AttributeSource.AttributeFactory;
import org.apache.lucene.util.AttributeFactory;
import java.io.Reader;
import java.util.Map;

View File

@ -20,11 +20,11 @@ package org.apache.lucene.analysis.th;
import java.text.BreakIterator;
import java.util.Locale;
import org.apache.lucene.analysis.Token;
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
import org.apache.lucene.analysis.util.CharArrayIterator;
import org.apache.lucene.analysis.util.SegmentingTokenizerBase;
import org.apache.lucene.util.AttributeFactory;
/**
* Tokenizer that use {@link BreakIterator} to tokenize Thai text.
@ -60,7 +60,7 @@ public class ThaiTokenizer extends SegmentingTokenizerBase {
/** Creates a new ThaiTokenizer */
public ThaiTokenizer() {
this(Token.TOKEN_ATTRIBUTE_FACTORY);
this(DEFAULT_TOKEN_ATTRIBUTE_FACTORY);
}
/** Creates a new ThaiTokenizer, supplying the AttributeFactory */

View File

@ -21,7 +21,7 @@ import java.util.Map;
import org.apache.lucene.analysis.Tokenizer;
import org.apache.lucene.analysis.util.TokenizerFactory;
import org.apache.lucene.util.AttributeSource;
import org.apache.lucene.util.AttributeFactory;
/**
* Factory for {@link ThaiTokenizer}.
@ -43,7 +43,7 @@ public class ThaiTokenizerFactory extends TokenizerFactory {
}
@Override
public Tokenizer create(AttributeSource.AttributeFactory factory) {
public Tokenizer create(AttributeFactory factory) {
return new ThaiTokenizer(factory);
}
}

View File

@ -23,6 +23,7 @@ import java.io.Reader;
import org.apache.lucene.analysis.Tokenizer;
import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
import org.apache.lucene.util.AttributeFactory;
import org.apache.lucene.util.AttributeSource;
import org.apache.lucene.analysis.util.CharacterUtils;
import org.apache.lucene.util.Version;

View File

@ -19,12 +19,11 @@ package org.apache.lucene.analysis.util;
import java.io.IOException;
import java.io.Reader;
import java.text.BreakIterator;
import org.apache.lucene.analysis.Token;
import org.apache.lucene.analysis.Tokenizer;
import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
import org.apache.lucene.util.AttributeFactory;
/**
* Breaks text into sentences with a {@link BreakIterator} and
@ -63,7 +62,7 @@ public abstract class SegmentingTokenizerBase extends Tokenizer {
* be provided to this constructor.
*/
public SegmentingTokenizerBase(BreakIterator iterator) {
this(Token.TOKEN_ATTRIBUTE_FACTORY, iterator);
this(DEFAULT_TOKEN_ATTRIBUTE_FACTORY, iterator);
}
/**

View File

@ -17,11 +17,10 @@ package org.apache.lucene.analysis.util;
* limitations under the License.
*/
import org.apache.lucene.analysis.Token;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.Tokenizer;
import org.apache.lucene.util.AttributeSource.AttributeFactory;
import org.apache.lucene.util.AttributeFactory;
import java.io.Reader;
import java.util.Map;
import java.util.Set;
@ -73,7 +72,7 @@ public abstract class TokenizerFactory extends AbstractAnalysisFactory {
/** Creates a TokenStream of the specified input using the default attribute factory. */
public final Tokenizer create() {
return create(Token.TOKEN_ATTRIBUTE_FACTORY);
return create(TokenStream.DEFAULT_TOKEN_ATTRIBUTE_FACTORY);
}
/** Creates a TokenStream of the specified input using the given AttributeFactory */

View File

@ -23,6 +23,7 @@ import org.apache.lucene.analysis.tokenattributes.FlagsAttribute;
import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
import org.apache.lucene.analysis.tokenattributes.TypeAttribute;
import org.apache.lucene.util.AttributeFactory;
import org.apache.lucene.util.AttributeSource;
import java.io.IOException;
@ -145,7 +146,7 @@ public final class WikipediaTokenizer extends Tokenizer {
/**
* Creates a new instance of the {@link org.apache.lucene.analysis.wikipedia.WikipediaTokenizer}. Attaches the
* <code>input</code> to a the newly created JFlex scanner. Uses the given {@link org.apache.lucene.util.AttributeSource.AttributeFactory}.
* <code>input</code> to a the newly created JFlex scanner. Uses the given {@link org.apache.lucene.util.AttributeFactory}.
*
* @param tokenOutput One of {@link #TOKENS_ONLY}, {@link #UNTOKENIZED_ONLY}, {@link #BOTH}
*/

View File

@ -21,7 +21,7 @@ import java.util.Collections;
import java.util.Map;
import org.apache.lucene.analysis.util.TokenizerFactory;
import org.apache.lucene.util.AttributeSource.AttributeFactory;
import org.apache.lucene.util.AttributeFactory;
/**
* Factory for {@link WikipediaTokenizer}.

View File

@ -19,11 +19,9 @@ package org.apache.lucene.collation;
import java.text.Collator;
import org.apache.lucene.analysis.Token;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.collation.tokenattributes.CollatedTermAttributeImpl;
import org.apache.lucene.util.Attribute;
import org.apache.lucene.util.AttributeImpl;
import org.apache.lucene.util.AttributeSource;
import org.apache.lucene.util.AttributeFactory;
/**
* <p>
@ -69,18 +67,17 @@ import org.apache.lucene.util.AttributeSource;
* ICUCollationAttributeFactory on the query side, or vice versa.
* </p>
*/
public class CollationAttributeFactory extends AttributeSource.AttributeFactory {
public class CollationAttributeFactory extends AttributeFactory.StaticImplementationAttributeFactory<CollatedTermAttributeImpl> {
private final Collator collator;
private final AttributeSource.AttributeFactory delegate;
/**
* Create a CollationAttributeFactory, using
* {@link org.apache.lucene.analysis.Token#TOKEN_ATTRIBUTE_FACTORY} as the
* {@link TokenStream#DEFAULT_TOKEN_ATTRIBUTE_FACTORY} as the
* factory for all other attributes.
* @param collator CollationKey generator
*/
public CollationAttributeFactory(Collator collator) {
this(Token.TOKEN_ATTRIBUTE_FACTORY, collator);
this(TokenStream.DEFAULT_TOKEN_ATTRIBUTE_FACTORY, collator);
}
/**
@ -89,16 +86,13 @@ public class CollationAttributeFactory extends AttributeSource.AttributeFactory
* @param delegate Attribute Factory
* @param collator CollationKey generator
*/
public CollationAttributeFactory(AttributeSource.AttributeFactory delegate, Collator collator) {
this.delegate = delegate;
public CollationAttributeFactory(AttributeFactory delegate, Collator collator) {
super(delegate, CollatedTermAttributeImpl.class);
this.collator = collator;
}
@Override
public AttributeImpl createAttributeInstance(
Class<? extends Attribute> attClass) {
return attClass.isAssignableFrom(CollatedTermAttributeImpl.class)
? new CollatedTermAttributeImpl(collator)
: delegate.createAttributeInstance(attClass);
public CollatedTermAttributeImpl createInstance() {
return new CollatedTermAttributeImpl(collator);
}
}

View File

@ -35,7 +35,7 @@ import org.apache.lucene.analysis.util.ResourceLoaderAware;
import org.apache.lucene.analysis.util.StringMockResourceLoader;
import org.apache.lucene.analysis.util.TokenFilterFactory;
import org.apache.lucene.analysis.util.TokenizerFactory;
import org.apache.lucene.util.AttributeSource.AttributeFactory;
import org.apache.lucene.util.AttributeFactory;
/**
* Sanity check some things about all factories,

View File

@ -81,8 +81,8 @@ import org.apache.lucene.analysis.synonym.SynonymMap;
import org.apache.lucene.analysis.util.CharArrayMap;
import org.apache.lucene.analysis.util.CharArraySet;
import org.apache.lucene.analysis.wikipedia.WikipediaTokenizer;
import org.apache.lucene.util.AttributeFactory;
import org.apache.lucene.util.AttributeSource;
import org.apache.lucene.util.AttributeSource.AttributeFactory;
import org.apache.lucene.util.CharsRef;
import org.apache.lucene.util.Rethrow;
import org.apache.lucene.util.TestUtil;

View File

@ -20,12 +20,12 @@ package org.apache.lucene.analysis.icu.segmentation;
import java.io.IOException;
import java.io.Reader;
import org.apache.lucene.analysis.Token;
import org.apache.lucene.analysis.Tokenizer;
import org.apache.lucene.analysis.icu.tokenattributes.ScriptAttribute;
import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
import org.apache.lucene.analysis.tokenattributes.TypeAttribute;
import org.apache.lucene.util.AttributeFactory;
import com.ibm.icu.lang.UCharacter;
import com.ibm.icu.text.BreakIterator;
@ -80,7 +80,7 @@ public final class ICUTokenizer extends Tokenizer {
* @param config Tailored BreakIterator configuration
*/
public ICUTokenizer(ICUTokenizerConfig config) {
this(Token.TOKEN_ATTRIBUTE_FACTORY, config);
this(DEFAULT_TOKEN_ATTRIBUTE_FACTORY, config);
}
/**

View File

@ -28,7 +28,7 @@ import java.util.Map;
import org.apache.lucene.analysis.util.ResourceLoader;
import org.apache.lucene.analysis.util.ResourceLoaderAware;
import org.apache.lucene.analysis.util.TokenizerFactory;
import org.apache.lucene.util.AttributeSource.AttributeFactory;
import org.apache.lucene.util.AttributeFactory;
import org.apache.lucene.util.IOUtils;
import com.ibm.icu.lang.UCharacter;

View File

@ -17,12 +17,9 @@ package org.apache.lucene.collation;
* limitations under the License.
*/
import org.apache.lucene.analysis.Token;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.collation.tokenattributes.ICUCollatedTermAttributeImpl;
import org.apache.lucene.util.Attribute;
import org.apache.lucene.util.AttributeImpl;
import org.apache.lucene.util.AttributeSource;
import org.apache.lucene.collation.CollationAttributeFactory; // javadoc
import org.apache.lucene.util.AttributeFactory;
import com.ibm.icu.text.Collator;
@ -63,18 +60,17 @@ import com.ibm.icu.text.Collator;
* java.text.Collator over several languages.
* </p>
*/
public class ICUCollationAttributeFactory extends AttributeSource.AttributeFactory {
public class ICUCollationAttributeFactory extends AttributeFactory.StaticImplementationAttributeFactory<ICUCollatedTermAttributeImpl> {
private final Collator collator;
private final AttributeSource.AttributeFactory delegate;
/**
* Create an ICUCollationAttributeFactory, using
* {@link org.apache.lucene.analysis.Token#TOKEN_ATTRIBUTE_FACTORY} as the
* {@link TokenStream#DEFAULT_TOKEN_ATTRIBUTE_FACTORY} as the
* factory for all other attributes.
* @param collator CollationKey generator
*/
public ICUCollationAttributeFactory(Collator collator) {
this(Token.TOKEN_ATTRIBUTE_FACTORY, collator);
this(TokenStream.DEFAULT_TOKEN_ATTRIBUTE_FACTORY, collator);
}
/**
@ -83,16 +79,13 @@ public class ICUCollationAttributeFactory extends AttributeSource.AttributeFacto
* @param delegate Attribute Factory
* @param collator CollationKey generator
*/
public ICUCollationAttributeFactory(AttributeSource.AttributeFactory delegate, Collator collator) {
this.delegate = delegate;
public ICUCollationAttributeFactory(AttributeFactory delegate, Collator collator) {
super(delegate, ICUCollatedTermAttributeImpl.class);
this.collator = collator;
}
@Override
public AttributeImpl createAttributeInstance(
Class<? extends Attribute> attClass) {
return attClass.isAssignableFrom(ICUCollatedTermAttributeImpl.class)
? new ICUCollatedTermAttributeImpl(collator)
: delegate.createAttributeInstance(attClass);
public ICUCollatedTermAttributeImpl createInstance() {
return new ICUCollatedTermAttributeImpl(collator);
}
}

View File

@ -18,7 +18,6 @@ package org.apache.lucene.analysis.ja;
*/
import java.io.IOException;
import java.io.Reader;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collections;
@ -40,6 +39,7 @@ import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
import org.apache.lucene.analysis.tokenattributes.PositionLengthAttribute;
import org.apache.lucene.analysis.util.RollingCharBuffer;
import org.apache.lucene.util.ArrayUtil;
import org.apache.lucene.util.AttributeFactory;
import org.apache.lucene.util.IntsRef;
import org.apache.lucene.util.RamUsageEstimator;
import org.apache.lucene.util.fst.FST;
@ -195,7 +195,7 @@ public final class JapaneseTokenizer extends Tokenizer {
* @param mode tokenization mode.
*/
public JapaneseTokenizer(UserDictionary userDictionary, boolean discardPunctuation, Mode mode) {
this(org.apache.lucene.analysis.Token.TOKEN_ATTRIBUTE_FACTORY, userDictionary, discardPunctuation, mode);
this(DEFAULT_TOKEN_ATTRIBUTE_FACTORY, userDictionary, discardPunctuation, mode);
}
/**

View File

@ -30,7 +30,7 @@ import java.util.Map;
import org.apache.lucene.analysis.ja.JapaneseTokenizer.Mode;
import org.apache.lucene.analysis.ja.dict.UserDictionary;
import org.apache.lucene.analysis.util.TokenizerFactory;
import org.apache.lucene.util.AttributeSource.AttributeFactory;
import org.apache.lucene.util.AttributeFactory;
import org.apache.lucene.util.IOUtils;
import org.apache.lucene.analysis.util.ResourceLoader;
import org.apache.lucene.analysis.util.ResourceLoaderAware;

View File

@ -22,12 +22,12 @@ import java.text.BreakIterator;
import java.util.Iterator;
import java.util.Locale;
import org.apache.lucene.analysis.Token;
import org.apache.lucene.analysis.cn.smart.hhmm.SegToken;
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
import org.apache.lucene.analysis.tokenattributes.TypeAttribute;
import org.apache.lucene.analysis.util.SegmentingTokenizerBase;
import org.apache.lucene.util.AttributeFactory;
/**
* Tokenizer for Chinese or mixed Chinese-English text.
@ -48,7 +48,7 @@ public class HMMChineseTokenizer extends SegmentingTokenizerBase {
/** Creates a new HMMChineseTokenizer */
public HMMChineseTokenizer() {
this(Token.TOKEN_ATTRIBUTE_FACTORY);
this(DEFAULT_TOKEN_ATTRIBUTE_FACTORY);
}
/** Creates a new HMMChineseTokenizer, supplying the AttributeFactory */

View File

@ -21,7 +21,7 @@ import java.util.Map;
import org.apache.lucene.analysis.Tokenizer;
import org.apache.lucene.analysis.util.TokenizerFactory;
import org.apache.lucene.util.AttributeSource.AttributeFactory;
import org.apache.lucene.util.AttributeFactory;
/**
* Factory for {@link HMMChineseTokenizer}

View File

@ -24,6 +24,7 @@ import org.apache.lucene.analysis.Tokenizer;
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
import org.apache.lucene.analysis.tokenattributes.TypeAttribute;
import org.apache.lucene.util.AttributeFactory;
import org.apache.lucene.util.AttributeSource;
/**

View File

@ -21,7 +21,7 @@ import java.io.Reader;
import java.util.Map;
import org.apache.lucene.analysis.util.TokenizerFactory;
import org.apache.lucene.util.AttributeSource.AttributeFactory;
import org.apache.lucene.util.AttributeFactory;
/**
* Factory for the SmartChineseAnalyzer {@link SentenceTokenizer}

View File

@ -19,6 +19,7 @@ package org.apache.lucene.analysis.uima;
import org.apache.lucene.analysis.Tokenizer;
import org.apache.lucene.analysis.uima.ae.AEProviderFactory;
import org.apache.lucene.util.AttributeFactory;
import org.apache.uima.analysis_engine.AnalysisEngine;
import org.apache.uima.analysis_engine.AnalysisEngineProcessException;
import org.apache.uima.cas.CAS;

View File

@ -17,17 +17,16 @@ package org.apache.lucene.analysis.uima;
* limitations under the License.
*/
import org.apache.lucene.analysis.Token;
import org.apache.lucene.analysis.Tokenizer;
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
import org.apache.lucene.util.AttributeFactory;
import org.apache.uima.analysis_engine.AnalysisEngineProcessException;
import org.apache.uima.cas.Type;
import org.apache.uima.cas.text.AnnotationFS;
import org.apache.uima.resource.ResourceInitializationException;
import java.io.IOException;
import java.io.Reader;
import java.util.Map;
/**
@ -44,7 +43,7 @@ public final class UIMAAnnotationsTokenizer extends BaseUIMATokenizer {
private int finalOffset = 0;
public UIMAAnnotationsTokenizer(String descriptorPath, String tokenType, Map<String, Object> configurationParameters) {
this(descriptorPath, tokenType, configurationParameters, Token.TOKEN_ATTRIBUTE_FACTORY);
this(descriptorPath, tokenType, configurationParameters, DEFAULT_TOKEN_ATTRIBUTE_FACTORY);
}
public UIMAAnnotationsTokenizer(String descriptorPath, String tokenType, Map<String, Object> configurationParameters,

View File

@ -18,7 +18,7 @@ package org.apache.lucene.analysis.uima;
*/
import org.apache.lucene.analysis.util.TokenizerFactory;
import org.apache.lucene.util.AttributeSource.AttributeFactory;
import org.apache.lucene.util.AttributeFactory;
import java.io.Reader;
import java.util.HashMap;

View File

@ -17,11 +17,11 @@ package org.apache.lucene.analysis.uima;
* limitations under the License.
*/
import org.apache.lucene.analysis.Token;
import org.apache.lucene.analysis.Tokenizer;
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
import org.apache.lucene.analysis.tokenattributes.TypeAttribute;
import org.apache.lucene.util.AttributeFactory;
import org.apache.uima.analysis_engine.AnalysisEngineProcessException;
import org.apache.uima.cas.CASException;
import org.apache.uima.cas.FeaturePath;
@ -30,7 +30,6 @@ import org.apache.uima.cas.text.AnnotationFS;
import org.apache.uima.resource.ResourceInitializationException;
import java.io.IOException;
import java.io.Reader;
import java.util.Map;
/**
@ -54,7 +53,7 @@ public final class UIMATypeAwareAnnotationsTokenizer extends BaseUIMATokenizer {
private int finalOffset = 0;
public UIMATypeAwareAnnotationsTokenizer(String descriptorPath, String tokenType, String typeAttributeFeaturePath, Map<String, Object> configurationParameters) {
this(descriptorPath, tokenType, typeAttributeFeaturePath, configurationParameters, Token.TOKEN_ATTRIBUTE_FACTORY);
this(descriptorPath, tokenType, typeAttributeFeaturePath, configurationParameters, DEFAULT_TOKEN_ATTRIBUTE_FACTORY);
}
public UIMATypeAwareAnnotationsTokenizer(String descriptorPath, String tokenType, String typeAttributeFeaturePath,

View File

@ -18,7 +18,7 @@ package org.apache.lucene.analysis.uima;
*/
import org.apache.lucene.analysis.util.TokenizerFactory;
import org.apache.lucene.util.AttributeSource.AttributeFactory;
import org.apache.lucene.util.AttributeFactory;
import java.io.Reader;
import java.util.HashMap;

View File

@ -28,6 +28,7 @@ import org.apache.lucene.document.LongField; // for javadocs
import org.apache.lucene.search.NumericRangeFilter; // for javadocs
import org.apache.lucene.search.NumericRangeQuery;
import org.apache.lucene.util.Attribute;
import org.apache.lucene.util.AttributeFactory;
import org.apache.lucene.util.AttributeImpl;
import org.apache.lucene.util.AttributeReflector;
import org.apache.lucene.util.BytesRef;
@ -233,7 +234,7 @@ public final class NumericTokenStream extends TokenStream {
/**
* Expert: Creates a token stream for numeric values with the specified
* <code>precisionStep</code> using the given
* {@link org.apache.lucene.util.AttributeSource.AttributeFactory}.
* {@link org.apache.lucene.util.AttributeFactory}.
* The stream is not yet initialized,
* before using set a value using the various set<em>???</em>Value() methods.
*/

View File

@ -17,16 +17,12 @@ package org.apache.lucene.analysis;
* limitations under the License.
*/
import org.apache.lucene.analysis.tokenattributes.CharTermAttributeImpl;
import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
import org.apache.lucene.analysis.tokenattributes.FlagsAttribute;
import org.apache.lucene.analysis.tokenattributes.PackedTokenAttributeImpl;
import org.apache.lucene.analysis.tokenattributes.PayloadAttribute;
import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
import org.apache.lucene.analysis.tokenattributes.PositionLengthAttribute;
import org.apache.lucene.analysis.tokenattributes.TypeAttribute;
import org.apache.lucene.index.DocsAndPositionsEnum; // for javadoc
import org.apache.lucene.util.Attribute;
import org.apache.lucene.util.AttributeSource;
import org.apache.lucene.util.AttributeFactory;
import org.apache.lucene.util.AttributeImpl;
import org.apache.lucene.util.AttributeReflector;
import org.apache.lucene.util.BytesRef;
@ -58,53 +54,6 @@ import org.apache.lucene.util.BytesRef;
be used as convenience class that implements all {@link Attribute}s, which is especially useful
to easily switch from the old to the new TokenStream API.
<br><br>
<p>Tokenizers and TokenFilters should try to re-use a Token
instance when possible for best performance, by
implementing the {@link TokenStream#incrementToken()} API.
Failing that, to create a new Token you should first use
one of the constructors that starts with null text. To load
the token from a char[] use {@link #copyBuffer(char[], int, int)}.
To load from a String use {@link #setEmpty} followed by {@link #append(CharSequence)} or {@link #append(CharSequence, int, int)}.
Alternatively you can get the Token's termBuffer by calling either {@link #buffer()},
if you know that your text is shorter than the capacity of the termBuffer
or {@link #resizeBuffer(int)}, if there is any possibility
that you may need to grow the buffer. Fill in the characters of your term into this
buffer, with {@link String#getChars(int, int, char[], int)} if loading from a string,
or with {@link System#arraycopy(Object, int, Object, int, int)}, and finally call {@link #setLength(int)} to
set the length of the term text. See <a target="_top"
href="https://issues.apache.org/jira/browse/LUCENE-969">LUCENE-969</a>
for details.</p>
<p>Typical Token reuse patterns:
<ul>
<li> Copying text from a string (type is reset to {@link #DEFAULT_TYPE} if not specified):<br/>
<pre class="prettyprint">
return reusableToken.reinit(string, startOffset, endOffset[, type]);
</pre>
</li>
<li> Copying some text from a string (type is reset to {@link #DEFAULT_TYPE} if not specified):<br/>
<pre class="prettyprint">
return reusableToken.reinit(string, 0, string.length(), startOffset, endOffset[, type]);
</pre>
</li>
</li>
<li> Copying text from char[] buffer (type is reset to {@link #DEFAULT_TYPE} if not specified):<br/>
<pre class="prettyprint">
return reusableToken.reinit(buffer, 0, buffer.length, startOffset, endOffset[, type]);
</pre>
</li>
<li> Copying some text from a char[] buffer (type is reset to {@link #DEFAULT_TYPE} if not specified):<br/>
<pre class="prettyprint">
return reusableToken.reinit(buffer, start, end - start, startOffset, endOffset[, type]);
</pre>
</li>
<li> Copying from one one Token to another (type is reset to {@link #DEFAULT_TYPE} if not specified):<br/>
<pre class="prettyprint">
return reusableToken.reinit(source.buffer(), 0, source.length(), source.startOffset(), source.endOffset()[, source.type()]);
</pre>
</li>
</ul>
A few things to note:
<ul>
<li>clear() initializes all of the fields to default values. This was changed in contrast to Lucene 2.4, but should affect no one.</li>
@ -118,58 +67,18 @@ import org.apache.lucene.util.BytesRef;
{@link CharSequence} interface introduced by the interface {@link org.apache.lucene.analysis.tokenattributes.CharTermAttribute}.
This method now only prints the term text, no additional information anymore.
</p>
@deprecated This class is outdated and no longer used since Lucene 2.9. Nuke it finally!
*/
public class Token extends CharTermAttributeImpl
implements TypeAttribute, PositionIncrementAttribute,
FlagsAttribute, OffsetAttribute, PayloadAttribute, PositionLengthAttribute {
@Deprecated
public class Token extends PackedTokenAttributeImpl implements FlagsAttribute, PayloadAttribute {
private int startOffset,endOffset;
private String type = DEFAULT_TYPE;
private int flags;
private BytesRef payload;
private int positionIncrement = 1;
private int positionLength = 1;
/** Constructs a Token will null text. */
public Token() {
}
/** Constructs a Token with null text and start & end
* offsets.
* @param start start offset in the source text
* @param end end offset in the source text */
public Token(int start, int end) {
checkOffsets(start, end);
startOffset = start;
endOffset = end;
}
/** Constructs a Token with null text and start & end
* offsets plus the Token type.
* @param start start offset in the source text
* @param end end offset in the source text
* @param typ the lexical type of this Token */
public Token(int start, int end, String typ) {
checkOffsets(start, end);
startOffset = start;
endOffset = end;
type = typ;
}
/**
* Constructs a Token with null text and start & end
* offsets plus flags. NOTE: flags is EXPERIMENTAL.
* @param start start offset in the source text
* @param end end offset in the source text
* @param flags The bits to set for this token
*/
public Token(int start, int end, int flags) {
checkOffsets(start, end);
startOffset = start;
endOffset = end;
this.flags = flags;
}
/** Constructs a Token with the given term text, and start
* & end offsets. The type defaults to "word."
* <b>NOTE:</b> for better indexing speed you should
@ -179,149 +88,9 @@ public class Token extends CharTermAttributeImpl
* @param start start offset in the source text
* @param end end offset in the source text
*/
public Token(String text, int start, int end) {
checkOffsets(start, end);
public Token(CharSequence text, int start, int end) {
append(text);
startOffset = start;
endOffset = end;
}
/** Constructs a Token with the given text, start and end
* offsets, & type. <b>NOTE:</b> for better indexing
* speed you should instead use the char[] termBuffer
* methods to set the term text.
* @param text term text
* @param start start offset in the source text
* @param end end offset in the source text
* @param typ token type
*/
public Token(String text, int start, int end, String typ) {
checkOffsets(start, end);
append(text);
startOffset = start;
endOffset = end;
type = typ;
}
/**
* Constructs a Token with the given text, start and end
* offsets, & type. <b>NOTE:</b> for better indexing
* speed you should instead use the char[] termBuffer
* methods to set the term text.
* @param text term text
* @param start start offset in the source text
* @param end end offset in the source text
* @param flags token type bits
*/
public Token(String text, int start, int end, int flags) {
checkOffsets(start, end);
append(text);
startOffset = start;
endOffset = end;
this.flags = flags;
}
/**
* Constructs a Token with the given term buffer (offset
* & length), start and end
* offsets
* @param startTermBuffer buffer containing term text
* @param termBufferOffset the index in the buffer of the first character
* @param termBufferLength number of valid characters in the buffer
* @param start start offset in the source text
* @param end end offset in the source text
*/
public Token(char[] startTermBuffer, int termBufferOffset, int termBufferLength, int start, int end) {
checkOffsets(start, end);
copyBuffer(startTermBuffer, termBufferOffset, termBufferLength);
startOffset = start;
endOffset = end;
}
/**
* {@inheritDoc}
* @see PositionIncrementAttribute
*/
@Override
public void setPositionIncrement(int positionIncrement) {
if (positionIncrement < 0)
throw new IllegalArgumentException
("Increment must be zero or greater: " + positionIncrement);
this.positionIncrement = positionIncrement;
}
/**
* {@inheritDoc}
* @see PositionIncrementAttribute
*/
@Override
public int getPositionIncrement() {
return positionIncrement;
}
/**
* {@inheritDoc}
* @see PositionLengthAttribute
*/
@Override
public void setPositionLength(int positionLength) {
this.positionLength = positionLength;
}
/**
* {@inheritDoc}
* @see PositionLengthAttribute
*/
@Override
public int getPositionLength() {
return positionLength;
}
/**
* {@inheritDoc}
* @see OffsetAttribute
*/
@Override
public final int startOffset() {
return startOffset;
}
/**
* {@inheritDoc}
* @see OffsetAttribute
*/
@Override
public final int endOffset() {
return endOffset;
}
/**
* {@inheritDoc}
* @see OffsetAttribute
*/
@Override
public void setOffset(int startOffset, int endOffset) {
checkOffsets(startOffset, endOffset);
this.startOffset = startOffset;
this.endOffset = endOffset;
}
/**
* {@inheritDoc}
* @see TypeAttribute
*/
@Override
public final String type() {
return type;
}
/**
* {@inheritDoc}
* @see TypeAttribute
*/
@Override
public final void setType(String type) {
this.type = type;
setOffset(start, end);
}
/**
@ -366,37 +135,8 @@ public class Token extends CharTermAttributeImpl
@Override
public void clear() {
super.clear();
payload = null;
positionIncrement = positionLength = 1;
flags = 0;
startOffset = endOffset = 0;
type = DEFAULT_TYPE;
}
@Override
public Token clone() {
Token t = (Token)super.clone();
// Do a deep clone
if (payload != null) {
t.payload = payload.clone();
}
return t;
}
/** Makes a clone, but replaces the term buffer &
* start/end offset in the process. This is more
* efficient than doing a full clone (and then calling
* {@link #copyBuffer}) because it saves a wasted copy of the old
* termBuffer. */
public Token clone(char[] newTermBuffer, int newTermOffset, int newTermLength, int newStartOffset, int newEndOffset) {
final Token t = new Token(newTermBuffer, newTermOffset, newTermLength, newStartOffset, newEndOffset);
t.positionIncrement = positionIncrement;
t.positionLength = positionLength;
t.flags = flags;
t.type = type;
if (payload != null)
t.payload = payload.clone();
return t;
payload = null;
}
@Override
@ -406,12 +146,8 @@ public class Token extends CharTermAttributeImpl
if (obj instanceof Token) {
final Token other = (Token) obj;
return (startOffset == other.startOffset &&
endOffset == other.endOffset &&
return (
flags == other.flags &&
positionIncrement == other.positionIncrement &&
positionLength == other.positionLength &&
(type == null ? other.type == null : type.equals(other.type)) &&
(payload == null ? other.payload == null : payload.equals(other.payload)) &&
super.equals(obj)
);
@ -422,117 +158,20 @@ public class Token extends CharTermAttributeImpl
@Override
public int hashCode() {
int code = super.hashCode();
code = code * 31 + startOffset;
code = code * 31 + endOffset;
code = code * 31 + flags;
code = code * 31 + positionIncrement;
code = code * 31 + positionLength;
if (type != null)
code = code * 31 + type.hashCode();
if (payload != null)
if (payload != null) {
code = code * 31 + payload.hashCode();
}
return code;
}
// like clear() but doesn't clear termBuffer/text
private void clearNoTermBuffer() {
payload = null;
positionIncrement = positionLength = 1;
flags = 0;
startOffset = endOffset = 0;
type = DEFAULT_TYPE;
@Override
public Token clone() {
final Token t = (Token) super.clone();
if (payload != null) {
t.payload = payload.clone();
}
/** Shorthand for calling {@link #clear},
* {@link #copyBuffer(char[], int, int)},
* {@link #setOffset},
* {@link #setType}
* @return this Token instance */
public Token reinit(char[] newTermBuffer, int newTermOffset, int newTermLength, int newStartOffset, int newEndOffset, String newType) {
checkOffsets(newStartOffset, newEndOffset);
clearNoTermBuffer();
copyBuffer(newTermBuffer, newTermOffset, newTermLength);
payload = null;
positionIncrement = positionLength = 1;
startOffset = newStartOffset;
endOffset = newEndOffset;
type = newType;
return this;
}
/** Shorthand for calling {@link #clear},
* {@link #copyBuffer(char[], int, int)},
* {@link #setOffset},
* {@link #setType} on Token.DEFAULT_TYPE
* @return this Token instance */
public Token reinit(char[] newTermBuffer, int newTermOffset, int newTermLength, int newStartOffset, int newEndOffset) {
checkOffsets(newStartOffset, newEndOffset);
clearNoTermBuffer();
copyBuffer(newTermBuffer, newTermOffset, newTermLength);
startOffset = newStartOffset;
endOffset = newEndOffset;
type = DEFAULT_TYPE;
return this;
}
/** Shorthand for calling {@link #clear},
* {@link #append(CharSequence)},
* {@link #setOffset},
* {@link #setType}
* @return this Token instance */
public Token reinit(String newTerm, int newStartOffset, int newEndOffset, String newType) {
checkOffsets(newStartOffset, newEndOffset);
clear();
append(newTerm);
startOffset = newStartOffset;
endOffset = newEndOffset;
type = newType;
return this;
}
/** Shorthand for calling {@link #clear},
* {@link #append(CharSequence, int, int)},
* {@link #setOffset},
* {@link #setType}
* @return this Token instance */
public Token reinit(String newTerm, int newTermOffset, int newTermLength, int newStartOffset, int newEndOffset, String newType) {
checkOffsets(newStartOffset, newEndOffset);
clear();
append(newTerm, newTermOffset, newTermOffset + newTermLength);
startOffset = newStartOffset;
endOffset = newEndOffset;
type = newType;
return this;
}
/** Shorthand for calling {@link #clear},
* {@link #append(CharSequence)},
* {@link #setOffset},
* {@link #setType} on Token.DEFAULT_TYPE
* @return this Token instance */
public Token reinit(String newTerm, int newStartOffset, int newEndOffset) {
checkOffsets(newStartOffset, newEndOffset);
clear();
append(newTerm);
startOffset = newStartOffset;
endOffset = newEndOffset;
type = DEFAULT_TYPE;
return this;
}
/** Shorthand for calling {@link #clear},
* {@link #append(CharSequence, int, int)},
* {@link #setOffset},
* {@link #setType} on Token.DEFAULT_TYPE
* @return this Token instance */
public Token reinit(String newTerm, int newTermOffset, int newTermLength, int newStartOffset, int newEndOffset) {
checkOffsets(newStartOffset, newEndOffset);
clear();
append(newTerm, newTermOffset, newTermOffset + newTermLength);
startOffset = newStartOffset;
endOffset = newEndOffset;
type = DEFAULT_TYPE;
return this;
return t;
}
/**
@ -540,87 +179,28 @@ public class Token extends CharTermAttributeImpl
* @param prototype source Token to copy fields from
*/
public void reinit(Token prototype) {
copyBuffer(prototype.buffer(), 0, prototype.length());
positionIncrement = prototype.positionIncrement;
positionLength = prototype.positionLength;
flags = prototype.flags;
startOffset = prototype.startOffset;
endOffset = prototype.endOffset;
type = prototype.type;
payload = prototype.payload;
// this is a bad hack to emulate no cloning of payload!
prototype.copyToWithoutPayloadClone(this);
}
/**
* Copy the prototype token's fields into this one, with a different term. Note: Payloads are shared.
* @param prototype existing Token
* @param newTerm new term text
*/
public void reinit(Token prototype, String newTerm) {
setEmpty().append(newTerm);
positionIncrement = prototype.positionIncrement;
positionLength = prototype.positionLength;
flags = prototype.flags;
startOffset = prototype.startOffset;
endOffset = prototype.endOffset;
type = prototype.type;
payload = prototype.payload;
}
/**
* Copy the prototype token's fields into this one, with a different term. Note: Payloads are shared.
* @param prototype existing Token
* @param newTermBuffer buffer containing new term text
* @param offset the index in the buffer of the first character
* @param length number of valid characters in the buffer
*/
public void reinit(Token prototype, char[] newTermBuffer, int offset, int length) {
copyBuffer(newTermBuffer, offset, length);
positionIncrement = prototype.positionIncrement;
positionLength = prototype.positionLength;
flags = prototype.flags;
startOffset = prototype.startOffset;
endOffset = prototype.endOffset;
type = prototype.type;
payload = prototype.payload;
private void copyToWithoutPayloadClone(AttributeImpl target) {
super.copyTo(target);
((FlagsAttribute) target).setFlags(flags);
((PayloadAttribute) target).setPayload(payload);
}
@Override
public void copyTo(AttributeImpl target) {
if (target instanceof Token) {
final Token to = (Token) target;
to.reinit(this);
// reinit shares the payload, so clone it:
if (payload !=null) {
to.payload = payload.clone();
}
} else {
super.copyTo(target);
((OffsetAttribute) target).setOffset(startOffset, endOffset);
((PositionIncrementAttribute) target).setPositionIncrement(positionIncrement);
((PositionLengthAttribute) target).setPositionLength(positionLength);
((PayloadAttribute) target).setPayload((payload == null) ? null : payload.clone());
((FlagsAttribute) target).setFlags(flags);
((TypeAttribute) target).setType(type);
}
((PayloadAttribute) target).setPayload((payload == null) ? null : payload.clone());
}
@Override
public void reflectWith(AttributeReflector reflector) {
super.reflectWith(reflector);
reflector.reflect(OffsetAttribute.class, "startOffset", startOffset);
reflector.reflect(OffsetAttribute.class, "endOffset", endOffset);
reflector.reflect(PositionIncrementAttribute.class, "positionIncrement", positionIncrement);
reflector.reflect(PositionLengthAttribute.class, "positionLength", positionLength);
reflector.reflect(PayloadAttribute.class, "payload", payload);
reflector.reflect(FlagsAttribute.class, "flags", flags);
reflector.reflect(TypeAttribute.class, "type", type);
}
private void checkOffsets(int startOffset, int endOffset) {
if (startOffset < 0 || endOffset < startOffset) {
throw new IllegalArgumentException("startOffset must be non-negative, and endOffset must be >= startOffset, "
+ "startOffset=" + startOffset + ",endOffset=" + endOffset);
}
reflector.reflect(PayloadAttribute.class, "payload", payload);
}
/** Convenience factory that returns <code>Token</code> as implementation for the basic
@ -628,43 +208,6 @@ public class Token extends CharTermAttributeImpl
* attributes.
* @since 3.0
*/
public static final AttributeSource.AttributeFactory TOKEN_ATTRIBUTE_FACTORY =
new TokenAttributeFactory(AttributeSource.AttributeFactory.DEFAULT_ATTRIBUTE_FACTORY);
/** <b>Expert:</b> Creates a TokenAttributeFactory returning {@link Token} as instance for the basic attributes
* and for all other attributes calls the given delegate factory.
* @since 3.0
*/
public static final class TokenAttributeFactory extends AttributeSource.AttributeFactory {
private final AttributeSource.AttributeFactory delegate;
/** <b>Expert</b>: Creates an AttributeFactory returning {@link Token} as instance for the basic attributes
* and for all other attributes calls the given delegate factory. */
public TokenAttributeFactory(AttributeSource.AttributeFactory delegate) {
this.delegate = delegate;
}
@Override
public AttributeImpl createAttributeInstance(Class<? extends Attribute> attClass) {
return attClass.isAssignableFrom(Token.class)
? new Token() : delegate.createAttributeInstance(attClass);
}
@Override
public boolean equals(Object other) {
if (this == other) return true;
if (other instanceof TokenAttributeFactory) {
final TokenAttributeFactory af = (TokenAttributeFactory) other;
return this.delegate.equals(af.delegate);
}
return false;
}
@Override
public int hashCode() {
return delegate.hashCode() ^ 0x0a45aa31;
}
}
public static final AttributeFactory TOKEN_ATTRIBUTE_FACTORY =
AttributeFactory.getStaticImplementation(AttributeFactory.DEFAULT_ATTRIBUTE_FACTORY, Token.class);
}

View File

@ -21,11 +21,13 @@ import java.io.IOException;
import java.io.Closeable;
import java.lang.reflect.Modifier;
import org.apache.lucene.analysis.tokenattributes.PackedTokenAttributeImpl;
import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.util.Attribute;
import org.apache.lucene.util.AttributeFactory;
import org.apache.lucene.util.AttributeImpl;
import org.apache.lucene.util.AttributeSource;
@ -85,11 +87,15 @@ import org.apache.lucene.util.AttributeSource;
*/
public abstract class TokenStream extends AttributeSource implements Closeable {
/** Default {@link AttributeFactory} instance that should be used for TokenStreams. */
public static final AttributeFactory DEFAULT_TOKEN_ATTRIBUTE_FACTORY =
AttributeFactory.getStaticImplementation(AttributeFactory.DEFAULT_ATTRIBUTE_FACTORY, PackedTokenAttributeImpl.class);
/**
* A TokenStream using the default attribute factory.
*/
protected TokenStream() {
super(Token.TOKEN_ATTRIBUTE_FACTORY);
super(DEFAULT_TOKEN_ATTRIBUTE_FACTORY);
assert assertFinal();
}

View File

@ -17,6 +17,7 @@ package org.apache.lucene.analysis;
* limitations under the License.
*/
import org.apache.lucene.util.AttributeFactory;
import org.apache.lucene.util.AttributeSource;
import java.io.Reader;

View File

@ -813,7 +813,7 @@ Now we're going to implement our own custom Attribute for part-of-speech tagging
</p>
<p>
This should be the usual behavior. However, there is also an expert-API that allows changing these naming conventions:
{@link org.apache.lucene.util.AttributeSource.AttributeFactory}. The factory accepts an Attribute interface as argument
{@link org.apache.lucene.util.AttributeFactory}. The factory accepts an Attribute interface as argument
and returns an actual instance. You can implement your own factory if you need to change the default behavior.
</p>
<p>

View File

@ -0,0 +1,206 @@
package org.apache.lucene.analysis.tokenattributes;
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import org.apache.lucene.util.AttributeImpl;
import org.apache.lucene.util.AttributeReflector;
/** Default implementation of the common attributes used by Lucene:<ul>
* <li>{@link CharTermAttribute}
* <li>{@link TypeAttribute}
* <li>{@link PositionIncrementAttribute}
* <li>{@link PositionLengthAttribute}
* <li>{@link OffsetAttribute}
* </ul>*/
public class PackedTokenAttributeImpl extends CharTermAttributeImpl
implements TypeAttribute, PositionIncrementAttribute,
PositionLengthAttribute, OffsetAttribute {
private int startOffset,endOffset;
private String type = DEFAULT_TYPE;
private int positionIncrement = 1;
private int positionLength = 1;
/** Constructs the attribute implementation. */
public PackedTokenAttributeImpl() {
}
/**
* {@inheritDoc}
* @see PositionIncrementAttribute
*/
@Override
public void setPositionIncrement(int positionIncrement) {
if (positionIncrement < 0)
throw new IllegalArgumentException
("Increment must be zero or greater: " + positionIncrement);
this.positionIncrement = positionIncrement;
}
/**
* {@inheritDoc}
* @see PositionIncrementAttribute
*/
@Override
public int getPositionIncrement() {
return positionIncrement;
}
/**
* {@inheritDoc}
* @see PositionLengthAttribute
*/
@Override
public void setPositionLength(int positionLength) {
this.positionLength = positionLength;
}
/**
* {@inheritDoc}
* @see PositionLengthAttribute
*/
@Override
public int getPositionLength() {
return positionLength;
}
/**
* {@inheritDoc}
* @see OffsetAttribute
*/
@Override
public final int startOffset() {
return startOffset;
}
/**
* {@inheritDoc}
* @see OffsetAttribute
*/
@Override
public final int endOffset() {
return endOffset;
}
/**
* {@inheritDoc}
* @see OffsetAttribute
*/
@Override
public void setOffset(int startOffset, int endOffset) {
if (startOffset < 0 || endOffset < startOffset) {
throw new IllegalArgumentException("startOffset must be non-negative, and endOffset must be >= startOffset, "
+ "startOffset=" + startOffset + ",endOffset=" + endOffset);
}
this.startOffset = startOffset;
this.endOffset = endOffset;
}
/**
* {@inheritDoc}
* @see TypeAttribute
*/
@Override
public final String type() {
return type;
}
/**
* {@inheritDoc}
* @see TypeAttribute
*/
@Override
public final void setType(String type) {
this.type = type;
}
/** Resets the attributes
*/
@Override
public void clear() {
super.clear();
positionIncrement = positionLength = 1;
startOffset = endOffset = 0;
type = DEFAULT_TYPE;
}
@Override
public PackedTokenAttributeImpl clone() {
return (PackedTokenAttributeImpl) super.clone();
}
@Override
public boolean equals(Object obj) {
if (obj == this)
return true;
if (obj instanceof PackedTokenAttributeImpl) {
final PackedTokenAttributeImpl other = (PackedTokenAttributeImpl) obj;
return (startOffset == other.startOffset &&
endOffset == other.endOffset &&
positionIncrement == other.positionIncrement &&
positionLength == other.positionLength &&
(type == null ? other.type == null : type.equals(other.type)) &&
super.equals(obj)
);
} else
return false;
}
@Override
public int hashCode() {
int code = super.hashCode();
code = code * 31 + startOffset;
code = code * 31 + endOffset;
code = code * 31 + positionIncrement;
code = code * 31 + positionLength;
if (type != null)
code = code * 31 + type.hashCode();
return code;
}
@Override
public void copyTo(AttributeImpl target) {
if (target instanceof PackedTokenAttributeImpl) {
final PackedTokenAttributeImpl to = (PackedTokenAttributeImpl) target;
to.copyBuffer(buffer(), 0, length());
to.positionIncrement = positionIncrement;
to.positionLength = positionLength;
to.startOffset = startOffset;
to.endOffset = endOffset;
to.type = type;
} else {
super.copyTo(target);
((OffsetAttribute) target).setOffset(startOffset, endOffset);
((PositionIncrementAttribute) target).setPositionIncrement(positionIncrement);
((PositionLengthAttribute) target).setPositionLength(positionLength);
((TypeAttribute) target).setType(type);
}
}
@Override
public void reflectWith(AttributeReflector reflector) {
super.reflectWith(reflector);
reflector.reflect(OffsetAttribute.class, "startOffset", startOffset);
reflector.reflect(OffsetAttribute.class, "endOffset", endOffset);
reflector.reflect(PositionIncrementAttribute.class, "positionIncrement", positionIncrement);
reflector.reflect(PositionLengthAttribute.class, "positionLength", positionLength);
reflector.reflect(TypeAttribute.class, "type", type);
}
}

View File

@ -0,0 +1,202 @@
package org.apache.lucene.util;
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import java.lang.invoke.MethodHandle;
import java.lang.invoke.MethodHandles;
import java.lang.invoke.MethodType;
import java.lang.ref.Reference;
import java.lang.ref.WeakReference;
/**
* An AttributeFactory creates instances of {@link AttributeImpl}s.
*/
public abstract class AttributeFactory {
/**
* Returns an {@link AttributeImpl} for the supplied {@link Attribute} interface class.
*/
public abstract AttributeImpl createAttributeInstance(Class<? extends Attribute> attClass);
/**
* Returns a correctly typed {@link MethodHandle} for the no-arg ctor of the given class.
*/
static final MethodHandle findAttributeImplCtor(Class<? extends AttributeImpl> clazz) {
try {
return lookup.findConstructor(clazz, NO_ARG_CTOR).asType(NO_ARG_RETURNING_ATTRIBUTEIMPL);
} catch (NoSuchMethodException | IllegalAccessException e) {
throw new IllegalArgumentException("Cannot lookup accessible no-arg constructor for: " + clazz.getName(), e);
}
}
private static final MethodHandles.Lookup lookup = MethodHandles.publicLookup();
private static final MethodType NO_ARG_CTOR = MethodType.methodType(void.class);
private static final MethodType NO_ARG_RETURNING_ATTRIBUTEIMPL = MethodType.methodType(AttributeImpl.class);
/**
* This is the default factory that creates {@link AttributeImpl}s using the
* class name of the supplied {@link Attribute} interface class by appending <code>Impl</code> to it.
*/
public static final AttributeFactory DEFAULT_ATTRIBUTE_FACTORY = new DefaultAttributeFactory(true);
static final class DefaultAttributeFactory extends AttributeFactory {
private final WeakIdentityMap<Class<? extends Attribute>, Object> attClassImplMap =
WeakIdentityMap.newConcurrentHashMap(false);
private final ClassLoader myClassLoader = getClass().getClassLoader();
private final boolean useMethodHandles;
// this constructor is available for tests, to be able to test the pure-reflective case, too
DefaultAttributeFactory(boolean useMethodHandles) {
this.useMethodHandles = useMethodHandles;
}
@Override
public AttributeImpl createAttributeInstance(Class<? extends Attribute> attClass) {
// first lookup from cache:
Object cached = attClassImplMap.get(attClass);
if (cached instanceof MethodHandle) {
return invokeMethodHandle((MethodHandle) cached);
} else if (cached instanceof Reference) {
@SuppressWarnings("unchecked") final Class<? extends AttributeImpl> clazz =
((Reference<Class<? extends AttributeImpl>>) cached).get();
if (clazz != null) {
return invokeReflective(clazz);
}
cached = null;
// fall-through
}
// No cache hit!
// Please note: we have the slight chance that another thread may do the same, but who cares?
assert cached == null;
final Class<? extends AttributeImpl> implClazz = findImplClass(attClass);
// if the attribute impl is from our own ClassLoader, we optimize to use pre-allocated MethodHandle to instantiate the object
if (useMethodHandles && implClazz.getClassLoader() == myClassLoader) {
final MethodHandle constr = findAttributeImplCtor(implClazz);
attClassImplMap.put(attClass, constr);
return invokeMethodHandle(constr);
} else {
// otherwise, to not refer to the class forever (because the MethodHandle strongly
// references the class), so it can never be unloaded, we use slower reflection:
attClassImplMap.put(attClass, new WeakReference<>(implClazz));
return invokeReflective(implClazz);
}
}
private Class<? extends AttributeImpl> findImplClass(Class<? extends Attribute> attClass) {
try {
return Class.forName(attClass.getName() + "Impl", true, attClass.getClassLoader()).asSubclass(AttributeImpl.class);
} catch (ClassNotFoundException cnfe) {
throw new IllegalArgumentException("Cannot find implementing class for: " + attClass.getName());
}
}
private AttributeImpl invokeMethodHandle(MethodHandle constr) {
try {
return (AttributeImpl) constr.invokeExact();
} catch (Throwable t) {
rethrow(t);
throw new AssertionError();
}
}
private AttributeImpl invokeReflective(Class<? extends AttributeImpl> implClass) {
try {
return implClass.newInstance();
} catch (InstantiationException | IllegalAccessException e) {
throw new IllegalArgumentException("Cannot instantiate implementing class: " + implClass.getName(), e);
}
}
}
/** <b>Expert</b>: AttributeFactory returning an instance of the given {@code clazz} for the
* attributes it implements. For all other attributes it calls the given delegate factory
* as fallback. This class can be used to prefer a specific {@code AttributeImpl} which
* combines multiple attributes over separate classes.
* @lucene.internal
*/
public abstract static class StaticImplementationAttributeFactory<A extends AttributeImpl> extends AttributeFactory {
private final AttributeFactory delegate;
private final Class<A> clazz;
/** <b>Expert</b>: Creates an AttributeFactory returning {@code clazz} as instance for the
* attributes it implements and for all other attributes calls the given delegate factory. */
public StaticImplementationAttributeFactory(AttributeFactory delegate, Class<A> clazz) {
this.delegate = delegate;
this.clazz = clazz;
}
@Override
public final AttributeImpl createAttributeInstance(Class<? extends Attribute> attClass) {
return attClass.isAssignableFrom(clazz) ? createInstance() : delegate.createAttributeInstance(attClass);
}
/** Creates an instance of {@code A}. */
protected abstract A createInstance();
@Override
public boolean equals(Object other) {
if (this == other)
return true;
if (other == null || other.getClass() != this.getClass())
return false;
@SuppressWarnings("rawtypes")
final StaticImplementationAttributeFactory af = (StaticImplementationAttributeFactory) other;
return this.delegate.equals(af.delegate) && this.clazz == af.clazz;
}
@Override
public int hashCode() {
return 31 * delegate.hashCode() + clazz.hashCode();
}
}
/** Returns an AttributeFactory returning an instance of the given {@code clazz} for the
* attributes it implements. The given {@code clazz} must have a public no-arg constructor.
* For all other attributes it calls the given delegate factory as fallback.
* This method can be used to prefer a specific {@code AttributeImpl} which combines
* multiple attributes over separate classes.
* <p>Please save instances created by this method in a static final field, because
* on each call, this does reflection for creating a {@link MethodHandle}.
*/
public static <A extends AttributeImpl> AttributeFactory getStaticImplementation(AttributeFactory delegate, Class<A> clazz) {
final MethodHandle constr = findAttributeImplCtor(clazz);
return new StaticImplementationAttributeFactory<A>(delegate, clazz) {
@Override
protected A createInstance() {
try {
return (A) constr.invokeExact();
} catch (Throwable t) {
rethrow(t);
throw new AssertionError();
}
}
};
}
// Hack to rethrow unknown Exceptions from {@link MethodHandle#invoke}:
// TODO: remove the impl in test-framework, this one is more elegant :-)
static void rethrow(Throwable t) {
AttributeFactory.<Error>rethrow0(t);
}
@SuppressWarnings("unchecked")
private static <T extends Throwable> void rethrow0(Throwable t) throws T {
throw (T) t;
}
}

View File

@ -19,8 +19,7 @@ package org.apache.lucene.util;
import java.lang.reflect.Field;
import java.lang.reflect.Modifier;
import java.lang.ref.WeakReference;
import java.util.LinkedList;
import java.lang.ref.Reference;
/**
* Base class for Attributes that can be added to a
@ -91,12 +90,14 @@ public abstract class AttributeImpl implements Cloneable, Attribute {
*/
public void reflectWith(AttributeReflector reflector) {
final Class<? extends AttributeImpl> clazz = this.getClass();
final LinkedList<WeakReference<Class<? extends Attribute>>> interfaces = AttributeSource.getAttributeInterfaces(clazz);
if (interfaces.size() != 1) {
final Reference<Class<? extends Attribute>>[] interfaces = AttributeSource.getAttributeInterfaces(clazz);
if (interfaces.length != 1) {
throw new UnsupportedOperationException(clazz.getName() +
" implements more than one Attribute interface, the default reflectWith() implementation cannot handle this.");
}
final Class<? extends Attribute> interf = interfaces.getFirst().get();
final Class<? extends Attribute> interf = interfaces[0].get();
assert (interf != null) :
"We have a strong reference on the class holding the interfaces, so they should never get evicted";
final Field[] fields = clazz.getDeclaredFields();
try {
for (int i = 0; i < fields.length; i++) {

View File

@ -17,12 +17,14 @@ package org.apache.lucene.util;
* limitations under the License.
*/
import java.lang.ref.Reference;
import java.lang.ref.WeakReference;
import java.util.ArrayList;
import java.util.Collections;
import java.util.List;
import java.util.NoSuchElementException;
import java.util.Iterator;
import java.util.LinkedHashMap;
import java.util.LinkedList;
import java.util.Map;
import java.util.Map.Entry;
@ -38,58 +40,14 @@ import org.apache.lucene.analysis.TokenStream; // for javadocs
* it creates a new instance and returns it.
*/
public class AttributeSource {
/**
* An AttributeFactory creates instances of {@link AttributeImpl}s.
*/
public static abstract class AttributeFactory {
/**
* returns an {@link AttributeImpl} for the supplied {@link Attribute} interface class.
*/
public abstract AttributeImpl createAttributeInstance(Class<? extends Attribute> attClass);
/**
* This is the default factory that creates {@link AttributeImpl}s using the
* class name of the supplied {@link Attribute} interface class by appending <code>Impl</code> to it.
* @deprecated use {@link AttributeFactory#DEFAULT_ATTRIBUTE_FACTORY}
*/
public static final AttributeFactory DEFAULT_ATTRIBUTE_FACTORY = new DefaultAttributeFactory();
private static final class DefaultAttributeFactory extends AttributeFactory {
private static final WeakIdentityMap<Class<? extends Attribute>, WeakReference<Class<? extends AttributeImpl>>> attClassImplMap =
WeakIdentityMap.newConcurrentHashMap(false);
DefaultAttributeFactory() {}
@Override
public AttributeImpl createAttributeInstance(Class<? extends Attribute> attClass) {
try {
return getClassForInterface(attClass).newInstance();
} catch (InstantiationException e) {
throw new IllegalArgumentException("Could not instantiate implementing class for " + attClass.getName());
} catch (IllegalAccessException e) {
throw new IllegalArgumentException("Could not instantiate implementing class for " + attClass.getName());
}
}
private static Class<? extends AttributeImpl> getClassForInterface(Class<? extends Attribute> attClass) {
final WeakReference<Class<? extends AttributeImpl>> ref = attClassImplMap.get(attClass);
Class<? extends AttributeImpl> clazz = (ref == null) ? null : ref.get();
if (clazz == null) {
// we have the slight chance that another thread may do the same, but who cares?
try {
attClassImplMap.put(attClass,
new WeakReference<Class<? extends AttributeImpl>>(
clazz = Class.forName(attClass.getName() + "Impl", true, attClass.getClassLoader())
.asSubclass(AttributeImpl.class)
)
);
} catch (ClassNotFoundException e) {
throw new IllegalArgumentException("Could not find implementing class for " + attClass.getName());
}
}
return clazz;
}
}
}
@Deprecated
public static final AttributeFactory DEFAULT_ATTRIBUTE_FACTORY = AttributeFactory.DEFAULT_ATTRIBUTE_FACTORY;
/**
* This class holds the state of an AttributeSource.
@ -122,7 +80,7 @@ public class AttributeSource {
private final AttributeFactory factory;
/**
* An AttributeSource using the default attribute factory {@link AttributeSource.AttributeFactory#DEFAULT_ATTRIBUTE_FACTORY}.
* An AttributeSource using the default attribute factory {@link AttributeFactory#DEFAULT_ATTRIBUTE_FACTORY}.
*/
public AttributeSource() {
this(AttributeFactory.DEFAULT_ATTRIBUTE_FACTORY);
@ -200,26 +158,28 @@ public class AttributeSource {
}
/** a cache that stores all interfaces for known implementation classes for performance (slow reflection) */
private static final WeakIdentityMap<Class<? extends AttributeImpl>,LinkedList<WeakReference<Class<? extends Attribute>>>> knownImplClasses =
private static final WeakIdentityMap<Class<? extends AttributeImpl>,Reference<Class<? extends Attribute>>[]> knownImplClasses =
WeakIdentityMap.newConcurrentHashMap(false);
static LinkedList<WeakReference<Class<? extends Attribute>>> getAttributeInterfaces(final Class<? extends AttributeImpl> clazz) {
LinkedList<WeakReference<Class<? extends Attribute>>> foundInterfaces = knownImplClasses.get(clazz);
static Reference<Class<? extends Attribute>>[] getAttributeInterfaces(final Class<? extends AttributeImpl> clazz) {
Reference<Class<? extends Attribute>>[] foundInterfaces = knownImplClasses.get(clazz);
if (foundInterfaces == null) {
// we have the slight chance that another thread may do the same, but who cares?
foundInterfaces = new LinkedList<>();
final List<Reference<Class<? extends Attribute>>> intfList = new ArrayList<>();
// find all interfaces that this attribute instance implements
// and that extend the Attribute interface
Class<?> actClazz = clazz;
do {
for (Class<?> curInterface : actClazz.getInterfaces()) {
if (curInterface != Attribute.class && Attribute.class.isAssignableFrom(curInterface)) {
foundInterfaces.add(new WeakReference<Class<? extends Attribute>>(curInterface.asSubclass(Attribute.class)));
intfList.add(new WeakReference<Class<? extends Attribute>>(curInterface.asSubclass(Attribute.class)));
}
}
actClazz = actClazz.getSuperclass();
} while (actClazz != null);
knownImplClasses.put(clazz, foundInterfaces);
@SuppressWarnings({"unchecked", "rawtypes"}) final Reference<Class<? extends Attribute>>[] a =
intfList.toArray(new Reference[intfList.size()]);
knownImplClasses.put(clazz, foundInterfaces = a);
}
return foundInterfaces;
}
@ -235,11 +195,9 @@ public class AttributeSource {
public final void addAttributeImpl(final AttributeImpl att) {
final Class<? extends AttributeImpl> clazz = att.getClass();
if (attributeImpls.containsKey(clazz)) return;
final LinkedList<WeakReference<Class<? extends Attribute>>> foundInterfaces =
getAttributeInterfaces(clazz);
// add all interfaces of this AttributeImpl to the maps
for (WeakReference<Class<? extends Attribute>> curInterfaceRef : foundInterfaces) {
for (Reference<Class<? extends Attribute>> curInterfaceRef : getAttributeInterfaces(clazz)) {
final Class<? extends Attribute> curInterface = curInterfaceRef.get();
assert (curInterface != null) :
"We have a strong reference on the class holding the interfaces, so they should never get evicted";

View File

@ -27,146 +27,22 @@ import org.apache.lucene.util.TestUtil;
import java.io.StringReader;
import java.util.HashMap;
@Deprecated
public class TestToken extends LuceneTestCase {
public void testCtor() throws Exception {
Token t = new Token();
char[] content = "hello".toCharArray();
t.copyBuffer(content, 0, content.length);
assertNotSame(t.buffer(), content);
Token t = new Token("hello", 0, 0);
assertEquals(0, t.startOffset());
assertEquals(0, t.endOffset());
assertEquals(1, t.getPositionIncrement());
assertEquals(1, t.getPositionLength());
assertEquals("hello", t.toString());
assertEquals("word", t.type());
assertEquals(0, t.getFlags());
t = new Token();
t.setOffset(6, 22);
t.setFlags(7);
t.copyBuffer(content, 0, content.length);
assertEquals("hello", t.toString());
assertEquals("hello", t.toString());
assertEquals(6, t.startOffset());
assertEquals(22, t.endOffset());
assertEquals("word", t.type());
assertEquals(7, t.getFlags());
t = new Token();
t.setOffset(6, 22);
t.setType("junk");
t.copyBuffer(content, 0, content.length);
assertEquals("hello", t.toString());
assertEquals("hello", t.toString());
assertEquals(6, t.startOffset());
assertEquals(22, t.endOffset());
assertEquals("junk", t.type());
assertEquals(0, t.getFlags());
assertNull(t.getPayload());
}
public void testResize() {
Token t = new Token();
char[] content = "hello".toCharArray();
t.copyBuffer(content, 0, content.length);
for (int i = 0; i < 2000; i++)
{
t.resizeBuffer(i);
assertTrue(i <= t.buffer().length);
assertEquals("hello", t.toString());
}
}
public void testGrow() {
Token t = new Token();
StringBuilder buf = new StringBuilder("ab");
for (int i = 0; i < 20; i++)
{
char[] content = buf.toString().toCharArray();
t.copyBuffer(content, 0, content.length);
assertEquals(buf.length(), t.length());
assertEquals(buf.toString(), t.toString());
buf.append(buf.toString());
}
assertEquals(1048576, t.length());
// now as a string, second variant
t = new Token();
buf = new StringBuilder("ab");
for (int i = 0; i < 20; i++)
{
t.setEmpty().append(buf);
String content = buf.toString();
assertEquals(content.length(), t.length());
assertEquals(content, t.toString());
buf.append(content);
}
assertEquals(1048576, t.length());
// Test for slow growth to a long term
t = new Token();
buf = new StringBuilder("a");
for (int i = 0; i < 20000; i++)
{
t.setEmpty().append(buf);
String content = buf.toString();
assertEquals(content.length(), t.length());
assertEquals(content, t.toString());
buf.append("a");
}
assertEquals(20000, t.length());
// Test for slow growth to a long term
t = new Token();
buf = new StringBuilder("a");
for (int i = 0; i < 20000; i++)
{
t.setEmpty().append(buf);
String content = buf.toString();
assertEquals(content.length(), t.length());
assertEquals(content, t.toString());
buf.append("a");
}
assertEquals(20000, t.length());
}
public void testToString() throws Exception {
char[] b = {'a', 'l', 'o', 'h', 'a'};
Token t = new Token("", 0, 5);
t.copyBuffer(b, 0, 5);
assertEquals("aloha", t.toString());
t.setEmpty().append("hi there");
assertEquals("hi there", t.toString());
}
public void testTermBufferEquals() throws Exception {
Token t1a = new Token();
char[] content1a = "hello".toCharArray();
t1a.copyBuffer(content1a, 0, 5);
Token t1b = new Token();
char[] content1b = "hello".toCharArray();
t1b.copyBuffer(content1b, 0, 5);
Token t2 = new Token();
char[] content2 = "hello2".toCharArray();
t2.copyBuffer(content2, 0, 6);
assertTrue(t1a.equals(t1b));
assertFalse(t1a.equals(t2));
assertFalse(t2.equals(t1b));
}
public void testMixedStringArray() throws Exception {
Token t = new Token("hello", 0, 5);
assertEquals(t.length(), 5);
assertEquals(t.toString(), "hello");
t.setEmpty().append("hello2");
assertEquals(t.length(), 6);
assertEquals(t.toString(), "hello2");
t.copyBuffer("hello3".toCharArray(), 0, 6);
assertEquals(t.toString(), "hello3");
char[] buffer = t.buffer();
buffer[1] = 'o';
assertEquals(t.toString(), "hollo3");
}
/* the CharTermAttributeStuff is tested by TestCharTermAttributeImpl */
public void testClone() throws Exception {
Token t = new Token();
@ -174,20 +50,20 @@ public class TestToken extends LuceneTestCase {
char[] content = "hello".toCharArray();
t.copyBuffer(content, 0, 5);
char[] buf = t.buffer();
Token copy = assertCloneIsEqual(t);
Token copy = TestCharTermAttributeImpl.assertCloneIsEqual(t);
assertEquals(t.toString(), copy.toString());
assertNotSame(buf, copy.buffer());
BytesRef pl = new BytesRef(new byte[]{1,2,3,4});
t.setPayload(pl);
copy = assertCloneIsEqual(t);
copy = TestCharTermAttributeImpl.assertCloneIsEqual(t);
assertEquals(pl, copy.getPayload());
assertNotSame(pl, copy.getPayload());
}
public void testCopyTo() throws Exception {
Token t = new Token();
Token copy = assertCopyIsEqual(t);
Token copy = TestCharTermAttributeImpl.assertCopyIsEqual(t);
assertEquals("", t.toString());
assertEquals("", copy.toString());
@ -196,13 +72,13 @@ public class TestToken extends LuceneTestCase {
char[] content = "hello".toCharArray();
t.copyBuffer(content, 0, 5);
char[] buf = t.buffer();
copy = assertCopyIsEqual(t);
copy = TestCharTermAttributeImpl.assertCopyIsEqual(t);
assertEquals(t.toString(), copy.toString());
assertNotSame(buf, copy.buffer());
BytesRef pl = new BytesRef(new byte[]{1,2,3,4});
t.setPayload(pl);
copy = assertCopyIsEqual(t);
copy = TestCharTermAttributeImpl.assertCopyIsEqual(t);
assertEquals(pl, copy.getPayload());
assertNotSame(pl, copy.getPayload());
}
@ -244,35 +120,19 @@ public class TestToken extends LuceneTestCase {
public void testAttributeReflection() throws Exception {
Token t = new Token("foobar", 6, 22);
t.setFlags(8);
t.setPositionIncrement(3);
t.setPositionLength(11);
TestUtil.assertAttributeReflection(t,
new HashMap<String, Object>() {{
put(CharTermAttribute.class.getName() + "#term", "foobar");
put(TermToBytesRefAttribute.class.getName() + "#bytes", new BytesRef("foobar"));
put(OffsetAttribute.class.getName() + "#startOffset", 6);
put(OffsetAttribute.class.getName() + "#endOffset", 22);
put(PositionIncrementAttribute.class.getName() + "#positionIncrement", 1);
put(PositionLengthAttribute.class.getName() + "#positionLength", 1);
put(PositionIncrementAttribute.class.getName() + "#positionIncrement", 3);
put(PositionLengthAttribute.class.getName() + "#positionLength", 11);
put(PayloadAttribute.class.getName() + "#payload", null);
put(TypeAttribute.class.getName() + "#type", TypeAttribute.DEFAULT_TYPE);
put(FlagsAttribute.class.getName() + "#flags", 8);
}});
}
public static <T extends AttributeImpl> T assertCloneIsEqual(T att) {
@SuppressWarnings("unchecked")
T clone = (T) att.clone();
assertEquals("Clone must be equal", att, clone);
assertEquals("Clone's hashcode must be equal", att.hashCode(), clone.hashCode());
return clone;
}
public static <T extends AttributeImpl> T assertCopyIsEqual(T att) throws Exception {
@SuppressWarnings("unchecked")
T copy = (T) att.getClass().newInstance();
att.copyTo(copy);
assertEquals("Copied instance must be equal", att, copy);
assertEquals("Copied instance's hashcode must be equal", att.hashCode(), copy.hashCode());
return copy;
}
}

View File

@ -17,7 +17,7 @@ package org.apache.lucene.analysis.tokenattributes;
* limitations under the License.
*/
import org.apache.lucene.analysis.TestToken;
import org.apache.lucene.util.AttributeImpl;
import org.apache.lucene.util.LuceneTestCase;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.TestUtil;
@ -95,7 +95,7 @@ public class TestCharTermAttributeImpl extends LuceneTestCase {
char[] content = "hello".toCharArray();
t.copyBuffer(content, 0, 5);
char[] buf = t.buffer();
CharTermAttributeImpl copy = TestToken.assertCloneIsEqual(t);
CharTermAttributeImpl copy = assertCloneIsEqual(t);
assertEquals(t.toString(), copy.toString());
assertNotSame(buf, copy.buffer());
}
@ -117,7 +117,7 @@ public class TestCharTermAttributeImpl extends LuceneTestCase {
public void testCopyTo() throws Exception {
CharTermAttributeImpl t = new CharTermAttributeImpl();
CharTermAttributeImpl copy = TestToken.assertCopyIsEqual(t);
CharTermAttributeImpl copy = assertCopyIsEqual(t);
assertEquals("", t.toString());
assertEquals("", copy.toString());
@ -125,7 +125,7 @@ public class TestCharTermAttributeImpl extends LuceneTestCase {
char[] content = "hello".toCharArray();
t.copyBuffer(content, 0, 5);
char[] buf = t.buffer();
copy = TestToken.assertCopyIsEqual(t);
copy = assertCopyIsEqual(t);
assertEquals(t.toString(), copy.toString());
assertNotSame(buf, copy.buffer());
}
@ -284,6 +284,23 @@ public class TestCharTermAttributeImpl extends LuceneTestCase {
}
}
public static <T extends AttributeImpl> T assertCloneIsEqual(T att) {
@SuppressWarnings("unchecked")
T clone = (T) att.clone();
assertEquals("Clone must be equal", att, clone);
assertEquals("Clone's hashcode must be equal", att.hashCode(), clone.hashCode());
return clone;
}
public static <T extends AttributeImpl> T assertCopyIsEqual(T att) throws Exception {
@SuppressWarnings("unchecked")
T copy = (T) att.getClass().newInstance();
att.copyTo(copy);
assertEquals("Copied instance must be equal", att, copy);
assertEquals("Copied instance's hashcode must be equal", att.hashCode(), copy.hashCode());
return copy;
}
/*
// test speed of the dynamic instanceof checks in append(CharSequence),

View File

@ -0,0 +1,96 @@
package org.apache.lucene.analysis.tokenattributes;
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import org.apache.lucene.analysis.MockTokenizer;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.Tokenizer;
import org.apache.lucene.util.LuceneTestCase;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.TestUtil;
import java.io.StringReader;
import java.util.HashMap;
public class TestPackedTokenAttributeImpl extends LuceneTestCase {
/* the CharTermAttributeStuff is tested by TestCharTermAttributeImpl */
public void testClone() throws Exception {
PackedTokenAttributeImpl t = new PackedTokenAttributeImpl();
t.setOffset(0, 5);
char[] content = "hello".toCharArray();
t.copyBuffer(content, 0, 5);
char[] buf = t.buffer();
PackedTokenAttributeImpl copy = TestCharTermAttributeImpl.assertCloneIsEqual(t);
assertEquals(t.toString(), copy.toString());
assertNotSame(buf, copy.buffer());
}
public void testCopyTo() throws Exception {
PackedTokenAttributeImpl t = new PackedTokenAttributeImpl();
PackedTokenAttributeImpl copy = TestCharTermAttributeImpl.assertCopyIsEqual(t);
assertEquals("", t.toString());
assertEquals("", copy.toString());
t = new PackedTokenAttributeImpl();
t.setOffset(0, 5);
char[] content = "hello".toCharArray();
t.copyBuffer(content, 0, 5);
char[] buf = t.buffer();
copy = TestCharTermAttributeImpl.assertCopyIsEqual(t);
assertEquals(t.toString(), copy.toString());
assertNotSame(buf, copy.buffer());
}
public void testPackedTokenAttributeFactory() throws Exception {
TokenStream ts = new MockTokenizer(TokenStream.DEFAULT_TOKEN_ATTRIBUTE_FACTORY, MockTokenizer.WHITESPACE, false, MockTokenizer.DEFAULT_MAX_TOKEN_LENGTH);
((Tokenizer)ts).setReader(new StringReader("foo bar"));
assertTrue("CharTermAttribute is not implemented by Token",
ts.addAttribute(CharTermAttribute.class) instanceof PackedTokenAttributeImpl);
assertTrue("OffsetAttribute is not implemented by Token",
ts.addAttribute(OffsetAttribute.class) instanceof PackedTokenAttributeImpl);
assertTrue("PositionIncrementAttribute is not implemented by Token",
ts.addAttribute(PositionIncrementAttribute.class) instanceof PackedTokenAttributeImpl);
assertTrue("TypeAttribute is not implemented by Token",
ts.addAttribute(TypeAttribute.class) instanceof PackedTokenAttributeImpl);
assertTrue("FlagsAttribute is not implemented by FlagsAttributeImpl",
ts.addAttribute(FlagsAttribute.class) instanceof FlagsAttributeImpl);
}
public void testAttributeReflection() throws Exception {
PackedTokenAttributeImpl t = new PackedTokenAttributeImpl();
t.append("foobar");
t.setOffset(6, 22);
t.setPositionIncrement(3);
t.setPositionLength(11);
t.setType("foobar");
TestUtil.assertAttributeReflection(t,
new HashMap<String, Object>() {{
put(CharTermAttribute.class.getName() + "#term", "foobar");
put(TermToBytesRefAttribute.class.getName() + "#bytes", new BytesRef("foobar"));
put(OffsetAttribute.class.getName() + "#startOffset", 6);
put(OffsetAttribute.class.getName() + "#endOffset", 22);
put(PositionIncrementAttribute.class.getName() + "#positionIncrement", 3);
put(PositionLengthAttribute.class.getName() + "#positionLength", 11);
put(TypeAttribute.class.getName() + "#type", "foobar");
}});
}
}

View File

@ -37,15 +37,15 @@ import org.apache.lucene.search.TermQuery;
import org.apache.lucene.store.BaseDirectoryWrapper;
import org.apache.lucene.store.MockDirectoryWrapper;
import org.apache.lucene.util.Attribute;
import org.apache.lucene.util.AttributeFactory;
import org.apache.lucene.util.AttributeImpl;
import org.apache.lucene.util.AttributeSource.AttributeFactory;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.LuceneTestCase.Monster;
import org.apache.lucene.util.LuceneTestCase.SuppressCodecs;
import org.apache.lucene.util.LuceneTestCase;
import org.apache.lucene.util.TestUtil;
import org.apache.lucene.util.TimeUnits;
import org.junit.Ignore;
import com.carrotsearch.randomizedtesting.annotations.TimeoutSuite;
// NOTE: SimpleText codec will consume very large amounts of

View File

@ -22,9 +22,9 @@ import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
import org.apache.lucene.analysis.tokenattributes.TermToBytesRefAttribute;
import org.apache.lucene.spatial.prefix.tree.Cell;
import org.apache.lucene.util.Attribute;
import org.apache.lucene.util.AttributeFactory;
import org.apache.lucene.util.AttributeImpl;
import org.apache.lucene.util.AttributeReflector;
import org.apache.lucene.util.AttributeSource;
import org.apache.lucene.util.BytesRef;
import java.io.IOException;
@ -51,10 +51,10 @@ class CellTokenStream extends TokenStream {
}
// just a wrapper to prevent adding CTA
private static final class CellAttributeFactory extends AttributeSource.AttributeFactory {
private final AttributeSource.AttributeFactory delegate;
private static final class CellAttributeFactory extends AttributeFactory {
private final AttributeFactory delegate;
CellAttributeFactory(AttributeSource.AttributeFactory delegate) {
CellAttributeFactory(AttributeFactory delegate) {
this.delegate = delegate;
}

View File

@ -39,9 +39,9 @@ import org.apache.lucene.analysis.CannedBinaryTokenStream.BinaryToken;
import org.apache.lucene.analysis.CannedBinaryTokenStream;
import org.apache.lucene.analysis.CannedTokenStream;
import org.apache.lucene.analysis.MockAnalyzer;
import org.apache.lucene.analysis.MockBytesAttributeFactory;
import org.apache.lucene.analysis.MockTokenFilter;
import org.apache.lucene.analysis.MockTokenizer;
import org.apache.lucene.analysis.MockUTF16TermAttributeImpl;
import org.apache.lucene.analysis.Token;
import org.apache.lucene.analysis.TokenFilter;
import org.apache.lucene.analysis.TokenStream;
@ -52,6 +52,7 @@ import org.apache.lucene.document.Document;
import org.apache.lucene.search.suggest.Lookup.LookupResult;
import org.apache.lucene.search.suggest.Input;
import org.apache.lucene.search.suggest.InputArrayIterator;
import org.apache.lucene.util.AttributeFactory;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.LineFileDocs;
import org.apache.lucene.util.LuceneTestCase;
@ -621,8 +622,6 @@ public class AnalyzingSuggesterTest extends LuceneTestCase {
private int numStopChars;
private boolean preserveHoles;
private final MockBytesAttributeFactory factory = new MockBytesAttributeFactory();
public MockTokenEatingAnalyzer(int numStopChars, boolean preserveHoles) {
this.preserveHoles = preserveHoles;
this.numStopChars = numStopChars;
@ -630,7 +629,8 @@ public class AnalyzingSuggesterTest extends LuceneTestCase {
@Override
public TokenStreamComponents createComponents(String fieldName) {
MockTokenizer tokenizer = new MockTokenizer(factory, MockTokenizer.WHITESPACE, false, MockTokenizer.DEFAULT_MAX_TOKEN_LENGTH);
MockTokenizer tokenizer = new MockTokenizer(MockUTF16TermAttributeImpl.UTF16_TERM_ATTRIBUTE_FACTORY,
MockTokenizer.WHITESPACE, false, MockTokenizer.DEFAULT_MAX_TOKEN_LENGTH);
tokenizer.setEnableChecks(true);
TokenStream next;
if (numStopChars != 0) {

View File

@ -25,6 +25,7 @@ import java.io.Reader;
import java.io.StringReader;
import java.io.StringWriter;
import java.io.Writer;
import java.lang.reflect.Constructor;
import java.nio.charset.StandardCharsets;
import java.util.*;
import java.util.concurrent.CountDownLatch;
@ -38,8 +39,8 @@ import org.apache.lucene.index.FieldInfo.IndexOptions;
import org.apache.lucene.index.RandomIndexWriter;
import org.apache.lucene.store.Directory;
import org.apache.lucene.util.Attribute;
import org.apache.lucene.util.AttributeFactory;
import org.apache.lucene.util.AttributeImpl;
import org.apache.lucene.util.AttributeSource.AttributeFactory;
import org.apache.lucene.util.IOUtils;
import org.apache.lucene.util.LineFileDocs;
import org.apache.lucene.util.LuceneTestCase;
@ -935,16 +936,41 @@ public abstract class BaseTokenStreamTestCase extends LuceneTestCase {
return mockTokenizer;
}
/** Returns a new AttributeFactory impl */
public static AttributeFactory newAttributeFactory(Random random) {
if (random.nextBoolean()) {
return Token.TOKEN_ATTRIBUTE_FACTORY;
} else {
return AttributeFactory.DEFAULT_ATTRIBUTE_FACTORY;
/**
* This provides the default AttributeFactory in reflective-only mode (package private)
* so we can test it.
*/
private final static AttributeFactory REFLECTIVE_ATTRIBUTE_FACTORY;
static {
try {
final Constructor<? extends AttributeFactory> constr = Class
.forName(AttributeFactory.class.getName() + "$DefaultAttributeFactory")
.asSubclass(AttributeFactory.class)
.getDeclaredConstructor(boolean.class);
constr.setAccessible(true);
REFLECTIVE_ATTRIBUTE_FACTORY = constr.newInstance(false);
} catch (ReflectiveOperationException e) {
throw new Error("Cannot initantiate a reflective-only DefaultAttributeFactory", e);
}
}
/** Returns a new AttributeFactory impl */
/** Returns a random AttributeFactory impl */
public static AttributeFactory newAttributeFactory(Random random) {
switch (random.nextInt(4)) {
case 0:
return TokenStream.DEFAULT_TOKEN_ATTRIBUTE_FACTORY;
case 1:
return Token.TOKEN_ATTRIBUTE_FACTORY;
case 2:
return AttributeFactory.DEFAULT_ATTRIBUTE_FACTORY;
case 3:
return REFLECTIVE_ATTRIBUTE_FACTORY;
default:
throw new AssertionError("Please fix the Random.nextInt() call above");
}
}
/** Returns a random AttributeFactory impl */
public static AttributeFactory newAttributeFactory() {
return newAttributeFactory(random());
}

View File

@ -20,12 +20,11 @@ package org.apache.lucene.analysis;
/**
* Analyzer for testing that encodes terms as UTF-16 bytes.
*/
public class MockBytesAnalyzer extends Analyzer {
private final MockBytesAttributeFactory factory = new MockBytesAttributeFactory();
public final class MockBytesAnalyzer extends Analyzer {
@Override
protected TokenStreamComponents createComponents(String fieldName) {
Tokenizer t = new MockTokenizer(factory, MockTokenizer.KEYWORD, false, MockTokenizer.DEFAULT_MAX_TOKEN_LENGTH);
Tokenizer t = new MockTokenizer(MockUTF16TermAttributeImpl.UTF16_TERM_ATTRIBUTE_FACTORY,
MockTokenizer.KEYWORD, false, MockTokenizer.DEFAULT_MAX_TOKEN_LENGTH);
return new TokenStreamComponents(t);
}
}

View File

@ -1,40 +0,0 @@
package org.apache.lucene.analysis;
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import org.apache.lucene.util.Attribute;
import org.apache.lucene.util.AttributeImpl;
import org.apache.lucene.util.AttributeSource;
/**
* Attribute factory that implements CharTermAttribute with
* {@link MockUTF16TermAttributeImpl}
*/
public class MockBytesAttributeFactory extends AttributeSource.AttributeFactory {
private final AttributeSource.AttributeFactory delegate =
AttributeSource.AttributeFactory.DEFAULT_ATTRIBUTE_FACTORY;
@Override
public AttributeImpl createAttributeInstance(
Class<? extends Attribute> attClass) {
return attClass.isAssignableFrom(MockUTF16TermAttributeImpl.class)
? new MockUTF16TermAttributeImpl()
: delegate.createAttributeInstance(attClass);
}
}

View File

@ -18,12 +18,12 @@ package org.apache.lucene.analysis;
*/
import java.io.IOException;
import java.io.Reader;
import java.nio.CharBuffer;
import java.util.Random;
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
import org.apache.lucene.util.AttributeFactory;
import org.apache.lucene.util.automaton.CharacterRunAutomaton;
import org.apache.lucene.util.automaton.RegExp;
@ -113,7 +113,7 @@ public class MockTokenizer extends Tokenizer {
this(factory, runAutomaton, lowerCase, DEFAULT_MAX_TOKEN_LENGTH);
}
/** Calls {@link #MockTokenizer(org.apache.lucene.util.AttributeSource.AttributeFactory,CharacterRunAutomaton,boolean)
/** Calls {@link #MockTokenizer(AttributeFactory,CharacterRunAutomaton,boolean)
* MockTokenizer(AttributeFactory, Reader, WHITESPACE, true)} */
public MockTokenizer(AttributeFactory factory) {
this(factory, WHITESPACE, true);

View File

@ -17,9 +17,10 @@ package org.apache.lucene.analysis;
* limitations under the License.
*/
import java.nio.charset.Charset;
import java.nio.charset.StandardCharsets;
import org.apache.lucene.analysis.tokenattributes.CharTermAttributeImpl;
import org.apache.lucene.util.AttributeFactory;
import org.apache.lucene.util.BytesRef;
/**
@ -27,12 +28,15 @@ import org.apache.lucene.util.BytesRef;
* text as UTF-16 bytes instead of as UTF-8 bytes.
*/
public class MockUTF16TermAttributeImpl extends CharTermAttributeImpl {
static final Charset charset = Charset.forName("UTF-16LE");
/** Factory that returns an instance of this class for CharTermAttribute */
public static final AttributeFactory UTF16_TERM_ATTRIBUTE_FACTORY =
AttributeFactory.getStaticImplementation(AttributeFactory.DEFAULT_ATTRIBUTE_FACTORY, MockUTF16TermAttributeImpl.class);
@Override
public void fillBytesRef() {
BytesRef bytes = getBytesRef();
byte[] utf16 = toString().getBytes(charset);
byte[] utf16 = toString().getBytes(StandardCharsets.UTF_16LE);
bytes.bytes = utf16;
bytes.offset = 0;
bytes.length = utf16.length;

View File

@ -31,6 +31,7 @@ import org.apache.lucene.document.Field;
import org.apache.lucene.index.StorableField;
import org.apache.lucene.index.FieldInfo.IndexOptions;
import org.apache.lucene.search.SortField;
import org.apache.lucene.util.AttributeFactory;
import org.apache.lucene.util.AttributeSource;
import org.apache.lucene.util.AttributeSource.State;
import org.apache.solr.analysis.SolrAnalyzer;

View File

@ -23,7 +23,7 @@ import java.util.Map;
import org.apache.lucene.analysis.MockTokenizer;
import org.apache.lucene.analysis.util.TokenizerFactory;
import org.apache.lucene.util.AttributeSource.AttributeFactory;
import org.apache.lucene.util.AttributeFactory;
import org.apache.lucene.util.automaton.CharacterRunAutomaton;
/**