LUCENE-5640: Refactor Token, add new PackedTokenAttributeImpl, make use of Java 7 MethodHandles in DEFAULT_ATTRIBUTE_FACTORY

git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1592914 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Uwe Schindler 2014-05-06 22:24:58 +00:00
parent b234e9748f
commit 0086a6e644
72 changed files with 769 additions and 883 deletions

View File

@ -100,6 +100,10 @@ Changes in Backwards Compatibility Policy
can be used by custom fieldtypes, which don't use the Analyzer, but can be used by custom fieldtypes, which don't use the Analyzer, but
implement their own TokenStream. (Uwe Schindler, Robert Muir) implement their own TokenStream. (Uwe Schindler, Robert Muir)
* LUCENE-5640: AttributeSource.AttributeFactory was moved to a
top-level class: org.apache.lucene.util.AttributeFactory
(Uwe Schindler, Robert Muir)
API Changes API Changes
* LUCENE-5582: Deprecate IndexOutput.length (just use * LUCENE-5582: Deprecate IndexOutput.length (just use
@ -126,6 +130,9 @@ API Changes
* LUCENE-5633: Change NoMergePolicy to a singleton with no distinction between * LUCENE-5633: Change NoMergePolicy to a singleton with no distinction between
compound and non-compound types. (Shai Erera) compound and non-compound types. (Shai Erera)
* LUCENE-5640: The Token class was deprecated. Since Lucene 2.9, TokenStreams
are using Attributes, Token is no longer used. (Uwe Schindler, Robert Muir)
Optimizations Optimizations
* LUCENE-5603: hunspell stemmer more efficiently strips prefixes * LUCENE-5603: hunspell stemmer more efficiently strips prefixes
@ -140,9 +147,11 @@ Optimizations
* LUCENE-5634: IndexWriter reuses TokenStream instances for String and Numeric * LUCENE-5634: IndexWriter reuses TokenStream instances for String and Numeric
fields by default. (Uwe Schindler, Shay Banon, Mike McCandless, Robert Muir) fields by default. (Uwe Schindler, Shay Banon, Mike McCandless, Robert Muir)
* LUCENE-5638: TokenStream uses a more performant AttributeFactory by default, * LUCENE-5638, LUCENE-5640: TokenStream uses a more performant AttributeFactory
that packs the core attributes into one impl, for faster clearAttributes(), by default, that packs the core attributes into one implementation
saveState(), and restoreState(). (Uwe Schindler, Robert Muir) (PackedTokenAttributeImpl), for faster clearAttributes(), saveState(), and
restoreState(). In addition, AttributeFactory uses Java 7 MethodHandles for
instantiating Attribute implementations. (Uwe Schindler, Robert Muir)
* LUCENE-5609: Changed the default NumericField precisionStep from 4 * LUCENE-5609: Changed the default NumericField precisionStep from 4
to 8 (for int/float) and 16 (for long/double), for faster indexing to 8 (for int/float) and 16 (for long/double), for faster indexing

View File

@ -23,6 +23,7 @@ import java.io.Reader;
import org.apache.lucene.analysis.Tokenizer; import org.apache.lucene.analysis.Tokenizer;
import org.apache.lucene.analysis.tokenattributes.OffsetAttribute; import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
import org.apache.lucene.util.AttributeFactory;
import org.apache.lucene.util.AttributeSource; import org.apache.lucene.util.AttributeSource;
/** /**

View File

@ -18,7 +18,7 @@ package org.apache.lucene.analysis.core;
*/ */
import org.apache.lucene.analysis.util.TokenizerFactory; import org.apache.lucene.analysis.util.TokenizerFactory;
import org.apache.lucene.util.AttributeSource.AttributeFactory; import org.apache.lucene.util.AttributeFactory;
import java.io.Reader; import java.io.Reader;
import java.util.Map; import java.util.Map;

View File

@ -19,6 +19,7 @@ package org.apache.lucene.analysis.core;
import org.apache.lucene.analysis.Tokenizer; import org.apache.lucene.analysis.Tokenizer;
import org.apache.lucene.analysis.util.CharTokenizer; import org.apache.lucene.analysis.util.CharTokenizer;
import org.apache.lucene.util.AttributeFactory;
import org.apache.lucene.util.Version; import org.apache.lucene.util.Version;
/** /**
@ -55,7 +56,7 @@ public class LetterTokenizer extends CharTokenizer {
/** /**
* Construct a new LetterTokenizer using a given * Construct a new LetterTokenizer using a given
* {@link org.apache.lucene.util.AttributeSource.AttributeFactory}. * {@link org.apache.lucene.util.AttributeFactory}.
* *
* @param matchVersion * @param matchVersion
* Lucene version to match See {@link <a href="#version">above</a>} * Lucene version to match See {@link <a href="#version">above</a>}

View File

@ -18,7 +18,7 @@ package org.apache.lucene.analysis.core;
*/ */
import org.apache.lucene.analysis.util.TokenizerFactory; import org.apache.lucene.analysis.util.TokenizerFactory;
import org.apache.lucene.util.AttributeSource.AttributeFactory; import org.apache.lucene.util.AttributeFactory;
import java.util.Map; import java.util.Map;

View File

@ -21,6 +21,7 @@ import java.io.Reader;
import org.apache.lucene.analysis.Tokenizer; import org.apache.lucene.analysis.Tokenizer;
import org.apache.lucene.analysis.util.CharTokenizer; import org.apache.lucene.analysis.util.CharTokenizer;
import org.apache.lucene.util.AttributeFactory;
import org.apache.lucene.util.AttributeSource; import org.apache.lucene.util.AttributeSource;
import org.apache.lucene.util.Version; import org.apache.lucene.util.Version;
@ -60,7 +61,7 @@ public final class LowerCaseTokenizer extends LetterTokenizer {
/** /**
* Construct a new LowerCaseTokenizer using a given * Construct a new LowerCaseTokenizer using a given
* {@link org.apache.lucene.util.AttributeSource.AttributeFactory}. * {@link org.apache.lucene.util.AttributeFactory}.
* *
* @param matchVersion * @param matchVersion
* Lucene version to match See {@link <a href="#version">above</a>} * Lucene version to match See {@link <a href="#version">above</a>}

View File

@ -20,7 +20,7 @@ package org.apache.lucene.analysis.core;
import org.apache.lucene.analysis.util.AbstractAnalysisFactory; import org.apache.lucene.analysis.util.AbstractAnalysisFactory;
import org.apache.lucene.analysis.util.MultiTermAwareComponent; import org.apache.lucene.analysis.util.MultiTermAwareComponent;
import org.apache.lucene.analysis.util.TokenizerFactory; import org.apache.lucene.analysis.util.TokenizerFactory;
import org.apache.lucene.util.AttributeSource.AttributeFactory; import org.apache.lucene.util.AttributeFactory;
import java.util.HashMap; import java.util.HashMap;
import java.util.Map; import java.util.Map;

View File

@ -21,6 +21,7 @@ import java.io.Reader;
import org.apache.lucene.analysis.Tokenizer; import org.apache.lucene.analysis.Tokenizer;
import org.apache.lucene.analysis.util.CharTokenizer; import org.apache.lucene.analysis.util.CharTokenizer;
import org.apache.lucene.util.AttributeFactory;
import org.apache.lucene.util.AttributeSource; import org.apache.lucene.util.AttributeSource;
import org.apache.lucene.util.Version; import org.apache.lucene.util.Version;
@ -50,7 +51,7 @@ public final class WhitespaceTokenizer extends CharTokenizer {
/** /**
* Construct a new WhitespaceTokenizer using a given * Construct a new WhitespaceTokenizer using a given
* {@link org.apache.lucene.util.AttributeSource.AttributeFactory}. * {@link org.apache.lucene.util.AttributeFactory}.
* *
* @param * @param
* matchVersion Lucene version to match See * matchVersion Lucene version to match See

View File

@ -18,7 +18,7 @@ package org.apache.lucene.analysis.core;
*/ */
import org.apache.lucene.analysis.util.TokenizerFactory; import org.apache.lucene.analysis.util.TokenizerFactory;
import org.apache.lucene.util.AttributeSource.AttributeFactory; import org.apache.lucene.util.AttributeFactory;
import java.io.Reader; import java.io.Reader;
import java.util.Map; import java.util.Map;

View File

@ -24,7 +24,9 @@ import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
/** /**
* A {@link TokenStream} containing a single token. * A {@link TokenStream} containing a single token.
* @deprecated Do not use this anymore!
*/ */
@Deprecated
public final class SingleTokenTokenStream extends TokenStream { public final class SingleTokenTokenStream extends TokenStream {
private boolean exhausted = false; private boolean exhausted = false;

View File

@ -20,6 +20,7 @@ package org.apache.lucene.analysis.ngram;
import java.io.Reader; import java.io.Reader;
import org.apache.lucene.analysis.Tokenizer; import org.apache.lucene.analysis.Tokenizer;
import org.apache.lucene.util.AttributeFactory;
import org.apache.lucene.util.Version; import org.apache.lucene.util.Version;
/** /**
@ -49,7 +50,7 @@ public class EdgeNGramTokenizer extends NGramTokenizer {
* Creates EdgeNGramTokenizer that can generate n-grams in the sizes of the given range * Creates EdgeNGramTokenizer that can generate n-grams in the sizes of the given range
* *
* @param version the Lucene match version * @param version the Lucene match version
* @param factory {@link org.apache.lucene.util.AttributeSource.AttributeFactory} to use * @param factory {@link org.apache.lucene.util.AttributeFactory} to use
* @param minGram the smallest n-gram to generate * @param minGram the smallest n-gram to generate
* @param maxGram the largest n-gram to generate * @param maxGram the largest n-gram to generate
*/ */

View File

@ -18,7 +18,7 @@ package org.apache.lucene.analysis.ngram;
*/ */
import org.apache.lucene.analysis.util.TokenizerFactory; import org.apache.lucene.analysis.util.TokenizerFactory;
import org.apache.lucene.util.AttributeSource.AttributeFactory; import org.apache.lucene.util.AttributeFactory;
import java.io.Reader; import java.io.Reader;
import java.util.Map; import java.util.Map;

View File

@ -23,6 +23,7 @@ import java.io.Reader;
import org.apache.lucene.analysis.Tokenizer; import org.apache.lucene.analysis.Tokenizer;
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
import org.apache.lucene.analysis.tokenattributes.OffsetAttribute; import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
import org.apache.lucene.util.AttributeFactory;
/** /**
* Old broken version of {@link NGramTokenizer}. * Old broken version of {@link NGramTokenizer}.
@ -54,7 +55,7 @@ public final class Lucene43NGramTokenizer extends Tokenizer {
/** /**
* Creates NGramTokenizer with given min and max n-grams. * Creates NGramTokenizer with given min and max n-grams.
* @param factory {@link org.apache.lucene.util.AttributeSource.AttributeFactory} to use * @param factory {@link org.apache.lucene.util.AttributeFactory} to use
* @param minGram the smallest n-gram to generate * @param minGram the smallest n-gram to generate
* @param maxGram the largest n-gram to generate * @param maxGram the largest n-gram to generate
*/ */

View File

@ -26,6 +26,7 @@ import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute; import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
import org.apache.lucene.analysis.tokenattributes.PositionLengthAttribute; import org.apache.lucene.analysis.tokenattributes.PositionLengthAttribute;
import org.apache.lucene.analysis.util.CharacterUtils; import org.apache.lucene.analysis.util.CharacterUtils;
import org.apache.lucene.util.AttributeFactory;
import org.apache.lucene.util.Version; import org.apache.lucene.util.Version;
/** /**
@ -99,7 +100,7 @@ public class NGramTokenizer extends Tokenizer {
/** /**
* Creates NGramTokenizer with given min and max n-grams. * Creates NGramTokenizer with given min and max n-grams.
* @param version the lucene compatibility <a href="#version">version</a> * @param version the lucene compatibility <a href="#version">version</a>
* @param factory {@link org.apache.lucene.util.AttributeSource.AttributeFactory} to use * @param factory {@link org.apache.lucene.util.AttributeFactory} to use
* @param minGram the smallest n-gram to generate * @param minGram the smallest n-gram to generate
* @param maxGram the largest n-gram to generate * @param maxGram the largest n-gram to generate
*/ */

View File

@ -20,7 +20,7 @@ package org.apache.lucene.analysis.ngram;
import org.apache.lucene.analysis.TokenStream; import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.Tokenizer; import org.apache.lucene.analysis.Tokenizer;
import org.apache.lucene.analysis.util.TokenizerFactory; import org.apache.lucene.analysis.util.TokenizerFactory;
import org.apache.lucene.util.AttributeSource.AttributeFactory; import org.apache.lucene.util.AttributeFactory;
import org.apache.lucene.util.Version; import org.apache.lucene.util.Version;
import java.io.Reader; import java.io.Reader;

View File

@ -17,13 +17,12 @@ package org.apache.lucene.analysis.path;
*/ */
import java.io.IOException; import java.io.IOException;
import java.io.Reader;
import org.apache.lucene.analysis.Token;
import org.apache.lucene.analysis.Tokenizer; import org.apache.lucene.analysis.Tokenizer;
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
import org.apache.lucene.analysis.tokenattributes.OffsetAttribute; import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute; import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
import org.apache.lucene.util.AttributeFactory;
/** /**
* Tokenizer for path-like hierarchies. * Tokenizer for path-like hierarchies.
@ -69,7 +68,7 @@ public class PathHierarchyTokenizer extends Tokenizer {
} }
public PathHierarchyTokenizer(int bufferSize, char delimiter, char replacement, int skip) { public PathHierarchyTokenizer(int bufferSize, char delimiter, char replacement, int skip) {
this(Token.TOKEN_ATTRIBUTE_FACTORY, bufferSize, delimiter, replacement, skip); this(DEFAULT_TOKEN_ATTRIBUTE_FACTORY, bufferSize, delimiter, replacement, skip);
} }
public PathHierarchyTokenizer public PathHierarchyTokenizer

View File

@ -21,7 +21,7 @@ import java.util.Map;
import org.apache.lucene.analysis.Tokenizer; import org.apache.lucene.analysis.Tokenizer;
import org.apache.lucene.analysis.util.TokenizerFactory; import org.apache.lucene.analysis.util.TokenizerFactory;
import org.apache.lucene.util.AttributeSource.AttributeFactory; import org.apache.lucene.util.AttributeFactory;
/** /**
* Factory for {@link PathHierarchyTokenizer}. * Factory for {@link PathHierarchyTokenizer}.

View File

@ -17,15 +17,14 @@ package org.apache.lucene.analysis.path;
*/ */
import java.io.IOException; import java.io.IOException;
import java.io.Reader;
import java.util.ArrayList; import java.util.ArrayList;
import java.util.List; import java.util.List;
import org.apache.lucene.analysis.Token;
import org.apache.lucene.analysis.Tokenizer; import org.apache.lucene.analysis.Tokenizer;
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
import org.apache.lucene.analysis.tokenattributes.OffsetAttribute; import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute; import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
import org.apache.lucene.util.AttributeFactory;
/** /**
* Tokenizer for domain-like hierarchies. * Tokenizer for domain-like hierarchies.
@ -82,7 +81,7 @@ public class ReversePathHierarchyTokenizer extends Tokenizer {
} }
public ReversePathHierarchyTokenizer( int bufferSize, char delimiter, char replacement, int skip) { public ReversePathHierarchyTokenizer( int bufferSize, char delimiter, char replacement, int skip) {
this(Token.TOKEN_ATTRIBUTE_FACTORY, bufferSize, delimiter, replacement, skip); this(DEFAULT_TOKEN_ATTRIBUTE_FACTORY, bufferSize, delimiter, replacement, skip);
} }
public ReversePathHierarchyTokenizer public ReversePathHierarchyTokenizer
(AttributeFactory factory, int bufferSize, char delimiter, char replacement, int skip) { (AttributeFactory factory, int bufferSize, char delimiter, char replacement, int skip) {

View File

@ -22,10 +22,10 @@ import java.io.Reader;
import java.util.regex.Matcher; import java.util.regex.Matcher;
import java.util.regex.Pattern; import java.util.regex.Pattern;
import org.apache.lucene.analysis.Token;
import org.apache.lucene.analysis.Tokenizer; import org.apache.lucene.analysis.Tokenizer;
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
import org.apache.lucene.analysis.tokenattributes.OffsetAttribute; import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
import org.apache.lucene.util.AttributeFactory;
/** /**
* This tokenizer uses regex pattern matching to construct distinct tokens * This tokenizer uses regex pattern matching to construct distinct tokens
@ -67,7 +67,7 @@ public final class PatternTokenizer extends Tokenizer {
/** creates a new PatternTokenizer returning tokens from group (-1 for split functionality) */ /** creates a new PatternTokenizer returning tokens from group (-1 for split functionality) */
public PatternTokenizer(Pattern pattern, int group) { public PatternTokenizer(Pattern pattern, int group) {
this(Token.TOKEN_ATTRIBUTE_FACTORY, pattern, group); this(DEFAULT_TOKEN_ATTRIBUTE_FACTORY, pattern, group);
} }
/** creates a new PatternTokenizer returning tokens from group (-1 for split functionality) */ /** creates a new PatternTokenizer returning tokens from group (-1 for split functionality) */

View File

@ -21,7 +21,7 @@ import java.util.Map;
import java.util.regex.Pattern; import java.util.regex.Pattern;
import org.apache.lucene.analysis.util.TokenizerFactory; import org.apache.lucene.analysis.util.TokenizerFactory;
import org.apache.lucene.util.AttributeSource.AttributeFactory; import org.apache.lucene.util.AttributeFactory;
/** /**
* Factory for {@link PatternTokenizer}. * Factory for {@link PatternTokenizer}.

View File

@ -25,6 +25,7 @@ import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute; import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
import org.apache.lucene.analysis.tokenattributes.TypeAttribute; import org.apache.lucene.analysis.tokenattributes.TypeAttribute;
import org.apache.lucene.util.AttributeFactory;
import org.apache.lucene.util.AttributeSource; import org.apache.lucene.util.AttributeSource;
import org.apache.lucene.util.Version; import org.apache.lucene.util.Version;
@ -106,7 +107,7 @@ public final class ClassicTokenizer extends Tokenizer {
} }
/** /**
* Creates a new ClassicTokenizer with a given {@link org.apache.lucene.util.AttributeSource.AttributeFactory} * Creates a new ClassicTokenizer with a given {@link org.apache.lucene.util.AttributeFactory}
*/ */
public ClassicTokenizer(Version matchVersion, AttributeFactory factory) { public ClassicTokenizer(Version matchVersion, AttributeFactory factory) {
super(factory); super(factory);

View File

@ -18,7 +18,7 @@ package org.apache.lucene.analysis.standard;
*/ */
import org.apache.lucene.analysis.util.TokenizerFactory; import org.apache.lucene.analysis.util.TokenizerFactory;
import org.apache.lucene.util.AttributeSource.AttributeFactory; import org.apache.lucene.util.AttributeFactory;
import java.util.Map; import java.util.Map;

View File

@ -25,6 +25,7 @@ import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
import org.apache.lucene.analysis.tokenattributes.OffsetAttribute; import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute; import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
import org.apache.lucene.analysis.tokenattributes.TypeAttribute; import org.apache.lucene.analysis.tokenattributes.TypeAttribute;
import org.apache.lucene.util.AttributeFactory;
import org.apache.lucene.util.AttributeSource; import org.apache.lucene.util.AttributeSource;
import org.apache.lucene.util.Version; import org.apache.lucene.util.Version;
@ -120,7 +121,7 @@ public final class StandardTokenizer extends Tokenizer {
} }
/** /**
* Creates a new StandardTokenizer with a given {@link org.apache.lucene.util.AttributeSource.AttributeFactory} * Creates a new StandardTokenizer with a given {@link org.apache.lucene.util.AttributeFactory}
*/ */
public StandardTokenizer(Version matchVersion, AttributeFactory factory) { public StandardTokenizer(Version matchVersion, AttributeFactory factory) {
super(factory); super(factory);

View File

@ -18,7 +18,7 @@ package org.apache.lucene.analysis.standard;
*/ */
import org.apache.lucene.analysis.util.TokenizerFactory; import org.apache.lucene.analysis.util.TokenizerFactory;
import org.apache.lucene.util.AttributeSource.AttributeFactory; import org.apache.lucene.util.AttributeFactory;
import java.util.Map; import java.util.Map;

View File

@ -27,9 +27,9 @@ import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute; import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
import org.apache.lucene.analysis.tokenattributes.TypeAttribute; import org.apache.lucene.analysis.tokenattributes.TypeAttribute;
import org.apache.lucene.util.AttributeFactory;
import org.apache.lucene.util.AttributeSource; import org.apache.lucene.util.AttributeSource;
import org.apache.lucene.util.Version; import org.apache.lucene.util.Version;
import org.apache.lucene.util.AttributeSource.AttributeFactory;
/** /**
* This class implements Word Break rules from the Unicode Text Segmentation * This class implements Word Break rules from the Unicode Text Segmentation

View File

@ -18,7 +18,7 @@ package org.apache.lucene.analysis.standard;
*/ */
import org.apache.lucene.analysis.util.TokenizerFactory; import org.apache.lucene.analysis.util.TokenizerFactory;
import org.apache.lucene.util.AttributeSource.AttributeFactory; import org.apache.lucene.util.AttributeFactory;
import java.io.Reader; import java.io.Reader;
import java.util.Map; import java.util.Map;

View File

@ -20,11 +20,11 @@ package org.apache.lucene.analysis.th;
import java.text.BreakIterator; import java.text.BreakIterator;
import java.util.Locale; import java.util.Locale;
import org.apache.lucene.analysis.Token;
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
import org.apache.lucene.analysis.tokenattributes.OffsetAttribute; import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
import org.apache.lucene.analysis.util.CharArrayIterator; import org.apache.lucene.analysis.util.CharArrayIterator;
import org.apache.lucene.analysis.util.SegmentingTokenizerBase; import org.apache.lucene.analysis.util.SegmentingTokenizerBase;
import org.apache.lucene.util.AttributeFactory;
/** /**
* Tokenizer that use {@link BreakIterator} to tokenize Thai text. * Tokenizer that use {@link BreakIterator} to tokenize Thai text.
@ -60,7 +60,7 @@ public class ThaiTokenizer extends SegmentingTokenizerBase {
/** Creates a new ThaiTokenizer */ /** Creates a new ThaiTokenizer */
public ThaiTokenizer() { public ThaiTokenizer() {
this(Token.TOKEN_ATTRIBUTE_FACTORY); this(DEFAULT_TOKEN_ATTRIBUTE_FACTORY);
} }
/** Creates a new ThaiTokenizer, supplying the AttributeFactory */ /** Creates a new ThaiTokenizer, supplying the AttributeFactory */

View File

@ -21,7 +21,7 @@ import java.util.Map;
import org.apache.lucene.analysis.Tokenizer; import org.apache.lucene.analysis.Tokenizer;
import org.apache.lucene.analysis.util.TokenizerFactory; import org.apache.lucene.analysis.util.TokenizerFactory;
import org.apache.lucene.util.AttributeSource; import org.apache.lucene.util.AttributeFactory;
/** /**
* Factory for {@link ThaiTokenizer}. * Factory for {@link ThaiTokenizer}.
@ -43,7 +43,7 @@ public class ThaiTokenizerFactory extends TokenizerFactory {
} }
@Override @Override
public Tokenizer create(AttributeSource.AttributeFactory factory) { public Tokenizer create(AttributeFactory factory) {
return new ThaiTokenizer(factory); return new ThaiTokenizer(factory);
} }
} }

View File

@ -23,6 +23,7 @@ import java.io.Reader;
import org.apache.lucene.analysis.Tokenizer; import org.apache.lucene.analysis.Tokenizer;
import org.apache.lucene.analysis.tokenattributes.OffsetAttribute; import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
import org.apache.lucene.util.AttributeFactory;
import org.apache.lucene.util.AttributeSource; import org.apache.lucene.util.AttributeSource;
import org.apache.lucene.analysis.util.CharacterUtils; import org.apache.lucene.analysis.util.CharacterUtils;
import org.apache.lucene.util.Version; import org.apache.lucene.util.Version;

View File

@ -19,12 +19,11 @@ package org.apache.lucene.analysis.util;
import java.io.IOException; import java.io.IOException;
import java.io.Reader; import java.io.Reader;
import java.text.BreakIterator; import java.text.BreakIterator;
import org.apache.lucene.analysis.Token;
import org.apache.lucene.analysis.Tokenizer; import org.apache.lucene.analysis.Tokenizer;
import org.apache.lucene.analysis.tokenattributes.OffsetAttribute; import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
import org.apache.lucene.util.AttributeFactory;
/** /**
* Breaks text into sentences with a {@link BreakIterator} and * Breaks text into sentences with a {@link BreakIterator} and
@ -63,7 +62,7 @@ public abstract class SegmentingTokenizerBase extends Tokenizer {
* be provided to this constructor. * be provided to this constructor.
*/ */
public SegmentingTokenizerBase(BreakIterator iterator) { public SegmentingTokenizerBase(BreakIterator iterator) {
this(Token.TOKEN_ATTRIBUTE_FACTORY, iterator); this(DEFAULT_TOKEN_ATTRIBUTE_FACTORY, iterator);
} }
/** /**

View File

@ -17,11 +17,10 @@ package org.apache.lucene.analysis.util;
* limitations under the License. * limitations under the License.
*/ */
import org.apache.lucene.analysis.Token; import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.Tokenizer; import org.apache.lucene.analysis.Tokenizer;
import org.apache.lucene.util.AttributeSource.AttributeFactory; import org.apache.lucene.util.AttributeFactory;
import java.io.Reader;
import java.util.Map; import java.util.Map;
import java.util.Set; import java.util.Set;
@ -73,7 +72,7 @@ public abstract class TokenizerFactory extends AbstractAnalysisFactory {
/** Creates a TokenStream of the specified input using the default attribute factory. */ /** Creates a TokenStream of the specified input using the default attribute factory. */
public final Tokenizer create() { public final Tokenizer create() {
return create(Token.TOKEN_ATTRIBUTE_FACTORY); return create(TokenStream.DEFAULT_TOKEN_ATTRIBUTE_FACTORY);
} }
/** Creates a TokenStream of the specified input using the given AttributeFactory */ /** Creates a TokenStream of the specified input using the given AttributeFactory */

View File

@ -23,6 +23,7 @@ import org.apache.lucene.analysis.tokenattributes.FlagsAttribute;
import org.apache.lucene.analysis.tokenattributes.OffsetAttribute; import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute; import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
import org.apache.lucene.analysis.tokenattributes.TypeAttribute; import org.apache.lucene.analysis.tokenattributes.TypeAttribute;
import org.apache.lucene.util.AttributeFactory;
import org.apache.lucene.util.AttributeSource; import org.apache.lucene.util.AttributeSource;
import java.io.IOException; import java.io.IOException;
@ -145,7 +146,7 @@ public final class WikipediaTokenizer extends Tokenizer {
/** /**
* Creates a new instance of the {@link org.apache.lucene.analysis.wikipedia.WikipediaTokenizer}. Attaches the * Creates a new instance of the {@link org.apache.lucene.analysis.wikipedia.WikipediaTokenizer}. Attaches the
* <code>input</code> to a the newly created JFlex scanner. Uses the given {@link org.apache.lucene.util.AttributeSource.AttributeFactory}. * <code>input</code> to a the newly created JFlex scanner. Uses the given {@link org.apache.lucene.util.AttributeFactory}.
* *
* @param tokenOutput One of {@link #TOKENS_ONLY}, {@link #UNTOKENIZED_ONLY}, {@link #BOTH} * @param tokenOutput One of {@link #TOKENS_ONLY}, {@link #UNTOKENIZED_ONLY}, {@link #BOTH}
*/ */

View File

@ -21,7 +21,7 @@ import java.util.Collections;
import java.util.Map; import java.util.Map;
import org.apache.lucene.analysis.util.TokenizerFactory; import org.apache.lucene.analysis.util.TokenizerFactory;
import org.apache.lucene.util.AttributeSource.AttributeFactory; import org.apache.lucene.util.AttributeFactory;
/** /**
* Factory for {@link WikipediaTokenizer}. * Factory for {@link WikipediaTokenizer}.

View File

@ -19,11 +19,9 @@ package org.apache.lucene.collation;
import java.text.Collator; import java.text.Collator;
import org.apache.lucene.analysis.Token; import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.collation.tokenattributes.CollatedTermAttributeImpl; import org.apache.lucene.collation.tokenattributes.CollatedTermAttributeImpl;
import org.apache.lucene.util.Attribute; import org.apache.lucene.util.AttributeFactory;
import org.apache.lucene.util.AttributeImpl;
import org.apache.lucene.util.AttributeSource;
/** /**
* <p> * <p>
@ -69,18 +67,17 @@ import org.apache.lucene.util.AttributeSource;
* ICUCollationAttributeFactory on the query side, or vice versa. * ICUCollationAttributeFactory on the query side, or vice versa.
* </p> * </p>
*/ */
public class CollationAttributeFactory extends AttributeSource.AttributeFactory { public class CollationAttributeFactory extends AttributeFactory.StaticImplementationAttributeFactory<CollatedTermAttributeImpl> {
private final Collator collator; private final Collator collator;
private final AttributeSource.AttributeFactory delegate;
/** /**
* Create a CollationAttributeFactory, using * Create a CollationAttributeFactory, using
* {@link org.apache.lucene.analysis.Token#TOKEN_ATTRIBUTE_FACTORY} as the * {@link TokenStream#DEFAULT_TOKEN_ATTRIBUTE_FACTORY} as the
* factory for all other attributes. * factory for all other attributes.
* @param collator CollationKey generator * @param collator CollationKey generator
*/ */
public CollationAttributeFactory(Collator collator) { public CollationAttributeFactory(Collator collator) {
this(Token.TOKEN_ATTRIBUTE_FACTORY, collator); this(TokenStream.DEFAULT_TOKEN_ATTRIBUTE_FACTORY, collator);
} }
/** /**
@ -89,16 +86,13 @@ public class CollationAttributeFactory extends AttributeSource.AttributeFactory
* @param delegate Attribute Factory * @param delegate Attribute Factory
* @param collator CollationKey generator * @param collator CollationKey generator
*/ */
public CollationAttributeFactory(AttributeSource.AttributeFactory delegate, Collator collator) { public CollationAttributeFactory(AttributeFactory delegate, Collator collator) {
this.delegate = delegate; super(delegate, CollatedTermAttributeImpl.class);
this.collator = collator; this.collator = collator;
} }
@Override @Override
public AttributeImpl createAttributeInstance( public CollatedTermAttributeImpl createInstance() {
Class<? extends Attribute> attClass) { return new CollatedTermAttributeImpl(collator);
return attClass.isAssignableFrom(CollatedTermAttributeImpl.class)
? new CollatedTermAttributeImpl(collator)
: delegate.createAttributeInstance(attClass);
} }
} }

View File

@ -35,7 +35,7 @@ import org.apache.lucene.analysis.util.ResourceLoaderAware;
import org.apache.lucene.analysis.util.StringMockResourceLoader; import org.apache.lucene.analysis.util.StringMockResourceLoader;
import org.apache.lucene.analysis.util.TokenFilterFactory; import org.apache.lucene.analysis.util.TokenFilterFactory;
import org.apache.lucene.analysis.util.TokenizerFactory; import org.apache.lucene.analysis.util.TokenizerFactory;
import org.apache.lucene.util.AttributeSource.AttributeFactory; import org.apache.lucene.util.AttributeFactory;
/** /**
* Sanity check some things about all factories, * Sanity check some things about all factories,

View File

@ -81,8 +81,8 @@ import org.apache.lucene.analysis.synonym.SynonymMap;
import org.apache.lucene.analysis.util.CharArrayMap; import org.apache.lucene.analysis.util.CharArrayMap;
import org.apache.lucene.analysis.util.CharArraySet; import org.apache.lucene.analysis.util.CharArraySet;
import org.apache.lucene.analysis.wikipedia.WikipediaTokenizer; import org.apache.lucene.analysis.wikipedia.WikipediaTokenizer;
import org.apache.lucene.util.AttributeFactory;
import org.apache.lucene.util.AttributeSource; import org.apache.lucene.util.AttributeSource;
import org.apache.lucene.util.AttributeSource.AttributeFactory;
import org.apache.lucene.util.CharsRef; import org.apache.lucene.util.CharsRef;
import org.apache.lucene.util.Rethrow; import org.apache.lucene.util.Rethrow;
import org.apache.lucene.util.TestUtil; import org.apache.lucene.util.TestUtil;

View File

@ -20,12 +20,12 @@ package org.apache.lucene.analysis.icu.segmentation;
import java.io.IOException; import java.io.IOException;
import java.io.Reader; import java.io.Reader;
import org.apache.lucene.analysis.Token;
import org.apache.lucene.analysis.Tokenizer; import org.apache.lucene.analysis.Tokenizer;
import org.apache.lucene.analysis.icu.tokenattributes.ScriptAttribute; import org.apache.lucene.analysis.icu.tokenattributes.ScriptAttribute;
import org.apache.lucene.analysis.tokenattributes.OffsetAttribute; import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
import org.apache.lucene.analysis.tokenattributes.TypeAttribute; import org.apache.lucene.analysis.tokenattributes.TypeAttribute;
import org.apache.lucene.util.AttributeFactory;
import com.ibm.icu.lang.UCharacter; import com.ibm.icu.lang.UCharacter;
import com.ibm.icu.text.BreakIterator; import com.ibm.icu.text.BreakIterator;
@ -80,7 +80,7 @@ public final class ICUTokenizer extends Tokenizer {
* @param config Tailored BreakIterator configuration * @param config Tailored BreakIterator configuration
*/ */
public ICUTokenizer(ICUTokenizerConfig config) { public ICUTokenizer(ICUTokenizerConfig config) {
this(Token.TOKEN_ATTRIBUTE_FACTORY, config); this(DEFAULT_TOKEN_ATTRIBUTE_FACTORY, config);
} }
/** /**

View File

@ -28,7 +28,7 @@ import java.util.Map;
import org.apache.lucene.analysis.util.ResourceLoader; import org.apache.lucene.analysis.util.ResourceLoader;
import org.apache.lucene.analysis.util.ResourceLoaderAware; import org.apache.lucene.analysis.util.ResourceLoaderAware;
import org.apache.lucene.analysis.util.TokenizerFactory; import org.apache.lucene.analysis.util.TokenizerFactory;
import org.apache.lucene.util.AttributeSource.AttributeFactory; import org.apache.lucene.util.AttributeFactory;
import org.apache.lucene.util.IOUtils; import org.apache.lucene.util.IOUtils;
import com.ibm.icu.lang.UCharacter; import com.ibm.icu.lang.UCharacter;

View File

@ -17,12 +17,9 @@ package org.apache.lucene.collation;
* limitations under the License. * limitations under the License.
*/ */
import org.apache.lucene.analysis.Token; import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.collation.tokenattributes.ICUCollatedTermAttributeImpl; import org.apache.lucene.collation.tokenattributes.ICUCollatedTermAttributeImpl;
import org.apache.lucene.util.Attribute; import org.apache.lucene.util.AttributeFactory;
import org.apache.lucene.util.AttributeImpl;
import org.apache.lucene.util.AttributeSource;
import org.apache.lucene.collation.CollationAttributeFactory; // javadoc
import com.ibm.icu.text.Collator; import com.ibm.icu.text.Collator;
@ -63,18 +60,17 @@ import com.ibm.icu.text.Collator;
* java.text.Collator over several languages. * java.text.Collator over several languages.
* </p> * </p>
*/ */
public class ICUCollationAttributeFactory extends AttributeSource.AttributeFactory { public class ICUCollationAttributeFactory extends AttributeFactory.StaticImplementationAttributeFactory<ICUCollatedTermAttributeImpl> {
private final Collator collator; private final Collator collator;
private final AttributeSource.AttributeFactory delegate;
/** /**
* Create an ICUCollationAttributeFactory, using * Create an ICUCollationAttributeFactory, using
* {@link org.apache.lucene.analysis.Token#TOKEN_ATTRIBUTE_FACTORY} as the * {@link TokenStream#DEFAULT_TOKEN_ATTRIBUTE_FACTORY} as the
* factory for all other attributes. * factory for all other attributes.
* @param collator CollationKey generator * @param collator CollationKey generator
*/ */
public ICUCollationAttributeFactory(Collator collator) { public ICUCollationAttributeFactory(Collator collator) {
this(Token.TOKEN_ATTRIBUTE_FACTORY, collator); this(TokenStream.DEFAULT_TOKEN_ATTRIBUTE_FACTORY, collator);
} }
/** /**
@ -83,16 +79,13 @@ public class ICUCollationAttributeFactory extends AttributeSource.AttributeFacto
* @param delegate Attribute Factory * @param delegate Attribute Factory
* @param collator CollationKey generator * @param collator CollationKey generator
*/ */
public ICUCollationAttributeFactory(AttributeSource.AttributeFactory delegate, Collator collator) { public ICUCollationAttributeFactory(AttributeFactory delegate, Collator collator) {
this.delegate = delegate; super(delegate, ICUCollatedTermAttributeImpl.class);
this.collator = collator; this.collator = collator;
} }
@Override @Override
public AttributeImpl createAttributeInstance( public ICUCollatedTermAttributeImpl createInstance() {
Class<? extends Attribute> attClass) { return new ICUCollatedTermAttributeImpl(collator);
return attClass.isAssignableFrom(ICUCollatedTermAttributeImpl.class)
? new ICUCollatedTermAttributeImpl(collator)
: delegate.createAttributeInstance(attClass);
} }
} }

View File

@ -18,7 +18,6 @@ package org.apache.lucene.analysis.ja;
*/ */
import java.io.IOException; import java.io.IOException;
import java.io.Reader;
import java.util.ArrayList; import java.util.ArrayList;
import java.util.Arrays; import java.util.Arrays;
import java.util.Collections; import java.util.Collections;
@ -40,6 +39,7 @@ import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
import org.apache.lucene.analysis.tokenattributes.PositionLengthAttribute; import org.apache.lucene.analysis.tokenattributes.PositionLengthAttribute;
import org.apache.lucene.analysis.util.RollingCharBuffer; import org.apache.lucene.analysis.util.RollingCharBuffer;
import org.apache.lucene.util.ArrayUtil; import org.apache.lucene.util.ArrayUtil;
import org.apache.lucene.util.AttributeFactory;
import org.apache.lucene.util.IntsRef; import org.apache.lucene.util.IntsRef;
import org.apache.lucene.util.RamUsageEstimator; import org.apache.lucene.util.RamUsageEstimator;
import org.apache.lucene.util.fst.FST; import org.apache.lucene.util.fst.FST;
@ -195,7 +195,7 @@ public final class JapaneseTokenizer extends Tokenizer {
* @param mode tokenization mode. * @param mode tokenization mode.
*/ */
public JapaneseTokenizer(UserDictionary userDictionary, boolean discardPunctuation, Mode mode) { public JapaneseTokenizer(UserDictionary userDictionary, boolean discardPunctuation, Mode mode) {
this(org.apache.lucene.analysis.Token.TOKEN_ATTRIBUTE_FACTORY, userDictionary, discardPunctuation, mode); this(DEFAULT_TOKEN_ATTRIBUTE_FACTORY, userDictionary, discardPunctuation, mode);
} }
/** /**

View File

@ -30,7 +30,7 @@ import java.util.Map;
import org.apache.lucene.analysis.ja.JapaneseTokenizer.Mode; import org.apache.lucene.analysis.ja.JapaneseTokenizer.Mode;
import org.apache.lucene.analysis.ja.dict.UserDictionary; import org.apache.lucene.analysis.ja.dict.UserDictionary;
import org.apache.lucene.analysis.util.TokenizerFactory; import org.apache.lucene.analysis.util.TokenizerFactory;
import org.apache.lucene.util.AttributeSource.AttributeFactory; import org.apache.lucene.util.AttributeFactory;
import org.apache.lucene.util.IOUtils; import org.apache.lucene.util.IOUtils;
import org.apache.lucene.analysis.util.ResourceLoader; import org.apache.lucene.analysis.util.ResourceLoader;
import org.apache.lucene.analysis.util.ResourceLoaderAware; import org.apache.lucene.analysis.util.ResourceLoaderAware;

View File

@ -22,12 +22,12 @@ import java.text.BreakIterator;
import java.util.Iterator; import java.util.Iterator;
import java.util.Locale; import java.util.Locale;
import org.apache.lucene.analysis.Token;
import org.apache.lucene.analysis.cn.smart.hhmm.SegToken; import org.apache.lucene.analysis.cn.smart.hhmm.SegToken;
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
import org.apache.lucene.analysis.tokenattributes.OffsetAttribute; import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
import org.apache.lucene.analysis.tokenattributes.TypeAttribute; import org.apache.lucene.analysis.tokenattributes.TypeAttribute;
import org.apache.lucene.analysis.util.SegmentingTokenizerBase; import org.apache.lucene.analysis.util.SegmentingTokenizerBase;
import org.apache.lucene.util.AttributeFactory;
/** /**
* Tokenizer for Chinese or mixed Chinese-English text. * Tokenizer for Chinese or mixed Chinese-English text.
@ -48,7 +48,7 @@ public class HMMChineseTokenizer extends SegmentingTokenizerBase {
/** Creates a new HMMChineseTokenizer */ /** Creates a new HMMChineseTokenizer */
public HMMChineseTokenizer() { public HMMChineseTokenizer() {
this(Token.TOKEN_ATTRIBUTE_FACTORY); this(DEFAULT_TOKEN_ATTRIBUTE_FACTORY);
} }
/** Creates a new HMMChineseTokenizer, supplying the AttributeFactory */ /** Creates a new HMMChineseTokenizer, supplying the AttributeFactory */

View File

@ -21,7 +21,7 @@ import java.util.Map;
import org.apache.lucene.analysis.Tokenizer; import org.apache.lucene.analysis.Tokenizer;
import org.apache.lucene.analysis.util.TokenizerFactory; import org.apache.lucene.analysis.util.TokenizerFactory;
import org.apache.lucene.util.AttributeSource.AttributeFactory; import org.apache.lucene.util.AttributeFactory;
/** /**
* Factory for {@link HMMChineseTokenizer} * Factory for {@link HMMChineseTokenizer}

View File

@ -24,6 +24,7 @@ import org.apache.lucene.analysis.Tokenizer;
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
import org.apache.lucene.analysis.tokenattributes.OffsetAttribute; import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
import org.apache.lucene.analysis.tokenattributes.TypeAttribute; import org.apache.lucene.analysis.tokenattributes.TypeAttribute;
import org.apache.lucene.util.AttributeFactory;
import org.apache.lucene.util.AttributeSource; import org.apache.lucene.util.AttributeSource;
/** /**

View File

@ -21,7 +21,7 @@ import java.io.Reader;
import java.util.Map; import java.util.Map;
import org.apache.lucene.analysis.util.TokenizerFactory; import org.apache.lucene.analysis.util.TokenizerFactory;
import org.apache.lucene.util.AttributeSource.AttributeFactory; import org.apache.lucene.util.AttributeFactory;
/** /**
* Factory for the SmartChineseAnalyzer {@link SentenceTokenizer} * Factory for the SmartChineseAnalyzer {@link SentenceTokenizer}

View File

@ -19,6 +19,7 @@ package org.apache.lucene.analysis.uima;
import org.apache.lucene.analysis.Tokenizer; import org.apache.lucene.analysis.Tokenizer;
import org.apache.lucene.analysis.uima.ae.AEProviderFactory; import org.apache.lucene.analysis.uima.ae.AEProviderFactory;
import org.apache.lucene.util.AttributeFactory;
import org.apache.uima.analysis_engine.AnalysisEngine; import org.apache.uima.analysis_engine.AnalysisEngine;
import org.apache.uima.analysis_engine.AnalysisEngineProcessException; import org.apache.uima.analysis_engine.AnalysisEngineProcessException;
import org.apache.uima.cas.CAS; import org.apache.uima.cas.CAS;

View File

@ -17,17 +17,16 @@ package org.apache.lucene.analysis.uima;
* limitations under the License. * limitations under the License.
*/ */
import org.apache.lucene.analysis.Token;
import org.apache.lucene.analysis.Tokenizer; import org.apache.lucene.analysis.Tokenizer;
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
import org.apache.lucene.analysis.tokenattributes.OffsetAttribute; import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
import org.apache.lucene.util.AttributeFactory;
import org.apache.uima.analysis_engine.AnalysisEngineProcessException; import org.apache.uima.analysis_engine.AnalysisEngineProcessException;
import org.apache.uima.cas.Type; import org.apache.uima.cas.Type;
import org.apache.uima.cas.text.AnnotationFS; import org.apache.uima.cas.text.AnnotationFS;
import org.apache.uima.resource.ResourceInitializationException; import org.apache.uima.resource.ResourceInitializationException;
import java.io.IOException; import java.io.IOException;
import java.io.Reader;
import java.util.Map; import java.util.Map;
/** /**
@ -44,7 +43,7 @@ public final class UIMAAnnotationsTokenizer extends BaseUIMATokenizer {
private int finalOffset = 0; private int finalOffset = 0;
public UIMAAnnotationsTokenizer(String descriptorPath, String tokenType, Map<String, Object> configurationParameters) { public UIMAAnnotationsTokenizer(String descriptorPath, String tokenType, Map<String, Object> configurationParameters) {
this(descriptorPath, tokenType, configurationParameters, Token.TOKEN_ATTRIBUTE_FACTORY); this(descriptorPath, tokenType, configurationParameters, DEFAULT_TOKEN_ATTRIBUTE_FACTORY);
} }
public UIMAAnnotationsTokenizer(String descriptorPath, String tokenType, Map<String, Object> configurationParameters, public UIMAAnnotationsTokenizer(String descriptorPath, String tokenType, Map<String, Object> configurationParameters,

View File

@ -18,7 +18,7 @@ package org.apache.lucene.analysis.uima;
*/ */
import org.apache.lucene.analysis.util.TokenizerFactory; import org.apache.lucene.analysis.util.TokenizerFactory;
import org.apache.lucene.util.AttributeSource.AttributeFactory; import org.apache.lucene.util.AttributeFactory;
import java.io.Reader; import java.io.Reader;
import java.util.HashMap; import java.util.HashMap;

View File

@ -17,11 +17,11 @@ package org.apache.lucene.analysis.uima;
* limitations under the License. * limitations under the License.
*/ */
import org.apache.lucene.analysis.Token;
import org.apache.lucene.analysis.Tokenizer; import org.apache.lucene.analysis.Tokenizer;
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
import org.apache.lucene.analysis.tokenattributes.OffsetAttribute; import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
import org.apache.lucene.analysis.tokenattributes.TypeAttribute; import org.apache.lucene.analysis.tokenattributes.TypeAttribute;
import org.apache.lucene.util.AttributeFactory;
import org.apache.uima.analysis_engine.AnalysisEngineProcessException; import org.apache.uima.analysis_engine.AnalysisEngineProcessException;
import org.apache.uima.cas.CASException; import org.apache.uima.cas.CASException;
import org.apache.uima.cas.FeaturePath; import org.apache.uima.cas.FeaturePath;
@ -30,7 +30,6 @@ import org.apache.uima.cas.text.AnnotationFS;
import org.apache.uima.resource.ResourceInitializationException; import org.apache.uima.resource.ResourceInitializationException;
import java.io.IOException; import java.io.IOException;
import java.io.Reader;
import java.util.Map; import java.util.Map;
/** /**
@ -54,7 +53,7 @@ public final class UIMATypeAwareAnnotationsTokenizer extends BaseUIMATokenizer {
private int finalOffset = 0; private int finalOffset = 0;
public UIMATypeAwareAnnotationsTokenizer(String descriptorPath, String tokenType, String typeAttributeFeaturePath, Map<String, Object> configurationParameters) { public UIMATypeAwareAnnotationsTokenizer(String descriptorPath, String tokenType, String typeAttributeFeaturePath, Map<String, Object> configurationParameters) {
this(descriptorPath, tokenType, typeAttributeFeaturePath, configurationParameters, Token.TOKEN_ATTRIBUTE_FACTORY); this(descriptorPath, tokenType, typeAttributeFeaturePath, configurationParameters, DEFAULT_TOKEN_ATTRIBUTE_FACTORY);
} }
public UIMATypeAwareAnnotationsTokenizer(String descriptorPath, String tokenType, String typeAttributeFeaturePath, public UIMATypeAwareAnnotationsTokenizer(String descriptorPath, String tokenType, String typeAttributeFeaturePath,

View File

@ -18,7 +18,7 @@ package org.apache.lucene.analysis.uima;
*/ */
import org.apache.lucene.analysis.util.TokenizerFactory; import org.apache.lucene.analysis.util.TokenizerFactory;
import org.apache.lucene.util.AttributeSource.AttributeFactory; import org.apache.lucene.util.AttributeFactory;
import java.io.Reader; import java.io.Reader;
import java.util.HashMap; import java.util.HashMap;

View File

@ -28,6 +28,7 @@ import org.apache.lucene.document.LongField; // for javadocs
import org.apache.lucene.search.NumericRangeFilter; // for javadocs import org.apache.lucene.search.NumericRangeFilter; // for javadocs
import org.apache.lucene.search.NumericRangeQuery; import org.apache.lucene.search.NumericRangeQuery;
import org.apache.lucene.util.Attribute; import org.apache.lucene.util.Attribute;
import org.apache.lucene.util.AttributeFactory;
import org.apache.lucene.util.AttributeImpl; import org.apache.lucene.util.AttributeImpl;
import org.apache.lucene.util.AttributeReflector; import org.apache.lucene.util.AttributeReflector;
import org.apache.lucene.util.BytesRef; import org.apache.lucene.util.BytesRef;
@ -233,7 +234,7 @@ public final class NumericTokenStream extends TokenStream {
/** /**
* Expert: Creates a token stream for numeric values with the specified * Expert: Creates a token stream for numeric values with the specified
* <code>precisionStep</code> using the given * <code>precisionStep</code> using the given
* {@link org.apache.lucene.util.AttributeSource.AttributeFactory}. * {@link org.apache.lucene.util.AttributeFactory}.
* The stream is not yet initialized, * The stream is not yet initialized,
* before using set a value using the various set<em>???</em>Value() methods. * before using set a value using the various set<em>???</em>Value() methods.
*/ */

View File

@ -17,16 +17,12 @@ package org.apache.lucene.analysis;
* limitations under the License. * limitations under the License.
*/ */
import org.apache.lucene.analysis.tokenattributes.CharTermAttributeImpl;
import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
import org.apache.lucene.analysis.tokenattributes.FlagsAttribute; import org.apache.lucene.analysis.tokenattributes.FlagsAttribute;
import org.apache.lucene.analysis.tokenattributes.PackedTokenAttributeImpl;
import org.apache.lucene.analysis.tokenattributes.PayloadAttribute; import org.apache.lucene.analysis.tokenattributes.PayloadAttribute;
import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
import org.apache.lucene.analysis.tokenattributes.PositionLengthAttribute;
import org.apache.lucene.analysis.tokenattributes.TypeAttribute;
import org.apache.lucene.index.DocsAndPositionsEnum; // for javadoc import org.apache.lucene.index.DocsAndPositionsEnum; // for javadoc
import org.apache.lucene.util.Attribute; import org.apache.lucene.util.Attribute;
import org.apache.lucene.util.AttributeSource; import org.apache.lucene.util.AttributeFactory;
import org.apache.lucene.util.AttributeImpl; import org.apache.lucene.util.AttributeImpl;
import org.apache.lucene.util.AttributeReflector; import org.apache.lucene.util.AttributeReflector;
import org.apache.lucene.util.BytesRef; import org.apache.lucene.util.BytesRef;
@ -57,54 +53,7 @@ import org.apache.lucene.util.BytesRef;
Even though it is not necessary to use Token anymore, with the new TokenStream API it can Even though it is not necessary to use Token anymore, with the new TokenStream API it can
be used as convenience class that implements all {@link Attribute}s, which is especially useful be used as convenience class that implements all {@link Attribute}s, which is especially useful
to easily switch from the old to the new TokenStream API. to easily switch from the old to the new TokenStream API.
<br><br>
<p>Tokenizers and TokenFilters should try to re-use a Token
instance when possible for best performance, by
implementing the {@link TokenStream#incrementToken()} API.
Failing that, to create a new Token you should first use
one of the constructors that starts with null text. To load
the token from a char[] use {@link #copyBuffer(char[], int, int)}.
To load from a String use {@link #setEmpty} followed by {@link #append(CharSequence)} or {@link #append(CharSequence, int, int)}.
Alternatively you can get the Token's termBuffer by calling either {@link #buffer()},
if you know that your text is shorter than the capacity of the termBuffer
or {@link #resizeBuffer(int)}, if there is any possibility
that you may need to grow the buffer. Fill in the characters of your term into this
buffer, with {@link String#getChars(int, int, char[], int)} if loading from a string,
or with {@link System#arraycopy(Object, int, Object, int, int)}, and finally call {@link #setLength(int)} to
set the length of the term text. See <a target="_top"
href="https://issues.apache.org/jira/browse/LUCENE-969">LUCENE-969</a>
for details.</p>
<p>Typical Token reuse patterns:
<ul>
<li> Copying text from a string (type is reset to {@link #DEFAULT_TYPE} if not specified):<br/>
<pre class="prettyprint">
return reusableToken.reinit(string, startOffset, endOffset[, type]);
</pre>
</li>
<li> Copying some text from a string (type is reset to {@link #DEFAULT_TYPE} if not specified):<br/>
<pre class="prettyprint">
return reusableToken.reinit(string, 0, string.length(), startOffset, endOffset[, type]);
</pre>
</li>
</li>
<li> Copying text from char[] buffer (type is reset to {@link #DEFAULT_TYPE} if not specified):<br/>
<pre class="prettyprint">
return reusableToken.reinit(buffer, 0, buffer.length, startOffset, endOffset[, type]);
</pre>
</li>
<li> Copying some text from a char[] buffer (type is reset to {@link #DEFAULT_TYPE} if not specified):<br/>
<pre class="prettyprint">
return reusableToken.reinit(buffer, start, end - start, startOffset, endOffset[, type]);
</pre>
</li>
<li> Copying from one one Token to another (type is reset to {@link #DEFAULT_TYPE} if not specified):<br/>
<pre class="prettyprint">
return reusableToken.reinit(source.buffer(), 0, source.length(), source.startOffset(), source.endOffset()[, source.type()]);
</pre>
</li>
</ul>
A few things to note: A few things to note:
<ul> <ul>
<li>clear() initializes all of the fields to default values. This was changed in contrast to Lucene 2.4, but should affect no one.</li> <li>clear() initializes all of the fields to default values. This was changed in contrast to Lucene 2.4, but should affect no one.</li>
@ -118,58 +67,18 @@ import org.apache.lucene.util.BytesRef;
{@link CharSequence} interface introduced by the interface {@link org.apache.lucene.analysis.tokenattributes.CharTermAttribute}. {@link CharSequence} interface introduced by the interface {@link org.apache.lucene.analysis.tokenattributes.CharTermAttribute}.
This method now only prints the term text, no additional information anymore. This method now only prints the term text, no additional information anymore.
</p> </p>
@deprecated This class is outdated and no longer used since Lucene 2.9. Nuke it finally!
*/ */
public class Token extends CharTermAttributeImpl @Deprecated
implements TypeAttribute, PositionIncrementAttribute, public class Token extends PackedTokenAttributeImpl implements FlagsAttribute, PayloadAttribute {
FlagsAttribute, OffsetAttribute, PayloadAttribute, PositionLengthAttribute {
private int startOffset,endOffset;
private String type = DEFAULT_TYPE;
private int flags; private int flags;
private BytesRef payload; private BytesRef payload;
private int positionIncrement = 1;
private int positionLength = 1;
/** Constructs a Token will null text. */ /** Constructs a Token will null text. */
public Token() { public Token() {
} }
/** Constructs a Token with null text and start & end
* offsets.
* @param start start offset in the source text
* @param end end offset in the source text */
public Token(int start, int end) {
checkOffsets(start, end);
startOffset = start;
endOffset = end;
}
/** Constructs a Token with null text and start & end
* offsets plus the Token type.
* @param start start offset in the source text
* @param end end offset in the source text
* @param typ the lexical type of this Token */
public Token(int start, int end, String typ) {
checkOffsets(start, end);
startOffset = start;
endOffset = end;
type = typ;
}
/**
* Constructs a Token with null text and start & end
* offsets plus flags. NOTE: flags is EXPERIMENTAL.
* @param start start offset in the source text
* @param end end offset in the source text
* @param flags The bits to set for this token
*/
public Token(int start, int end, int flags) {
checkOffsets(start, end);
startOffset = start;
endOffset = end;
this.flags = flags;
}
/** Constructs a Token with the given term text, and start /** Constructs a Token with the given term text, and start
* & end offsets. The type defaults to "word." * & end offsets. The type defaults to "word."
* <b>NOTE:</b> for better indexing speed you should * <b>NOTE:</b> for better indexing speed you should
@ -179,149 +88,9 @@ public class Token extends CharTermAttributeImpl
* @param start start offset in the source text * @param start start offset in the source text
* @param end end offset in the source text * @param end end offset in the source text
*/ */
public Token(String text, int start, int end) { public Token(CharSequence text, int start, int end) {
checkOffsets(start, end);
append(text); append(text);
startOffset = start; setOffset(start, end);
endOffset = end;
}
/** Constructs a Token with the given text, start and end
* offsets, & type. <b>NOTE:</b> for better indexing
* speed you should instead use the char[] termBuffer
* methods to set the term text.
* @param text term text
* @param start start offset in the source text
* @param end end offset in the source text
* @param typ token type
*/
public Token(String text, int start, int end, String typ) {
checkOffsets(start, end);
append(text);
startOffset = start;
endOffset = end;
type = typ;
}
/**
* Constructs a Token with the given text, start and end
* offsets, & type. <b>NOTE:</b> for better indexing
* speed you should instead use the char[] termBuffer
* methods to set the term text.
* @param text term text
* @param start start offset in the source text
* @param end end offset in the source text
* @param flags token type bits
*/
public Token(String text, int start, int end, int flags) {
checkOffsets(start, end);
append(text);
startOffset = start;
endOffset = end;
this.flags = flags;
}
/**
* Constructs a Token with the given term buffer (offset
* & length), start and end
* offsets
* @param startTermBuffer buffer containing term text
* @param termBufferOffset the index in the buffer of the first character
* @param termBufferLength number of valid characters in the buffer
* @param start start offset in the source text
* @param end end offset in the source text
*/
public Token(char[] startTermBuffer, int termBufferOffset, int termBufferLength, int start, int end) {
checkOffsets(start, end);
copyBuffer(startTermBuffer, termBufferOffset, termBufferLength);
startOffset = start;
endOffset = end;
}
/**
* {@inheritDoc}
* @see PositionIncrementAttribute
*/
@Override
public void setPositionIncrement(int positionIncrement) {
if (positionIncrement < 0)
throw new IllegalArgumentException
("Increment must be zero or greater: " + positionIncrement);
this.positionIncrement = positionIncrement;
}
/**
* {@inheritDoc}
* @see PositionIncrementAttribute
*/
@Override
public int getPositionIncrement() {
return positionIncrement;
}
/**
* {@inheritDoc}
* @see PositionLengthAttribute
*/
@Override
public void setPositionLength(int positionLength) {
this.positionLength = positionLength;
}
/**
* {@inheritDoc}
* @see PositionLengthAttribute
*/
@Override
public int getPositionLength() {
return positionLength;
}
/**
* {@inheritDoc}
* @see OffsetAttribute
*/
@Override
public final int startOffset() {
return startOffset;
}
/**
* {@inheritDoc}
* @see OffsetAttribute
*/
@Override
public final int endOffset() {
return endOffset;
}
/**
* {@inheritDoc}
* @see OffsetAttribute
*/
@Override
public void setOffset(int startOffset, int endOffset) {
checkOffsets(startOffset, endOffset);
this.startOffset = startOffset;
this.endOffset = endOffset;
}
/**
* {@inheritDoc}
* @see TypeAttribute
*/
@Override
public final String type() {
return type;
}
/**
* {@inheritDoc}
* @see TypeAttribute
*/
@Override
public final void setType(String type) {
this.type = type;
} }
/** /**
@ -366,37 +135,8 @@ public class Token extends CharTermAttributeImpl
@Override @Override
public void clear() { public void clear() {
super.clear(); super.clear();
payload = null;
positionIncrement = positionLength = 1;
flags = 0; flags = 0;
startOffset = endOffset = 0; payload = null;
type = DEFAULT_TYPE;
}
@Override
public Token clone() {
Token t = (Token)super.clone();
// Do a deep clone
if (payload != null) {
t.payload = payload.clone();
}
return t;
}
/** Makes a clone, but replaces the term buffer &
* start/end offset in the process. This is more
* efficient than doing a full clone (and then calling
* {@link #copyBuffer}) because it saves a wasted copy of the old
* termBuffer. */
public Token clone(char[] newTermBuffer, int newTermOffset, int newTermLength, int newStartOffset, int newEndOffset) {
final Token t = new Token(newTermBuffer, newTermOffset, newTermLength, newStartOffset, newEndOffset);
t.positionIncrement = positionIncrement;
t.positionLength = positionLength;
t.flags = flags;
t.type = type;
if (payload != null)
t.payload = payload.clone();
return t;
} }
@Override @Override
@ -406,14 +146,10 @@ public class Token extends CharTermAttributeImpl
if (obj instanceof Token) { if (obj instanceof Token) {
final Token other = (Token) obj; final Token other = (Token) obj;
return (startOffset == other.startOffset && return (
endOffset == other.endOffset && flags == other.flags &&
flags == other.flags && (payload == null ? other.payload == null : payload.equals(other.payload)) &&
positionIncrement == other.positionIncrement && super.equals(obj)
positionLength == other.positionLength &&
(type == null ? other.type == null : type.equals(other.type)) &&
(payload == null ? other.payload == null : payload.equals(other.payload)) &&
super.equals(obj)
); );
} else } else
return false; return false;
@ -422,117 +158,20 @@ public class Token extends CharTermAttributeImpl
@Override @Override
public int hashCode() { public int hashCode() {
int code = super.hashCode(); int code = super.hashCode();
code = code * 31 + startOffset;
code = code * 31 + endOffset;
code = code * 31 + flags; code = code * 31 + flags;
code = code * 31 + positionIncrement; if (payload != null) {
code = code * 31 + positionLength;
if (type != null)
code = code * 31 + type.hashCode();
if (payload != null)
code = code * 31 + payload.hashCode(); code = code * 31 + payload.hashCode();
}
return code; return code;
} }
// like clear() but doesn't clear termBuffer/text
private void clearNoTermBuffer() {
payload = null;
positionIncrement = positionLength = 1;
flags = 0;
startOffset = endOffset = 0;
type = DEFAULT_TYPE;
}
/** Shorthand for calling {@link #clear}, @Override
* {@link #copyBuffer(char[], int, int)}, public Token clone() {
* {@link #setOffset}, final Token t = (Token) super.clone();
* {@link #setType} if (payload != null) {
* @return this Token instance */ t.payload = payload.clone();
public Token reinit(char[] newTermBuffer, int newTermOffset, int newTermLength, int newStartOffset, int newEndOffset, String newType) { }
checkOffsets(newStartOffset, newEndOffset); return t;
clearNoTermBuffer();
copyBuffer(newTermBuffer, newTermOffset, newTermLength);
payload = null;
positionIncrement = positionLength = 1;
startOffset = newStartOffset;
endOffset = newEndOffset;
type = newType;
return this;
}
/** Shorthand for calling {@link #clear},
* {@link #copyBuffer(char[], int, int)},
* {@link #setOffset},
* {@link #setType} on Token.DEFAULT_TYPE
* @return this Token instance */
public Token reinit(char[] newTermBuffer, int newTermOffset, int newTermLength, int newStartOffset, int newEndOffset) {
checkOffsets(newStartOffset, newEndOffset);
clearNoTermBuffer();
copyBuffer(newTermBuffer, newTermOffset, newTermLength);
startOffset = newStartOffset;
endOffset = newEndOffset;
type = DEFAULT_TYPE;
return this;
}
/** Shorthand for calling {@link #clear},
* {@link #append(CharSequence)},
* {@link #setOffset},
* {@link #setType}
* @return this Token instance */
public Token reinit(String newTerm, int newStartOffset, int newEndOffset, String newType) {
checkOffsets(newStartOffset, newEndOffset);
clear();
append(newTerm);
startOffset = newStartOffset;
endOffset = newEndOffset;
type = newType;
return this;
}
/** Shorthand for calling {@link #clear},
* {@link #append(CharSequence, int, int)},
* {@link #setOffset},
* {@link #setType}
* @return this Token instance */
public Token reinit(String newTerm, int newTermOffset, int newTermLength, int newStartOffset, int newEndOffset, String newType) {
checkOffsets(newStartOffset, newEndOffset);
clear();
append(newTerm, newTermOffset, newTermOffset + newTermLength);
startOffset = newStartOffset;
endOffset = newEndOffset;
type = newType;
return this;
}
/** Shorthand for calling {@link #clear},
* {@link #append(CharSequence)},
* {@link #setOffset},
* {@link #setType} on Token.DEFAULT_TYPE
* @return this Token instance */
public Token reinit(String newTerm, int newStartOffset, int newEndOffset) {
checkOffsets(newStartOffset, newEndOffset);
clear();
append(newTerm);
startOffset = newStartOffset;
endOffset = newEndOffset;
type = DEFAULT_TYPE;
return this;
}
/** Shorthand for calling {@link #clear},
* {@link #append(CharSequence, int, int)},
* {@link #setOffset},
* {@link #setType} on Token.DEFAULT_TYPE
* @return this Token instance */
public Token reinit(String newTerm, int newTermOffset, int newTermLength, int newStartOffset, int newEndOffset) {
checkOffsets(newStartOffset, newEndOffset);
clear();
append(newTerm, newTermOffset, newTermOffset + newTermLength);
startOffset = newStartOffset;
endOffset = newEndOffset;
type = DEFAULT_TYPE;
return this;
} }
/** /**
@ -540,87 +179,28 @@ public class Token extends CharTermAttributeImpl
* @param prototype source Token to copy fields from * @param prototype source Token to copy fields from
*/ */
public void reinit(Token prototype) { public void reinit(Token prototype) {
copyBuffer(prototype.buffer(), 0, prototype.length()); // this is a bad hack to emulate no cloning of payload!
positionIncrement = prototype.positionIncrement; prototype.copyToWithoutPayloadClone(this);
positionLength = prototype.positionLength;
flags = prototype.flags;
startOffset = prototype.startOffset;
endOffset = prototype.endOffset;
type = prototype.type;
payload = prototype.payload;
} }
/** private void copyToWithoutPayloadClone(AttributeImpl target) {
* Copy the prototype token's fields into this one, with a different term. Note: Payloads are shared. super.copyTo(target);
* @param prototype existing Token ((FlagsAttribute) target).setFlags(flags);
* @param newTerm new term text ((PayloadAttribute) target).setPayload(payload);
*/
public void reinit(Token prototype, String newTerm) {
setEmpty().append(newTerm);
positionIncrement = prototype.positionIncrement;
positionLength = prototype.positionLength;
flags = prototype.flags;
startOffset = prototype.startOffset;
endOffset = prototype.endOffset;
type = prototype.type;
payload = prototype.payload;
}
/**
* Copy the prototype token's fields into this one, with a different term. Note: Payloads are shared.
* @param prototype existing Token
* @param newTermBuffer buffer containing new term text
* @param offset the index in the buffer of the first character
* @param length number of valid characters in the buffer
*/
public void reinit(Token prototype, char[] newTermBuffer, int offset, int length) {
copyBuffer(newTermBuffer, offset, length);
positionIncrement = prototype.positionIncrement;
positionLength = prototype.positionLength;
flags = prototype.flags;
startOffset = prototype.startOffset;
endOffset = prototype.endOffset;
type = prototype.type;
payload = prototype.payload;
} }
@Override @Override
public void copyTo(AttributeImpl target) { public void copyTo(AttributeImpl target) {
if (target instanceof Token) { super.copyTo(target);
final Token to = (Token) target; ((FlagsAttribute) target).setFlags(flags);
to.reinit(this); ((PayloadAttribute) target).setPayload((payload == null) ? null : payload.clone());
// reinit shares the payload, so clone it:
if (payload !=null) {
to.payload = payload.clone();
}
} else {
super.copyTo(target);
((OffsetAttribute) target).setOffset(startOffset, endOffset);
((PositionIncrementAttribute) target).setPositionIncrement(positionIncrement);
((PositionLengthAttribute) target).setPositionLength(positionLength);
((PayloadAttribute) target).setPayload((payload == null) ? null : payload.clone());
((FlagsAttribute) target).setFlags(flags);
((TypeAttribute) target).setType(type);
}
} }
@Override @Override
public void reflectWith(AttributeReflector reflector) { public void reflectWith(AttributeReflector reflector) {
super.reflectWith(reflector); super.reflectWith(reflector);
reflector.reflect(OffsetAttribute.class, "startOffset", startOffset);
reflector.reflect(OffsetAttribute.class, "endOffset", endOffset);
reflector.reflect(PositionIncrementAttribute.class, "positionIncrement", positionIncrement);
reflector.reflect(PositionLengthAttribute.class, "positionLength", positionLength);
reflector.reflect(PayloadAttribute.class, "payload", payload);
reflector.reflect(FlagsAttribute.class, "flags", flags); reflector.reflect(FlagsAttribute.class, "flags", flags);
reflector.reflect(TypeAttribute.class, "type", type); reflector.reflect(PayloadAttribute.class, "payload", payload);
}
private void checkOffsets(int startOffset, int endOffset) {
if (startOffset < 0 || endOffset < startOffset) {
throw new IllegalArgumentException("startOffset must be non-negative, and endOffset must be >= startOffset, "
+ "startOffset=" + startOffset + ",endOffset=" + endOffset);
}
} }
/** Convenience factory that returns <code>Token</code> as implementation for the basic /** Convenience factory that returns <code>Token</code> as implementation for the basic
@ -628,43 +208,6 @@ public class Token extends CharTermAttributeImpl
* attributes. * attributes.
* @since 3.0 * @since 3.0
*/ */
public static final AttributeSource.AttributeFactory TOKEN_ATTRIBUTE_FACTORY = public static final AttributeFactory TOKEN_ATTRIBUTE_FACTORY =
new TokenAttributeFactory(AttributeSource.AttributeFactory.DEFAULT_ATTRIBUTE_FACTORY); AttributeFactory.getStaticImplementation(AttributeFactory.DEFAULT_ATTRIBUTE_FACTORY, Token.class);
/** <b>Expert:</b> Creates a TokenAttributeFactory returning {@link Token} as instance for the basic attributes
* and for all other attributes calls the given delegate factory.
* @since 3.0
*/
public static final class TokenAttributeFactory extends AttributeSource.AttributeFactory {
private final AttributeSource.AttributeFactory delegate;
/** <b>Expert</b>: Creates an AttributeFactory returning {@link Token} as instance for the basic attributes
* and for all other attributes calls the given delegate factory. */
public TokenAttributeFactory(AttributeSource.AttributeFactory delegate) {
this.delegate = delegate;
}
@Override
public AttributeImpl createAttributeInstance(Class<? extends Attribute> attClass) {
return attClass.isAssignableFrom(Token.class)
? new Token() : delegate.createAttributeInstance(attClass);
}
@Override
public boolean equals(Object other) {
if (this == other) return true;
if (other instanceof TokenAttributeFactory) {
final TokenAttributeFactory af = (TokenAttributeFactory) other;
return this.delegate.equals(af.delegate);
}
return false;
}
@Override
public int hashCode() {
return delegate.hashCode() ^ 0x0a45aa31;
}
}
} }

View File

@ -21,11 +21,13 @@ import java.io.IOException;
import java.io.Closeable; import java.io.Closeable;
import java.lang.reflect.Modifier; import java.lang.reflect.Modifier;
import org.apache.lucene.analysis.tokenattributes.PackedTokenAttributeImpl;
import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute; import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
import org.apache.lucene.document.Document; import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field; import org.apache.lucene.document.Field;
import org.apache.lucene.index.IndexWriter; import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.util.Attribute; import org.apache.lucene.util.Attribute;
import org.apache.lucene.util.AttributeFactory;
import org.apache.lucene.util.AttributeImpl; import org.apache.lucene.util.AttributeImpl;
import org.apache.lucene.util.AttributeSource; import org.apache.lucene.util.AttributeSource;
@ -84,12 +86,16 @@ import org.apache.lucene.util.AttributeSource;
* assertions are enabled. * assertions are enabled.
*/ */
public abstract class TokenStream extends AttributeSource implements Closeable { public abstract class TokenStream extends AttributeSource implements Closeable {
/** Default {@link AttributeFactory} instance that should be used for TokenStreams. */
public static final AttributeFactory DEFAULT_TOKEN_ATTRIBUTE_FACTORY =
AttributeFactory.getStaticImplementation(AttributeFactory.DEFAULT_ATTRIBUTE_FACTORY, PackedTokenAttributeImpl.class);
/** /**
* A TokenStream using the default attribute factory. * A TokenStream using the default attribute factory.
*/ */
protected TokenStream() { protected TokenStream() {
super(Token.TOKEN_ATTRIBUTE_FACTORY); super(DEFAULT_TOKEN_ATTRIBUTE_FACTORY);
assert assertFinal(); assert assertFinal();
} }

View File

@ -17,6 +17,7 @@ package org.apache.lucene.analysis;
* limitations under the License. * limitations under the License.
*/ */
import org.apache.lucene.util.AttributeFactory;
import org.apache.lucene.util.AttributeSource; import org.apache.lucene.util.AttributeSource;
import java.io.Reader; import java.io.Reader;

View File

@ -813,7 +813,7 @@ Now we're going to implement our own custom Attribute for part-of-speech tagging
</p> </p>
<p> <p>
This should be the usual behavior. However, there is also an expert-API that allows changing these naming conventions: This should be the usual behavior. However, there is also an expert-API that allows changing these naming conventions:
{@link org.apache.lucene.util.AttributeSource.AttributeFactory}. The factory accepts an Attribute interface as argument {@link org.apache.lucene.util.AttributeFactory}. The factory accepts an Attribute interface as argument
and returns an actual instance. You can implement your own factory if you need to change the default behavior. and returns an actual instance. You can implement your own factory if you need to change the default behavior.
</p> </p>
<p> <p>

View File

@ -0,0 +1,206 @@
package org.apache.lucene.analysis.tokenattributes;
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import org.apache.lucene.util.AttributeImpl;
import org.apache.lucene.util.AttributeReflector;
/** Default implementation of the common attributes used by Lucene:<ul>
* <li>{@link CharTermAttribute}
* <li>{@link TypeAttribute}
* <li>{@link PositionIncrementAttribute}
* <li>{@link PositionLengthAttribute}
* <li>{@link OffsetAttribute}
* </ul>*/
public class PackedTokenAttributeImpl extends CharTermAttributeImpl
implements TypeAttribute, PositionIncrementAttribute,
PositionLengthAttribute, OffsetAttribute {
private int startOffset,endOffset;
private String type = DEFAULT_TYPE;
private int positionIncrement = 1;
private int positionLength = 1;
/** Constructs the attribute implementation. */
public PackedTokenAttributeImpl() {
}
/**
* {@inheritDoc}
* @see PositionIncrementAttribute
*/
@Override
public void setPositionIncrement(int positionIncrement) {
if (positionIncrement < 0)
throw new IllegalArgumentException
("Increment must be zero or greater: " + positionIncrement);
this.positionIncrement = positionIncrement;
}
/**
* {@inheritDoc}
* @see PositionIncrementAttribute
*/
@Override
public int getPositionIncrement() {
return positionIncrement;
}
/**
* {@inheritDoc}
* @see PositionLengthAttribute
*/
@Override
public void setPositionLength(int positionLength) {
this.positionLength = positionLength;
}
/**
* {@inheritDoc}
* @see PositionLengthAttribute
*/
@Override
public int getPositionLength() {
return positionLength;
}
/**
* {@inheritDoc}
* @see OffsetAttribute
*/
@Override
public final int startOffset() {
return startOffset;
}
/**
* {@inheritDoc}
* @see OffsetAttribute
*/
@Override
public final int endOffset() {
return endOffset;
}
/**
* {@inheritDoc}
* @see OffsetAttribute
*/
@Override
public void setOffset(int startOffset, int endOffset) {
if (startOffset < 0 || endOffset < startOffset) {
throw new IllegalArgumentException("startOffset must be non-negative, and endOffset must be >= startOffset, "
+ "startOffset=" + startOffset + ",endOffset=" + endOffset);
}
this.startOffset = startOffset;
this.endOffset = endOffset;
}
/**
* {@inheritDoc}
* @see TypeAttribute
*/
@Override
public final String type() {
return type;
}
/**
* {@inheritDoc}
* @see TypeAttribute
*/
@Override
public final void setType(String type) {
this.type = type;
}
/** Resets the attributes
*/
@Override
public void clear() {
super.clear();
positionIncrement = positionLength = 1;
startOffset = endOffset = 0;
type = DEFAULT_TYPE;
}
@Override
public PackedTokenAttributeImpl clone() {
return (PackedTokenAttributeImpl) super.clone();
}
@Override
public boolean equals(Object obj) {
if (obj == this)
return true;
if (obj instanceof PackedTokenAttributeImpl) {
final PackedTokenAttributeImpl other = (PackedTokenAttributeImpl) obj;
return (startOffset == other.startOffset &&
endOffset == other.endOffset &&
positionIncrement == other.positionIncrement &&
positionLength == other.positionLength &&
(type == null ? other.type == null : type.equals(other.type)) &&
super.equals(obj)
);
} else
return false;
}
@Override
public int hashCode() {
int code = super.hashCode();
code = code * 31 + startOffset;
code = code * 31 + endOffset;
code = code * 31 + positionIncrement;
code = code * 31 + positionLength;
if (type != null)
code = code * 31 + type.hashCode();
return code;
}
@Override
public void copyTo(AttributeImpl target) {
if (target instanceof PackedTokenAttributeImpl) {
final PackedTokenAttributeImpl to = (PackedTokenAttributeImpl) target;
to.copyBuffer(buffer(), 0, length());
to.positionIncrement = positionIncrement;
to.positionLength = positionLength;
to.startOffset = startOffset;
to.endOffset = endOffset;
to.type = type;
} else {
super.copyTo(target);
((OffsetAttribute) target).setOffset(startOffset, endOffset);
((PositionIncrementAttribute) target).setPositionIncrement(positionIncrement);
((PositionLengthAttribute) target).setPositionLength(positionLength);
((TypeAttribute) target).setType(type);
}
}
@Override
public void reflectWith(AttributeReflector reflector) {
super.reflectWith(reflector);
reflector.reflect(OffsetAttribute.class, "startOffset", startOffset);
reflector.reflect(OffsetAttribute.class, "endOffset", endOffset);
reflector.reflect(PositionIncrementAttribute.class, "positionIncrement", positionIncrement);
reflector.reflect(PositionLengthAttribute.class, "positionLength", positionLength);
reflector.reflect(TypeAttribute.class, "type", type);
}
}

View File

@ -0,0 +1,202 @@
package org.apache.lucene.util;
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import java.lang.invoke.MethodHandle;
import java.lang.invoke.MethodHandles;
import java.lang.invoke.MethodType;
import java.lang.ref.Reference;
import java.lang.ref.WeakReference;
/**
* An AttributeFactory creates instances of {@link AttributeImpl}s.
*/
public abstract class AttributeFactory {
/**
* Returns an {@link AttributeImpl} for the supplied {@link Attribute} interface class.
*/
public abstract AttributeImpl createAttributeInstance(Class<? extends Attribute> attClass);
/**
* Returns a correctly typed {@link MethodHandle} for the no-arg ctor of the given class.
*/
static final MethodHandle findAttributeImplCtor(Class<? extends AttributeImpl> clazz) {
try {
return lookup.findConstructor(clazz, NO_ARG_CTOR).asType(NO_ARG_RETURNING_ATTRIBUTEIMPL);
} catch (NoSuchMethodException | IllegalAccessException e) {
throw new IllegalArgumentException("Cannot lookup accessible no-arg constructor for: " + clazz.getName(), e);
}
}
private static final MethodHandles.Lookup lookup = MethodHandles.publicLookup();
private static final MethodType NO_ARG_CTOR = MethodType.methodType(void.class);
private static final MethodType NO_ARG_RETURNING_ATTRIBUTEIMPL = MethodType.methodType(AttributeImpl.class);
/**
* This is the default factory that creates {@link AttributeImpl}s using the
* class name of the supplied {@link Attribute} interface class by appending <code>Impl</code> to it.
*/
public static final AttributeFactory DEFAULT_ATTRIBUTE_FACTORY = new DefaultAttributeFactory(true);
static final class DefaultAttributeFactory extends AttributeFactory {
private final WeakIdentityMap<Class<? extends Attribute>, Object> attClassImplMap =
WeakIdentityMap.newConcurrentHashMap(false);
private final ClassLoader myClassLoader = getClass().getClassLoader();
private final boolean useMethodHandles;
// this constructor is available for tests, to be able to test the pure-reflective case, too
DefaultAttributeFactory(boolean useMethodHandles) {
this.useMethodHandles = useMethodHandles;
}
@Override
public AttributeImpl createAttributeInstance(Class<? extends Attribute> attClass) {
// first lookup from cache:
Object cached = attClassImplMap.get(attClass);
if (cached instanceof MethodHandle) {
return invokeMethodHandle((MethodHandle) cached);
} else if (cached instanceof Reference) {
@SuppressWarnings("unchecked") final Class<? extends AttributeImpl> clazz =
((Reference<Class<? extends AttributeImpl>>) cached).get();
if (clazz != null) {
return invokeReflective(clazz);
}
cached = null;
// fall-through
}
// No cache hit!
// Please note: we have the slight chance that another thread may do the same, but who cares?
assert cached == null;
final Class<? extends AttributeImpl> implClazz = findImplClass(attClass);
// if the attribute impl is from our own ClassLoader, we optimize to use pre-allocated MethodHandle to instantiate the object
if (useMethodHandles && implClazz.getClassLoader() == myClassLoader) {
final MethodHandle constr = findAttributeImplCtor(implClazz);
attClassImplMap.put(attClass, constr);
return invokeMethodHandle(constr);
} else {
// otherwise, to not refer to the class forever (because the MethodHandle strongly
// references the class), so it can never be unloaded, we use slower reflection:
attClassImplMap.put(attClass, new WeakReference<>(implClazz));
return invokeReflective(implClazz);
}
}
private Class<? extends AttributeImpl> findImplClass(Class<? extends Attribute> attClass) {
try {
return Class.forName(attClass.getName() + "Impl", true, attClass.getClassLoader()).asSubclass(AttributeImpl.class);
} catch (ClassNotFoundException cnfe) {
throw new IllegalArgumentException("Cannot find implementing class for: " + attClass.getName());
}
}
private AttributeImpl invokeMethodHandle(MethodHandle constr) {
try {
return (AttributeImpl) constr.invokeExact();
} catch (Throwable t) {
rethrow(t);
throw new AssertionError();
}
}
private AttributeImpl invokeReflective(Class<? extends AttributeImpl> implClass) {
try {
return implClass.newInstance();
} catch (InstantiationException | IllegalAccessException e) {
throw new IllegalArgumentException("Cannot instantiate implementing class: " + implClass.getName(), e);
}
}
}
/** <b>Expert</b>: AttributeFactory returning an instance of the given {@code clazz} for the
* attributes it implements. For all other attributes it calls the given delegate factory
* as fallback. This class can be used to prefer a specific {@code AttributeImpl} which
* combines multiple attributes over separate classes.
* @lucene.internal
*/
public abstract static class StaticImplementationAttributeFactory<A extends AttributeImpl> extends AttributeFactory {
private final AttributeFactory delegate;
private final Class<A> clazz;
/** <b>Expert</b>: Creates an AttributeFactory returning {@code clazz} as instance for the
* attributes it implements and for all other attributes calls the given delegate factory. */
public StaticImplementationAttributeFactory(AttributeFactory delegate, Class<A> clazz) {
this.delegate = delegate;
this.clazz = clazz;
}
@Override
public final AttributeImpl createAttributeInstance(Class<? extends Attribute> attClass) {
return attClass.isAssignableFrom(clazz) ? createInstance() : delegate.createAttributeInstance(attClass);
}
/** Creates an instance of {@code A}. */
protected abstract A createInstance();
@Override
public boolean equals(Object other) {
if (this == other)
return true;
if (other == null || other.getClass() != this.getClass())
return false;
@SuppressWarnings("rawtypes")
final StaticImplementationAttributeFactory af = (StaticImplementationAttributeFactory) other;
return this.delegate.equals(af.delegate) && this.clazz == af.clazz;
}
@Override
public int hashCode() {
return 31 * delegate.hashCode() + clazz.hashCode();
}
}
/** Returns an AttributeFactory returning an instance of the given {@code clazz} for the
* attributes it implements. The given {@code clazz} must have a public no-arg constructor.
* For all other attributes it calls the given delegate factory as fallback.
* This method can be used to prefer a specific {@code AttributeImpl} which combines
* multiple attributes over separate classes.
* <p>Please save instances created by this method in a static final field, because
* on each call, this does reflection for creating a {@link MethodHandle}.
*/
public static <A extends AttributeImpl> AttributeFactory getStaticImplementation(AttributeFactory delegate, Class<A> clazz) {
final MethodHandle constr = findAttributeImplCtor(clazz);
return new StaticImplementationAttributeFactory<A>(delegate, clazz) {
@Override
protected A createInstance() {
try {
return (A) constr.invokeExact();
} catch (Throwable t) {
rethrow(t);
throw new AssertionError();
}
}
};
}
// Hack to rethrow unknown Exceptions from {@link MethodHandle#invoke}:
// TODO: remove the impl in test-framework, this one is more elegant :-)
static void rethrow(Throwable t) {
AttributeFactory.<Error>rethrow0(t);
}
@SuppressWarnings("unchecked")
private static <T extends Throwable> void rethrow0(Throwable t) throws T {
throw (T) t;
}
}

View File

@ -19,8 +19,7 @@ package org.apache.lucene.util;
import java.lang.reflect.Field; import java.lang.reflect.Field;
import java.lang.reflect.Modifier; import java.lang.reflect.Modifier;
import java.lang.ref.WeakReference; import java.lang.ref.Reference;
import java.util.LinkedList;
/** /**
* Base class for Attributes that can be added to a * Base class for Attributes that can be added to a
@ -91,12 +90,14 @@ public abstract class AttributeImpl implements Cloneable, Attribute {
*/ */
public void reflectWith(AttributeReflector reflector) { public void reflectWith(AttributeReflector reflector) {
final Class<? extends AttributeImpl> clazz = this.getClass(); final Class<? extends AttributeImpl> clazz = this.getClass();
final LinkedList<WeakReference<Class<? extends Attribute>>> interfaces = AttributeSource.getAttributeInterfaces(clazz); final Reference<Class<? extends Attribute>>[] interfaces = AttributeSource.getAttributeInterfaces(clazz);
if (interfaces.size() != 1) { if (interfaces.length != 1) {
throw new UnsupportedOperationException(clazz.getName() + throw new UnsupportedOperationException(clazz.getName() +
" implements more than one Attribute interface, the default reflectWith() implementation cannot handle this."); " implements more than one Attribute interface, the default reflectWith() implementation cannot handle this.");
} }
final Class<? extends Attribute> interf = interfaces.getFirst().get(); final Class<? extends Attribute> interf = interfaces[0].get();
assert (interf != null) :
"We have a strong reference on the class holding the interfaces, so they should never get evicted";
final Field[] fields = clazz.getDeclaredFields(); final Field[] fields = clazz.getDeclaredFields();
try { try {
for (int i = 0; i < fields.length; i++) { for (int i = 0; i < fields.length; i++) {

View File

@ -17,12 +17,14 @@ package org.apache.lucene.util;
* limitations under the License. * limitations under the License.
*/ */
import java.lang.ref.Reference;
import java.lang.ref.WeakReference; import java.lang.ref.WeakReference;
import java.util.ArrayList;
import java.util.Collections; import java.util.Collections;
import java.util.List;
import java.util.NoSuchElementException; import java.util.NoSuchElementException;
import java.util.Iterator; import java.util.Iterator;
import java.util.LinkedHashMap; import java.util.LinkedHashMap;
import java.util.LinkedList;
import java.util.Map; import java.util.Map;
import java.util.Map.Entry; import java.util.Map.Entry;
@ -38,59 +40,15 @@ import org.apache.lucene.analysis.TokenStream; // for javadocs
* it creates a new instance and returns it. * it creates a new instance and returns it.
*/ */
public class AttributeSource { public class AttributeSource {
/** /**
* An AttributeFactory creates instances of {@link AttributeImpl}s. * This is the default factory that creates {@link AttributeImpl}s using the
* class name of the supplied {@link Attribute} interface class by appending <code>Impl</code> to it.
* @deprecated use {@link AttributeFactory#DEFAULT_ATTRIBUTE_FACTORY}
*/ */
public static abstract class AttributeFactory { @Deprecated
/** public static final AttributeFactory DEFAULT_ATTRIBUTE_FACTORY = AttributeFactory.DEFAULT_ATTRIBUTE_FACTORY;
* returns an {@link AttributeImpl} for the supplied {@link Attribute} interface class.
*/
public abstract AttributeImpl createAttributeInstance(Class<? extends Attribute> attClass);
/**
* This is the default factory that creates {@link AttributeImpl}s using the
* class name of the supplied {@link Attribute} interface class by appending <code>Impl</code> to it.
*/
public static final AttributeFactory DEFAULT_ATTRIBUTE_FACTORY = new DefaultAttributeFactory();
private static final class DefaultAttributeFactory extends AttributeFactory {
private static final WeakIdentityMap<Class<? extends Attribute>, WeakReference<Class<? extends AttributeImpl>>> attClassImplMap =
WeakIdentityMap.newConcurrentHashMap(false);
DefaultAttributeFactory() {}
@Override
public AttributeImpl createAttributeInstance(Class<? extends Attribute> attClass) {
try {
return getClassForInterface(attClass).newInstance();
} catch (InstantiationException e) {
throw new IllegalArgumentException("Could not instantiate implementing class for " + attClass.getName());
} catch (IllegalAccessException e) {
throw new IllegalArgumentException("Could not instantiate implementing class for " + attClass.getName());
}
}
private static Class<? extends AttributeImpl> getClassForInterface(Class<? extends Attribute> attClass) {
final WeakReference<Class<? extends AttributeImpl>> ref = attClassImplMap.get(attClass);
Class<? extends AttributeImpl> clazz = (ref == null) ? null : ref.get();
if (clazz == null) {
// we have the slight chance that another thread may do the same, but who cares?
try {
attClassImplMap.put(attClass,
new WeakReference<Class<? extends AttributeImpl>>(
clazz = Class.forName(attClass.getName() + "Impl", true, attClass.getClassLoader())
.asSubclass(AttributeImpl.class)
)
);
} catch (ClassNotFoundException e) {
throw new IllegalArgumentException("Could not find implementing class for " + attClass.getName());
}
}
return clazz;
}
}
}
/** /**
* This class holds the state of an AttributeSource. * This class holds the state of an AttributeSource.
* @see #captureState * @see #captureState
@ -122,7 +80,7 @@ public class AttributeSource {
private final AttributeFactory factory; private final AttributeFactory factory;
/** /**
* An AttributeSource using the default attribute factory {@link AttributeSource.AttributeFactory#DEFAULT_ATTRIBUTE_FACTORY}. * An AttributeSource using the default attribute factory {@link AttributeFactory#DEFAULT_ATTRIBUTE_FACTORY}.
*/ */
public AttributeSource() { public AttributeSource() {
this(AttributeFactory.DEFAULT_ATTRIBUTE_FACTORY); this(AttributeFactory.DEFAULT_ATTRIBUTE_FACTORY);
@ -200,26 +158,28 @@ public class AttributeSource {
} }
/** a cache that stores all interfaces for known implementation classes for performance (slow reflection) */ /** a cache that stores all interfaces for known implementation classes for performance (slow reflection) */
private static final WeakIdentityMap<Class<? extends AttributeImpl>,LinkedList<WeakReference<Class<? extends Attribute>>>> knownImplClasses = private static final WeakIdentityMap<Class<? extends AttributeImpl>,Reference<Class<? extends Attribute>>[]> knownImplClasses =
WeakIdentityMap.newConcurrentHashMap(false); WeakIdentityMap.newConcurrentHashMap(false);
static LinkedList<WeakReference<Class<? extends Attribute>>> getAttributeInterfaces(final Class<? extends AttributeImpl> clazz) { static Reference<Class<? extends Attribute>>[] getAttributeInterfaces(final Class<? extends AttributeImpl> clazz) {
LinkedList<WeakReference<Class<? extends Attribute>>> foundInterfaces = knownImplClasses.get(clazz); Reference<Class<? extends Attribute>>[] foundInterfaces = knownImplClasses.get(clazz);
if (foundInterfaces == null) { if (foundInterfaces == null) {
// we have the slight chance that another thread may do the same, but who cares? // we have the slight chance that another thread may do the same, but who cares?
foundInterfaces = new LinkedList<>(); final List<Reference<Class<? extends Attribute>>> intfList = new ArrayList<>();
// find all interfaces that this attribute instance implements // find all interfaces that this attribute instance implements
// and that extend the Attribute interface // and that extend the Attribute interface
Class<?> actClazz = clazz; Class<?> actClazz = clazz;
do { do {
for (Class<?> curInterface : actClazz.getInterfaces()) { for (Class<?> curInterface : actClazz.getInterfaces()) {
if (curInterface != Attribute.class && Attribute.class.isAssignableFrom(curInterface)) { if (curInterface != Attribute.class && Attribute.class.isAssignableFrom(curInterface)) {
foundInterfaces.add(new WeakReference<Class<? extends Attribute>>(curInterface.asSubclass(Attribute.class))); intfList.add(new WeakReference<Class<? extends Attribute>>(curInterface.asSubclass(Attribute.class)));
} }
} }
actClazz = actClazz.getSuperclass(); actClazz = actClazz.getSuperclass();
} while (actClazz != null); } while (actClazz != null);
knownImplClasses.put(clazz, foundInterfaces); @SuppressWarnings({"unchecked", "rawtypes"}) final Reference<Class<? extends Attribute>>[] a =
intfList.toArray(new Reference[intfList.size()]);
knownImplClasses.put(clazz, foundInterfaces = a);
} }
return foundInterfaces; return foundInterfaces;
} }
@ -235,11 +195,9 @@ public class AttributeSource {
public final void addAttributeImpl(final AttributeImpl att) { public final void addAttributeImpl(final AttributeImpl att) {
final Class<? extends AttributeImpl> clazz = att.getClass(); final Class<? extends AttributeImpl> clazz = att.getClass();
if (attributeImpls.containsKey(clazz)) return; if (attributeImpls.containsKey(clazz)) return;
final LinkedList<WeakReference<Class<? extends Attribute>>> foundInterfaces =
getAttributeInterfaces(clazz);
// add all interfaces of this AttributeImpl to the maps // add all interfaces of this AttributeImpl to the maps
for (WeakReference<Class<? extends Attribute>> curInterfaceRef : foundInterfaces) { for (Reference<Class<? extends Attribute>> curInterfaceRef : getAttributeInterfaces(clazz)) {
final Class<? extends Attribute> curInterface = curInterfaceRef.get(); final Class<? extends Attribute> curInterface = curInterfaceRef.get();
assert (curInterface != null) : assert (curInterface != null) :
"We have a strong reference on the class holding the interfaces, so they should never get evicted"; "We have a strong reference on the class holding the interfaces, so they should never get evicted";

View File

@ -27,167 +27,43 @@ import org.apache.lucene.util.TestUtil;
import java.io.StringReader; import java.io.StringReader;
import java.util.HashMap; import java.util.HashMap;
@Deprecated
public class TestToken extends LuceneTestCase { public class TestToken extends LuceneTestCase {
public void testCtor() throws Exception { public void testCtor() throws Exception {
Token t = new Token(); Token t = new Token("hello", 0, 0);
char[] content = "hello".toCharArray();
t.copyBuffer(content, 0, content.length);
assertNotSame(t.buffer(), content);
assertEquals(0, t.startOffset()); assertEquals(0, t.startOffset());
assertEquals(0, t.endOffset()); assertEquals(0, t.endOffset());
assertEquals(1, t.getPositionIncrement());
assertEquals(1, t.getPositionLength());
assertEquals("hello", t.toString()); assertEquals("hello", t.toString());
assertEquals("word", t.type()); assertEquals("word", t.type());
assertEquals(0, t.getFlags()); assertEquals(0, t.getFlags());
assertNull(t.getPayload());
t = new Token();
t.setOffset(6, 22);
t.setFlags(7);
t.copyBuffer(content, 0, content.length);
assertEquals("hello", t.toString());
assertEquals("hello", t.toString());
assertEquals(6, t.startOffset());
assertEquals(22, t.endOffset());
assertEquals("word", t.type());
assertEquals(7, t.getFlags());
t = new Token();
t.setOffset(6, 22);
t.setType("junk");
t.copyBuffer(content, 0, content.length);
assertEquals("hello", t.toString());
assertEquals("hello", t.toString());
assertEquals(6, t.startOffset());
assertEquals(22, t.endOffset());
assertEquals("junk", t.type());
assertEquals(0, t.getFlags());
}
public void testResize() {
Token t = new Token();
char[] content = "hello".toCharArray();
t.copyBuffer(content, 0, content.length);
for (int i = 0; i < 2000; i++)
{
t.resizeBuffer(i);
assertTrue(i <= t.buffer().length);
assertEquals("hello", t.toString());
}
}
public void testGrow() {
Token t = new Token();
StringBuilder buf = new StringBuilder("ab");
for (int i = 0; i < 20; i++)
{
char[] content = buf.toString().toCharArray();
t.copyBuffer(content, 0, content.length);
assertEquals(buf.length(), t.length());
assertEquals(buf.toString(), t.toString());
buf.append(buf.toString());
}
assertEquals(1048576, t.length());
// now as a string, second variant
t = new Token();
buf = new StringBuilder("ab");
for (int i = 0; i < 20; i++)
{
t.setEmpty().append(buf);
String content = buf.toString();
assertEquals(content.length(), t.length());
assertEquals(content, t.toString());
buf.append(content);
}
assertEquals(1048576, t.length());
// Test for slow growth to a long term
t = new Token();
buf = new StringBuilder("a");
for (int i = 0; i < 20000; i++)
{
t.setEmpty().append(buf);
String content = buf.toString();
assertEquals(content.length(), t.length());
assertEquals(content, t.toString());
buf.append("a");
}
assertEquals(20000, t.length());
// Test for slow growth to a long term
t = new Token();
buf = new StringBuilder("a");
for (int i = 0; i < 20000; i++)
{
t.setEmpty().append(buf);
String content = buf.toString();
assertEquals(content.length(), t.length());
assertEquals(content, t.toString());
buf.append("a");
}
assertEquals(20000, t.length());
}
public void testToString() throws Exception {
char[] b = {'a', 'l', 'o', 'h', 'a'};
Token t = new Token("", 0, 5);
t.copyBuffer(b, 0, 5);
assertEquals("aloha", t.toString());
t.setEmpty().append("hi there");
assertEquals("hi there", t.toString());
}
public void testTermBufferEquals() throws Exception {
Token t1a = new Token();
char[] content1a = "hello".toCharArray();
t1a.copyBuffer(content1a, 0, 5);
Token t1b = new Token();
char[] content1b = "hello".toCharArray();
t1b.copyBuffer(content1b, 0, 5);
Token t2 = new Token();
char[] content2 = "hello2".toCharArray();
t2.copyBuffer(content2, 0, 6);
assertTrue(t1a.equals(t1b));
assertFalse(t1a.equals(t2));
assertFalse(t2.equals(t1b));
} }
public void testMixedStringArray() throws Exception { /* the CharTermAttributeStuff is tested by TestCharTermAttributeImpl */
Token t = new Token("hello", 0, 5);
assertEquals(t.length(), 5);
assertEquals(t.toString(), "hello");
t.setEmpty().append("hello2");
assertEquals(t.length(), 6);
assertEquals(t.toString(), "hello2");
t.copyBuffer("hello3".toCharArray(), 0, 6);
assertEquals(t.toString(), "hello3");
char[] buffer = t.buffer();
buffer[1] = 'o';
assertEquals(t.toString(), "hollo3");
}
public void testClone() throws Exception { public void testClone() throws Exception {
Token t = new Token(); Token t = new Token();
t.setOffset(0, 5); t.setOffset(0, 5);
char[] content = "hello".toCharArray(); char[] content = "hello".toCharArray();
t.copyBuffer(content, 0, 5); t.copyBuffer(content, 0, 5);
char[] buf = t.buffer(); char[] buf = t.buffer();
Token copy = assertCloneIsEqual(t); Token copy = TestCharTermAttributeImpl.assertCloneIsEqual(t);
assertEquals(t.toString(), copy.toString()); assertEquals(t.toString(), copy.toString());
assertNotSame(buf, copy.buffer()); assertNotSame(buf, copy.buffer());
BytesRef pl = new BytesRef(new byte[]{1,2,3,4}); BytesRef pl = new BytesRef(new byte[]{1,2,3,4});
t.setPayload(pl); t.setPayload(pl);
copy = assertCloneIsEqual(t); copy = TestCharTermAttributeImpl.assertCloneIsEqual(t);
assertEquals(pl, copy.getPayload()); assertEquals(pl, copy.getPayload());
assertNotSame(pl, copy.getPayload()); assertNotSame(pl, copy.getPayload());
} }
public void testCopyTo() throws Exception { public void testCopyTo() throws Exception {
Token t = new Token(); Token t = new Token();
Token copy = assertCopyIsEqual(t); Token copy = TestCharTermAttributeImpl.assertCopyIsEqual(t);
assertEquals("", t.toString()); assertEquals("", t.toString());
assertEquals("", copy.toString()); assertEquals("", copy.toString());
@ -196,13 +72,13 @@ public class TestToken extends LuceneTestCase {
char[] content = "hello".toCharArray(); char[] content = "hello".toCharArray();
t.copyBuffer(content, 0, 5); t.copyBuffer(content, 0, 5);
char[] buf = t.buffer(); char[] buf = t.buffer();
copy = assertCopyIsEqual(t); copy = TestCharTermAttributeImpl.assertCopyIsEqual(t);
assertEquals(t.toString(), copy.toString()); assertEquals(t.toString(), copy.toString());
assertNotSame(buf, copy.buffer()); assertNotSame(buf, copy.buffer());
BytesRef pl = new BytesRef(new byte[]{1,2,3,4}); BytesRef pl = new BytesRef(new byte[]{1,2,3,4});
t.setPayload(pl); t.setPayload(pl);
copy = assertCopyIsEqual(t); copy = TestCharTermAttributeImpl.assertCopyIsEqual(t);
assertEquals(pl, copy.getPayload()); assertEquals(pl, copy.getPayload());
assertNotSame(pl, copy.getPayload()); assertNotSame(pl, copy.getPayload());
} }
@ -244,35 +120,19 @@ public class TestToken extends LuceneTestCase {
public void testAttributeReflection() throws Exception { public void testAttributeReflection() throws Exception {
Token t = new Token("foobar", 6, 22); Token t = new Token("foobar", 6, 22);
t.setFlags(8); t.setFlags(8);
t.setPositionIncrement(3);
t.setPositionLength(11);
TestUtil.assertAttributeReflection(t, TestUtil.assertAttributeReflection(t,
new HashMap<String, Object>() {{ new HashMap<String, Object>() {{
put(CharTermAttribute.class.getName() + "#term", "foobar"); put(CharTermAttribute.class.getName() + "#term", "foobar");
put(TermToBytesRefAttribute.class.getName() + "#bytes", new BytesRef("foobar")); put(TermToBytesRefAttribute.class.getName() + "#bytes", new BytesRef("foobar"));
put(OffsetAttribute.class.getName() + "#startOffset", 6); put(OffsetAttribute.class.getName() + "#startOffset", 6);
put(OffsetAttribute.class.getName() + "#endOffset", 22); put(OffsetAttribute.class.getName() + "#endOffset", 22);
put(PositionIncrementAttribute.class.getName() + "#positionIncrement", 1); put(PositionIncrementAttribute.class.getName() + "#positionIncrement", 3);
put(PositionLengthAttribute.class.getName() + "#positionLength", 1); put(PositionLengthAttribute.class.getName() + "#positionLength", 11);
put(PayloadAttribute.class.getName() + "#payload", null); put(PayloadAttribute.class.getName() + "#payload", null);
put(TypeAttribute.class.getName() + "#type", TypeAttribute.DEFAULT_TYPE); put(TypeAttribute.class.getName() + "#type", TypeAttribute.DEFAULT_TYPE);
put(FlagsAttribute.class.getName() + "#flags", 8); put(FlagsAttribute.class.getName() + "#flags", 8);
}}); }});
} }
public static <T extends AttributeImpl> T assertCloneIsEqual(T att) {
@SuppressWarnings("unchecked")
T clone = (T) att.clone();
assertEquals("Clone must be equal", att, clone);
assertEquals("Clone's hashcode must be equal", att.hashCode(), clone.hashCode());
return clone;
}
public static <T extends AttributeImpl> T assertCopyIsEqual(T att) throws Exception {
@SuppressWarnings("unchecked")
T copy = (T) att.getClass().newInstance();
att.copyTo(copy);
assertEquals("Copied instance must be equal", att, copy);
assertEquals("Copied instance's hashcode must be equal", att.hashCode(), copy.hashCode());
return copy;
}
} }

View File

@ -17,7 +17,7 @@ package org.apache.lucene.analysis.tokenattributes;
* limitations under the License. * limitations under the License.
*/ */
import org.apache.lucene.analysis.TestToken; import org.apache.lucene.util.AttributeImpl;
import org.apache.lucene.util.LuceneTestCase; import org.apache.lucene.util.LuceneTestCase;
import org.apache.lucene.util.BytesRef; import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.TestUtil; import org.apache.lucene.util.TestUtil;
@ -95,7 +95,7 @@ public class TestCharTermAttributeImpl extends LuceneTestCase {
char[] content = "hello".toCharArray(); char[] content = "hello".toCharArray();
t.copyBuffer(content, 0, 5); t.copyBuffer(content, 0, 5);
char[] buf = t.buffer(); char[] buf = t.buffer();
CharTermAttributeImpl copy = TestToken.assertCloneIsEqual(t); CharTermAttributeImpl copy = assertCloneIsEqual(t);
assertEquals(t.toString(), copy.toString()); assertEquals(t.toString(), copy.toString());
assertNotSame(buf, copy.buffer()); assertNotSame(buf, copy.buffer());
} }
@ -117,7 +117,7 @@ public class TestCharTermAttributeImpl extends LuceneTestCase {
public void testCopyTo() throws Exception { public void testCopyTo() throws Exception {
CharTermAttributeImpl t = new CharTermAttributeImpl(); CharTermAttributeImpl t = new CharTermAttributeImpl();
CharTermAttributeImpl copy = TestToken.assertCopyIsEqual(t); CharTermAttributeImpl copy = assertCopyIsEqual(t);
assertEquals("", t.toString()); assertEquals("", t.toString());
assertEquals("", copy.toString()); assertEquals("", copy.toString());
@ -125,7 +125,7 @@ public class TestCharTermAttributeImpl extends LuceneTestCase {
char[] content = "hello".toCharArray(); char[] content = "hello".toCharArray();
t.copyBuffer(content, 0, 5); t.copyBuffer(content, 0, 5);
char[] buf = t.buffer(); char[] buf = t.buffer();
copy = TestToken.assertCopyIsEqual(t); copy = assertCopyIsEqual(t);
assertEquals(t.toString(), copy.toString()); assertEquals(t.toString(), copy.toString());
assertNotSame(buf, copy.buffer()); assertNotSame(buf, copy.buffer());
} }
@ -284,6 +284,23 @@ public class TestCharTermAttributeImpl extends LuceneTestCase {
} }
} }
public static <T extends AttributeImpl> T assertCloneIsEqual(T att) {
@SuppressWarnings("unchecked")
T clone = (T) att.clone();
assertEquals("Clone must be equal", att, clone);
assertEquals("Clone's hashcode must be equal", att.hashCode(), clone.hashCode());
return clone;
}
public static <T extends AttributeImpl> T assertCopyIsEqual(T att) throws Exception {
@SuppressWarnings("unchecked")
T copy = (T) att.getClass().newInstance();
att.copyTo(copy);
assertEquals("Copied instance must be equal", att, copy);
assertEquals("Copied instance's hashcode must be equal", att.hashCode(), copy.hashCode());
return copy;
}
/* /*
// test speed of the dynamic instanceof checks in append(CharSequence), // test speed of the dynamic instanceof checks in append(CharSequence),

View File

@ -0,0 +1,96 @@
package org.apache.lucene.analysis.tokenattributes;
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import org.apache.lucene.analysis.MockTokenizer;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.Tokenizer;
import org.apache.lucene.util.LuceneTestCase;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.TestUtil;
import java.io.StringReader;
import java.util.HashMap;
public class TestPackedTokenAttributeImpl extends LuceneTestCase {
/* the CharTermAttributeStuff is tested by TestCharTermAttributeImpl */
public void testClone() throws Exception {
PackedTokenAttributeImpl t = new PackedTokenAttributeImpl();
t.setOffset(0, 5);
char[] content = "hello".toCharArray();
t.copyBuffer(content, 0, 5);
char[] buf = t.buffer();
PackedTokenAttributeImpl copy = TestCharTermAttributeImpl.assertCloneIsEqual(t);
assertEquals(t.toString(), copy.toString());
assertNotSame(buf, copy.buffer());
}
public void testCopyTo() throws Exception {
PackedTokenAttributeImpl t = new PackedTokenAttributeImpl();
PackedTokenAttributeImpl copy = TestCharTermAttributeImpl.assertCopyIsEqual(t);
assertEquals("", t.toString());
assertEquals("", copy.toString());
t = new PackedTokenAttributeImpl();
t.setOffset(0, 5);
char[] content = "hello".toCharArray();
t.copyBuffer(content, 0, 5);
char[] buf = t.buffer();
copy = TestCharTermAttributeImpl.assertCopyIsEqual(t);
assertEquals(t.toString(), copy.toString());
assertNotSame(buf, copy.buffer());
}
public void testPackedTokenAttributeFactory() throws Exception {
TokenStream ts = new MockTokenizer(TokenStream.DEFAULT_TOKEN_ATTRIBUTE_FACTORY, MockTokenizer.WHITESPACE, false, MockTokenizer.DEFAULT_MAX_TOKEN_LENGTH);
((Tokenizer)ts).setReader(new StringReader("foo bar"));
assertTrue("CharTermAttribute is not implemented by Token",
ts.addAttribute(CharTermAttribute.class) instanceof PackedTokenAttributeImpl);
assertTrue("OffsetAttribute is not implemented by Token",
ts.addAttribute(OffsetAttribute.class) instanceof PackedTokenAttributeImpl);
assertTrue("PositionIncrementAttribute is not implemented by Token",
ts.addAttribute(PositionIncrementAttribute.class) instanceof PackedTokenAttributeImpl);
assertTrue("TypeAttribute is not implemented by Token",
ts.addAttribute(TypeAttribute.class) instanceof PackedTokenAttributeImpl);
assertTrue("FlagsAttribute is not implemented by FlagsAttributeImpl",
ts.addAttribute(FlagsAttribute.class) instanceof FlagsAttributeImpl);
}
public void testAttributeReflection() throws Exception {
PackedTokenAttributeImpl t = new PackedTokenAttributeImpl();
t.append("foobar");
t.setOffset(6, 22);
t.setPositionIncrement(3);
t.setPositionLength(11);
t.setType("foobar");
TestUtil.assertAttributeReflection(t,
new HashMap<String, Object>() {{
put(CharTermAttribute.class.getName() + "#term", "foobar");
put(TermToBytesRefAttribute.class.getName() + "#bytes", new BytesRef("foobar"));
put(OffsetAttribute.class.getName() + "#startOffset", 6);
put(OffsetAttribute.class.getName() + "#endOffset", 22);
put(PositionIncrementAttribute.class.getName() + "#positionIncrement", 3);
put(PositionLengthAttribute.class.getName() + "#positionLength", 11);
put(TypeAttribute.class.getName() + "#type", "foobar");
}});
}
}

View File

@ -37,15 +37,15 @@ import org.apache.lucene.search.TermQuery;
import org.apache.lucene.store.BaseDirectoryWrapper; import org.apache.lucene.store.BaseDirectoryWrapper;
import org.apache.lucene.store.MockDirectoryWrapper; import org.apache.lucene.store.MockDirectoryWrapper;
import org.apache.lucene.util.Attribute; import org.apache.lucene.util.Attribute;
import org.apache.lucene.util.AttributeFactory;
import org.apache.lucene.util.AttributeImpl; import org.apache.lucene.util.AttributeImpl;
import org.apache.lucene.util.AttributeSource.AttributeFactory;
import org.apache.lucene.util.BytesRef; import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.LuceneTestCase.Monster; import org.apache.lucene.util.LuceneTestCase.Monster;
import org.apache.lucene.util.LuceneTestCase.SuppressCodecs; import org.apache.lucene.util.LuceneTestCase.SuppressCodecs;
import org.apache.lucene.util.LuceneTestCase; import org.apache.lucene.util.LuceneTestCase;
import org.apache.lucene.util.TestUtil; import org.apache.lucene.util.TestUtil;
import org.apache.lucene.util.TimeUnits; import org.apache.lucene.util.TimeUnits;
import org.junit.Ignore;
import com.carrotsearch.randomizedtesting.annotations.TimeoutSuite; import com.carrotsearch.randomizedtesting.annotations.TimeoutSuite;
// NOTE: SimpleText codec will consume very large amounts of // NOTE: SimpleText codec will consume very large amounts of

View File

@ -22,9 +22,9 @@ import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
import org.apache.lucene.analysis.tokenattributes.TermToBytesRefAttribute; import org.apache.lucene.analysis.tokenattributes.TermToBytesRefAttribute;
import org.apache.lucene.spatial.prefix.tree.Cell; import org.apache.lucene.spatial.prefix.tree.Cell;
import org.apache.lucene.util.Attribute; import org.apache.lucene.util.Attribute;
import org.apache.lucene.util.AttributeFactory;
import org.apache.lucene.util.AttributeImpl; import org.apache.lucene.util.AttributeImpl;
import org.apache.lucene.util.AttributeReflector; import org.apache.lucene.util.AttributeReflector;
import org.apache.lucene.util.AttributeSource;
import org.apache.lucene.util.BytesRef; import org.apache.lucene.util.BytesRef;
import java.io.IOException; import java.io.IOException;
@ -51,10 +51,10 @@ class CellTokenStream extends TokenStream {
} }
// just a wrapper to prevent adding CTA // just a wrapper to prevent adding CTA
private static final class CellAttributeFactory extends AttributeSource.AttributeFactory { private static final class CellAttributeFactory extends AttributeFactory {
private final AttributeSource.AttributeFactory delegate; private final AttributeFactory delegate;
CellAttributeFactory(AttributeSource.AttributeFactory delegate) { CellAttributeFactory(AttributeFactory delegate) {
this.delegate = delegate; this.delegate = delegate;
} }

View File

@ -39,9 +39,9 @@ import org.apache.lucene.analysis.CannedBinaryTokenStream.BinaryToken;
import org.apache.lucene.analysis.CannedBinaryTokenStream; import org.apache.lucene.analysis.CannedBinaryTokenStream;
import org.apache.lucene.analysis.CannedTokenStream; import org.apache.lucene.analysis.CannedTokenStream;
import org.apache.lucene.analysis.MockAnalyzer; import org.apache.lucene.analysis.MockAnalyzer;
import org.apache.lucene.analysis.MockBytesAttributeFactory;
import org.apache.lucene.analysis.MockTokenFilter; import org.apache.lucene.analysis.MockTokenFilter;
import org.apache.lucene.analysis.MockTokenizer; import org.apache.lucene.analysis.MockTokenizer;
import org.apache.lucene.analysis.MockUTF16TermAttributeImpl;
import org.apache.lucene.analysis.Token; import org.apache.lucene.analysis.Token;
import org.apache.lucene.analysis.TokenFilter; import org.apache.lucene.analysis.TokenFilter;
import org.apache.lucene.analysis.TokenStream; import org.apache.lucene.analysis.TokenStream;
@ -52,6 +52,7 @@ import org.apache.lucene.document.Document;
import org.apache.lucene.search.suggest.Lookup.LookupResult; import org.apache.lucene.search.suggest.Lookup.LookupResult;
import org.apache.lucene.search.suggest.Input; import org.apache.lucene.search.suggest.Input;
import org.apache.lucene.search.suggest.InputArrayIterator; import org.apache.lucene.search.suggest.InputArrayIterator;
import org.apache.lucene.util.AttributeFactory;
import org.apache.lucene.util.BytesRef; import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.LineFileDocs; import org.apache.lucene.util.LineFileDocs;
import org.apache.lucene.util.LuceneTestCase; import org.apache.lucene.util.LuceneTestCase;
@ -621,8 +622,6 @@ public class AnalyzingSuggesterTest extends LuceneTestCase {
private int numStopChars; private int numStopChars;
private boolean preserveHoles; private boolean preserveHoles;
private final MockBytesAttributeFactory factory = new MockBytesAttributeFactory();
public MockTokenEatingAnalyzer(int numStopChars, boolean preserveHoles) { public MockTokenEatingAnalyzer(int numStopChars, boolean preserveHoles) {
this.preserveHoles = preserveHoles; this.preserveHoles = preserveHoles;
this.numStopChars = numStopChars; this.numStopChars = numStopChars;
@ -630,7 +629,8 @@ public class AnalyzingSuggesterTest extends LuceneTestCase {
@Override @Override
public TokenStreamComponents createComponents(String fieldName) { public TokenStreamComponents createComponents(String fieldName) {
MockTokenizer tokenizer = new MockTokenizer(factory, MockTokenizer.WHITESPACE, false, MockTokenizer.DEFAULT_MAX_TOKEN_LENGTH); MockTokenizer tokenizer = new MockTokenizer(MockUTF16TermAttributeImpl.UTF16_TERM_ATTRIBUTE_FACTORY,
MockTokenizer.WHITESPACE, false, MockTokenizer.DEFAULT_MAX_TOKEN_LENGTH);
tokenizer.setEnableChecks(true); tokenizer.setEnableChecks(true);
TokenStream next; TokenStream next;
if (numStopChars != 0) { if (numStopChars != 0) {

View File

@ -25,6 +25,7 @@ import java.io.Reader;
import java.io.StringReader; import java.io.StringReader;
import java.io.StringWriter; import java.io.StringWriter;
import java.io.Writer; import java.io.Writer;
import java.lang.reflect.Constructor;
import java.nio.charset.StandardCharsets; import java.nio.charset.StandardCharsets;
import java.util.*; import java.util.*;
import java.util.concurrent.CountDownLatch; import java.util.concurrent.CountDownLatch;
@ -38,8 +39,8 @@ import org.apache.lucene.index.FieldInfo.IndexOptions;
import org.apache.lucene.index.RandomIndexWriter; import org.apache.lucene.index.RandomIndexWriter;
import org.apache.lucene.store.Directory; import org.apache.lucene.store.Directory;
import org.apache.lucene.util.Attribute; import org.apache.lucene.util.Attribute;
import org.apache.lucene.util.AttributeFactory;
import org.apache.lucene.util.AttributeImpl; import org.apache.lucene.util.AttributeImpl;
import org.apache.lucene.util.AttributeSource.AttributeFactory;
import org.apache.lucene.util.IOUtils; import org.apache.lucene.util.IOUtils;
import org.apache.lucene.util.LineFileDocs; import org.apache.lucene.util.LineFileDocs;
import org.apache.lucene.util.LuceneTestCase; import org.apache.lucene.util.LuceneTestCase;
@ -935,16 +936,41 @@ public abstract class BaseTokenStreamTestCase extends LuceneTestCase {
return mockTokenizer; return mockTokenizer;
} }
/** Returns a new AttributeFactory impl */ /**
public static AttributeFactory newAttributeFactory(Random random) { * This provides the default AttributeFactory in reflective-only mode (package private)
if (random.nextBoolean()) { * so we can test it.
return Token.TOKEN_ATTRIBUTE_FACTORY; */
} else { private final static AttributeFactory REFLECTIVE_ATTRIBUTE_FACTORY;
return AttributeFactory.DEFAULT_ATTRIBUTE_FACTORY; static {
try {
final Constructor<? extends AttributeFactory> constr = Class
.forName(AttributeFactory.class.getName() + "$DefaultAttributeFactory")
.asSubclass(AttributeFactory.class)
.getDeclaredConstructor(boolean.class);
constr.setAccessible(true);
REFLECTIVE_ATTRIBUTE_FACTORY = constr.newInstance(false);
} catch (ReflectiveOperationException e) {
throw new Error("Cannot initantiate a reflective-only DefaultAttributeFactory", e);
} }
} }
/** Returns a new AttributeFactory impl */ /** Returns a random AttributeFactory impl */
public static AttributeFactory newAttributeFactory(Random random) {
switch (random.nextInt(4)) {
case 0:
return TokenStream.DEFAULT_TOKEN_ATTRIBUTE_FACTORY;
case 1:
return Token.TOKEN_ATTRIBUTE_FACTORY;
case 2:
return AttributeFactory.DEFAULT_ATTRIBUTE_FACTORY;
case 3:
return REFLECTIVE_ATTRIBUTE_FACTORY;
default:
throw new AssertionError("Please fix the Random.nextInt() call above");
}
}
/** Returns a random AttributeFactory impl */
public static AttributeFactory newAttributeFactory() { public static AttributeFactory newAttributeFactory() {
return newAttributeFactory(random()); return newAttributeFactory(random());
} }

View File

@ -20,12 +20,11 @@ package org.apache.lucene.analysis;
/** /**
* Analyzer for testing that encodes terms as UTF-16 bytes. * Analyzer for testing that encodes terms as UTF-16 bytes.
*/ */
public class MockBytesAnalyzer extends Analyzer { public final class MockBytesAnalyzer extends Analyzer {
private final MockBytesAttributeFactory factory = new MockBytesAttributeFactory();
@Override @Override
protected TokenStreamComponents createComponents(String fieldName) { protected TokenStreamComponents createComponents(String fieldName) {
Tokenizer t = new MockTokenizer(factory, MockTokenizer.KEYWORD, false, MockTokenizer.DEFAULT_MAX_TOKEN_LENGTH); Tokenizer t = new MockTokenizer(MockUTF16TermAttributeImpl.UTF16_TERM_ATTRIBUTE_FACTORY,
MockTokenizer.KEYWORD, false, MockTokenizer.DEFAULT_MAX_TOKEN_LENGTH);
return new TokenStreamComponents(t); return new TokenStreamComponents(t);
} }
} }

View File

@ -1,40 +0,0 @@
package org.apache.lucene.analysis;
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import org.apache.lucene.util.Attribute;
import org.apache.lucene.util.AttributeImpl;
import org.apache.lucene.util.AttributeSource;
/**
* Attribute factory that implements CharTermAttribute with
* {@link MockUTF16TermAttributeImpl}
*/
public class MockBytesAttributeFactory extends AttributeSource.AttributeFactory {
private final AttributeSource.AttributeFactory delegate =
AttributeSource.AttributeFactory.DEFAULT_ATTRIBUTE_FACTORY;
@Override
public AttributeImpl createAttributeInstance(
Class<? extends Attribute> attClass) {
return attClass.isAssignableFrom(MockUTF16TermAttributeImpl.class)
? new MockUTF16TermAttributeImpl()
: delegate.createAttributeInstance(attClass);
}
}

View File

@ -18,12 +18,12 @@ package org.apache.lucene.analysis;
*/ */
import java.io.IOException; import java.io.IOException;
import java.io.Reader;
import java.nio.CharBuffer; import java.nio.CharBuffer;
import java.util.Random; import java.util.Random;
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
import org.apache.lucene.analysis.tokenattributes.OffsetAttribute; import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
import org.apache.lucene.util.AttributeFactory;
import org.apache.lucene.util.automaton.CharacterRunAutomaton; import org.apache.lucene.util.automaton.CharacterRunAutomaton;
import org.apache.lucene.util.automaton.RegExp; import org.apache.lucene.util.automaton.RegExp;
@ -113,7 +113,7 @@ public class MockTokenizer extends Tokenizer {
this(factory, runAutomaton, lowerCase, DEFAULT_MAX_TOKEN_LENGTH); this(factory, runAutomaton, lowerCase, DEFAULT_MAX_TOKEN_LENGTH);
} }
/** Calls {@link #MockTokenizer(org.apache.lucene.util.AttributeSource.AttributeFactory,CharacterRunAutomaton,boolean) /** Calls {@link #MockTokenizer(AttributeFactory,CharacterRunAutomaton,boolean)
* MockTokenizer(AttributeFactory, Reader, WHITESPACE, true)} */ * MockTokenizer(AttributeFactory, Reader, WHITESPACE, true)} */
public MockTokenizer(AttributeFactory factory) { public MockTokenizer(AttributeFactory factory) {
this(factory, WHITESPACE, true); this(factory, WHITESPACE, true);

View File

@ -17,9 +17,10 @@ package org.apache.lucene.analysis;
* limitations under the License. * limitations under the License.
*/ */
import java.nio.charset.Charset; import java.nio.charset.StandardCharsets;
import org.apache.lucene.analysis.tokenattributes.CharTermAttributeImpl; import org.apache.lucene.analysis.tokenattributes.CharTermAttributeImpl;
import org.apache.lucene.util.AttributeFactory;
import org.apache.lucene.util.BytesRef; import org.apache.lucene.util.BytesRef;
/** /**
@ -27,12 +28,15 @@ import org.apache.lucene.util.BytesRef;
* text as UTF-16 bytes instead of as UTF-8 bytes. * text as UTF-16 bytes instead of as UTF-8 bytes.
*/ */
public class MockUTF16TermAttributeImpl extends CharTermAttributeImpl { public class MockUTF16TermAttributeImpl extends CharTermAttributeImpl {
static final Charset charset = Charset.forName("UTF-16LE");
/** Factory that returns an instance of this class for CharTermAttribute */
public static final AttributeFactory UTF16_TERM_ATTRIBUTE_FACTORY =
AttributeFactory.getStaticImplementation(AttributeFactory.DEFAULT_ATTRIBUTE_FACTORY, MockUTF16TermAttributeImpl.class);
@Override @Override
public void fillBytesRef() { public void fillBytesRef() {
BytesRef bytes = getBytesRef(); BytesRef bytes = getBytesRef();
byte[] utf16 = toString().getBytes(charset); byte[] utf16 = toString().getBytes(StandardCharsets.UTF_16LE);
bytes.bytes = utf16; bytes.bytes = utf16;
bytes.offset = 0; bytes.offset = 0;
bytes.length = utf16.length; bytes.length = utf16.length;

View File

@ -31,6 +31,7 @@ import org.apache.lucene.document.Field;
import org.apache.lucene.index.StorableField; import org.apache.lucene.index.StorableField;
import org.apache.lucene.index.FieldInfo.IndexOptions; import org.apache.lucene.index.FieldInfo.IndexOptions;
import org.apache.lucene.search.SortField; import org.apache.lucene.search.SortField;
import org.apache.lucene.util.AttributeFactory;
import org.apache.lucene.util.AttributeSource; import org.apache.lucene.util.AttributeSource;
import org.apache.lucene.util.AttributeSource.State; import org.apache.lucene.util.AttributeSource.State;
import org.apache.solr.analysis.SolrAnalyzer; import org.apache.solr.analysis.SolrAnalyzer;

View File

@ -23,7 +23,7 @@ import java.util.Map;
import org.apache.lucene.analysis.MockTokenizer; import org.apache.lucene.analysis.MockTokenizer;
import org.apache.lucene.analysis.util.TokenizerFactory; import org.apache.lucene.analysis.util.TokenizerFactory;
import org.apache.lucene.util.AttributeSource.AttributeFactory; import org.apache.lucene.util.AttributeFactory;
import org.apache.lucene.util.automaton.CharacterRunAutomaton; import org.apache.lucene.util.automaton.CharacterRunAutomaton;
/** /**