mirror of https://github.com/apache/lucene.git
LUCENE-5640: Refactor Token, add new PackedTokenAttributeImpl, make use of Java 7 MethodHandles in DEFAULT_ATTRIBUTE_FACTORY
git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1592914 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
b234e9748f
commit
0086a6e644
|
@ -100,6 +100,10 @@ Changes in Backwards Compatibility Policy
|
||||||
can be used by custom fieldtypes, which don't use the Analyzer, but
|
can be used by custom fieldtypes, which don't use the Analyzer, but
|
||||||
implement their own TokenStream. (Uwe Schindler, Robert Muir)
|
implement their own TokenStream. (Uwe Schindler, Robert Muir)
|
||||||
|
|
||||||
|
* LUCENE-5640: AttributeSource.AttributeFactory was moved to a
|
||||||
|
top-level class: org.apache.lucene.util.AttributeFactory
|
||||||
|
(Uwe Schindler, Robert Muir)
|
||||||
|
|
||||||
API Changes
|
API Changes
|
||||||
|
|
||||||
* LUCENE-5582: Deprecate IndexOutput.length (just use
|
* LUCENE-5582: Deprecate IndexOutput.length (just use
|
||||||
|
@ -126,6 +130,9 @@ API Changes
|
||||||
* LUCENE-5633: Change NoMergePolicy to a singleton with no distinction between
|
* LUCENE-5633: Change NoMergePolicy to a singleton with no distinction between
|
||||||
compound and non-compound types. (Shai Erera)
|
compound and non-compound types. (Shai Erera)
|
||||||
|
|
||||||
|
* LUCENE-5640: The Token class was deprecated. Since Lucene 2.9, TokenStreams
|
||||||
|
are using Attributes, Token is no longer used. (Uwe Schindler, Robert Muir)
|
||||||
|
|
||||||
Optimizations
|
Optimizations
|
||||||
|
|
||||||
* LUCENE-5603: hunspell stemmer more efficiently strips prefixes
|
* LUCENE-5603: hunspell stemmer more efficiently strips prefixes
|
||||||
|
@ -140,9 +147,11 @@ Optimizations
|
||||||
* LUCENE-5634: IndexWriter reuses TokenStream instances for String and Numeric
|
* LUCENE-5634: IndexWriter reuses TokenStream instances for String and Numeric
|
||||||
fields by default. (Uwe Schindler, Shay Banon, Mike McCandless, Robert Muir)
|
fields by default. (Uwe Schindler, Shay Banon, Mike McCandless, Robert Muir)
|
||||||
|
|
||||||
* LUCENE-5638: TokenStream uses a more performant AttributeFactory by default,
|
* LUCENE-5638, LUCENE-5640: TokenStream uses a more performant AttributeFactory
|
||||||
that packs the core attributes into one impl, for faster clearAttributes(),
|
by default, that packs the core attributes into one implementation
|
||||||
saveState(), and restoreState(). (Uwe Schindler, Robert Muir)
|
(PackedTokenAttributeImpl), for faster clearAttributes(), saveState(), and
|
||||||
|
restoreState(). In addition, AttributeFactory uses Java 7 MethodHandles for
|
||||||
|
instantiating Attribute implementations. (Uwe Schindler, Robert Muir)
|
||||||
|
|
||||||
* LUCENE-5609: Changed the default NumericField precisionStep from 4
|
* LUCENE-5609: Changed the default NumericField precisionStep from 4
|
||||||
to 8 (for int/float) and 16 (for long/double), for faster indexing
|
to 8 (for int/float) and 16 (for long/double), for faster indexing
|
||||||
|
|
|
@ -23,6 +23,7 @@ import java.io.Reader;
|
||||||
import org.apache.lucene.analysis.Tokenizer;
|
import org.apache.lucene.analysis.Tokenizer;
|
||||||
import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
|
import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
|
||||||
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
|
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
|
||||||
|
import org.apache.lucene.util.AttributeFactory;
|
||||||
import org.apache.lucene.util.AttributeSource;
|
import org.apache.lucene.util.AttributeSource;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
|
|
@ -18,7 +18,7 @@ package org.apache.lucene.analysis.core;
|
||||||
*/
|
*/
|
||||||
|
|
||||||
import org.apache.lucene.analysis.util.TokenizerFactory;
|
import org.apache.lucene.analysis.util.TokenizerFactory;
|
||||||
import org.apache.lucene.util.AttributeSource.AttributeFactory;
|
import org.apache.lucene.util.AttributeFactory;
|
||||||
|
|
||||||
import java.io.Reader;
|
import java.io.Reader;
|
||||||
import java.util.Map;
|
import java.util.Map;
|
||||||
|
|
|
@ -19,6 +19,7 @@ package org.apache.lucene.analysis.core;
|
||||||
|
|
||||||
import org.apache.lucene.analysis.Tokenizer;
|
import org.apache.lucene.analysis.Tokenizer;
|
||||||
import org.apache.lucene.analysis.util.CharTokenizer;
|
import org.apache.lucene.analysis.util.CharTokenizer;
|
||||||
|
import org.apache.lucene.util.AttributeFactory;
|
||||||
import org.apache.lucene.util.Version;
|
import org.apache.lucene.util.Version;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
@ -55,7 +56,7 @@ public class LetterTokenizer extends CharTokenizer {
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Construct a new LetterTokenizer using a given
|
* Construct a new LetterTokenizer using a given
|
||||||
* {@link org.apache.lucene.util.AttributeSource.AttributeFactory}.
|
* {@link org.apache.lucene.util.AttributeFactory}.
|
||||||
*
|
*
|
||||||
* @param matchVersion
|
* @param matchVersion
|
||||||
* Lucene version to match See {@link <a href="#version">above</a>}
|
* Lucene version to match See {@link <a href="#version">above</a>}
|
||||||
|
|
|
@ -18,7 +18,7 @@ package org.apache.lucene.analysis.core;
|
||||||
*/
|
*/
|
||||||
|
|
||||||
import org.apache.lucene.analysis.util.TokenizerFactory;
|
import org.apache.lucene.analysis.util.TokenizerFactory;
|
||||||
import org.apache.lucene.util.AttributeSource.AttributeFactory;
|
import org.apache.lucene.util.AttributeFactory;
|
||||||
|
|
||||||
import java.util.Map;
|
import java.util.Map;
|
||||||
|
|
||||||
|
|
|
@ -21,6 +21,7 @@ import java.io.Reader;
|
||||||
|
|
||||||
import org.apache.lucene.analysis.Tokenizer;
|
import org.apache.lucene.analysis.Tokenizer;
|
||||||
import org.apache.lucene.analysis.util.CharTokenizer;
|
import org.apache.lucene.analysis.util.CharTokenizer;
|
||||||
|
import org.apache.lucene.util.AttributeFactory;
|
||||||
import org.apache.lucene.util.AttributeSource;
|
import org.apache.lucene.util.AttributeSource;
|
||||||
import org.apache.lucene.util.Version;
|
import org.apache.lucene.util.Version;
|
||||||
|
|
||||||
|
@ -60,7 +61,7 @@ public final class LowerCaseTokenizer extends LetterTokenizer {
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Construct a new LowerCaseTokenizer using a given
|
* Construct a new LowerCaseTokenizer using a given
|
||||||
* {@link org.apache.lucene.util.AttributeSource.AttributeFactory}.
|
* {@link org.apache.lucene.util.AttributeFactory}.
|
||||||
*
|
*
|
||||||
* @param matchVersion
|
* @param matchVersion
|
||||||
* Lucene version to match See {@link <a href="#version">above</a>}
|
* Lucene version to match See {@link <a href="#version">above</a>}
|
||||||
|
|
|
@ -20,7 +20,7 @@ package org.apache.lucene.analysis.core;
|
||||||
import org.apache.lucene.analysis.util.AbstractAnalysisFactory;
|
import org.apache.lucene.analysis.util.AbstractAnalysisFactory;
|
||||||
import org.apache.lucene.analysis.util.MultiTermAwareComponent;
|
import org.apache.lucene.analysis.util.MultiTermAwareComponent;
|
||||||
import org.apache.lucene.analysis.util.TokenizerFactory;
|
import org.apache.lucene.analysis.util.TokenizerFactory;
|
||||||
import org.apache.lucene.util.AttributeSource.AttributeFactory;
|
import org.apache.lucene.util.AttributeFactory;
|
||||||
|
|
||||||
import java.util.HashMap;
|
import java.util.HashMap;
|
||||||
import java.util.Map;
|
import java.util.Map;
|
||||||
|
|
|
@ -21,6 +21,7 @@ import java.io.Reader;
|
||||||
|
|
||||||
import org.apache.lucene.analysis.Tokenizer;
|
import org.apache.lucene.analysis.Tokenizer;
|
||||||
import org.apache.lucene.analysis.util.CharTokenizer;
|
import org.apache.lucene.analysis.util.CharTokenizer;
|
||||||
|
import org.apache.lucene.util.AttributeFactory;
|
||||||
import org.apache.lucene.util.AttributeSource;
|
import org.apache.lucene.util.AttributeSource;
|
||||||
import org.apache.lucene.util.Version;
|
import org.apache.lucene.util.Version;
|
||||||
|
|
||||||
|
@ -50,7 +51,7 @@ public final class WhitespaceTokenizer extends CharTokenizer {
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Construct a new WhitespaceTokenizer using a given
|
* Construct a new WhitespaceTokenizer using a given
|
||||||
* {@link org.apache.lucene.util.AttributeSource.AttributeFactory}.
|
* {@link org.apache.lucene.util.AttributeFactory}.
|
||||||
*
|
*
|
||||||
* @param
|
* @param
|
||||||
* matchVersion Lucene version to match See
|
* matchVersion Lucene version to match See
|
||||||
|
|
|
@ -18,7 +18,7 @@ package org.apache.lucene.analysis.core;
|
||||||
*/
|
*/
|
||||||
|
|
||||||
import org.apache.lucene.analysis.util.TokenizerFactory;
|
import org.apache.lucene.analysis.util.TokenizerFactory;
|
||||||
import org.apache.lucene.util.AttributeSource.AttributeFactory;
|
import org.apache.lucene.util.AttributeFactory;
|
||||||
|
|
||||||
import java.io.Reader;
|
import java.io.Reader;
|
||||||
import java.util.Map;
|
import java.util.Map;
|
||||||
|
|
|
@ -24,7 +24,9 @@ import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* A {@link TokenStream} containing a single token.
|
* A {@link TokenStream} containing a single token.
|
||||||
|
* @deprecated Do not use this anymore!
|
||||||
*/
|
*/
|
||||||
|
@Deprecated
|
||||||
public final class SingleTokenTokenStream extends TokenStream {
|
public final class SingleTokenTokenStream extends TokenStream {
|
||||||
|
|
||||||
private boolean exhausted = false;
|
private boolean exhausted = false;
|
||||||
|
|
|
@ -20,6 +20,7 @@ package org.apache.lucene.analysis.ngram;
|
||||||
import java.io.Reader;
|
import java.io.Reader;
|
||||||
|
|
||||||
import org.apache.lucene.analysis.Tokenizer;
|
import org.apache.lucene.analysis.Tokenizer;
|
||||||
|
import org.apache.lucene.util.AttributeFactory;
|
||||||
import org.apache.lucene.util.Version;
|
import org.apache.lucene.util.Version;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
@ -49,7 +50,7 @@ public class EdgeNGramTokenizer extends NGramTokenizer {
|
||||||
* Creates EdgeNGramTokenizer that can generate n-grams in the sizes of the given range
|
* Creates EdgeNGramTokenizer that can generate n-grams in the sizes of the given range
|
||||||
*
|
*
|
||||||
* @param version the Lucene match version
|
* @param version the Lucene match version
|
||||||
* @param factory {@link org.apache.lucene.util.AttributeSource.AttributeFactory} to use
|
* @param factory {@link org.apache.lucene.util.AttributeFactory} to use
|
||||||
* @param minGram the smallest n-gram to generate
|
* @param minGram the smallest n-gram to generate
|
||||||
* @param maxGram the largest n-gram to generate
|
* @param maxGram the largest n-gram to generate
|
||||||
*/
|
*/
|
||||||
|
|
|
@ -18,7 +18,7 @@ package org.apache.lucene.analysis.ngram;
|
||||||
*/
|
*/
|
||||||
|
|
||||||
import org.apache.lucene.analysis.util.TokenizerFactory;
|
import org.apache.lucene.analysis.util.TokenizerFactory;
|
||||||
import org.apache.lucene.util.AttributeSource.AttributeFactory;
|
import org.apache.lucene.util.AttributeFactory;
|
||||||
|
|
||||||
import java.io.Reader;
|
import java.io.Reader;
|
||||||
import java.util.Map;
|
import java.util.Map;
|
||||||
|
|
|
@ -23,6 +23,7 @@ import java.io.Reader;
|
||||||
import org.apache.lucene.analysis.Tokenizer;
|
import org.apache.lucene.analysis.Tokenizer;
|
||||||
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
|
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
|
||||||
import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
|
import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
|
||||||
|
import org.apache.lucene.util.AttributeFactory;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Old broken version of {@link NGramTokenizer}.
|
* Old broken version of {@link NGramTokenizer}.
|
||||||
|
@ -54,7 +55,7 @@ public final class Lucene43NGramTokenizer extends Tokenizer {
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Creates NGramTokenizer with given min and max n-grams.
|
* Creates NGramTokenizer with given min and max n-grams.
|
||||||
* @param factory {@link org.apache.lucene.util.AttributeSource.AttributeFactory} to use
|
* @param factory {@link org.apache.lucene.util.AttributeFactory} to use
|
||||||
* @param minGram the smallest n-gram to generate
|
* @param minGram the smallest n-gram to generate
|
||||||
* @param maxGram the largest n-gram to generate
|
* @param maxGram the largest n-gram to generate
|
||||||
*/
|
*/
|
||||||
|
|
|
@ -26,6 +26,7 @@ import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
|
||||||
import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
|
import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
|
||||||
import org.apache.lucene.analysis.tokenattributes.PositionLengthAttribute;
|
import org.apache.lucene.analysis.tokenattributes.PositionLengthAttribute;
|
||||||
import org.apache.lucene.analysis.util.CharacterUtils;
|
import org.apache.lucene.analysis.util.CharacterUtils;
|
||||||
|
import org.apache.lucene.util.AttributeFactory;
|
||||||
import org.apache.lucene.util.Version;
|
import org.apache.lucene.util.Version;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
@ -99,7 +100,7 @@ public class NGramTokenizer extends Tokenizer {
|
||||||
/**
|
/**
|
||||||
* Creates NGramTokenizer with given min and max n-grams.
|
* Creates NGramTokenizer with given min and max n-grams.
|
||||||
* @param version the lucene compatibility <a href="#version">version</a>
|
* @param version the lucene compatibility <a href="#version">version</a>
|
||||||
* @param factory {@link org.apache.lucene.util.AttributeSource.AttributeFactory} to use
|
* @param factory {@link org.apache.lucene.util.AttributeFactory} to use
|
||||||
* @param minGram the smallest n-gram to generate
|
* @param minGram the smallest n-gram to generate
|
||||||
* @param maxGram the largest n-gram to generate
|
* @param maxGram the largest n-gram to generate
|
||||||
*/
|
*/
|
||||||
|
|
|
@ -20,7 +20,7 @@ package org.apache.lucene.analysis.ngram;
|
||||||
import org.apache.lucene.analysis.TokenStream;
|
import org.apache.lucene.analysis.TokenStream;
|
||||||
import org.apache.lucene.analysis.Tokenizer;
|
import org.apache.lucene.analysis.Tokenizer;
|
||||||
import org.apache.lucene.analysis.util.TokenizerFactory;
|
import org.apache.lucene.analysis.util.TokenizerFactory;
|
||||||
import org.apache.lucene.util.AttributeSource.AttributeFactory;
|
import org.apache.lucene.util.AttributeFactory;
|
||||||
import org.apache.lucene.util.Version;
|
import org.apache.lucene.util.Version;
|
||||||
|
|
||||||
import java.io.Reader;
|
import java.io.Reader;
|
||||||
|
|
|
@ -17,13 +17,12 @@ package org.apache.lucene.analysis.path;
|
||||||
*/
|
*/
|
||||||
|
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
import java.io.Reader;
|
|
||||||
|
|
||||||
import org.apache.lucene.analysis.Token;
|
|
||||||
import org.apache.lucene.analysis.Tokenizer;
|
import org.apache.lucene.analysis.Tokenizer;
|
||||||
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
|
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
|
||||||
import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
|
import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
|
||||||
import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
|
import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
|
||||||
|
import org.apache.lucene.util.AttributeFactory;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Tokenizer for path-like hierarchies.
|
* Tokenizer for path-like hierarchies.
|
||||||
|
@ -69,7 +68,7 @@ public class PathHierarchyTokenizer extends Tokenizer {
|
||||||
}
|
}
|
||||||
|
|
||||||
public PathHierarchyTokenizer(int bufferSize, char delimiter, char replacement, int skip) {
|
public PathHierarchyTokenizer(int bufferSize, char delimiter, char replacement, int skip) {
|
||||||
this(Token.TOKEN_ATTRIBUTE_FACTORY, bufferSize, delimiter, replacement, skip);
|
this(DEFAULT_TOKEN_ATTRIBUTE_FACTORY, bufferSize, delimiter, replacement, skip);
|
||||||
}
|
}
|
||||||
|
|
||||||
public PathHierarchyTokenizer
|
public PathHierarchyTokenizer
|
||||||
|
|
|
@ -21,7 +21,7 @@ import java.util.Map;
|
||||||
|
|
||||||
import org.apache.lucene.analysis.Tokenizer;
|
import org.apache.lucene.analysis.Tokenizer;
|
||||||
import org.apache.lucene.analysis.util.TokenizerFactory;
|
import org.apache.lucene.analysis.util.TokenizerFactory;
|
||||||
import org.apache.lucene.util.AttributeSource.AttributeFactory;
|
import org.apache.lucene.util.AttributeFactory;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Factory for {@link PathHierarchyTokenizer}.
|
* Factory for {@link PathHierarchyTokenizer}.
|
||||||
|
|
|
@ -17,15 +17,14 @@ package org.apache.lucene.analysis.path;
|
||||||
*/
|
*/
|
||||||
|
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
import java.io.Reader;
|
|
||||||
import java.util.ArrayList;
|
import java.util.ArrayList;
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
|
|
||||||
import org.apache.lucene.analysis.Token;
|
|
||||||
import org.apache.lucene.analysis.Tokenizer;
|
import org.apache.lucene.analysis.Tokenizer;
|
||||||
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
|
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
|
||||||
import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
|
import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
|
||||||
import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
|
import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
|
||||||
|
import org.apache.lucene.util.AttributeFactory;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Tokenizer for domain-like hierarchies.
|
* Tokenizer for domain-like hierarchies.
|
||||||
|
@ -82,7 +81,7 @@ public class ReversePathHierarchyTokenizer extends Tokenizer {
|
||||||
}
|
}
|
||||||
|
|
||||||
public ReversePathHierarchyTokenizer( int bufferSize, char delimiter, char replacement, int skip) {
|
public ReversePathHierarchyTokenizer( int bufferSize, char delimiter, char replacement, int skip) {
|
||||||
this(Token.TOKEN_ATTRIBUTE_FACTORY, bufferSize, delimiter, replacement, skip);
|
this(DEFAULT_TOKEN_ATTRIBUTE_FACTORY, bufferSize, delimiter, replacement, skip);
|
||||||
}
|
}
|
||||||
public ReversePathHierarchyTokenizer
|
public ReversePathHierarchyTokenizer
|
||||||
(AttributeFactory factory, int bufferSize, char delimiter, char replacement, int skip) {
|
(AttributeFactory factory, int bufferSize, char delimiter, char replacement, int skip) {
|
||||||
|
|
|
@ -22,10 +22,10 @@ import java.io.Reader;
|
||||||
import java.util.regex.Matcher;
|
import java.util.regex.Matcher;
|
||||||
import java.util.regex.Pattern;
|
import java.util.regex.Pattern;
|
||||||
|
|
||||||
import org.apache.lucene.analysis.Token;
|
|
||||||
import org.apache.lucene.analysis.Tokenizer;
|
import org.apache.lucene.analysis.Tokenizer;
|
||||||
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
|
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
|
||||||
import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
|
import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
|
||||||
|
import org.apache.lucene.util.AttributeFactory;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* This tokenizer uses regex pattern matching to construct distinct tokens
|
* This tokenizer uses regex pattern matching to construct distinct tokens
|
||||||
|
@ -67,7 +67,7 @@ public final class PatternTokenizer extends Tokenizer {
|
||||||
|
|
||||||
/** creates a new PatternTokenizer returning tokens from group (-1 for split functionality) */
|
/** creates a new PatternTokenizer returning tokens from group (-1 for split functionality) */
|
||||||
public PatternTokenizer(Pattern pattern, int group) {
|
public PatternTokenizer(Pattern pattern, int group) {
|
||||||
this(Token.TOKEN_ATTRIBUTE_FACTORY, pattern, group);
|
this(DEFAULT_TOKEN_ATTRIBUTE_FACTORY, pattern, group);
|
||||||
}
|
}
|
||||||
|
|
||||||
/** creates a new PatternTokenizer returning tokens from group (-1 for split functionality) */
|
/** creates a new PatternTokenizer returning tokens from group (-1 for split functionality) */
|
||||||
|
|
|
@ -21,7 +21,7 @@ import java.util.Map;
|
||||||
import java.util.regex.Pattern;
|
import java.util.regex.Pattern;
|
||||||
|
|
||||||
import org.apache.lucene.analysis.util.TokenizerFactory;
|
import org.apache.lucene.analysis.util.TokenizerFactory;
|
||||||
import org.apache.lucene.util.AttributeSource.AttributeFactory;
|
import org.apache.lucene.util.AttributeFactory;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Factory for {@link PatternTokenizer}.
|
* Factory for {@link PatternTokenizer}.
|
||||||
|
|
|
@ -25,6 +25,7 @@ import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
|
||||||
import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
|
import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
|
||||||
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
|
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
|
||||||
import org.apache.lucene.analysis.tokenattributes.TypeAttribute;
|
import org.apache.lucene.analysis.tokenattributes.TypeAttribute;
|
||||||
|
import org.apache.lucene.util.AttributeFactory;
|
||||||
import org.apache.lucene.util.AttributeSource;
|
import org.apache.lucene.util.AttributeSource;
|
||||||
import org.apache.lucene.util.Version;
|
import org.apache.lucene.util.Version;
|
||||||
|
|
||||||
|
@ -106,7 +107,7 @@ public final class ClassicTokenizer extends Tokenizer {
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Creates a new ClassicTokenizer with a given {@link org.apache.lucene.util.AttributeSource.AttributeFactory}
|
* Creates a new ClassicTokenizer with a given {@link org.apache.lucene.util.AttributeFactory}
|
||||||
*/
|
*/
|
||||||
public ClassicTokenizer(Version matchVersion, AttributeFactory factory) {
|
public ClassicTokenizer(Version matchVersion, AttributeFactory factory) {
|
||||||
super(factory);
|
super(factory);
|
||||||
|
|
|
@ -18,7 +18,7 @@ package org.apache.lucene.analysis.standard;
|
||||||
*/
|
*/
|
||||||
|
|
||||||
import org.apache.lucene.analysis.util.TokenizerFactory;
|
import org.apache.lucene.analysis.util.TokenizerFactory;
|
||||||
import org.apache.lucene.util.AttributeSource.AttributeFactory;
|
import org.apache.lucene.util.AttributeFactory;
|
||||||
|
|
||||||
import java.util.Map;
|
import java.util.Map;
|
||||||
|
|
||||||
|
|
|
@ -25,6 +25,7 @@ import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
|
||||||
import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
|
import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
|
||||||
import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
|
import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
|
||||||
import org.apache.lucene.analysis.tokenattributes.TypeAttribute;
|
import org.apache.lucene.analysis.tokenattributes.TypeAttribute;
|
||||||
|
import org.apache.lucene.util.AttributeFactory;
|
||||||
import org.apache.lucene.util.AttributeSource;
|
import org.apache.lucene.util.AttributeSource;
|
||||||
import org.apache.lucene.util.Version;
|
import org.apache.lucene.util.Version;
|
||||||
|
|
||||||
|
@ -120,7 +121,7 @@ public final class StandardTokenizer extends Tokenizer {
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Creates a new StandardTokenizer with a given {@link org.apache.lucene.util.AttributeSource.AttributeFactory}
|
* Creates a new StandardTokenizer with a given {@link org.apache.lucene.util.AttributeFactory}
|
||||||
*/
|
*/
|
||||||
public StandardTokenizer(Version matchVersion, AttributeFactory factory) {
|
public StandardTokenizer(Version matchVersion, AttributeFactory factory) {
|
||||||
super(factory);
|
super(factory);
|
||||||
|
|
|
@ -18,7 +18,7 @@ package org.apache.lucene.analysis.standard;
|
||||||
*/
|
*/
|
||||||
|
|
||||||
import org.apache.lucene.analysis.util.TokenizerFactory;
|
import org.apache.lucene.analysis.util.TokenizerFactory;
|
||||||
import org.apache.lucene.util.AttributeSource.AttributeFactory;
|
import org.apache.lucene.util.AttributeFactory;
|
||||||
|
|
||||||
import java.util.Map;
|
import java.util.Map;
|
||||||
|
|
||||||
|
|
|
@ -27,9 +27,9 @@ import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
|
||||||
import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
|
import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
|
||||||
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
|
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
|
||||||
import org.apache.lucene.analysis.tokenattributes.TypeAttribute;
|
import org.apache.lucene.analysis.tokenattributes.TypeAttribute;
|
||||||
|
import org.apache.lucene.util.AttributeFactory;
|
||||||
import org.apache.lucene.util.AttributeSource;
|
import org.apache.lucene.util.AttributeSource;
|
||||||
import org.apache.lucene.util.Version;
|
import org.apache.lucene.util.Version;
|
||||||
import org.apache.lucene.util.AttributeSource.AttributeFactory;
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* This class implements Word Break rules from the Unicode Text Segmentation
|
* This class implements Word Break rules from the Unicode Text Segmentation
|
||||||
|
|
|
@ -18,7 +18,7 @@ package org.apache.lucene.analysis.standard;
|
||||||
*/
|
*/
|
||||||
|
|
||||||
import org.apache.lucene.analysis.util.TokenizerFactory;
|
import org.apache.lucene.analysis.util.TokenizerFactory;
|
||||||
import org.apache.lucene.util.AttributeSource.AttributeFactory;
|
import org.apache.lucene.util.AttributeFactory;
|
||||||
|
|
||||||
import java.io.Reader;
|
import java.io.Reader;
|
||||||
import java.util.Map;
|
import java.util.Map;
|
||||||
|
|
|
@ -20,11 +20,11 @@ package org.apache.lucene.analysis.th;
|
||||||
import java.text.BreakIterator;
|
import java.text.BreakIterator;
|
||||||
import java.util.Locale;
|
import java.util.Locale;
|
||||||
|
|
||||||
import org.apache.lucene.analysis.Token;
|
|
||||||
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
|
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
|
||||||
import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
|
import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
|
||||||
import org.apache.lucene.analysis.util.CharArrayIterator;
|
import org.apache.lucene.analysis.util.CharArrayIterator;
|
||||||
import org.apache.lucene.analysis.util.SegmentingTokenizerBase;
|
import org.apache.lucene.analysis.util.SegmentingTokenizerBase;
|
||||||
|
import org.apache.lucene.util.AttributeFactory;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Tokenizer that use {@link BreakIterator} to tokenize Thai text.
|
* Tokenizer that use {@link BreakIterator} to tokenize Thai text.
|
||||||
|
@ -60,7 +60,7 @@ public class ThaiTokenizer extends SegmentingTokenizerBase {
|
||||||
|
|
||||||
/** Creates a new ThaiTokenizer */
|
/** Creates a new ThaiTokenizer */
|
||||||
public ThaiTokenizer() {
|
public ThaiTokenizer() {
|
||||||
this(Token.TOKEN_ATTRIBUTE_FACTORY);
|
this(DEFAULT_TOKEN_ATTRIBUTE_FACTORY);
|
||||||
}
|
}
|
||||||
|
|
||||||
/** Creates a new ThaiTokenizer, supplying the AttributeFactory */
|
/** Creates a new ThaiTokenizer, supplying the AttributeFactory */
|
||||||
|
|
|
@ -21,7 +21,7 @@ import java.util.Map;
|
||||||
|
|
||||||
import org.apache.lucene.analysis.Tokenizer;
|
import org.apache.lucene.analysis.Tokenizer;
|
||||||
import org.apache.lucene.analysis.util.TokenizerFactory;
|
import org.apache.lucene.analysis.util.TokenizerFactory;
|
||||||
import org.apache.lucene.util.AttributeSource;
|
import org.apache.lucene.util.AttributeFactory;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Factory for {@link ThaiTokenizer}.
|
* Factory for {@link ThaiTokenizer}.
|
||||||
|
@ -43,7 +43,7 @@ public class ThaiTokenizerFactory extends TokenizerFactory {
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public Tokenizer create(AttributeSource.AttributeFactory factory) {
|
public Tokenizer create(AttributeFactory factory) {
|
||||||
return new ThaiTokenizer(factory);
|
return new ThaiTokenizer(factory);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -23,6 +23,7 @@ import java.io.Reader;
|
||||||
import org.apache.lucene.analysis.Tokenizer;
|
import org.apache.lucene.analysis.Tokenizer;
|
||||||
import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
|
import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
|
||||||
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
|
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
|
||||||
|
import org.apache.lucene.util.AttributeFactory;
|
||||||
import org.apache.lucene.util.AttributeSource;
|
import org.apache.lucene.util.AttributeSource;
|
||||||
import org.apache.lucene.analysis.util.CharacterUtils;
|
import org.apache.lucene.analysis.util.CharacterUtils;
|
||||||
import org.apache.lucene.util.Version;
|
import org.apache.lucene.util.Version;
|
||||||
|
|
|
@ -19,12 +19,11 @@ package org.apache.lucene.analysis.util;
|
||||||
|
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
import java.io.Reader;
|
import java.io.Reader;
|
||||||
|
|
||||||
import java.text.BreakIterator;
|
import java.text.BreakIterator;
|
||||||
|
|
||||||
import org.apache.lucene.analysis.Token;
|
|
||||||
import org.apache.lucene.analysis.Tokenizer;
|
import org.apache.lucene.analysis.Tokenizer;
|
||||||
import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
|
import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
|
||||||
|
import org.apache.lucene.util.AttributeFactory;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Breaks text into sentences with a {@link BreakIterator} and
|
* Breaks text into sentences with a {@link BreakIterator} and
|
||||||
|
@ -63,7 +62,7 @@ public abstract class SegmentingTokenizerBase extends Tokenizer {
|
||||||
* be provided to this constructor.
|
* be provided to this constructor.
|
||||||
*/
|
*/
|
||||||
public SegmentingTokenizerBase(BreakIterator iterator) {
|
public SegmentingTokenizerBase(BreakIterator iterator) {
|
||||||
this(Token.TOKEN_ATTRIBUTE_FACTORY, iterator);
|
this(DEFAULT_TOKEN_ATTRIBUTE_FACTORY, iterator);
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
|
|
@ -17,11 +17,10 @@ package org.apache.lucene.analysis.util;
|
||||||
* limitations under the License.
|
* limitations under the License.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
import org.apache.lucene.analysis.Token;
|
import org.apache.lucene.analysis.TokenStream;
|
||||||
import org.apache.lucene.analysis.Tokenizer;
|
import org.apache.lucene.analysis.Tokenizer;
|
||||||
import org.apache.lucene.util.AttributeSource.AttributeFactory;
|
import org.apache.lucene.util.AttributeFactory;
|
||||||
|
|
||||||
import java.io.Reader;
|
|
||||||
import java.util.Map;
|
import java.util.Map;
|
||||||
import java.util.Set;
|
import java.util.Set;
|
||||||
|
|
||||||
|
@ -73,7 +72,7 @@ public abstract class TokenizerFactory extends AbstractAnalysisFactory {
|
||||||
|
|
||||||
/** Creates a TokenStream of the specified input using the default attribute factory. */
|
/** Creates a TokenStream of the specified input using the default attribute factory. */
|
||||||
public final Tokenizer create() {
|
public final Tokenizer create() {
|
||||||
return create(Token.TOKEN_ATTRIBUTE_FACTORY);
|
return create(TokenStream.DEFAULT_TOKEN_ATTRIBUTE_FACTORY);
|
||||||
}
|
}
|
||||||
|
|
||||||
/** Creates a TokenStream of the specified input using the given AttributeFactory */
|
/** Creates a TokenStream of the specified input using the given AttributeFactory */
|
||||||
|
|
|
@ -23,6 +23,7 @@ import org.apache.lucene.analysis.tokenattributes.FlagsAttribute;
|
||||||
import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
|
import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
|
||||||
import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
|
import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
|
||||||
import org.apache.lucene.analysis.tokenattributes.TypeAttribute;
|
import org.apache.lucene.analysis.tokenattributes.TypeAttribute;
|
||||||
|
import org.apache.lucene.util.AttributeFactory;
|
||||||
import org.apache.lucene.util.AttributeSource;
|
import org.apache.lucene.util.AttributeSource;
|
||||||
|
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
|
@ -145,7 +146,7 @@ public final class WikipediaTokenizer extends Tokenizer {
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Creates a new instance of the {@link org.apache.lucene.analysis.wikipedia.WikipediaTokenizer}. Attaches the
|
* Creates a new instance of the {@link org.apache.lucene.analysis.wikipedia.WikipediaTokenizer}. Attaches the
|
||||||
* <code>input</code> to a the newly created JFlex scanner. Uses the given {@link org.apache.lucene.util.AttributeSource.AttributeFactory}.
|
* <code>input</code> to a the newly created JFlex scanner. Uses the given {@link org.apache.lucene.util.AttributeFactory}.
|
||||||
*
|
*
|
||||||
* @param tokenOutput One of {@link #TOKENS_ONLY}, {@link #UNTOKENIZED_ONLY}, {@link #BOTH}
|
* @param tokenOutput One of {@link #TOKENS_ONLY}, {@link #UNTOKENIZED_ONLY}, {@link #BOTH}
|
||||||
*/
|
*/
|
||||||
|
|
|
@ -21,7 +21,7 @@ import java.util.Collections;
|
||||||
import java.util.Map;
|
import java.util.Map;
|
||||||
|
|
||||||
import org.apache.lucene.analysis.util.TokenizerFactory;
|
import org.apache.lucene.analysis.util.TokenizerFactory;
|
||||||
import org.apache.lucene.util.AttributeSource.AttributeFactory;
|
import org.apache.lucene.util.AttributeFactory;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Factory for {@link WikipediaTokenizer}.
|
* Factory for {@link WikipediaTokenizer}.
|
||||||
|
|
|
@ -19,11 +19,9 @@ package org.apache.lucene.collation;
|
||||||
|
|
||||||
import java.text.Collator;
|
import java.text.Collator;
|
||||||
|
|
||||||
import org.apache.lucene.analysis.Token;
|
import org.apache.lucene.analysis.TokenStream;
|
||||||
import org.apache.lucene.collation.tokenattributes.CollatedTermAttributeImpl;
|
import org.apache.lucene.collation.tokenattributes.CollatedTermAttributeImpl;
|
||||||
import org.apache.lucene.util.Attribute;
|
import org.apache.lucene.util.AttributeFactory;
|
||||||
import org.apache.lucene.util.AttributeImpl;
|
|
||||||
import org.apache.lucene.util.AttributeSource;
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* <p>
|
* <p>
|
||||||
|
@ -69,18 +67,17 @@ import org.apache.lucene.util.AttributeSource;
|
||||||
* ICUCollationAttributeFactory on the query side, or vice versa.
|
* ICUCollationAttributeFactory on the query side, or vice versa.
|
||||||
* </p>
|
* </p>
|
||||||
*/
|
*/
|
||||||
public class CollationAttributeFactory extends AttributeSource.AttributeFactory {
|
public class CollationAttributeFactory extends AttributeFactory.StaticImplementationAttributeFactory<CollatedTermAttributeImpl> {
|
||||||
private final Collator collator;
|
private final Collator collator;
|
||||||
private final AttributeSource.AttributeFactory delegate;
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Create a CollationAttributeFactory, using
|
* Create a CollationAttributeFactory, using
|
||||||
* {@link org.apache.lucene.analysis.Token#TOKEN_ATTRIBUTE_FACTORY} as the
|
* {@link TokenStream#DEFAULT_TOKEN_ATTRIBUTE_FACTORY} as the
|
||||||
* factory for all other attributes.
|
* factory for all other attributes.
|
||||||
* @param collator CollationKey generator
|
* @param collator CollationKey generator
|
||||||
*/
|
*/
|
||||||
public CollationAttributeFactory(Collator collator) {
|
public CollationAttributeFactory(Collator collator) {
|
||||||
this(Token.TOKEN_ATTRIBUTE_FACTORY, collator);
|
this(TokenStream.DEFAULT_TOKEN_ATTRIBUTE_FACTORY, collator);
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
@ -89,16 +86,13 @@ public class CollationAttributeFactory extends AttributeSource.AttributeFactory
|
||||||
* @param delegate Attribute Factory
|
* @param delegate Attribute Factory
|
||||||
* @param collator CollationKey generator
|
* @param collator CollationKey generator
|
||||||
*/
|
*/
|
||||||
public CollationAttributeFactory(AttributeSource.AttributeFactory delegate, Collator collator) {
|
public CollationAttributeFactory(AttributeFactory delegate, Collator collator) {
|
||||||
this.delegate = delegate;
|
super(delegate, CollatedTermAttributeImpl.class);
|
||||||
this.collator = collator;
|
this.collator = collator;
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public AttributeImpl createAttributeInstance(
|
public CollatedTermAttributeImpl createInstance() {
|
||||||
Class<? extends Attribute> attClass) {
|
return new CollatedTermAttributeImpl(collator);
|
||||||
return attClass.isAssignableFrom(CollatedTermAttributeImpl.class)
|
|
||||||
? new CollatedTermAttributeImpl(collator)
|
|
||||||
: delegate.createAttributeInstance(attClass);
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -35,7 +35,7 @@ import org.apache.lucene.analysis.util.ResourceLoaderAware;
|
||||||
import org.apache.lucene.analysis.util.StringMockResourceLoader;
|
import org.apache.lucene.analysis.util.StringMockResourceLoader;
|
||||||
import org.apache.lucene.analysis.util.TokenFilterFactory;
|
import org.apache.lucene.analysis.util.TokenFilterFactory;
|
||||||
import org.apache.lucene.analysis.util.TokenizerFactory;
|
import org.apache.lucene.analysis.util.TokenizerFactory;
|
||||||
import org.apache.lucene.util.AttributeSource.AttributeFactory;
|
import org.apache.lucene.util.AttributeFactory;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Sanity check some things about all factories,
|
* Sanity check some things about all factories,
|
||||||
|
|
|
@ -81,8 +81,8 @@ import org.apache.lucene.analysis.synonym.SynonymMap;
|
||||||
import org.apache.lucene.analysis.util.CharArrayMap;
|
import org.apache.lucene.analysis.util.CharArrayMap;
|
||||||
import org.apache.lucene.analysis.util.CharArraySet;
|
import org.apache.lucene.analysis.util.CharArraySet;
|
||||||
import org.apache.lucene.analysis.wikipedia.WikipediaTokenizer;
|
import org.apache.lucene.analysis.wikipedia.WikipediaTokenizer;
|
||||||
|
import org.apache.lucene.util.AttributeFactory;
|
||||||
import org.apache.lucene.util.AttributeSource;
|
import org.apache.lucene.util.AttributeSource;
|
||||||
import org.apache.lucene.util.AttributeSource.AttributeFactory;
|
|
||||||
import org.apache.lucene.util.CharsRef;
|
import org.apache.lucene.util.CharsRef;
|
||||||
import org.apache.lucene.util.Rethrow;
|
import org.apache.lucene.util.Rethrow;
|
||||||
import org.apache.lucene.util.TestUtil;
|
import org.apache.lucene.util.TestUtil;
|
||||||
|
|
|
@ -20,12 +20,12 @@ package org.apache.lucene.analysis.icu.segmentation;
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
import java.io.Reader;
|
import java.io.Reader;
|
||||||
|
|
||||||
import org.apache.lucene.analysis.Token;
|
|
||||||
import org.apache.lucene.analysis.Tokenizer;
|
import org.apache.lucene.analysis.Tokenizer;
|
||||||
import org.apache.lucene.analysis.icu.tokenattributes.ScriptAttribute;
|
import org.apache.lucene.analysis.icu.tokenattributes.ScriptAttribute;
|
||||||
import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
|
import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
|
||||||
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
|
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
|
||||||
import org.apache.lucene.analysis.tokenattributes.TypeAttribute;
|
import org.apache.lucene.analysis.tokenattributes.TypeAttribute;
|
||||||
|
import org.apache.lucene.util.AttributeFactory;
|
||||||
|
|
||||||
import com.ibm.icu.lang.UCharacter;
|
import com.ibm.icu.lang.UCharacter;
|
||||||
import com.ibm.icu.text.BreakIterator;
|
import com.ibm.icu.text.BreakIterator;
|
||||||
|
@ -80,7 +80,7 @@ public final class ICUTokenizer extends Tokenizer {
|
||||||
* @param config Tailored BreakIterator configuration
|
* @param config Tailored BreakIterator configuration
|
||||||
*/
|
*/
|
||||||
public ICUTokenizer(ICUTokenizerConfig config) {
|
public ICUTokenizer(ICUTokenizerConfig config) {
|
||||||
this(Token.TOKEN_ATTRIBUTE_FACTORY, config);
|
this(DEFAULT_TOKEN_ATTRIBUTE_FACTORY, config);
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
|
|
@ -28,7 +28,7 @@ import java.util.Map;
|
||||||
import org.apache.lucene.analysis.util.ResourceLoader;
|
import org.apache.lucene.analysis.util.ResourceLoader;
|
||||||
import org.apache.lucene.analysis.util.ResourceLoaderAware;
|
import org.apache.lucene.analysis.util.ResourceLoaderAware;
|
||||||
import org.apache.lucene.analysis.util.TokenizerFactory;
|
import org.apache.lucene.analysis.util.TokenizerFactory;
|
||||||
import org.apache.lucene.util.AttributeSource.AttributeFactory;
|
import org.apache.lucene.util.AttributeFactory;
|
||||||
import org.apache.lucene.util.IOUtils;
|
import org.apache.lucene.util.IOUtils;
|
||||||
|
|
||||||
import com.ibm.icu.lang.UCharacter;
|
import com.ibm.icu.lang.UCharacter;
|
||||||
|
|
|
@ -17,12 +17,9 @@ package org.apache.lucene.collation;
|
||||||
* limitations under the License.
|
* limitations under the License.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
import org.apache.lucene.analysis.Token;
|
import org.apache.lucene.analysis.TokenStream;
|
||||||
import org.apache.lucene.collation.tokenattributes.ICUCollatedTermAttributeImpl;
|
import org.apache.lucene.collation.tokenattributes.ICUCollatedTermAttributeImpl;
|
||||||
import org.apache.lucene.util.Attribute;
|
import org.apache.lucene.util.AttributeFactory;
|
||||||
import org.apache.lucene.util.AttributeImpl;
|
|
||||||
import org.apache.lucene.util.AttributeSource;
|
|
||||||
import org.apache.lucene.collation.CollationAttributeFactory; // javadoc
|
|
||||||
|
|
||||||
import com.ibm.icu.text.Collator;
|
import com.ibm.icu.text.Collator;
|
||||||
|
|
||||||
|
@ -63,18 +60,17 @@ import com.ibm.icu.text.Collator;
|
||||||
* java.text.Collator over several languages.
|
* java.text.Collator over several languages.
|
||||||
* </p>
|
* </p>
|
||||||
*/
|
*/
|
||||||
public class ICUCollationAttributeFactory extends AttributeSource.AttributeFactory {
|
public class ICUCollationAttributeFactory extends AttributeFactory.StaticImplementationAttributeFactory<ICUCollatedTermAttributeImpl> {
|
||||||
private final Collator collator;
|
private final Collator collator;
|
||||||
private final AttributeSource.AttributeFactory delegate;
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Create an ICUCollationAttributeFactory, using
|
* Create an ICUCollationAttributeFactory, using
|
||||||
* {@link org.apache.lucene.analysis.Token#TOKEN_ATTRIBUTE_FACTORY} as the
|
* {@link TokenStream#DEFAULT_TOKEN_ATTRIBUTE_FACTORY} as the
|
||||||
* factory for all other attributes.
|
* factory for all other attributes.
|
||||||
* @param collator CollationKey generator
|
* @param collator CollationKey generator
|
||||||
*/
|
*/
|
||||||
public ICUCollationAttributeFactory(Collator collator) {
|
public ICUCollationAttributeFactory(Collator collator) {
|
||||||
this(Token.TOKEN_ATTRIBUTE_FACTORY, collator);
|
this(TokenStream.DEFAULT_TOKEN_ATTRIBUTE_FACTORY, collator);
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
@ -83,16 +79,13 @@ public class ICUCollationAttributeFactory extends AttributeSource.AttributeFacto
|
||||||
* @param delegate Attribute Factory
|
* @param delegate Attribute Factory
|
||||||
* @param collator CollationKey generator
|
* @param collator CollationKey generator
|
||||||
*/
|
*/
|
||||||
public ICUCollationAttributeFactory(AttributeSource.AttributeFactory delegate, Collator collator) {
|
public ICUCollationAttributeFactory(AttributeFactory delegate, Collator collator) {
|
||||||
this.delegate = delegate;
|
super(delegate, ICUCollatedTermAttributeImpl.class);
|
||||||
this.collator = collator;
|
this.collator = collator;
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public AttributeImpl createAttributeInstance(
|
public ICUCollatedTermAttributeImpl createInstance() {
|
||||||
Class<? extends Attribute> attClass) {
|
return new ICUCollatedTermAttributeImpl(collator);
|
||||||
return attClass.isAssignableFrom(ICUCollatedTermAttributeImpl.class)
|
|
||||||
? new ICUCollatedTermAttributeImpl(collator)
|
|
||||||
: delegate.createAttributeInstance(attClass);
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -18,7 +18,6 @@ package org.apache.lucene.analysis.ja;
|
||||||
*/
|
*/
|
||||||
|
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
import java.io.Reader;
|
|
||||||
import java.util.ArrayList;
|
import java.util.ArrayList;
|
||||||
import java.util.Arrays;
|
import java.util.Arrays;
|
||||||
import java.util.Collections;
|
import java.util.Collections;
|
||||||
|
@ -40,6 +39,7 @@ import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
|
||||||
import org.apache.lucene.analysis.tokenattributes.PositionLengthAttribute;
|
import org.apache.lucene.analysis.tokenattributes.PositionLengthAttribute;
|
||||||
import org.apache.lucene.analysis.util.RollingCharBuffer;
|
import org.apache.lucene.analysis.util.RollingCharBuffer;
|
||||||
import org.apache.lucene.util.ArrayUtil;
|
import org.apache.lucene.util.ArrayUtil;
|
||||||
|
import org.apache.lucene.util.AttributeFactory;
|
||||||
import org.apache.lucene.util.IntsRef;
|
import org.apache.lucene.util.IntsRef;
|
||||||
import org.apache.lucene.util.RamUsageEstimator;
|
import org.apache.lucene.util.RamUsageEstimator;
|
||||||
import org.apache.lucene.util.fst.FST;
|
import org.apache.lucene.util.fst.FST;
|
||||||
|
@ -195,7 +195,7 @@ public final class JapaneseTokenizer extends Tokenizer {
|
||||||
* @param mode tokenization mode.
|
* @param mode tokenization mode.
|
||||||
*/
|
*/
|
||||||
public JapaneseTokenizer(UserDictionary userDictionary, boolean discardPunctuation, Mode mode) {
|
public JapaneseTokenizer(UserDictionary userDictionary, boolean discardPunctuation, Mode mode) {
|
||||||
this(org.apache.lucene.analysis.Token.TOKEN_ATTRIBUTE_FACTORY, userDictionary, discardPunctuation, mode);
|
this(DEFAULT_TOKEN_ATTRIBUTE_FACTORY, userDictionary, discardPunctuation, mode);
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
|
|
@ -30,7 +30,7 @@ import java.util.Map;
|
||||||
import org.apache.lucene.analysis.ja.JapaneseTokenizer.Mode;
|
import org.apache.lucene.analysis.ja.JapaneseTokenizer.Mode;
|
||||||
import org.apache.lucene.analysis.ja.dict.UserDictionary;
|
import org.apache.lucene.analysis.ja.dict.UserDictionary;
|
||||||
import org.apache.lucene.analysis.util.TokenizerFactory;
|
import org.apache.lucene.analysis.util.TokenizerFactory;
|
||||||
import org.apache.lucene.util.AttributeSource.AttributeFactory;
|
import org.apache.lucene.util.AttributeFactory;
|
||||||
import org.apache.lucene.util.IOUtils;
|
import org.apache.lucene.util.IOUtils;
|
||||||
import org.apache.lucene.analysis.util.ResourceLoader;
|
import org.apache.lucene.analysis.util.ResourceLoader;
|
||||||
import org.apache.lucene.analysis.util.ResourceLoaderAware;
|
import org.apache.lucene.analysis.util.ResourceLoaderAware;
|
||||||
|
|
|
@ -22,12 +22,12 @@ import java.text.BreakIterator;
|
||||||
import java.util.Iterator;
|
import java.util.Iterator;
|
||||||
import java.util.Locale;
|
import java.util.Locale;
|
||||||
|
|
||||||
import org.apache.lucene.analysis.Token;
|
|
||||||
import org.apache.lucene.analysis.cn.smart.hhmm.SegToken;
|
import org.apache.lucene.analysis.cn.smart.hhmm.SegToken;
|
||||||
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
|
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
|
||||||
import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
|
import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
|
||||||
import org.apache.lucene.analysis.tokenattributes.TypeAttribute;
|
import org.apache.lucene.analysis.tokenattributes.TypeAttribute;
|
||||||
import org.apache.lucene.analysis.util.SegmentingTokenizerBase;
|
import org.apache.lucene.analysis.util.SegmentingTokenizerBase;
|
||||||
|
import org.apache.lucene.util.AttributeFactory;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Tokenizer for Chinese or mixed Chinese-English text.
|
* Tokenizer for Chinese or mixed Chinese-English text.
|
||||||
|
@ -48,7 +48,7 @@ public class HMMChineseTokenizer extends SegmentingTokenizerBase {
|
||||||
|
|
||||||
/** Creates a new HMMChineseTokenizer */
|
/** Creates a new HMMChineseTokenizer */
|
||||||
public HMMChineseTokenizer() {
|
public HMMChineseTokenizer() {
|
||||||
this(Token.TOKEN_ATTRIBUTE_FACTORY);
|
this(DEFAULT_TOKEN_ATTRIBUTE_FACTORY);
|
||||||
}
|
}
|
||||||
|
|
||||||
/** Creates a new HMMChineseTokenizer, supplying the AttributeFactory */
|
/** Creates a new HMMChineseTokenizer, supplying the AttributeFactory */
|
||||||
|
|
|
@ -21,7 +21,7 @@ import java.util.Map;
|
||||||
|
|
||||||
import org.apache.lucene.analysis.Tokenizer;
|
import org.apache.lucene.analysis.Tokenizer;
|
||||||
import org.apache.lucene.analysis.util.TokenizerFactory;
|
import org.apache.lucene.analysis.util.TokenizerFactory;
|
||||||
import org.apache.lucene.util.AttributeSource.AttributeFactory;
|
import org.apache.lucene.util.AttributeFactory;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Factory for {@link HMMChineseTokenizer}
|
* Factory for {@link HMMChineseTokenizer}
|
||||||
|
|
|
@ -24,6 +24,7 @@ import org.apache.lucene.analysis.Tokenizer;
|
||||||
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
|
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
|
||||||
import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
|
import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
|
||||||
import org.apache.lucene.analysis.tokenattributes.TypeAttribute;
|
import org.apache.lucene.analysis.tokenattributes.TypeAttribute;
|
||||||
|
import org.apache.lucene.util.AttributeFactory;
|
||||||
import org.apache.lucene.util.AttributeSource;
|
import org.apache.lucene.util.AttributeSource;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
|
|
@ -21,7 +21,7 @@ import java.io.Reader;
|
||||||
import java.util.Map;
|
import java.util.Map;
|
||||||
|
|
||||||
import org.apache.lucene.analysis.util.TokenizerFactory;
|
import org.apache.lucene.analysis.util.TokenizerFactory;
|
||||||
import org.apache.lucene.util.AttributeSource.AttributeFactory;
|
import org.apache.lucene.util.AttributeFactory;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Factory for the SmartChineseAnalyzer {@link SentenceTokenizer}
|
* Factory for the SmartChineseAnalyzer {@link SentenceTokenizer}
|
||||||
|
|
|
@ -19,6 +19,7 @@ package org.apache.lucene.analysis.uima;
|
||||||
|
|
||||||
import org.apache.lucene.analysis.Tokenizer;
|
import org.apache.lucene.analysis.Tokenizer;
|
||||||
import org.apache.lucene.analysis.uima.ae.AEProviderFactory;
|
import org.apache.lucene.analysis.uima.ae.AEProviderFactory;
|
||||||
|
import org.apache.lucene.util.AttributeFactory;
|
||||||
import org.apache.uima.analysis_engine.AnalysisEngine;
|
import org.apache.uima.analysis_engine.AnalysisEngine;
|
||||||
import org.apache.uima.analysis_engine.AnalysisEngineProcessException;
|
import org.apache.uima.analysis_engine.AnalysisEngineProcessException;
|
||||||
import org.apache.uima.cas.CAS;
|
import org.apache.uima.cas.CAS;
|
||||||
|
|
|
@ -17,17 +17,16 @@ package org.apache.lucene.analysis.uima;
|
||||||
* limitations under the License.
|
* limitations under the License.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
import org.apache.lucene.analysis.Token;
|
|
||||||
import org.apache.lucene.analysis.Tokenizer;
|
import org.apache.lucene.analysis.Tokenizer;
|
||||||
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
|
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
|
||||||
import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
|
import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
|
||||||
|
import org.apache.lucene.util.AttributeFactory;
|
||||||
import org.apache.uima.analysis_engine.AnalysisEngineProcessException;
|
import org.apache.uima.analysis_engine.AnalysisEngineProcessException;
|
||||||
import org.apache.uima.cas.Type;
|
import org.apache.uima.cas.Type;
|
||||||
import org.apache.uima.cas.text.AnnotationFS;
|
import org.apache.uima.cas.text.AnnotationFS;
|
||||||
import org.apache.uima.resource.ResourceInitializationException;
|
import org.apache.uima.resource.ResourceInitializationException;
|
||||||
|
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
import java.io.Reader;
|
|
||||||
import java.util.Map;
|
import java.util.Map;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
@ -44,7 +43,7 @@ public final class UIMAAnnotationsTokenizer extends BaseUIMATokenizer {
|
||||||
private int finalOffset = 0;
|
private int finalOffset = 0;
|
||||||
|
|
||||||
public UIMAAnnotationsTokenizer(String descriptorPath, String tokenType, Map<String, Object> configurationParameters) {
|
public UIMAAnnotationsTokenizer(String descriptorPath, String tokenType, Map<String, Object> configurationParameters) {
|
||||||
this(descriptorPath, tokenType, configurationParameters, Token.TOKEN_ATTRIBUTE_FACTORY);
|
this(descriptorPath, tokenType, configurationParameters, DEFAULT_TOKEN_ATTRIBUTE_FACTORY);
|
||||||
}
|
}
|
||||||
|
|
||||||
public UIMAAnnotationsTokenizer(String descriptorPath, String tokenType, Map<String, Object> configurationParameters,
|
public UIMAAnnotationsTokenizer(String descriptorPath, String tokenType, Map<String, Object> configurationParameters,
|
||||||
|
|
|
@ -18,7 +18,7 @@ package org.apache.lucene.analysis.uima;
|
||||||
*/
|
*/
|
||||||
|
|
||||||
import org.apache.lucene.analysis.util.TokenizerFactory;
|
import org.apache.lucene.analysis.util.TokenizerFactory;
|
||||||
import org.apache.lucene.util.AttributeSource.AttributeFactory;
|
import org.apache.lucene.util.AttributeFactory;
|
||||||
|
|
||||||
import java.io.Reader;
|
import java.io.Reader;
|
||||||
import java.util.HashMap;
|
import java.util.HashMap;
|
||||||
|
|
|
@ -17,11 +17,11 @@ package org.apache.lucene.analysis.uima;
|
||||||
* limitations under the License.
|
* limitations under the License.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
import org.apache.lucene.analysis.Token;
|
|
||||||
import org.apache.lucene.analysis.Tokenizer;
|
import org.apache.lucene.analysis.Tokenizer;
|
||||||
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
|
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
|
||||||
import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
|
import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
|
||||||
import org.apache.lucene.analysis.tokenattributes.TypeAttribute;
|
import org.apache.lucene.analysis.tokenattributes.TypeAttribute;
|
||||||
|
import org.apache.lucene.util.AttributeFactory;
|
||||||
import org.apache.uima.analysis_engine.AnalysisEngineProcessException;
|
import org.apache.uima.analysis_engine.AnalysisEngineProcessException;
|
||||||
import org.apache.uima.cas.CASException;
|
import org.apache.uima.cas.CASException;
|
||||||
import org.apache.uima.cas.FeaturePath;
|
import org.apache.uima.cas.FeaturePath;
|
||||||
|
@ -30,7 +30,6 @@ import org.apache.uima.cas.text.AnnotationFS;
|
||||||
import org.apache.uima.resource.ResourceInitializationException;
|
import org.apache.uima.resource.ResourceInitializationException;
|
||||||
|
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
import java.io.Reader;
|
|
||||||
import java.util.Map;
|
import java.util.Map;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
@ -54,7 +53,7 @@ public final class UIMATypeAwareAnnotationsTokenizer extends BaseUIMATokenizer {
|
||||||
private int finalOffset = 0;
|
private int finalOffset = 0;
|
||||||
|
|
||||||
public UIMATypeAwareAnnotationsTokenizer(String descriptorPath, String tokenType, String typeAttributeFeaturePath, Map<String, Object> configurationParameters) {
|
public UIMATypeAwareAnnotationsTokenizer(String descriptorPath, String tokenType, String typeAttributeFeaturePath, Map<String, Object> configurationParameters) {
|
||||||
this(descriptorPath, tokenType, typeAttributeFeaturePath, configurationParameters, Token.TOKEN_ATTRIBUTE_FACTORY);
|
this(descriptorPath, tokenType, typeAttributeFeaturePath, configurationParameters, DEFAULT_TOKEN_ATTRIBUTE_FACTORY);
|
||||||
}
|
}
|
||||||
|
|
||||||
public UIMATypeAwareAnnotationsTokenizer(String descriptorPath, String tokenType, String typeAttributeFeaturePath,
|
public UIMATypeAwareAnnotationsTokenizer(String descriptorPath, String tokenType, String typeAttributeFeaturePath,
|
||||||
|
|
|
@ -18,7 +18,7 @@ package org.apache.lucene.analysis.uima;
|
||||||
*/
|
*/
|
||||||
|
|
||||||
import org.apache.lucene.analysis.util.TokenizerFactory;
|
import org.apache.lucene.analysis.util.TokenizerFactory;
|
||||||
import org.apache.lucene.util.AttributeSource.AttributeFactory;
|
import org.apache.lucene.util.AttributeFactory;
|
||||||
|
|
||||||
import java.io.Reader;
|
import java.io.Reader;
|
||||||
import java.util.HashMap;
|
import java.util.HashMap;
|
||||||
|
|
|
@ -28,6 +28,7 @@ import org.apache.lucene.document.LongField; // for javadocs
|
||||||
import org.apache.lucene.search.NumericRangeFilter; // for javadocs
|
import org.apache.lucene.search.NumericRangeFilter; // for javadocs
|
||||||
import org.apache.lucene.search.NumericRangeQuery;
|
import org.apache.lucene.search.NumericRangeQuery;
|
||||||
import org.apache.lucene.util.Attribute;
|
import org.apache.lucene.util.Attribute;
|
||||||
|
import org.apache.lucene.util.AttributeFactory;
|
||||||
import org.apache.lucene.util.AttributeImpl;
|
import org.apache.lucene.util.AttributeImpl;
|
||||||
import org.apache.lucene.util.AttributeReflector;
|
import org.apache.lucene.util.AttributeReflector;
|
||||||
import org.apache.lucene.util.BytesRef;
|
import org.apache.lucene.util.BytesRef;
|
||||||
|
@ -233,7 +234,7 @@ public final class NumericTokenStream extends TokenStream {
|
||||||
/**
|
/**
|
||||||
* Expert: Creates a token stream for numeric values with the specified
|
* Expert: Creates a token stream for numeric values with the specified
|
||||||
* <code>precisionStep</code> using the given
|
* <code>precisionStep</code> using the given
|
||||||
* {@link org.apache.lucene.util.AttributeSource.AttributeFactory}.
|
* {@link org.apache.lucene.util.AttributeFactory}.
|
||||||
* The stream is not yet initialized,
|
* The stream is not yet initialized,
|
||||||
* before using set a value using the various set<em>???</em>Value() methods.
|
* before using set a value using the various set<em>???</em>Value() methods.
|
||||||
*/
|
*/
|
||||||
|
|
|
@ -17,16 +17,12 @@ package org.apache.lucene.analysis;
|
||||||
* limitations under the License.
|
* limitations under the License.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
import org.apache.lucene.analysis.tokenattributes.CharTermAttributeImpl;
|
|
||||||
import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
|
|
||||||
import org.apache.lucene.analysis.tokenattributes.FlagsAttribute;
|
import org.apache.lucene.analysis.tokenattributes.FlagsAttribute;
|
||||||
|
import org.apache.lucene.analysis.tokenattributes.PackedTokenAttributeImpl;
|
||||||
import org.apache.lucene.analysis.tokenattributes.PayloadAttribute;
|
import org.apache.lucene.analysis.tokenattributes.PayloadAttribute;
|
||||||
import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
|
|
||||||
import org.apache.lucene.analysis.tokenattributes.PositionLengthAttribute;
|
|
||||||
import org.apache.lucene.analysis.tokenattributes.TypeAttribute;
|
|
||||||
import org.apache.lucene.index.DocsAndPositionsEnum; // for javadoc
|
import org.apache.lucene.index.DocsAndPositionsEnum; // for javadoc
|
||||||
import org.apache.lucene.util.Attribute;
|
import org.apache.lucene.util.Attribute;
|
||||||
import org.apache.lucene.util.AttributeSource;
|
import org.apache.lucene.util.AttributeFactory;
|
||||||
import org.apache.lucene.util.AttributeImpl;
|
import org.apache.lucene.util.AttributeImpl;
|
||||||
import org.apache.lucene.util.AttributeReflector;
|
import org.apache.lucene.util.AttributeReflector;
|
||||||
import org.apache.lucene.util.BytesRef;
|
import org.apache.lucene.util.BytesRef;
|
||||||
|
@ -58,53 +54,6 @@ import org.apache.lucene.util.BytesRef;
|
||||||
be used as convenience class that implements all {@link Attribute}s, which is especially useful
|
be used as convenience class that implements all {@link Attribute}s, which is especially useful
|
||||||
to easily switch from the old to the new TokenStream API.
|
to easily switch from the old to the new TokenStream API.
|
||||||
|
|
||||||
<br><br>
|
|
||||||
|
|
||||||
<p>Tokenizers and TokenFilters should try to re-use a Token
|
|
||||||
instance when possible for best performance, by
|
|
||||||
implementing the {@link TokenStream#incrementToken()} API.
|
|
||||||
Failing that, to create a new Token you should first use
|
|
||||||
one of the constructors that starts with null text. To load
|
|
||||||
the token from a char[] use {@link #copyBuffer(char[], int, int)}.
|
|
||||||
To load from a String use {@link #setEmpty} followed by {@link #append(CharSequence)} or {@link #append(CharSequence, int, int)}.
|
|
||||||
Alternatively you can get the Token's termBuffer by calling either {@link #buffer()},
|
|
||||||
if you know that your text is shorter than the capacity of the termBuffer
|
|
||||||
or {@link #resizeBuffer(int)}, if there is any possibility
|
|
||||||
that you may need to grow the buffer. Fill in the characters of your term into this
|
|
||||||
buffer, with {@link String#getChars(int, int, char[], int)} if loading from a string,
|
|
||||||
or with {@link System#arraycopy(Object, int, Object, int, int)}, and finally call {@link #setLength(int)} to
|
|
||||||
set the length of the term text. See <a target="_top"
|
|
||||||
href="https://issues.apache.org/jira/browse/LUCENE-969">LUCENE-969</a>
|
|
||||||
for details.</p>
|
|
||||||
<p>Typical Token reuse patterns:
|
|
||||||
<ul>
|
|
||||||
<li> Copying text from a string (type is reset to {@link #DEFAULT_TYPE} if not specified):<br/>
|
|
||||||
<pre class="prettyprint">
|
|
||||||
return reusableToken.reinit(string, startOffset, endOffset[, type]);
|
|
||||||
</pre>
|
|
||||||
</li>
|
|
||||||
<li> Copying some text from a string (type is reset to {@link #DEFAULT_TYPE} if not specified):<br/>
|
|
||||||
<pre class="prettyprint">
|
|
||||||
return reusableToken.reinit(string, 0, string.length(), startOffset, endOffset[, type]);
|
|
||||||
</pre>
|
|
||||||
</li>
|
|
||||||
</li>
|
|
||||||
<li> Copying text from char[] buffer (type is reset to {@link #DEFAULT_TYPE} if not specified):<br/>
|
|
||||||
<pre class="prettyprint">
|
|
||||||
return reusableToken.reinit(buffer, 0, buffer.length, startOffset, endOffset[, type]);
|
|
||||||
</pre>
|
|
||||||
</li>
|
|
||||||
<li> Copying some text from a char[] buffer (type is reset to {@link #DEFAULT_TYPE} if not specified):<br/>
|
|
||||||
<pre class="prettyprint">
|
|
||||||
return reusableToken.reinit(buffer, start, end - start, startOffset, endOffset[, type]);
|
|
||||||
</pre>
|
|
||||||
</li>
|
|
||||||
<li> Copying from one one Token to another (type is reset to {@link #DEFAULT_TYPE} if not specified):<br/>
|
|
||||||
<pre class="prettyprint">
|
|
||||||
return reusableToken.reinit(source.buffer(), 0, source.length(), source.startOffset(), source.endOffset()[, source.type()]);
|
|
||||||
</pre>
|
|
||||||
</li>
|
|
||||||
</ul>
|
|
||||||
A few things to note:
|
A few things to note:
|
||||||
<ul>
|
<ul>
|
||||||
<li>clear() initializes all of the fields to default values. This was changed in contrast to Lucene 2.4, but should affect no one.</li>
|
<li>clear() initializes all of the fields to default values. This was changed in contrast to Lucene 2.4, but should affect no one.</li>
|
||||||
|
@ -118,58 +67,18 @@ import org.apache.lucene.util.BytesRef;
|
||||||
{@link CharSequence} interface introduced by the interface {@link org.apache.lucene.analysis.tokenattributes.CharTermAttribute}.
|
{@link CharSequence} interface introduced by the interface {@link org.apache.lucene.analysis.tokenattributes.CharTermAttribute}.
|
||||||
This method now only prints the term text, no additional information anymore.
|
This method now only prints the term text, no additional information anymore.
|
||||||
</p>
|
</p>
|
||||||
|
@deprecated This class is outdated and no longer used since Lucene 2.9. Nuke it finally!
|
||||||
*/
|
*/
|
||||||
public class Token extends CharTermAttributeImpl
|
@Deprecated
|
||||||
implements TypeAttribute, PositionIncrementAttribute,
|
public class Token extends PackedTokenAttributeImpl implements FlagsAttribute, PayloadAttribute {
|
||||||
FlagsAttribute, OffsetAttribute, PayloadAttribute, PositionLengthAttribute {
|
|
||||||
|
|
||||||
private int startOffset,endOffset;
|
|
||||||
private String type = DEFAULT_TYPE;
|
|
||||||
private int flags;
|
private int flags;
|
||||||
private BytesRef payload;
|
private BytesRef payload;
|
||||||
private int positionIncrement = 1;
|
|
||||||
private int positionLength = 1;
|
|
||||||
|
|
||||||
/** Constructs a Token will null text. */
|
/** Constructs a Token will null text. */
|
||||||
public Token() {
|
public Token() {
|
||||||
}
|
}
|
||||||
|
|
||||||
/** Constructs a Token with null text and start & end
|
|
||||||
* offsets.
|
|
||||||
* @param start start offset in the source text
|
|
||||||
* @param end end offset in the source text */
|
|
||||||
public Token(int start, int end) {
|
|
||||||
checkOffsets(start, end);
|
|
||||||
startOffset = start;
|
|
||||||
endOffset = end;
|
|
||||||
}
|
|
||||||
|
|
||||||
/** Constructs a Token with null text and start & end
|
|
||||||
* offsets plus the Token type.
|
|
||||||
* @param start start offset in the source text
|
|
||||||
* @param end end offset in the source text
|
|
||||||
* @param typ the lexical type of this Token */
|
|
||||||
public Token(int start, int end, String typ) {
|
|
||||||
checkOffsets(start, end);
|
|
||||||
startOffset = start;
|
|
||||||
endOffset = end;
|
|
||||||
type = typ;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Constructs a Token with null text and start & end
|
|
||||||
* offsets plus flags. NOTE: flags is EXPERIMENTAL.
|
|
||||||
* @param start start offset in the source text
|
|
||||||
* @param end end offset in the source text
|
|
||||||
* @param flags The bits to set for this token
|
|
||||||
*/
|
|
||||||
public Token(int start, int end, int flags) {
|
|
||||||
checkOffsets(start, end);
|
|
||||||
startOffset = start;
|
|
||||||
endOffset = end;
|
|
||||||
this.flags = flags;
|
|
||||||
}
|
|
||||||
|
|
||||||
/** Constructs a Token with the given term text, and start
|
/** Constructs a Token with the given term text, and start
|
||||||
* & end offsets. The type defaults to "word."
|
* & end offsets. The type defaults to "word."
|
||||||
* <b>NOTE:</b> for better indexing speed you should
|
* <b>NOTE:</b> for better indexing speed you should
|
||||||
|
@ -179,149 +88,9 @@ public class Token extends CharTermAttributeImpl
|
||||||
* @param start start offset in the source text
|
* @param start start offset in the source text
|
||||||
* @param end end offset in the source text
|
* @param end end offset in the source text
|
||||||
*/
|
*/
|
||||||
public Token(String text, int start, int end) {
|
public Token(CharSequence text, int start, int end) {
|
||||||
checkOffsets(start, end);
|
|
||||||
append(text);
|
append(text);
|
||||||
startOffset = start;
|
setOffset(start, end);
|
||||||
endOffset = end;
|
|
||||||
}
|
|
||||||
|
|
||||||
/** Constructs a Token with the given text, start and end
|
|
||||||
* offsets, & type. <b>NOTE:</b> for better indexing
|
|
||||||
* speed you should instead use the char[] termBuffer
|
|
||||||
* methods to set the term text.
|
|
||||||
* @param text term text
|
|
||||||
* @param start start offset in the source text
|
|
||||||
* @param end end offset in the source text
|
|
||||||
* @param typ token type
|
|
||||||
*/
|
|
||||||
public Token(String text, int start, int end, String typ) {
|
|
||||||
checkOffsets(start, end);
|
|
||||||
append(text);
|
|
||||||
startOffset = start;
|
|
||||||
endOffset = end;
|
|
||||||
type = typ;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Constructs a Token with the given text, start and end
|
|
||||||
* offsets, & type. <b>NOTE:</b> for better indexing
|
|
||||||
* speed you should instead use the char[] termBuffer
|
|
||||||
* methods to set the term text.
|
|
||||||
* @param text term text
|
|
||||||
* @param start start offset in the source text
|
|
||||||
* @param end end offset in the source text
|
|
||||||
* @param flags token type bits
|
|
||||||
*/
|
|
||||||
public Token(String text, int start, int end, int flags) {
|
|
||||||
checkOffsets(start, end);
|
|
||||||
append(text);
|
|
||||||
startOffset = start;
|
|
||||||
endOffset = end;
|
|
||||||
this.flags = flags;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Constructs a Token with the given term buffer (offset
|
|
||||||
* & length), start and end
|
|
||||||
* offsets
|
|
||||||
* @param startTermBuffer buffer containing term text
|
|
||||||
* @param termBufferOffset the index in the buffer of the first character
|
|
||||||
* @param termBufferLength number of valid characters in the buffer
|
|
||||||
* @param start start offset in the source text
|
|
||||||
* @param end end offset in the source text
|
|
||||||
*/
|
|
||||||
public Token(char[] startTermBuffer, int termBufferOffset, int termBufferLength, int start, int end) {
|
|
||||||
checkOffsets(start, end);
|
|
||||||
copyBuffer(startTermBuffer, termBufferOffset, termBufferLength);
|
|
||||||
startOffset = start;
|
|
||||||
endOffset = end;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* {@inheritDoc}
|
|
||||||
* @see PositionIncrementAttribute
|
|
||||||
*/
|
|
||||||
@Override
|
|
||||||
public void setPositionIncrement(int positionIncrement) {
|
|
||||||
if (positionIncrement < 0)
|
|
||||||
throw new IllegalArgumentException
|
|
||||||
("Increment must be zero or greater: " + positionIncrement);
|
|
||||||
this.positionIncrement = positionIncrement;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* {@inheritDoc}
|
|
||||||
* @see PositionIncrementAttribute
|
|
||||||
*/
|
|
||||||
@Override
|
|
||||||
public int getPositionIncrement() {
|
|
||||||
return positionIncrement;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* {@inheritDoc}
|
|
||||||
* @see PositionLengthAttribute
|
|
||||||
*/
|
|
||||||
@Override
|
|
||||||
public void setPositionLength(int positionLength) {
|
|
||||||
this.positionLength = positionLength;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* {@inheritDoc}
|
|
||||||
* @see PositionLengthAttribute
|
|
||||||
*/
|
|
||||||
@Override
|
|
||||||
public int getPositionLength() {
|
|
||||||
return positionLength;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* {@inheritDoc}
|
|
||||||
* @see OffsetAttribute
|
|
||||||
*/
|
|
||||||
@Override
|
|
||||||
public final int startOffset() {
|
|
||||||
return startOffset;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* {@inheritDoc}
|
|
||||||
* @see OffsetAttribute
|
|
||||||
*/
|
|
||||||
@Override
|
|
||||||
public final int endOffset() {
|
|
||||||
return endOffset;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* {@inheritDoc}
|
|
||||||
* @see OffsetAttribute
|
|
||||||
*/
|
|
||||||
@Override
|
|
||||||
public void setOffset(int startOffset, int endOffset) {
|
|
||||||
checkOffsets(startOffset, endOffset);
|
|
||||||
this.startOffset = startOffset;
|
|
||||||
this.endOffset = endOffset;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* {@inheritDoc}
|
|
||||||
* @see TypeAttribute
|
|
||||||
*/
|
|
||||||
@Override
|
|
||||||
public final String type() {
|
|
||||||
return type;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* {@inheritDoc}
|
|
||||||
* @see TypeAttribute
|
|
||||||
*/
|
|
||||||
@Override
|
|
||||||
public final void setType(String type) {
|
|
||||||
this.type = type;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
@ -366,37 +135,8 @@ public class Token extends CharTermAttributeImpl
|
||||||
@Override
|
@Override
|
||||||
public void clear() {
|
public void clear() {
|
||||||
super.clear();
|
super.clear();
|
||||||
payload = null;
|
|
||||||
positionIncrement = positionLength = 1;
|
|
||||||
flags = 0;
|
flags = 0;
|
||||||
startOffset = endOffset = 0;
|
payload = null;
|
||||||
type = DEFAULT_TYPE;
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public Token clone() {
|
|
||||||
Token t = (Token)super.clone();
|
|
||||||
// Do a deep clone
|
|
||||||
if (payload != null) {
|
|
||||||
t.payload = payload.clone();
|
|
||||||
}
|
|
||||||
return t;
|
|
||||||
}
|
|
||||||
|
|
||||||
/** Makes a clone, but replaces the term buffer &
|
|
||||||
* start/end offset in the process. This is more
|
|
||||||
* efficient than doing a full clone (and then calling
|
|
||||||
* {@link #copyBuffer}) because it saves a wasted copy of the old
|
|
||||||
* termBuffer. */
|
|
||||||
public Token clone(char[] newTermBuffer, int newTermOffset, int newTermLength, int newStartOffset, int newEndOffset) {
|
|
||||||
final Token t = new Token(newTermBuffer, newTermOffset, newTermLength, newStartOffset, newEndOffset);
|
|
||||||
t.positionIncrement = positionIncrement;
|
|
||||||
t.positionLength = positionLength;
|
|
||||||
t.flags = flags;
|
|
||||||
t.type = type;
|
|
||||||
if (payload != null)
|
|
||||||
t.payload = payload.clone();
|
|
||||||
return t;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
|
@ -406,14 +146,10 @@ public class Token extends CharTermAttributeImpl
|
||||||
|
|
||||||
if (obj instanceof Token) {
|
if (obj instanceof Token) {
|
||||||
final Token other = (Token) obj;
|
final Token other = (Token) obj;
|
||||||
return (startOffset == other.startOffset &&
|
return (
|
||||||
endOffset == other.endOffset &&
|
flags == other.flags &&
|
||||||
flags == other.flags &&
|
(payload == null ? other.payload == null : payload.equals(other.payload)) &&
|
||||||
positionIncrement == other.positionIncrement &&
|
super.equals(obj)
|
||||||
positionLength == other.positionLength &&
|
|
||||||
(type == null ? other.type == null : type.equals(other.type)) &&
|
|
||||||
(payload == null ? other.payload == null : payload.equals(other.payload)) &&
|
|
||||||
super.equals(obj)
|
|
||||||
);
|
);
|
||||||
} else
|
} else
|
||||||
return false;
|
return false;
|
||||||
|
@ -422,117 +158,20 @@ public class Token extends CharTermAttributeImpl
|
||||||
@Override
|
@Override
|
||||||
public int hashCode() {
|
public int hashCode() {
|
||||||
int code = super.hashCode();
|
int code = super.hashCode();
|
||||||
code = code * 31 + startOffset;
|
|
||||||
code = code * 31 + endOffset;
|
|
||||||
code = code * 31 + flags;
|
code = code * 31 + flags;
|
||||||
code = code * 31 + positionIncrement;
|
if (payload != null) {
|
||||||
code = code * 31 + positionLength;
|
|
||||||
if (type != null)
|
|
||||||
code = code * 31 + type.hashCode();
|
|
||||||
if (payload != null)
|
|
||||||
code = code * 31 + payload.hashCode();
|
code = code * 31 + payload.hashCode();
|
||||||
|
}
|
||||||
return code;
|
return code;
|
||||||
}
|
}
|
||||||
|
|
||||||
// like clear() but doesn't clear termBuffer/text
|
@Override
|
||||||
private void clearNoTermBuffer() {
|
public Token clone() {
|
||||||
payload = null;
|
final Token t = (Token) super.clone();
|
||||||
positionIncrement = positionLength = 1;
|
if (payload != null) {
|
||||||
flags = 0;
|
t.payload = payload.clone();
|
||||||
startOffset = endOffset = 0;
|
}
|
||||||
type = DEFAULT_TYPE;
|
return t;
|
||||||
}
|
|
||||||
|
|
||||||
/** Shorthand for calling {@link #clear},
|
|
||||||
* {@link #copyBuffer(char[], int, int)},
|
|
||||||
* {@link #setOffset},
|
|
||||||
* {@link #setType}
|
|
||||||
* @return this Token instance */
|
|
||||||
public Token reinit(char[] newTermBuffer, int newTermOffset, int newTermLength, int newStartOffset, int newEndOffset, String newType) {
|
|
||||||
checkOffsets(newStartOffset, newEndOffset);
|
|
||||||
clearNoTermBuffer();
|
|
||||||
copyBuffer(newTermBuffer, newTermOffset, newTermLength);
|
|
||||||
payload = null;
|
|
||||||
positionIncrement = positionLength = 1;
|
|
||||||
startOffset = newStartOffset;
|
|
||||||
endOffset = newEndOffset;
|
|
||||||
type = newType;
|
|
||||||
return this;
|
|
||||||
}
|
|
||||||
|
|
||||||
/** Shorthand for calling {@link #clear},
|
|
||||||
* {@link #copyBuffer(char[], int, int)},
|
|
||||||
* {@link #setOffset},
|
|
||||||
* {@link #setType} on Token.DEFAULT_TYPE
|
|
||||||
* @return this Token instance */
|
|
||||||
public Token reinit(char[] newTermBuffer, int newTermOffset, int newTermLength, int newStartOffset, int newEndOffset) {
|
|
||||||
checkOffsets(newStartOffset, newEndOffset);
|
|
||||||
clearNoTermBuffer();
|
|
||||||
copyBuffer(newTermBuffer, newTermOffset, newTermLength);
|
|
||||||
startOffset = newStartOffset;
|
|
||||||
endOffset = newEndOffset;
|
|
||||||
type = DEFAULT_TYPE;
|
|
||||||
return this;
|
|
||||||
}
|
|
||||||
|
|
||||||
/** Shorthand for calling {@link #clear},
|
|
||||||
* {@link #append(CharSequence)},
|
|
||||||
* {@link #setOffset},
|
|
||||||
* {@link #setType}
|
|
||||||
* @return this Token instance */
|
|
||||||
public Token reinit(String newTerm, int newStartOffset, int newEndOffset, String newType) {
|
|
||||||
checkOffsets(newStartOffset, newEndOffset);
|
|
||||||
clear();
|
|
||||||
append(newTerm);
|
|
||||||
startOffset = newStartOffset;
|
|
||||||
endOffset = newEndOffset;
|
|
||||||
type = newType;
|
|
||||||
return this;
|
|
||||||
}
|
|
||||||
|
|
||||||
/** Shorthand for calling {@link #clear},
|
|
||||||
* {@link #append(CharSequence, int, int)},
|
|
||||||
* {@link #setOffset},
|
|
||||||
* {@link #setType}
|
|
||||||
* @return this Token instance */
|
|
||||||
public Token reinit(String newTerm, int newTermOffset, int newTermLength, int newStartOffset, int newEndOffset, String newType) {
|
|
||||||
checkOffsets(newStartOffset, newEndOffset);
|
|
||||||
clear();
|
|
||||||
append(newTerm, newTermOffset, newTermOffset + newTermLength);
|
|
||||||
startOffset = newStartOffset;
|
|
||||||
endOffset = newEndOffset;
|
|
||||||
type = newType;
|
|
||||||
return this;
|
|
||||||
}
|
|
||||||
|
|
||||||
/** Shorthand for calling {@link #clear},
|
|
||||||
* {@link #append(CharSequence)},
|
|
||||||
* {@link #setOffset},
|
|
||||||
* {@link #setType} on Token.DEFAULT_TYPE
|
|
||||||
* @return this Token instance */
|
|
||||||
public Token reinit(String newTerm, int newStartOffset, int newEndOffset) {
|
|
||||||
checkOffsets(newStartOffset, newEndOffset);
|
|
||||||
clear();
|
|
||||||
append(newTerm);
|
|
||||||
startOffset = newStartOffset;
|
|
||||||
endOffset = newEndOffset;
|
|
||||||
type = DEFAULT_TYPE;
|
|
||||||
return this;
|
|
||||||
}
|
|
||||||
|
|
||||||
/** Shorthand for calling {@link #clear},
|
|
||||||
* {@link #append(CharSequence, int, int)},
|
|
||||||
* {@link #setOffset},
|
|
||||||
* {@link #setType} on Token.DEFAULT_TYPE
|
|
||||||
* @return this Token instance */
|
|
||||||
public Token reinit(String newTerm, int newTermOffset, int newTermLength, int newStartOffset, int newEndOffset) {
|
|
||||||
checkOffsets(newStartOffset, newEndOffset);
|
|
||||||
clear();
|
|
||||||
append(newTerm, newTermOffset, newTermOffset + newTermLength);
|
|
||||||
startOffset = newStartOffset;
|
|
||||||
endOffset = newEndOffset;
|
|
||||||
type = DEFAULT_TYPE;
|
|
||||||
return this;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
@ -540,87 +179,28 @@ public class Token extends CharTermAttributeImpl
|
||||||
* @param prototype source Token to copy fields from
|
* @param prototype source Token to copy fields from
|
||||||
*/
|
*/
|
||||||
public void reinit(Token prototype) {
|
public void reinit(Token prototype) {
|
||||||
copyBuffer(prototype.buffer(), 0, prototype.length());
|
// this is a bad hack to emulate no cloning of payload!
|
||||||
positionIncrement = prototype.positionIncrement;
|
prototype.copyToWithoutPayloadClone(this);
|
||||||
positionLength = prototype.positionLength;
|
|
||||||
flags = prototype.flags;
|
|
||||||
startOffset = prototype.startOffset;
|
|
||||||
endOffset = prototype.endOffset;
|
|
||||||
type = prototype.type;
|
|
||||||
payload = prototype.payload;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
private void copyToWithoutPayloadClone(AttributeImpl target) {
|
||||||
* Copy the prototype token's fields into this one, with a different term. Note: Payloads are shared.
|
super.copyTo(target);
|
||||||
* @param prototype existing Token
|
((FlagsAttribute) target).setFlags(flags);
|
||||||
* @param newTerm new term text
|
((PayloadAttribute) target).setPayload(payload);
|
||||||
*/
|
|
||||||
public void reinit(Token prototype, String newTerm) {
|
|
||||||
setEmpty().append(newTerm);
|
|
||||||
positionIncrement = prototype.positionIncrement;
|
|
||||||
positionLength = prototype.positionLength;
|
|
||||||
flags = prototype.flags;
|
|
||||||
startOffset = prototype.startOffset;
|
|
||||||
endOffset = prototype.endOffset;
|
|
||||||
type = prototype.type;
|
|
||||||
payload = prototype.payload;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Copy the prototype token's fields into this one, with a different term. Note: Payloads are shared.
|
|
||||||
* @param prototype existing Token
|
|
||||||
* @param newTermBuffer buffer containing new term text
|
|
||||||
* @param offset the index in the buffer of the first character
|
|
||||||
* @param length number of valid characters in the buffer
|
|
||||||
*/
|
|
||||||
public void reinit(Token prototype, char[] newTermBuffer, int offset, int length) {
|
|
||||||
copyBuffer(newTermBuffer, offset, length);
|
|
||||||
positionIncrement = prototype.positionIncrement;
|
|
||||||
positionLength = prototype.positionLength;
|
|
||||||
flags = prototype.flags;
|
|
||||||
startOffset = prototype.startOffset;
|
|
||||||
endOffset = prototype.endOffset;
|
|
||||||
type = prototype.type;
|
|
||||||
payload = prototype.payload;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public void copyTo(AttributeImpl target) {
|
public void copyTo(AttributeImpl target) {
|
||||||
if (target instanceof Token) {
|
super.copyTo(target);
|
||||||
final Token to = (Token) target;
|
((FlagsAttribute) target).setFlags(flags);
|
||||||
to.reinit(this);
|
((PayloadAttribute) target).setPayload((payload == null) ? null : payload.clone());
|
||||||
// reinit shares the payload, so clone it:
|
|
||||||
if (payload !=null) {
|
|
||||||
to.payload = payload.clone();
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
super.copyTo(target);
|
|
||||||
((OffsetAttribute) target).setOffset(startOffset, endOffset);
|
|
||||||
((PositionIncrementAttribute) target).setPositionIncrement(positionIncrement);
|
|
||||||
((PositionLengthAttribute) target).setPositionLength(positionLength);
|
|
||||||
((PayloadAttribute) target).setPayload((payload == null) ? null : payload.clone());
|
|
||||||
((FlagsAttribute) target).setFlags(flags);
|
|
||||||
((TypeAttribute) target).setType(type);
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public void reflectWith(AttributeReflector reflector) {
|
public void reflectWith(AttributeReflector reflector) {
|
||||||
super.reflectWith(reflector);
|
super.reflectWith(reflector);
|
||||||
reflector.reflect(OffsetAttribute.class, "startOffset", startOffset);
|
|
||||||
reflector.reflect(OffsetAttribute.class, "endOffset", endOffset);
|
|
||||||
reflector.reflect(PositionIncrementAttribute.class, "positionIncrement", positionIncrement);
|
|
||||||
reflector.reflect(PositionLengthAttribute.class, "positionLength", positionLength);
|
|
||||||
reflector.reflect(PayloadAttribute.class, "payload", payload);
|
|
||||||
reflector.reflect(FlagsAttribute.class, "flags", flags);
|
reflector.reflect(FlagsAttribute.class, "flags", flags);
|
||||||
reflector.reflect(TypeAttribute.class, "type", type);
|
reflector.reflect(PayloadAttribute.class, "payload", payload);
|
||||||
}
|
|
||||||
|
|
||||||
private void checkOffsets(int startOffset, int endOffset) {
|
|
||||||
if (startOffset < 0 || endOffset < startOffset) {
|
|
||||||
throw new IllegalArgumentException("startOffset must be non-negative, and endOffset must be >= startOffset, "
|
|
||||||
+ "startOffset=" + startOffset + ",endOffset=" + endOffset);
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/** Convenience factory that returns <code>Token</code> as implementation for the basic
|
/** Convenience factory that returns <code>Token</code> as implementation for the basic
|
||||||
|
@ -628,43 +208,6 @@ public class Token extends CharTermAttributeImpl
|
||||||
* attributes.
|
* attributes.
|
||||||
* @since 3.0
|
* @since 3.0
|
||||||
*/
|
*/
|
||||||
public static final AttributeSource.AttributeFactory TOKEN_ATTRIBUTE_FACTORY =
|
public static final AttributeFactory TOKEN_ATTRIBUTE_FACTORY =
|
||||||
new TokenAttributeFactory(AttributeSource.AttributeFactory.DEFAULT_ATTRIBUTE_FACTORY);
|
AttributeFactory.getStaticImplementation(AttributeFactory.DEFAULT_ATTRIBUTE_FACTORY, Token.class);
|
||||||
|
|
||||||
/** <b>Expert:</b> Creates a TokenAttributeFactory returning {@link Token} as instance for the basic attributes
|
|
||||||
* and for all other attributes calls the given delegate factory.
|
|
||||||
* @since 3.0
|
|
||||||
*/
|
|
||||||
public static final class TokenAttributeFactory extends AttributeSource.AttributeFactory {
|
|
||||||
|
|
||||||
private final AttributeSource.AttributeFactory delegate;
|
|
||||||
|
|
||||||
/** <b>Expert</b>: Creates an AttributeFactory returning {@link Token} as instance for the basic attributes
|
|
||||||
* and for all other attributes calls the given delegate factory. */
|
|
||||||
public TokenAttributeFactory(AttributeSource.AttributeFactory delegate) {
|
|
||||||
this.delegate = delegate;
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public AttributeImpl createAttributeInstance(Class<? extends Attribute> attClass) {
|
|
||||||
return attClass.isAssignableFrom(Token.class)
|
|
||||||
? new Token() : delegate.createAttributeInstance(attClass);
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public boolean equals(Object other) {
|
|
||||||
if (this == other) return true;
|
|
||||||
if (other instanceof TokenAttributeFactory) {
|
|
||||||
final TokenAttributeFactory af = (TokenAttributeFactory) other;
|
|
||||||
return this.delegate.equals(af.delegate);
|
|
||||||
}
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public int hashCode() {
|
|
||||||
return delegate.hashCode() ^ 0x0a45aa31;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
|
@ -21,11 +21,13 @@ import java.io.IOException;
|
||||||
import java.io.Closeable;
|
import java.io.Closeable;
|
||||||
import java.lang.reflect.Modifier;
|
import java.lang.reflect.Modifier;
|
||||||
|
|
||||||
|
import org.apache.lucene.analysis.tokenattributes.PackedTokenAttributeImpl;
|
||||||
import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
|
import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
|
||||||
import org.apache.lucene.document.Document;
|
import org.apache.lucene.document.Document;
|
||||||
import org.apache.lucene.document.Field;
|
import org.apache.lucene.document.Field;
|
||||||
import org.apache.lucene.index.IndexWriter;
|
import org.apache.lucene.index.IndexWriter;
|
||||||
import org.apache.lucene.util.Attribute;
|
import org.apache.lucene.util.Attribute;
|
||||||
|
import org.apache.lucene.util.AttributeFactory;
|
||||||
import org.apache.lucene.util.AttributeImpl;
|
import org.apache.lucene.util.AttributeImpl;
|
||||||
import org.apache.lucene.util.AttributeSource;
|
import org.apache.lucene.util.AttributeSource;
|
||||||
|
|
||||||
|
@ -85,11 +87,15 @@ import org.apache.lucene.util.AttributeSource;
|
||||||
*/
|
*/
|
||||||
public abstract class TokenStream extends AttributeSource implements Closeable {
|
public abstract class TokenStream extends AttributeSource implements Closeable {
|
||||||
|
|
||||||
|
/** Default {@link AttributeFactory} instance that should be used for TokenStreams. */
|
||||||
|
public static final AttributeFactory DEFAULT_TOKEN_ATTRIBUTE_FACTORY =
|
||||||
|
AttributeFactory.getStaticImplementation(AttributeFactory.DEFAULT_ATTRIBUTE_FACTORY, PackedTokenAttributeImpl.class);
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* A TokenStream using the default attribute factory.
|
* A TokenStream using the default attribute factory.
|
||||||
*/
|
*/
|
||||||
protected TokenStream() {
|
protected TokenStream() {
|
||||||
super(Token.TOKEN_ATTRIBUTE_FACTORY);
|
super(DEFAULT_TOKEN_ATTRIBUTE_FACTORY);
|
||||||
assert assertFinal();
|
assert assertFinal();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -17,6 +17,7 @@ package org.apache.lucene.analysis;
|
||||||
* limitations under the License.
|
* limitations under the License.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
|
import org.apache.lucene.util.AttributeFactory;
|
||||||
import org.apache.lucene.util.AttributeSource;
|
import org.apache.lucene.util.AttributeSource;
|
||||||
|
|
||||||
import java.io.Reader;
|
import java.io.Reader;
|
||||||
|
|
|
@ -813,7 +813,7 @@ Now we're going to implement our own custom Attribute for part-of-speech tagging
|
||||||
</p>
|
</p>
|
||||||
<p>
|
<p>
|
||||||
This should be the usual behavior. However, there is also an expert-API that allows changing these naming conventions:
|
This should be the usual behavior. However, there is also an expert-API that allows changing these naming conventions:
|
||||||
{@link org.apache.lucene.util.AttributeSource.AttributeFactory}. The factory accepts an Attribute interface as argument
|
{@link org.apache.lucene.util.AttributeFactory}. The factory accepts an Attribute interface as argument
|
||||||
and returns an actual instance. You can implement your own factory if you need to change the default behavior.
|
and returns an actual instance. You can implement your own factory if you need to change the default behavior.
|
||||||
</p>
|
</p>
|
||||||
<p>
|
<p>
|
||||||
|
|
|
@ -0,0 +1,206 @@
|
||||||
|
package org.apache.lucene.analysis.tokenattributes;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||||
|
* contributor license agreements. See the NOTICE file distributed with
|
||||||
|
* this work for additional information regarding copyright ownership.
|
||||||
|
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||||
|
* (the "License"); you may not use this file except in compliance with
|
||||||
|
* the License. You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
import org.apache.lucene.util.AttributeImpl;
|
||||||
|
import org.apache.lucene.util.AttributeReflector;
|
||||||
|
|
||||||
|
/** Default implementation of the common attributes used by Lucene:<ul>
|
||||||
|
* <li>{@link CharTermAttribute}
|
||||||
|
* <li>{@link TypeAttribute}
|
||||||
|
* <li>{@link PositionIncrementAttribute}
|
||||||
|
* <li>{@link PositionLengthAttribute}
|
||||||
|
* <li>{@link OffsetAttribute}
|
||||||
|
* </ul>*/
|
||||||
|
public class PackedTokenAttributeImpl extends CharTermAttributeImpl
|
||||||
|
implements TypeAttribute, PositionIncrementAttribute,
|
||||||
|
PositionLengthAttribute, OffsetAttribute {
|
||||||
|
|
||||||
|
private int startOffset,endOffset;
|
||||||
|
private String type = DEFAULT_TYPE;
|
||||||
|
private int positionIncrement = 1;
|
||||||
|
private int positionLength = 1;
|
||||||
|
|
||||||
|
/** Constructs the attribute implementation. */
|
||||||
|
public PackedTokenAttributeImpl() {
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* {@inheritDoc}
|
||||||
|
* @see PositionIncrementAttribute
|
||||||
|
*/
|
||||||
|
@Override
|
||||||
|
public void setPositionIncrement(int positionIncrement) {
|
||||||
|
if (positionIncrement < 0)
|
||||||
|
throw new IllegalArgumentException
|
||||||
|
("Increment must be zero or greater: " + positionIncrement);
|
||||||
|
this.positionIncrement = positionIncrement;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* {@inheritDoc}
|
||||||
|
* @see PositionIncrementAttribute
|
||||||
|
*/
|
||||||
|
@Override
|
||||||
|
public int getPositionIncrement() {
|
||||||
|
return positionIncrement;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* {@inheritDoc}
|
||||||
|
* @see PositionLengthAttribute
|
||||||
|
*/
|
||||||
|
@Override
|
||||||
|
public void setPositionLength(int positionLength) {
|
||||||
|
this.positionLength = positionLength;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* {@inheritDoc}
|
||||||
|
* @see PositionLengthAttribute
|
||||||
|
*/
|
||||||
|
@Override
|
||||||
|
public int getPositionLength() {
|
||||||
|
return positionLength;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* {@inheritDoc}
|
||||||
|
* @see OffsetAttribute
|
||||||
|
*/
|
||||||
|
@Override
|
||||||
|
public final int startOffset() {
|
||||||
|
return startOffset;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* {@inheritDoc}
|
||||||
|
* @see OffsetAttribute
|
||||||
|
*/
|
||||||
|
@Override
|
||||||
|
public final int endOffset() {
|
||||||
|
return endOffset;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* {@inheritDoc}
|
||||||
|
* @see OffsetAttribute
|
||||||
|
*/
|
||||||
|
@Override
|
||||||
|
public void setOffset(int startOffset, int endOffset) {
|
||||||
|
if (startOffset < 0 || endOffset < startOffset) {
|
||||||
|
throw new IllegalArgumentException("startOffset must be non-negative, and endOffset must be >= startOffset, "
|
||||||
|
+ "startOffset=" + startOffset + ",endOffset=" + endOffset);
|
||||||
|
}
|
||||||
|
this.startOffset = startOffset;
|
||||||
|
this.endOffset = endOffset;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* {@inheritDoc}
|
||||||
|
* @see TypeAttribute
|
||||||
|
*/
|
||||||
|
@Override
|
||||||
|
public final String type() {
|
||||||
|
return type;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* {@inheritDoc}
|
||||||
|
* @see TypeAttribute
|
||||||
|
*/
|
||||||
|
@Override
|
||||||
|
public final void setType(String type) {
|
||||||
|
this.type = type;
|
||||||
|
}
|
||||||
|
|
||||||
|
/** Resets the attributes
|
||||||
|
*/
|
||||||
|
@Override
|
||||||
|
public void clear() {
|
||||||
|
super.clear();
|
||||||
|
positionIncrement = positionLength = 1;
|
||||||
|
startOffset = endOffset = 0;
|
||||||
|
type = DEFAULT_TYPE;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public PackedTokenAttributeImpl clone() {
|
||||||
|
return (PackedTokenAttributeImpl) super.clone();
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public boolean equals(Object obj) {
|
||||||
|
if (obj == this)
|
||||||
|
return true;
|
||||||
|
|
||||||
|
if (obj instanceof PackedTokenAttributeImpl) {
|
||||||
|
final PackedTokenAttributeImpl other = (PackedTokenAttributeImpl) obj;
|
||||||
|
return (startOffset == other.startOffset &&
|
||||||
|
endOffset == other.endOffset &&
|
||||||
|
positionIncrement == other.positionIncrement &&
|
||||||
|
positionLength == other.positionLength &&
|
||||||
|
(type == null ? other.type == null : type.equals(other.type)) &&
|
||||||
|
super.equals(obj)
|
||||||
|
);
|
||||||
|
} else
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public int hashCode() {
|
||||||
|
int code = super.hashCode();
|
||||||
|
code = code * 31 + startOffset;
|
||||||
|
code = code * 31 + endOffset;
|
||||||
|
code = code * 31 + positionIncrement;
|
||||||
|
code = code * 31 + positionLength;
|
||||||
|
if (type != null)
|
||||||
|
code = code * 31 + type.hashCode();
|
||||||
|
return code;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void copyTo(AttributeImpl target) {
|
||||||
|
if (target instanceof PackedTokenAttributeImpl) {
|
||||||
|
final PackedTokenAttributeImpl to = (PackedTokenAttributeImpl) target;
|
||||||
|
to.copyBuffer(buffer(), 0, length());
|
||||||
|
to.positionIncrement = positionIncrement;
|
||||||
|
to.positionLength = positionLength;
|
||||||
|
to.startOffset = startOffset;
|
||||||
|
to.endOffset = endOffset;
|
||||||
|
to.type = type;
|
||||||
|
} else {
|
||||||
|
super.copyTo(target);
|
||||||
|
((OffsetAttribute) target).setOffset(startOffset, endOffset);
|
||||||
|
((PositionIncrementAttribute) target).setPositionIncrement(positionIncrement);
|
||||||
|
((PositionLengthAttribute) target).setPositionLength(positionLength);
|
||||||
|
((TypeAttribute) target).setType(type);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void reflectWith(AttributeReflector reflector) {
|
||||||
|
super.reflectWith(reflector);
|
||||||
|
reflector.reflect(OffsetAttribute.class, "startOffset", startOffset);
|
||||||
|
reflector.reflect(OffsetAttribute.class, "endOffset", endOffset);
|
||||||
|
reflector.reflect(PositionIncrementAttribute.class, "positionIncrement", positionIncrement);
|
||||||
|
reflector.reflect(PositionLengthAttribute.class, "positionLength", positionLength);
|
||||||
|
reflector.reflect(TypeAttribute.class, "type", type);
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
|
@ -0,0 +1,202 @@
|
||||||
|
package org.apache.lucene.util;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||||
|
* contributor license agreements. See the NOTICE file distributed with
|
||||||
|
* this work for additional information regarding copyright ownership.
|
||||||
|
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||||
|
* (the "License"); you may not use this file except in compliance with
|
||||||
|
* the License. You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
import java.lang.invoke.MethodHandle;
|
||||||
|
import java.lang.invoke.MethodHandles;
|
||||||
|
import java.lang.invoke.MethodType;
|
||||||
|
import java.lang.ref.Reference;
|
||||||
|
import java.lang.ref.WeakReference;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* An AttributeFactory creates instances of {@link AttributeImpl}s.
|
||||||
|
*/
|
||||||
|
public abstract class AttributeFactory {
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Returns an {@link AttributeImpl} for the supplied {@link Attribute} interface class.
|
||||||
|
*/
|
||||||
|
public abstract AttributeImpl createAttributeInstance(Class<? extends Attribute> attClass);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Returns a correctly typed {@link MethodHandle} for the no-arg ctor of the given class.
|
||||||
|
*/
|
||||||
|
static final MethodHandle findAttributeImplCtor(Class<? extends AttributeImpl> clazz) {
|
||||||
|
try {
|
||||||
|
return lookup.findConstructor(clazz, NO_ARG_CTOR).asType(NO_ARG_RETURNING_ATTRIBUTEIMPL);
|
||||||
|
} catch (NoSuchMethodException | IllegalAccessException e) {
|
||||||
|
throw new IllegalArgumentException("Cannot lookup accessible no-arg constructor for: " + clazz.getName(), e);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private static final MethodHandles.Lookup lookup = MethodHandles.publicLookup();
|
||||||
|
private static final MethodType NO_ARG_CTOR = MethodType.methodType(void.class);
|
||||||
|
private static final MethodType NO_ARG_RETURNING_ATTRIBUTEIMPL = MethodType.methodType(AttributeImpl.class);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* This is the default factory that creates {@link AttributeImpl}s using the
|
||||||
|
* class name of the supplied {@link Attribute} interface class by appending <code>Impl</code> to it.
|
||||||
|
*/
|
||||||
|
public static final AttributeFactory DEFAULT_ATTRIBUTE_FACTORY = new DefaultAttributeFactory(true);
|
||||||
|
|
||||||
|
static final class DefaultAttributeFactory extends AttributeFactory {
|
||||||
|
private final WeakIdentityMap<Class<? extends Attribute>, Object> attClassImplMap =
|
||||||
|
WeakIdentityMap.newConcurrentHashMap(false);
|
||||||
|
private final ClassLoader myClassLoader = getClass().getClassLoader();
|
||||||
|
private final boolean useMethodHandles;
|
||||||
|
|
||||||
|
// this constructor is available for tests, to be able to test the pure-reflective case, too
|
||||||
|
DefaultAttributeFactory(boolean useMethodHandles) {
|
||||||
|
this.useMethodHandles = useMethodHandles;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public AttributeImpl createAttributeInstance(Class<? extends Attribute> attClass) {
|
||||||
|
// first lookup from cache:
|
||||||
|
Object cached = attClassImplMap.get(attClass);
|
||||||
|
if (cached instanceof MethodHandle) {
|
||||||
|
return invokeMethodHandle((MethodHandle) cached);
|
||||||
|
} else if (cached instanceof Reference) {
|
||||||
|
@SuppressWarnings("unchecked") final Class<? extends AttributeImpl> clazz =
|
||||||
|
((Reference<Class<? extends AttributeImpl>>) cached).get();
|
||||||
|
if (clazz != null) {
|
||||||
|
return invokeReflective(clazz);
|
||||||
|
}
|
||||||
|
cached = null;
|
||||||
|
// fall-through
|
||||||
|
}
|
||||||
|
// No cache hit!
|
||||||
|
// Please note: we have the slight chance that another thread may do the same, but who cares?
|
||||||
|
assert cached == null;
|
||||||
|
final Class<? extends AttributeImpl> implClazz = findImplClass(attClass);
|
||||||
|
// if the attribute impl is from our own ClassLoader, we optimize to use pre-allocated MethodHandle to instantiate the object
|
||||||
|
if (useMethodHandles && implClazz.getClassLoader() == myClassLoader) {
|
||||||
|
final MethodHandle constr = findAttributeImplCtor(implClazz);
|
||||||
|
attClassImplMap.put(attClass, constr);
|
||||||
|
return invokeMethodHandle(constr);
|
||||||
|
} else {
|
||||||
|
// otherwise, to not refer to the class forever (because the MethodHandle strongly
|
||||||
|
// references the class), so it can never be unloaded, we use slower reflection:
|
||||||
|
attClassImplMap.put(attClass, new WeakReference<>(implClazz));
|
||||||
|
return invokeReflective(implClazz);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private Class<? extends AttributeImpl> findImplClass(Class<? extends Attribute> attClass) {
|
||||||
|
try {
|
||||||
|
return Class.forName(attClass.getName() + "Impl", true, attClass.getClassLoader()).asSubclass(AttributeImpl.class);
|
||||||
|
} catch (ClassNotFoundException cnfe) {
|
||||||
|
throw new IllegalArgumentException("Cannot find implementing class for: " + attClass.getName());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private AttributeImpl invokeMethodHandle(MethodHandle constr) {
|
||||||
|
try {
|
||||||
|
return (AttributeImpl) constr.invokeExact();
|
||||||
|
} catch (Throwable t) {
|
||||||
|
rethrow(t);
|
||||||
|
throw new AssertionError();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private AttributeImpl invokeReflective(Class<? extends AttributeImpl> implClass) {
|
||||||
|
try {
|
||||||
|
return implClass.newInstance();
|
||||||
|
} catch (InstantiationException | IllegalAccessException e) {
|
||||||
|
throw new IllegalArgumentException("Cannot instantiate implementing class: " + implClass.getName(), e);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/** <b>Expert</b>: AttributeFactory returning an instance of the given {@code clazz} for the
|
||||||
|
* attributes it implements. For all other attributes it calls the given delegate factory
|
||||||
|
* as fallback. This class can be used to prefer a specific {@code AttributeImpl} which
|
||||||
|
* combines multiple attributes over separate classes.
|
||||||
|
* @lucene.internal
|
||||||
|
*/
|
||||||
|
public abstract static class StaticImplementationAttributeFactory<A extends AttributeImpl> extends AttributeFactory {
|
||||||
|
private final AttributeFactory delegate;
|
||||||
|
private final Class<A> clazz;
|
||||||
|
|
||||||
|
/** <b>Expert</b>: Creates an AttributeFactory returning {@code clazz} as instance for the
|
||||||
|
* attributes it implements and for all other attributes calls the given delegate factory. */
|
||||||
|
public StaticImplementationAttributeFactory(AttributeFactory delegate, Class<A> clazz) {
|
||||||
|
this.delegate = delegate;
|
||||||
|
this.clazz = clazz;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public final AttributeImpl createAttributeInstance(Class<? extends Attribute> attClass) {
|
||||||
|
return attClass.isAssignableFrom(clazz) ? createInstance() : delegate.createAttributeInstance(attClass);
|
||||||
|
}
|
||||||
|
|
||||||
|
/** Creates an instance of {@code A}. */
|
||||||
|
protected abstract A createInstance();
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public boolean equals(Object other) {
|
||||||
|
if (this == other)
|
||||||
|
return true;
|
||||||
|
if (other == null || other.getClass() != this.getClass())
|
||||||
|
return false;
|
||||||
|
@SuppressWarnings("rawtypes")
|
||||||
|
final StaticImplementationAttributeFactory af = (StaticImplementationAttributeFactory) other;
|
||||||
|
return this.delegate.equals(af.delegate) && this.clazz == af.clazz;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public int hashCode() {
|
||||||
|
return 31 * delegate.hashCode() + clazz.hashCode();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/** Returns an AttributeFactory returning an instance of the given {@code clazz} for the
|
||||||
|
* attributes it implements. The given {@code clazz} must have a public no-arg constructor.
|
||||||
|
* For all other attributes it calls the given delegate factory as fallback.
|
||||||
|
* This method can be used to prefer a specific {@code AttributeImpl} which combines
|
||||||
|
* multiple attributes over separate classes.
|
||||||
|
* <p>Please save instances created by this method in a static final field, because
|
||||||
|
* on each call, this does reflection for creating a {@link MethodHandle}.
|
||||||
|
*/
|
||||||
|
public static <A extends AttributeImpl> AttributeFactory getStaticImplementation(AttributeFactory delegate, Class<A> clazz) {
|
||||||
|
final MethodHandle constr = findAttributeImplCtor(clazz);
|
||||||
|
return new StaticImplementationAttributeFactory<A>(delegate, clazz) {
|
||||||
|
@Override
|
||||||
|
protected A createInstance() {
|
||||||
|
try {
|
||||||
|
return (A) constr.invokeExact();
|
||||||
|
} catch (Throwable t) {
|
||||||
|
rethrow(t);
|
||||||
|
throw new AssertionError();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
// Hack to rethrow unknown Exceptions from {@link MethodHandle#invoke}:
|
||||||
|
// TODO: remove the impl in test-framework, this one is more elegant :-)
|
||||||
|
static void rethrow(Throwable t) {
|
||||||
|
AttributeFactory.<Error>rethrow0(t);
|
||||||
|
}
|
||||||
|
|
||||||
|
@SuppressWarnings("unchecked")
|
||||||
|
private static <T extends Throwable> void rethrow0(Throwable t) throws T {
|
||||||
|
throw (T) t;
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
|
@ -19,8 +19,7 @@ package org.apache.lucene.util;
|
||||||
|
|
||||||
import java.lang.reflect.Field;
|
import java.lang.reflect.Field;
|
||||||
import java.lang.reflect.Modifier;
|
import java.lang.reflect.Modifier;
|
||||||
import java.lang.ref.WeakReference;
|
import java.lang.ref.Reference;
|
||||||
import java.util.LinkedList;
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Base class for Attributes that can be added to a
|
* Base class for Attributes that can be added to a
|
||||||
|
@ -91,12 +90,14 @@ public abstract class AttributeImpl implements Cloneable, Attribute {
|
||||||
*/
|
*/
|
||||||
public void reflectWith(AttributeReflector reflector) {
|
public void reflectWith(AttributeReflector reflector) {
|
||||||
final Class<? extends AttributeImpl> clazz = this.getClass();
|
final Class<? extends AttributeImpl> clazz = this.getClass();
|
||||||
final LinkedList<WeakReference<Class<? extends Attribute>>> interfaces = AttributeSource.getAttributeInterfaces(clazz);
|
final Reference<Class<? extends Attribute>>[] interfaces = AttributeSource.getAttributeInterfaces(clazz);
|
||||||
if (interfaces.size() != 1) {
|
if (interfaces.length != 1) {
|
||||||
throw new UnsupportedOperationException(clazz.getName() +
|
throw new UnsupportedOperationException(clazz.getName() +
|
||||||
" implements more than one Attribute interface, the default reflectWith() implementation cannot handle this.");
|
" implements more than one Attribute interface, the default reflectWith() implementation cannot handle this.");
|
||||||
}
|
}
|
||||||
final Class<? extends Attribute> interf = interfaces.getFirst().get();
|
final Class<? extends Attribute> interf = interfaces[0].get();
|
||||||
|
assert (interf != null) :
|
||||||
|
"We have a strong reference on the class holding the interfaces, so they should never get evicted";
|
||||||
final Field[] fields = clazz.getDeclaredFields();
|
final Field[] fields = clazz.getDeclaredFields();
|
||||||
try {
|
try {
|
||||||
for (int i = 0; i < fields.length; i++) {
|
for (int i = 0; i < fields.length; i++) {
|
||||||
|
|
|
@ -17,12 +17,14 @@ package org.apache.lucene.util;
|
||||||
* limitations under the License.
|
* limitations under the License.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
|
import java.lang.ref.Reference;
|
||||||
import java.lang.ref.WeakReference;
|
import java.lang.ref.WeakReference;
|
||||||
|
import java.util.ArrayList;
|
||||||
import java.util.Collections;
|
import java.util.Collections;
|
||||||
|
import java.util.List;
|
||||||
import java.util.NoSuchElementException;
|
import java.util.NoSuchElementException;
|
||||||
import java.util.Iterator;
|
import java.util.Iterator;
|
||||||
import java.util.LinkedHashMap;
|
import java.util.LinkedHashMap;
|
||||||
import java.util.LinkedList;
|
|
||||||
import java.util.Map;
|
import java.util.Map;
|
||||||
import java.util.Map.Entry;
|
import java.util.Map.Entry;
|
||||||
|
|
||||||
|
@ -38,58 +40,14 @@ import org.apache.lucene.analysis.TokenStream; // for javadocs
|
||||||
* it creates a new instance and returns it.
|
* it creates a new instance and returns it.
|
||||||
*/
|
*/
|
||||||
public class AttributeSource {
|
public class AttributeSource {
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* An AttributeFactory creates instances of {@link AttributeImpl}s.
|
* This is the default factory that creates {@link AttributeImpl}s using the
|
||||||
|
* class name of the supplied {@link Attribute} interface class by appending <code>Impl</code> to it.
|
||||||
|
* @deprecated use {@link AttributeFactory#DEFAULT_ATTRIBUTE_FACTORY}
|
||||||
*/
|
*/
|
||||||
public static abstract class AttributeFactory {
|
@Deprecated
|
||||||
/**
|
public static final AttributeFactory DEFAULT_ATTRIBUTE_FACTORY = AttributeFactory.DEFAULT_ATTRIBUTE_FACTORY;
|
||||||
* returns an {@link AttributeImpl} for the supplied {@link Attribute} interface class.
|
|
||||||
*/
|
|
||||||
public abstract AttributeImpl createAttributeInstance(Class<? extends Attribute> attClass);
|
|
||||||
|
|
||||||
/**
|
|
||||||
* This is the default factory that creates {@link AttributeImpl}s using the
|
|
||||||
* class name of the supplied {@link Attribute} interface class by appending <code>Impl</code> to it.
|
|
||||||
*/
|
|
||||||
public static final AttributeFactory DEFAULT_ATTRIBUTE_FACTORY = new DefaultAttributeFactory();
|
|
||||||
|
|
||||||
private static final class DefaultAttributeFactory extends AttributeFactory {
|
|
||||||
private static final WeakIdentityMap<Class<? extends Attribute>, WeakReference<Class<? extends AttributeImpl>>> attClassImplMap =
|
|
||||||
WeakIdentityMap.newConcurrentHashMap(false);
|
|
||||||
|
|
||||||
DefaultAttributeFactory() {}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public AttributeImpl createAttributeInstance(Class<? extends Attribute> attClass) {
|
|
||||||
try {
|
|
||||||
return getClassForInterface(attClass).newInstance();
|
|
||||||
} catch (InstantiationException e) {
|
|
||||||
throw new IllegalArgumentException("Could not instantiate implementing class for " + attClass.getName());
|
|
||||||
} catch (IllegalAccessException e) {
|
|
||||||
throw new IllegalArgumentException("Could not instantiate implementing class for " + attClass.getName());
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
private static Class<? extends AttributeImpl> getClassForInterface(Class<? extends Attribute> attClass) {
|
|
||||||
final WeakReference<Class<? extends AttributeImpl>> ref = attClassImplMap.get(attClass);
|
|
||||||
Class<? extends AttributeImpl> clazz = (ref == null) ? null : ref.get();
|
|
||||||
if (clazz == null) {
|
|
||||||
// we have the slight chance that another thread may do the same, but who cares?
|
|
||||||
try {
|
|
||||||
attClassImplMap.put(attClass,
|
|
||||||
new WeakReference<Class<? extends AttributeImpl>>(
|
|
||||||
clazz = Class.forName(attClass.getName() + "Impl", true, attClass.getClassLoader())
|
|
||||||
.asSubclass(AttributeImpl.class)
|
|
||||||
)
|
|
||||||
);
|
|
||||||
} catch (ClassNotFoundException e) {
|
|
||||||
throw new IllegalArgumentException("Could not find implementing class for " + attClass.getName());
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return clazz;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* This class holds the state of an AttributeSource.
|
* This class holds the state of an AttributeSource.
|
||||||
|
@ -122,7 +80,7 @@ public class AttributeSource {
|
||||||
private final AttributeFactory factory;
|
private final AttributeFactory factory;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* An AttributeSource using the default attribute factory {@link AttributeSource.AttributeFactory#DEFAULT_ATTRIBUTE_FACTORY}.
|
* An AttributeSource using the default attribute factory {@link AttributeFactory#DEFAULT_ATTRIBUTE_FACTORY}.
|
||||||
*/
|
*/
|
||||||
public AttributeSource() {
|
public AttributeSource() {
|
||||||
this(AttributeFactory.DEFAULT_ATTRIBUTE_FACTORY);
|
this(AttributeFactory.DEFAULT_ATTRIBUTE_FACTORY);
|
||||||
|
@ -200,26 +158,28 @@ public class AttributeSource {
|
||||||
}
|
}
|
||||||
|
|
||||||
/** a cache that stores all interfaces for known implementation classes for performance (slow reflection) */
|
/** a cache that stores all interfaces for known implementation classes for performance (slow reflection) */
|
||||||
private static final WeakIdentityMap<Class<? extends AttributeImpl>,LinkedList<WeakReference<Class<? extends Attribute>>>> knownImplClasses =
|
private static final WeakIdentityMap<Class<? extends AttributeImpl>,Reference<Class<? extends Attribute>>[]> knownImplClasses =
|
||||||
WeakIdentityMap.newConcurrentHashMap(false);
|
WeakIdentityMap.newConcurrentHashMap(false);
|
||||||
|
|
||||||
static LinkedList<WeakReference<Class<? extends Attribute>>> getAttributeInterfaces(final Class<? extends AttributeImpl> clazz) {
|
static Reference<Class<? extends Attribute>>[] getAttributeInterfaces(final Class<? extends AttributeImpl> clazz) {
|
||||||
LinkedList<WeakReference<Class<? extends Attribute>>> foundInterfaces = knownImplClasses.get(clazz);
|
Reference<Class<? extends Attribute>>[] foundInterfaces = knownImplClasses.get(clazz);
|
||||||
if (foundInterfaces == null) {
|
if (foundInterfaces == null) {
|
||||||
// we have the slight chance that another thread may do the same, but who cares?
|
// we have the slight chance that another thread may do the same, but who cares?
|
||||||
foundInterfaces = new LinkedList<>();
|
final List<Reference<Class<? extends Attribute>>> intfList = new ArrayList<>();
|
||||||
// find all interfaces that this attribute instance implements
|
// find all interfaces that this attribute instance implements
|
||||||
// and that extend the Attribute interface
|
// and that extend the Attribute interface
|
||||||
Class<?> actClazz = clazz;
|
Class<?> actClazz = clazz;
|
||||||
do {
|
do {
|
||||||
for (Class<?> curInterface : actClazz.getInterfaces()) {
|
for (Class<?> curInterface : actClazz.getInterfaces()) {
|
||||||
if (curInterface != Attribute.class && Attribute.class.isAssignableFrom(curInterface)) {
|
if (curInterface != Attribute.class && Attribute.class.isAssignableFrom(curInterface)) {
|
||||||
foundInterfaces.add(new WeakReference<Class<? extends Attribute>>(curInterface.asSubclass(Attribute.class)));
|
intfList.add(new WeakReference<Class<? extends Attribute>>(curInterface.asSubclass(Attribute.class)));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
actClazz = actClazz.getSuperclass();
|
actClazz = actClazz.getSuperclass();
|
||||||
} while (actClazz != null);
|
} while (actClazz != null);
|
||||||
knownImplClasses.put(clazz, foundInterfaces);
|
@SuppressWarnings({"unchecked", "rawtypes"}) final Reference<Class<? extends Attribute>>[] a =
|
||||||
|
intfList.toArray(new Reference[intfList.size()]);
|
||||||
|
knownImplClasses.put(clazz, foundInterfaces = a);
|
||||||
}
|
}
|
||||||
return foundInterfaces;
|
return foundInterfaces;
|
||||||
}
|
}
|
||||||
|
@ -235,11 +195,9 @@ public class AttributeSource {
|
||||||
public final void addAttributeImpl(final AttributeImpl att) {
|
public final void addAttributeImpl(final AttributeImpl att) {
|
||||||
final Class<? extends AttributeImpl> clazz = att.getClass();
|
final Class<? extends AttributeImpl> clazz = att.getClass();
|
||||||
if (attributeImpls.containsKey(clazz)) return;
|
if (attributeImpls.containsKey(clazz)) return;
|
||||||
final LinkedList<WeakReference<Class<? extends Attribute>>> foundInterfaces =
|
|
||||||
getAttributeInterfaces(clazz);
|
|
||||||
|
|
||||||
// add all interfaces of this AttributeImpl to the maps
|
// add all interfaces of this AttributeImpl to the maps
|
||||||
for (WeakReference<Class<? extends Attribute>> curInterfaceRef : foundInterfaces) {
|
for (Reference<Class<? extends Attribute>> curInterfaceRef : getAttributeInterfaces(clazz)) {
|
||||||
final Class<? extends Attribute> curInterface = curInterfaceRef.get();
|
final Class<? extends Attribute> curInterface = curInterfaceRef.get();
|
||||||
assert (curInterface != null) :
|
assert (curInterface != null) :
|
||||||
"We have a strong reference on the class holding the interfaces, so they should never get evicted";
|
"We have a strong reference on the class holding the interfaces, so they should never get evicted";
|
||||||
|
|
|
@ -27,146 +27,22 @@ import org.apache.lucene.util.TestUtil;
|
||||||
import java.io.StringReader;
|
import java.io.StringReader;
|
||||||
import java.util.HashMap;
|
import java.util.HashMap;
|
||||||
|
|
||||||
|
@Deprecated
|
||||||
public class TestToken extends LuceneTestCase {
|
public class TestToken extends LuceneTestCase {
|
||||||
|
|
||||||
public void testCtor() throws Exception {
|
public void testCtor() throws Exception {
|
||||||
Token t = new Token();
|
Token t = new Token("hello", 0, 0);
|
||||||
char[] content = "hello".toCharArray();
|
|
||||||
t.copyBuffer(content, 0, content.length);
|
|
||||||
assertNotSame(t.buffer(), content);
|
|
||||||
assertEquals(0, t.startOffset());
|
assertEquals(0, t.startOffset());
|
||||||
assertEquals(0, t.endOffset());
|
assertEquals(0, t.endOffset());
|
||||||
|
assertEquals(1, t.getPositionIncrement());
|
||||||
|
assertEquals(1, t.getPositionLength());
|
||||||
assertEquals("hello", t.toString());
|
assertEquals("hello", t.toString());
|
||||||
assertEquals("word", t.type());
|
assertEquals("word", t.type());
|
||||||
assertEquals(0, t.getFlags());
|
assertEquals(0, t.getFlags());
|
||||||
|
assertNull(t.getPayload());
|
||||||
t = new Token();
|
|
||||||
t.setOffset(6, 22);
|
|
||||||
t.setFlags(7);
|
|
||||||
t.copyBuffer(content, 0, content.length);
|
|
||||||
assertEquals("hello", t.toString());
|
|
||||||
assertEquals("hello", t.toString());
|
|
||||||
assertEquals(6, t.startOffset());
|
|
||||||
assertEquals(22, t.endOffset());
|
|
||||||
assertEquals("word", t.type());
|
|
||||||
assertEquals(7, t.getFlags());
|
|
||||||
|
|
||||||
t = new Token();
|
|
||||||
t.setOffset(6, 22);
|
|
||||||
t.setType("junk");
|
|
||||||
t.copyBuffer(content, 0, content.length);
|
|
||||||
assertEquals("hello", t.toString());
|
|
||||||
assertEquals("hello", t.toString());
|
|
||||||
assertEquals(6, t.startOffset());
|
|
||||||
assertEquals(22, t.endOffset());
|
|
||||||
assertEquals("junk", t.type());
|
|
||||||
assertEquals(0, t.getFlags());
|
|
||||||
}
|
}
|
||||||
|
|
||||||
public void testResize() {
|
/* the CharTermAttributeStuff is tested by TestCharTermAttributeImpl */
|
||||||
Token t = new Token();
|
|
||||||
char[] content = "hello".toCharArray();
|
|
||||||
t.copyBuffer(content, 0, content.length);
|
|
||||||
for (int i = 0; i < 2000; i++)
|
|
||||||
{
|
|
||||||
t.resizeBuffer(i);
|
|
||||||
assertTrue(i <= t.buffer().length);
|
|
||||||
assertEquals("hello", t.toString());
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
public void testGrow() {
|
|
||||||
Token t = new Token();
|
|
||||||
StringBuilder buf = new StringBuilder("ab");
|
|
||||||
for (int i = 0; i < 20; i++)
|
|
||||||
{
|
|
||||||
char[] content = buf.toString().toCharArray();
|
|
||||||
t.copyBuffer(content, 0, content.length);
|
|
||||||
assertEquals(buf.length(), t.length());
|
|
||||||
assertEquals(buf.toString(), t.toString());
|
|
||||||
buf.append(buf.toString());
|
|
||||||
}
|
|
||||||
assertEquals(1048576, t.length());
|
|
||||||
|
|
||||||
// now as a string, second variant
|
|
||||||
t = new Token();
|
|
||||||
buf = new StringBuilder("ab");
|
|
||||||
for (int i = 0; i < 20; i++)
|
|
||||||
{
|
|
||||||
t.setEmpty().append(buf);
|
|
||||||
String content = buf.toString();
|
|
||||||
assertEquals(content.length(), t.length());
|
|
||||||
assertEquals(content, t.toString());
|
|
||||||
buf.append(content);
|
|
||||||
}
|
|
||||||
assertEquals(1048576, t.length());
|
|
||||||
|
|
||||||
// Test for slow growth to a long term
|
|
||||||
t = new Token();
|
|
||||||
buf = new StringBuilder("a");
|
|
||||||
for (int i = 0; i < 20000; i++)
|
|
||||||
{
|
|
||||||
t.setEmpty().append(buf);
|
|
||||||
String content = buf.toString();
|
|
||||||
assertEquals(content.length(), t.length());
|
|
||||||
assertEquals(content, t.toString());
|
|
||||||
buf.append("a");
|
|
||||||
}
|
|
||||||
assertEquals(20000, t.length());
|
|
||||||
|
|
||||||
// Test for slow growth to a long term
|
|
||||||
t = new Token();
|
|
||||||
buf = new StringBuilder("a");
|
|
||||||
for (int i = 0; i < 20000; i++)
|
|
||||||
{
|
|
||||||
t.setEmpty().append(buf);
|
|
||||||
String content = buf.toString();
|
|
||||||
assertEquals(content.length(), t.length());
|
|
||||||
assertEquals(content, t.toString());
|
|
||||||
buf.append("a");
|
|
||||||
}
|
|
||||||
assertEquals(20000, t.length());
|
|
||||||
}
|
|
||||||
|
|
||||||
public void testToString() throws Exception {
|
|
||||||
char[] b = {'a', 'l', 'o', 'h', 'a'};
|
|
||||||
Token t = new Token("", 0, 5);
|
|
||||||
t.copyBuffer(b, 0, 5);
|
|
||||||
assertEquals("aloha", t.toString());
|
|
||||||
|
|
||||||
t.setEmpty().append("hi there");
|
|
||||||
assertEquals("hi there", t.toString());
|
|
||||||
}
|
|
||||||
|
|
||||||
public void testTermBufferEquals() throws Exception {
|
|
||||||
Token t1a = new Token();
|
|
||||||
char[] content1a = "hello".toCharArray();
|
|
||||||
t1a.copyBuffer(content1a, 0, 5);
|
|
||||||
Token t1b = new Token();
|
|
||||||
char[] content1b = "hello".toCharArray();
|
|
||||||
t1b.copyBuffer(content1b, 0, 5);
|
|
||||||
Token t2 = new Token();
|
|
||||||
char[] content2 = "hello2".toCharArray();
|
|
||||||
t2.copyBuffer(content2, 0, 6);
|
|
||||||
assertTrue(t1a.equals(t1b));
|
|
||||||
assertFalse(t1a.equals(t2));
|
|
||||||
assertFalse(t2.equals(t1b));
|
|
||||||
}
|
|
||||||
|
|
||||||
public void testMixedStringArray() throws Exception {
|
|
||||||
Token t = new Token("hello", 0, 5);
|
|
||||||
assertEquals(t.length(), 5);
|
|
||||||
assertEquals(t.toString(), "hello");
|
|
||||||
t.setEmpty().append("hello2");
|
|
||||||
assertEquals(t.length(), 6);
|
|
||||||
assertEquals(t.toString(), "hello2");
|
|
||||||
t.copyBuffer("hello3".toCharArray(), 0, 6);
|
|
||||||
assertEquals(t.toString(), "hello3");
|
|
||||||
|
|
||||||
char[] buffer = t.buffer();
|
|
||||||
buffer[1] = 'o';
|
|
||||||
assertEquals(t.toString(), "hollo3");
|
|
||||||
}
|
|
||||||
|
|
||||||
public void testClone() throws Exception {
|
public void testClone() throws Exception {
|
||||||
Token t = new Token();
|
Token t = new Token();
|
||||||
|
@ -174,20 +50,20 @@ public class TestToken extends LuceneTestCase {
|
||||||
char[] content = "hello".toCharArray();
|
char[] content = "hello".toCharArray();
|
||||||
t.copyBuffer(content, 0, 5);
|
t.copyBuffer(content, 0, 5);
|
||||||
char[] buf = t.buffer();
|
char[] buf = t.buffer();
|
||||||
Token copy = assertCloneIsEqual(t);
|
Token copy = TestCharTermAttributeImpl.assertCloneIsEqual(t);
|
||||||
assertEquals(t.toString(), copy.toString());
|
assertEquals(t.toString(), copy.toString());
|
||||||
assertNotSame(buf, copy.buffer());
|
assertNotSame(buf, copy.buffer());
|
||||||
|
|
||||||
BytesRef pl = new BytesRef(new byte[]{1,2,3,4});
|
BytesRef pl = new BytesRef(new byte[]{1,2,3,4});
|
||||||
t.setPayload(pl);
|
t.setPayload(pl);
|
||||||
copy = assertCloneIsEqual(t);
|
copy = TestCharTermAttributeImpl.assertCloneIsEqual(t);
|
||||||
assertEquals(pl, copy.getPayload());
|
assertEquals(pl, copy.getPayload());
|
||||||
assertNotSame(pl, copy.getPayload());
|
assertNotSame(pl, copy.getPayload());
|
||||||
}
|
}
|
||||||
|
|
||||||
public void testCopyTo() throws Exception {
|
public void testCopyTo() throws Exception {
|
||||||
Token t = new Token();
|
Token t = new Token();
|
||||||
Token copy = assertCopyIsEqual(t);
|
Token copy = TestCharTermAttributeImpl.assertCopyIsEqual(t);
|
||||||
assertEquals("", t.toString());
|
assertEquals("", t.toString());
|
||||||
assertEquals("", copy.toString());
|
assertEquals("", copy.toString());
|
||||||
|
|
||||||
|
@ -196,13 +72,13 @@ public class TestToken extends LuceneTestCase {
|
||||||
char[] content = "hello".toCharArray();
|
char[] content = "hello".toCharArray();
|
||||||
t.copyBuffer(content, 0, 5);
|
t.copyBuffer(content, 0, 5);
|
||||||
char[] buf = t.buffer();
|
char[] buf = t.buffer();
|
||||||
copy = assertCopyIsEqual(t);
|
copy = TestCharTermAttributeImpl.assertCopyIsEqual(t);
|
||||||
assertEquals(t.toString(), copy.toString());
|
assertEquals(t.toString(), copy.toString());
|
||||||
assertNotSame(buf, copy.buffer());
|
assertNotSame(buf, copy.buffer());
|
||||||
|
|
||||||
BytesRef pl = new BytesRef(new byte[]{1,2,3,4});
|
BytesRef pl = new BytesRef(new byte[]{1,2,3,4});
|
||||||
t.setPayload(pl);
|
t.setPayload(pl);
|
||||||
copy = assertCopyIsEqual(t);
|
copy = TestCharTermAttributeImpl.assertCopyIsEqual(t);
|
||||||
assertEquals(pl, copy.getPayload());
|
assertEquals(pl, copy.getPayload());
|
||||||
assertNotSame(pl, copy.getPayload());
|
assertNotSame(pl, copy.getPayload());
|
||||||
}
|
}
|
||||||
|
@ -244,35 +120,19 @@ public class TestToken extends LuceneTestCase {
|
||||||
public void testAttributeReflection() throws Exception {
|
public void testAttributeReflection() throws Exception {
|
||||||
Token t = new Token("foobar", 6, 22);
|
Token t = new Token("foobar", 6, 22);
|
||||||
t.setFlags(8);
|
t.setFlags(8);
|
||||||
|
t.setPositionIncrement(3);
|
||||||
|
t.setPositionLength(11);
|
||||||
TestUtil.assertAttributeReflection(t,
|
TestUtil.assertAttributeReflection(t,
|
||||||
new HashMap<String, Object>() {{
|
new HashMap<String, Object>() {{
|
||||||
put(CharTermAttribute.class.getName() + "#term", "foobar");
|
put(CharTermAttribute.class.getName() + "#term", "foobar");
|
||||||
put(TermToBytesRefAttribute.class.getName() + "#bytes", new BytesRef("foobar"));
|
put(TermToBytesRefAttribute.class.getName() + "#bytes", new BytesRef("foobar"));
|
||||||
put(OffsetAttribute.class.getName() + "#startOffset", 6);
|
put(OffsetAttribute.class.getName() + "#startOffset", 6);
|
||||||
put(OffsetAttribute.class.getName() + "#endOffset", 22);
|
put(OffsetAttribute.class.getName() + "#endOffset", 22);
|
||||||
put(PositionIncrementAttribute.class.getName() + "#positionIncrement", 1);
|
put(PositionIncrementAttribute.class.getName() + "#positionIncrement", 3);
|
||||||
put(PositionLengthAttribute.class.getName() + "#positionLength", 1);
|
put(PositionLengthAttribute.class.getName() + "#positionLength", 11);
|
||||||
put(PayloadAttribute.class.getName() + "#payload", null);
|
put(PayloadAttribute.class.getName() + "#payload", null);
|
||||||
put(TypeAttribute.class.getName() + "#type", TypeAttribute.DEFAULT_TYPE);
|
put(TypeAttribute.class.getName() + "#type", TypeAttribute.DEFAULT_TYPE);
|
||||||
put(FlagsAttribute.class.getName() + "#flags", 8);
|
put(FlagsAttribute.class.getName() + "#flags", 8);
|
||||||
}});
|
}});
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
public static <T extends AttributeImpl> T assertCloneIsEqual(T att) {
|
|
||||||
@SuppressWarnings("unchecked")
|
|
||||||
T clone = (T) att.clone();
|
|
||||||
assertEquals("Clone must be equal", att, clone);
|
|
||||||
assertEquals("Clone's hashcode must be equal", att.hashCode(), clone.hashCode());
|
|
||||||
return clone;
|
|
||||||
}
|
|
||||||
|
|
||||||
public static <T extends AttributeImpl> T assertCopyIsEqual(T att) throws Exception {
|
|
||||||
@SuppressWarnings("unchecked")
|
|
||||||
T copy = (T) att.getClass().newInstance();
|
|
||||||
att.copyTo(copy);
|
|
||||||
assertEquals("Copied instance must be equal", att, copy);
|
|
||||||
assertEquals("Copied instance's hashcode must be equal", att.hashCode(), copy.hashCode());
|
|
||||||
return copy;
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
|
@ -17,7 +17,7 @@ package org.apache.lucene.analysis.tokenattributes;
|
||||||
* limitations under the License.
|
* limitations under the License.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
import org.apache.lucene.analysis.TestToken;
|
import org.apache.lucene.util.AttributeImpl;
|
||||||
import org.apache.lucene.util.LuceneTestCase;
|
import org.apache.lucene.util.LuceneTestCase;
|
||||||
import org.apache.lucene.util.BytesRef;
|
import org.apache.lucene.util.BytesRef;
|
||||||
import org.apache.lucene.util.TestUtil;
|
import org.apache.lucene.util.TestUtil;
|
||||||
|
@ -95,7 +95,7 @@ public class TestCharTermAttributeImpl extends LuceneTestCase {
|
||||||
char[] content = "hello".toCharArray();
|
char[] content = "hello".toCharArray();
|
||||||
t.copyBuffer(content, 0, 5);
|
t.copyBuffer(content, 0, 5);
|
||||||
char[] buf = t.buffer();
|
char[] buf = t.buffer();
|
||||||
CharTermAttributeImpl copy = TestToken.assertCloneIsEqual(t);
|
CharTermAttributeImpl copy = assertCloneIsEqual(t);
|
||||||
assertEquals(t.toString(), copy.toString());
|
assertEquals(t.toString(), copy.toString());
|
||||||
assertNotSame(buf, copy.buffer());
|
assertNotSame(buf, copy.buffer());
|
||||||
}
|
}
|
||||||
|
@ -117,7 +117,7 @@ public class TestCharTermAttributeImpl extends LuceneTestCase {
|
||||||
|
|
||||||
public void testCopyTo() throws Exception {
|
public void testCopyTo() throws Exception {
|
||||||
CharTermAttributeImpl t = new CharTermAttributeImpl();
|
CharTermAttributeImpl t = new CharTermAttributeImpl();
|
||||||
CharTermAttributeImpl copy = TestToken.assertCopyIsEqual(t);
|
CharTermAttributeImpl copy = assertCopyIsEqual(t);
|
||||||
assertEquals("", t.toString());
|
assertEquals("", t.toString());
|
||||||
assertEquals("", copy.toString());
|
assertEquals("", copy.toString());
|
||||||
|
|
||||||
|
@ -125,7 +125,7 @@ public class TestCharTermAttributeImpl extends LuceneTestCase {
|
||||||
char[] content = "hello".toCharArray();
|
char[] content = "hello".toCharArray();
|
||||||
t.copyBuffer(content, 0, 5);
|
t.copyBuffer(content, 0, 5);
|
||||||
char[] buf = t.buffer();
|
char[] buf = t.buffer();
|
||||||
copy = TestToken.assertCopyIsEqual(t);
|
copy = assertCopyIsEqual(t);
|
||||||
assertEquals(t.toString(), copy.toString());
|
assertEquals(t.toString(), copy.toString());
|
||||||
assertNotSame(buf, copy.buffer());
|
assertNotSame(buf, copy.buffer());
|
||||||
}
|
}
|
||||||
|
@ -284,6 +284,23 @@ public class TestCharTermAttributeImpl extends LuceneTestCase {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public static <T extends AttributeImpl> T assertCloneIsEqual(T att) {
|
||||||
|
@SuppressWarnings("unchecked")
|
||||||
|
T clone = (T) att.clone();
|
||||||
|
assertEquals("Clone must be equal", att, clone);
|
||||||
|
assertEquals("Clone's hashcode must be equal", att.hashCode(), clone.hashCode());
|
||||||
|
return clone;
|
||||||
|
}
|
||||||
|
|
||||||
|
public static <T extends AttributeImpl> T assertCopyIsEqual(T att) throws Exception {
|
||||||
|
@SuppressWarnings("unchecked")
|
||||||
|
T copy = (T) att.getClass().newInstance();
|
||||||
|
att.copyTo(copy);
|
||||||
|
assertEquals("Copied instance must be equal", att, copy);
|
||||||
|
assertEquals("Copied instance's hashcode must be equal", att.hashCode(), copy.hashCode());
|
||||||
|
return copy;
|
||||||
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
|
|
||||||
// test speed of the dynamic instanceof checks in append(CharSequence),
|
// test speed of the dynamic instanceof checks in append(CharSequence),
|
||||||
|
|
|
@ -0,0 +1,96 @@
|
||||||
|
package org.apache.lucene.analysis.tokenattributes;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||||
|
* contributor license agreements. See the NOTICE file distributed with
|
||||||
|
* this work for additional information regarding copyright ownership.
|
||||||
|
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||||
|
* (the "License"); you may not use this file except in compliance with
|
||||||
|
* the License. You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
import org.apache.lucene.analysis.MockTokenizer;
|
||||||
|
import org.apache.lucene.analysis.TokenStream;
|
||||||
|
import org.apache.lucene.analysis.Tokenizer;
|
||||||
|
import org.apache.lucene.util.LuceneTestCase;
|
||||||
|
import org.apache.lucene.util.BytesRef;
|
||||||
|
import org.apache.lucene.util.TestUtil;
|
||||||
|
|
||||||
|
import java.io.StringReader;
|
||||||
|
import java.util.HashMap;
|
||||||
|
|
||||||
|
public class TestPackedTokenAttributeImpl extends LuceneTestCase {
|
||||||
|
|
||||||
|
/* the CharTermAttributeStuff is tested by TestCharTermAttributeImpl */
|
||||||
|
|
||||||
|
public void testClone() throws Exception {
|
||||||
|
PackedTokenAttributeImpl t = new PackedTokenAttributeImpl();
|
||||||
|
t.setOffset(0, 5);
|
||||||
|
char[] content = "hello".toCharArray();
|
||||||
|
t.copyBuffer(content, 0, 5);
|
||||||
|
char[] buf = t.buffer();
|
||||||
|
PackedTokenAttributeImpl copy = TestCharTermAttributeImpl.assertCloneIsEqual(t);
|
||||||
|
assertEquals(t.toString(), copy.toString());
|
||||||
|
assertNotSame(buf, copy.buffer());
|
||||||
|
}
|
||||||
|
|
||||||
|
public void testCopyTo() throws Exception {
|
||||||
|
PackedTokenAttributeImpl t = new PackedTokenAttributeImpl();
|
||||||
|
PackedTokenAttributeImpl copy = TestCharTermAttributeImpl.assertCopyIsEqual(t);
|
||||||
|
assertEquals("", t.toString());
|
||||||
|
assertEquals("", copy.toString());
|
||||||
|
|
||||||
|
t = new PackedTokenAttributeImpl();
|
||||||
|
t.setOffset(0, 5);
|
||||||
|
char[] content = "hello".toCharArray();
|
||||||
|
t.copyBuffer(content, 0, 5);
|
||||||
|
char[] buf = t.buffer();
|
||||||
|
copy = TestCharTermAttributeImpl.assertCopyIsEqual(t);
|
||||||
|
assertEquals(t.toString(), copy.toString());
|
||||||
|
assertNotSame(buf, copy.buffer());
|
||||||
|
}
|
||||||
|
|
||||||
|
public void testPackedTokenAttributeFactory() throws Exception {
|
||||||
|
TokenStream ts = new MockTokenizer(TokenStream.DEFAULT_TOKEN_ATTRIBUTE_FACTORY, MockTokenizer.WHITESPACE, false, MockTokenizer.DEFAULT_MAX_TOKEN_LENGTH);
|
||||||
|
((Tokenizer)ts).setReader(new StringReader("foo bar"));
|
||||||
|
|
||||||
|
assertTrue("CharTermAttribute is not implemented by Token",
|
||||||
|
ts.addAttribute(CharTermAttribute.class) instanceof PackedTokenAttributeImpl);
|
||||||
|
assertTrue("OffsetAttribute is not implemented by Token",
|
||||||
|
ts.addAttribute(OffsetAttribute.class) instanceof PackedTokenAttributeImpl);
|
||||||
|
assertTrue("PositionIncrementAttribute is not implemented by Token",
|
||||||
|
ts.addAttribute(PositionIncrementAttribute.class) instanceof PackedTokenAttributeImpl);
|
||||||
|
assertTrue("TypeAttribute is not implemented by Token",
|
||||||
|
ts.addAttribute(TypeAttribute.class) instanceof PackedTokenAttributeImpl);
|
||||||
|
|
||||||
|
assertTrue("FlagsAttribute is not implemented by FlagsAttributeImpl",
|
||||||
|
ts.addAttribute(FlagsAttribute.class) instanceof FlagsAttributeImpl);
|
||||||
|
}
|
||||||
|
|
||||||
|
public void testAttributeReflection() throws Exception {
|
||||||
|
PackedTokenAttributeImpl t = new PackedTokenAttributeImpl();
|
||||||
|
t.append("foobar");
|
||||||
|
t.setOffset(6, 22);
|
||||||
|
t.setPositionIncrement(3);
|
||||||
|
t.setPositionLength(11);
|
||||||
|
t.setType("foobar");
|
||||||
|
TestUtil.assertAttributeReflection(t,
|
||||||
|
new HashMap<String, Object>() {{
|
||||||
|
put(CharTermAttribute.class.getName() + "#term", "foobar");
|
||||||
|
put(TermToBytesRefAttribute.class.getName() + "#bytes", new BytesRef("foobar"));
|
||||||
|
put(OffsetAttribute.class.getName() + "#startOffset", 6);
|
||||||
|
put(OffsetAttribute.class.getName() + "#endOffset", 22);
|
||||||
|
put(PositionIncrementAttribute.class.getName() + "#positionIncrement", 3);
|
||||||
|
put(PositionLengthAttribute.class.getName() + "#positionLength", 11);
|
||||||
|
put(TypeAttribute.class.getName() + "#type", "foobar");
|
||||||
|
}});
|
||||||
|
}
|
||||||
|
}
|
|
@ -37,15 +37,15 @@ import org.apache.lucene.search.TermQuery;
|
||||||
import org.apache.lucene.store.BaseDirectoryWrapper;
|
import org.apache.lucene.store.BaseDirectoryWrapper;
|
||||||
import org.apache.lucene.store.MockDirectoryWrapper;
|
import org.apache.lucene.store.MockDirectoryWrapper;
|
||||||
import org.apache.lucene.util.Attribute;
|
import org.apache.lucene.util.Attribute;
|
||||||
|
import org.apache.lucene.util.AttributeFactory;
|
||||||
import org.apache.lucene.util.AttributeImpl;
|
import org.apache.lucene.util.AttributeImpl;
|
||||||
import org.apache.lucene.util.AttributeSource.AttributeFactory;
|
|
||||||
import org.apache.lucene.util.BytesRef;
|
import org.apache.lucene.util.BytesRef;
|
||||||
import org.apache.lucene.util.LuceneTestCase.Monster;
|
import org.apache.lucene.util.LuceneTestCase.Monster;
|
||||||
import org.apache.lucene.util.LuceneTestCase.SuppressCodecs;
|
import org.apache.lucene.util.LuceneTestCase.SuppressCodecs;
|
||||||
import org.apache.lucene.util.LuceneTestCase;
|
import org.apache.lucene.util.LuceneTestCase;
|
||||||
import org.apache.lucene.util.TestUtil;
|
import org.apache.lucene.util.TestUtil;
|
||||||
import org.apache.lucene.util.TimeUnits;
|
import org.apache.lucene.util.TimeUnits;
|
||||||
import org.junit.Ignore;
|
|
||||||
import com.carrotsearch.randomizedtesting.annotations.TimeoutSuite;
|
import com.carrotsearch.randomizedtesting.annotations.TimeoutSuite;
|
||||||
|
|
||||||
// NOTE: SimpleText codec will consume very large amounts of
|
// NOTE: SimpleText codec will consume very large amounts of
|
||||||
|
|
|
@ -22,9 +22,9 @@ import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
|
||||||
import org.apache.lucene.analysis.tokenattributes.TermToBytesRefAttribute;
|
import org.apache.lucene.analysis.tokenattributes.TermToBytesRefAttribute;
|
||||||
import org.apache.lucene.spatial.prefix.tree.Cell;
|
import org.apache.lucene.spatial.prefix.tree.Cell;
|
||||||
import org.apache.lucene.util.Attribute;
|
import org.apache.lucene.util.Attribute;
|
||||||
|
import org.apache.lucene.util.AttributeFactory;
|
||||||
import org.apache.lucene.util.AttributeImpl;
|
import org.apache.lucene.util.AttributeImpl;
|
||||||
import org.apache.lucene.util.AttributeReflector;
|
import org.apache.lucene.util.AttributeReflector;
|
||||||
import org.apache.lucene.util.AttributeSource;
|
|
||||||
import org.apache.lucene.util.BytesRef;
|
import org.apache.lucene.util.BytesRef;
|
||||||
|
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
|
@ -51,10 +51,10 @@ class CellTokenStream extends TokenStream {
|
||||||
}
|
}
|
||||||
|
|
||||||
// just a wrapper to prevent adding CTA
|
// just a wrapper to prevent adding CTA
|
||||||
private static final class CellAttributeFactory extends AttributeSource.AttributeFactory {
|
private static final class CellAttributeFactory extends AttributeFactory {
|
||||||
private final AttributeSource.AttributeFactory delegate;
|
private final AttributeFactory delegate;
|
||||||
|
|
||||||
CellAttributeFactory(AttributeSource.AttributeFactory delegate) {
|
CellAttributeFactory(AttributeFactory delegate) {
|
||||||
this.delegate = delegate;
|
this.delegate = delegate;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -39,9 +39,9 @@ import org.apache.lucene.analysis.CannedBinaryTokenStream.BinaryToken;
|
||||||
import org.apache.lucene.analysis.CannedBinaryTokenStream;
|
import org.apache.lucene.analysis.CannedBinaryTokenStream;
|
||||||
import org.apache.lucene.analysis.CannedTokenStream;
|
import org.apache.lucene.analysis.CannedTokenStream;
|
||||||
import org.apache.lucene.analysis.MockAnalyzer;
|
import org.apache.lucene.analysis.MockAnalyzer;
|
||||||
import org.apache.lucene.analysis.MockBytesAttributeFactory;
|
|
||||||
import org.apache.lucene.analysis.MockTokenFilter;
|
import org.apache.lucene.analysis.MockTokenFilter;
|
||||||
import org.apache.lucene.analysis.MockTokenizer;
|
import org.apache.lucene.analysis.MockTokenizer;
|
||||||
|
import org.apache.lucene.analysis.MockUTF16TermAttributeImpl;
|
||||||
import org.apache.lucene.analysis.Token;
|
import org.apache.lucene.analysis.Token;
|
||||||
import org.apache.lucene.analysis.TokenFilter;
|
import org.apache.lucene.analysis.TokenFilter;
|
||||||
import org.apache.lucene.analysis.TokenStream;
|
import org.apache.lucene.analysis.TokenStream;
|
||||||
|
@ -52,6 +52,7 @@ import org.apache.lucene.document.Document;
|
||||||
import org.apache.lucene.search.suggest.Lookup.LookupResult;
|
import org.apache.lucene.search.suggest.Lookup.LookupResult;
|
||||||
import org.apache.lucene.search.suggest.Input;
|
import org.apache.lucene.search.suggest.Input;
|
||||||
import org.apache.lucene.search.suggest.InputArrayIterator;
|
import org.apache.lucene.search.suggest.InputArrayIterator;
|
||||||
|
import org.apache.lucene.util.AttributeFactory;
|
||||||
import org.apache.lucene.util.BytesRef;
|
import org.apache.lucene.util.BytesRef;
|
||||||
import org.apache.lucene.util.LineFileDocs;
|
import org.apache.lucene.util.LineFileDocs;
|
||||||
import org.apache.lucene.util.LuceneTestCase;
|
import org.apache.lucene.util.LuceneTestCase;
|
||||||
|
@ -621,8 +622,6 @@ public class AnalyzingSuggesterTest extends LuceneTestCase {
|
||||||
private int numStopChars;
|
private int numStopChars;
|
||||||
private boolean preserveHoles;
|
private boolean preserveHoles;
|
||||||
|
|
||||||
private final MockBytesAttributeFactory factory = new MockBytesAttributeFactory();
|
|
||||||
|
|
||||||
public MockTokenEatingAnalyzer(int numStopChars, boolean preserveHoles) {
|
public MockTokenEatingAnalyzer(int numStopChars, boolean preserveHoles) {
|
||||||
this.preserveHoles = preserveHoles;
|
this.preserveHoles = preserveHoles;
|
||||||
this.numStopChars = numStopChars;
|
this.numStopChars = numStopChars;
|
||||||
|
@ -630,7 +629,8 @@ public class AnalyzingSuggesterTest extends LuceneTestCase {
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public TokenStreamComponents createComponents(String fieldName) {
|
public TokenStreamComponents createComponents(String fieldName) {
|
||||||
MockTokenizer tokenizer = new MockTokenizer(factory, MockTokenizer.WHITESPACE, false, MockTokenizer.DEFAULT_MAX_TOKEN_LENGTH);
|
MockTokenizer tokenizer = new MockTokenizer(MockUTF16TermAttributeImpl.UTF16_TERM_ATTRIBUTE_FACTORY,
|
||||||
|
MockTokenizer.WHITESPACE, false, MockTokenizer.DEFAULT_MAX_TOKEN_LENGTH);
|
||||||
tokenizer.setEnableChecks(true);
|
tokenizer.setEnableChecks(true);
|
||||||
TokenStream next;
|
TokenStream next;
|
||||||
if (numStopChars != 0) {
|
if (numStopChars != 0) {
|
||||||
|
|
|
@ -25,6 +25,7 @@ import java.io.Reader;
|
||||||
import java.io.StringReader;
|
import java.io.StringReader;
|
||||||
import java.io.StringWriter;
|
import java.io.StringWriter;
|
||||||
import java.io.Writer;
|
import java.io.Writer;
|
||||||
|
import java.lang.reflect.Constructor;
|
||||||
import java.nio.charset.StandardCharsets;
|
import java.nio.charset.StandardCharsets;
|
||||||
import java.util.*;
|
import java.util.*;
|
||||||
import java.util.concurrent.CountDownLatch;
|
import java.util.concurrent.CountDownLatch;
|
||||||
|
@ -38,8 +39,8 @@ import org.apache.lucene.index.FieldInfo.IndexOptions;
|
||||||
import org.apache.lucene.index.RandomIndexWriter;
|
import org.apache.lucene.index.RandomIndexWriter;
|
||||||
import org.apache.lucene.store.Directory;
|
import org.apache.lucene.store.Directory;
|
||||||
import org.apache.lucene.util.Attribute;
|
import org.apache.lucene.util.Attribute;
|
||||||
|
import org.apache.lucene.util.AttributeFactory;
|
||||||
import org.apache.lucene.util.AttributeImpl;
|
import org.apache.lucene.util.AttributeImpl;
|
||||||
import org.apache.lucene.util.AttributeSource.AttributeFactory;
|
|
||||||
import org.apache.lucene.util.IOUtils;
|
import org.apache.lucene.util.IOUtils;
|
||||||
import org.apache.lucene.util.LineFileDocs;
|
import org.apache.lucene.util.LineFileDocs;
|
||||||
import org.apache.lucene.util.LuceneTestCase;
|
import org.apache.lucene.util.LuceneTestCase;
|
||||||
|
@ -935,16 +936,41 @@ public abstract class BaseTokenStreamTestCase extends LuceneTestCase {
|
||||||
return mockTokenizer;
|
return mockTokenizer;
|
||||||
}
|
}
|
||||||
|
|
||||||
/** Returns a new AttributeFactory impl */
|
/**
|
||||||
public static AttributeFactory newAttributeFactory(Random random) {
|
* This provides the default AttributeFactory in reflective-only mode (package private)
|
||||||
if (random.nextBoolean()) {
|
* so we can test it.
|
||||||
return Token.TOKEN_ATTRIBUTE_FACTORY;
|
*/
|
||||||
} else {
|
private final static AttributeFactory REFLECTIVE_ATTRIBUTE_FACTORY;
|
||||||
return AttributeFactory.DEFAULT_ATTRIBUTE_FACTORY;
|
static {
|
||||||
|
try {
|
||||||
|
final Constructor<? extends AttributeFactory> constr = Class
|
||||||
|
.forName(AttributeFactory.class.getName() + "$DefaultAttributeFactory")
|
||||||
|
.asSubclass(AttributeFactory.class)
|
||||||
|
.getDeclaredConstructor(boolean.class);
|
||||||
|
constr.setAccessible(true);
|
||||||
|
REFLECTIVE_ATTRIBUTE_FACTORY = constr.newInstance(false);
|
||||||
|
} catch (ReflectiveOperationException e) {
|
||||||
|
throw new Error("Cannot initantiate a reflective-only DefaultAttributeFactory", e);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/** Returns a new AttributeFactory impl */
|
/** Returns a random AttributeFactory impl */
|
||||||
|
public static AttributeFactory newAttributeFactory(Random random) {
|
||||||
|
switch (random.nextInt(4)) {
|
||||||
|
case 0:
|
||||||
|
return TokenStream.DEFAULT_TOKEN_ATTRIBUTE_FACTORY;
|
||||||
|
case 1:
|
||||||
|
return Token.TOKEN_ATTRIBUTE_FACTORY;
|
||||||
|
case 2:
|
||||||
|
return AttributeFactory.DEFAULT_ATTRIBUTE_FACTORY;
|
||||||
|
case 3:
|
||||||
|
return REFLECTIVE_ATTRIBUTE_FACTORY;
|
||||||
|
default:
|
||||||
|
throw new AssertionError("Please fix the Random.nextInt() call above");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/** Returns a random AttributeFactory impl */
|
||||||
public static AttributeFactory newAttributeFactory() {
|
public static AttributeFactory newAttributeFactory() {
|
||||||
return newAttributeFactory(random());
|
return newAttributeFactory(random());
|
||||||
}
|
}
|
||||||
|
|
|
@ -20,12 +20,11 @@ package org.apache.lucene.analysis;
|
||||||
/**
|
/**
|
||||||
* Analyzer for testing that encodes terms as UTF-16 bytes.
|
* Analyzer for testing that encodes terms as UTF-16 bytes.
|
||||||
*/
|
*/
|
||||||
public class MockBytesAnalyzer extends Analyzer {
|
public final class MockBytesAnalyzer extends Analyzer {
|
||||||
private final MockBytesAttributeFactory factory = new MockBytesAttributeFactory();
|
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
protected TokenStreamComponents createComponents(String fieldName) {
|
protected TokenStreamComponents createComponents(String fieldName) {
|
||||||
Tokenizer t = new MockTokenizer(factory, MockTokenizer.KEYWORD, false, MockTokenizer.DEFAULT_MAX_TOKEN_LENGTH);
|
Tokenizer t = new MockTokenizer(MockUTF16TermAttributeImpl.UTF16_TERM_ATTRIBUTE_FACTORY,
|
||||||
|
MockTokenizer.KEYWORD, false, MockTokenizer.DEFAULT_MAX_TOKEN_LENGTH);
|
||||||
return new TokenStreamComponents(t);
|
return new TokenStreamComponents(t);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -1,40 +0,0 @@
|
||||||
package org.apache.lucene.analysis;
|
|
||||||
|
|
||||||
/*
|
|
||||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
|
||||||
* contributor license agreements. See the NOTICE file distributed with
|
|
||||||
* this work for additional information regarding copyright ownership.
|
|
||||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
|
||||||
* (the "License"); you may not use this file except in compliance with
|
|
||||||
* the License. You may obtain a copy of the License at
|
|
||||||
*
|
|
||||||
* http://www.apache.org/licenses/LICENSE-2.0
|
|
||||||
*
|
|
||||||
* Unless required by applicable law or agreed to in writing, software
|
|
||||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
|
||||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
||||||
* See the License for the specific language governing permissions and
|
|
||||||
* limitations under the License.
|
|
||||||
*/
|
|
||||||
|
|
||||||
import org.apache.lucene.util.Attribute;
|
|
||||||
import org.apache.lucene.util.AttributeImpl;
|
|
||||||
import org.apache.lucene.util.AttributeSource;
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Attribute factory that implements CharTermAttribute with
|
|
||||||
* {@link MockUTF16TermAttributeImpl}
|
|
||||||
*/
|
|
||||||
public class MockBytesAttributeFactory extends AttributeSource.AttributeFactory {
|
|
||||||
private final AttributeSource.AttributeFactory delegate =
|
|
||||||
AttributeSource.AttributeFactory.DEFAULT_ATTRIBUTE_FACTORY;
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public AttributeImpl createAttributeInstance(
|
|
||||||
Class<? extends Attribute> attClass) {
|
|
||||||
return attClass.isAssignableFrom(MockUTF16TermAttributeImpl.class)
|
|
||||||
? new MockUTF16TermAttributeImpl()
|
|
||||||
: delegate.createAttributeInstance(attClass);
|
|
||||||
}
|
|
||||||
|
|
||||||
}
|
|
|
@ -18,12 +18,12 @@ package org.apache.lucene.analysis;
|
||||||
*/
|
*/
|
||||||
|
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
import java.io.Reader;
|
|
||||||
import java.nio.CharBuffer;
|
import java.nio.CharBuffer;
|
||||||
import java.util.Random;
|
import java.util.Random;
|
||||||
|
|
||||||
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
|
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
|
||||||
import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
|
import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
|
||||||
|
import org.apache.lucene.util.AttributeFactory;
|
||||||
import org.apache.lucene.util.automaton.CharacterRunAutomaton;
|
import org.apache.lucene.util.automaton.CharacterRunAutomaton;
|
||||||
import org.apache.lucene.util.automaton.RegExp;
|
import org.apache.lucene.util.automaton.RegExp;
|
||||||
|
|
||||||
|
@ -113,7 +113,7 @@ public class MockTokenizer extends Tokenizer {
|
||||||
this(factory, runAutomaton, lowerCase, DEFAULT_MAX_TOKEN_LENGTH);
|
this(factory, runAutomaton, lowerCase, DEFAULT_MAX_TOKEN_LENGTH);
|
||||||
}
|
}
|
||||||
|
|
||||||
/** Calls {@link #MockTokenizer(org.apache.lucene.util.AttributeSource.AttributeFactory,CharacterRunAutomaton,boolean)
|
/** Calls {@link #MockTokenizer(AttributeFactory,CharacterRunAutomaton,boolean)
|
||||||
* MockTokenizer(AttributeFactory, Reader, WHITESPACE, true)} */
|
* MockTokenizer(AttributeFactory, Reader, WHITESPACE, true)} */
|
||||||
public MockTokenizer(AttributeFactory factory) {
|
public MockTokenizer(AttributeFactory factory) {
|
||||||
this(factory, WHITESPACE, true);
|
this(factory, WHITESPACE, true);
|
||||||
|
|
|
@ -17,9 +17,10 @@ package org.apache.lucene.analysis;
|
||||||
* limitations under the License.
|
* limitations under the License.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
import java.nio.charset.Charset;
|
import java.nio.charset.StandardCharsets;
|
||||||
|
|
||||||
import org.apache.lucene.analysis.tokenattributes.CharTermAttributeImpl;
|
import org.apache.lucene.analysis.tokenattributes.CharTermAttributeImpl;
|
||||||
|
import org.apache.lucene.util.AttributeFactory;
|
||||||
import org.apache.lucene.util.BytesRef;
|
import org.apache.lucene.util.BytesRef;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
@ -27,12 +28,15 @@ import org.apache.lucene.util.BytesRef;
|
||||||
* text as UTF-16 bytes instead of as UTF-8 bytes.
|
* text as UTF-16 bytes instead of as UTF-8 bytes.
|
||||||
*/
|
*/
|
||||||
public class MockUTF16TermAttributeImpl extends CharTermAttributeImpl {
|
public class MockUTF16TermAttributeImpl extends CharTermAttributeImpl {
|
||||||
static final Charset charset = Charset.forName("UTF-16LE");
|
|
||||||
|
/** Factory that returns an instance of this class for CharTermAttribute */
|
||||||
|
public static final AttributeFactory UTF16_TERM_ATTRIBUTE_FACTORY =
|
||||||
|
AttributeFactory.getStaticImplementation(AttributeFactory.DEFAULT_ATTRIBUTE_FACTORY, MockUTF16TermAttributeImpl.class);
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public void fillBytesRef() {
|
public void fillBytesRef() {
|
||||||
BytesRef bytes = getBytesRef();
|
BytesRef bytes = getBytesRef();
|
||||||
byte[] utf16 = toString().getBytes(charset);
|
byte[] utf16 = toString().getBytes(StandardCharsets.UTF_16LE);
|
||||||
bytes.bytes = utf16;
|
bytes.bytes = utf16;
|
||||||
bytes.offset = 0;
|
bytes.offset = 0;
|
||||||
bytes.length = utf16.length;
|
bytes.length = utf16.length;
|
||||||
|
|
|
@ -31,6 +31,7 @@ import org.apache.lucene.document.Field;
|
||||||
import org.apache.lucene.index.StorableField;
|
import org.apache.lucene.index.StorableField;
|
||||||
import org.apache.lucene.index.FieldInfo.IndexOptions;
|
import org.apache.lucene.index.FieldInfo.IndexOptions;
|
||||||
import org.apache.lucene.search.SortField;
|
import org.apache.lucene.search.SortField;
|
||||||
|
import org.apache.lucene.util.AttributeFactory;
|
||||||
import org.apache.lucene.util.AttributeSource;
|
import org.apache.lucene.util.AttributeSource;
|
||||||
import org.apache.lucene.util.AttributeSource.State;
|
import org.apache.lucene.util.AttributeSource.State;
|
||||||
import org.apache.solr.analysis.SolrAnalyzer;
|
import org.apache.solr.analysis.SolrAnalyzer;
|
||||||
|
|
|
@ -23,7 +23,7 @@ import java.util.Map;
|
||||||
|
|
||||||
import org.apache.lucene.analysis.MockTokenizer;
|
import org.apache.lucene.analysis.MockTokenizer;
|
||||||
import org.apache.lucene.analysis.util.TokenizerFactory;
|
import org.apache.lucene.analysis.util.TokenizerFactory;
|
||||||
import org.apache.lucene.util.AttributeSource.AttributeFactory;
|
import org.apache.lucene.util.AttributeFactory;
|
||||||
import org.apache.lucene.util.automaton.CharacterRunAutomaton;
|
import org.apache.lucene.util.automaton.CharacterRunAutomaton;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
|
Loading…
Reference in New Issue