mirror of https://github.com/apache/lucene.git
LUCENE-5638: pack the core attributes into one impl by default
git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1592353 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
94c513ad5b
commit
6e9cbf3986
|
@ -137,6 +137,10 @@ Optimizations
|
|||
* LUCENE-5634: IndexWriter reuses TokenStream instances for String and Numeric
|
||||
fields by default. (Uwe Schindler, Shay Banon, Mike McCandless, Robert Muir)
|
||||
|
||||
* LUCENE-5638: TokenStream uses a more performant AttributeFactory by default,
|
||||
that packs the core attributes into one impl, for faster clearAttributes(),
|
||||
saveState(), and restoreState(). (Uwe Schindler, Robert Muir)
|
||||
|
||||
Bug fixes
|
||||
|
||||
* LUCENE-5600: HttpClientBase did not properly consume a connection if a server
|
||||
|
|
|
@ -19,6 +19,7 @@ package org.apache.lucene.analysis.path;
|
|||
import java.io.IOException;
|
||||
import java.io.Reader;
|
||||
|
||||
import org.apache.lucene.analysis.Token;
|
||||
import org.apache.lucene.analysis.Tokenizer;
|
||||
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
|
||||
import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
|
||||
|
@ -68,7 +69,7 @@ public class PathHierarchyTokenizer extends Tokenizer {
|
|||
}
|
||||
|
||||
public PathHierarchyTokenizer(int bufferSize, char delimiter, char replacement, int skip) {
|
||||
this(AttributeFactory.DEFAULT_ATTRIBUTE_FACTORY, bufferSize, delimiter, replacement, skip);
|
||||
this(Token.TOKEN_ATTRIBUTE_FACTORY, bufferSize, delimiter, replacement, skip);
|
||||
}
|
||||
|
||||
public PathHierarchyTokenizer
|
||||
|
|
|
@ -21,6 +21,7 @@ import java.io.Reader;
|
|||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
|
||||
import org.apache.lucene.analysis.Token;
|
||||
import org.apache.lucene.analysis.Tokenizer;
|
||||
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
|
||||
import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
|
||||
|
@ -81,7 +82,7 @@ public class ReversePathHierarchyTokenizer extends Tokenizer {
|
|||
}
|
||||
|
||||
public ReversePathHierarchyTokenizer( int bufferSize, char delimiter, char replacement, int skip) {
|
||||
this(AttributeFactory.DEFAULT_ATTRIBUTE_FACTORY, bufferSize, delimiter, replacement, skip);
|
||||
this(Token.TOKEN_ATTRIBUTE_FACTORY, bufferSize, delimiter, replacement, skip);
|
||||
}
|
||||
public ReversePathHierarchyTokenizer
|
||||
(AttributeFactory factory, int bufferSize, char delimiter, char replacement, int skip) {
|
||||
|
|
|
@ -21,6 +21,8 @@ import java.io.IOException;
|
|||
import java.io.Reader;
|
||||
import java.util.regex.Matcher;
|
||||
import java.util.regex.Pattern;
|
||||
|
||||
import org.apache.lucene.analysis.Token;
|
||||
import org.apache.lucene.analysis.Tokenizer;
|
||||
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
|
||||
import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
|
||||
|
@ -65,7 +67,7 @@ public final class PatternTokenizer extends Tokenizer {
|
|||
|
||||
/** creates a new PatternTokenizer returning tokens from group (-1 for split functionality) */
|
||||
public PatternTokenizer(Pattern pattern, int group) {
|
||||
this(AttributeFactory.DEFAULT_ATTRIBUTE_FACTORY, pattern, group);
|
||||
this(Token.TOKEN_ATTRIBUTE_FACTORY, pattern, group);
|
||||
}
|
||||
|
||||
/** creates a new PatternTokenizer returning tokens from group (-1 for split functionality) */
|
||||
|
|
|
@ -20,6 +20,7 @@ package org.apache.lucene.analysis.th;
|
|||
import java.text.BreakIterator;
|
||||
import java.util.Locale;
|
||||
|
||||
import org.apache.lucene.analysis.Token;
|
||||
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
|
||||
import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
|
||||
import org.apache.lucene.analysis.util.CharArrayIterator;
|
||||
|
@ -59,7 +60,7 @@ public class ThaiTokenizer extends SegmentingTokenizerBase {
|
|||
|
||||
/** Creates a new ThaiTokenizer */
|
||||
public ThaiTokenizer() {
|
||||
this(AttributeFactory.DEFAULT_ATTRIBUTE_FACTORY);
|
||||
this(Token.TOKEN_ATTRIBUTE_FACTORY);
|
||||
}
|
||||
|
||||
/** Creates a new ThaiTokenizer, supplying the AttributeFactory */
|
||||
|
|
|
@ -22,6 +22,7 @@ import java.io.Reader;
|
|||
|
||||
import java.text.BreakIterator;
|
||||
|
||||
import org.apache.lucene.analysis.Token;
|
||||
import org.apache.lucene.analysis.Tokenizer;
|
||||
import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
|
||||
|
||||
|
@ -62,7 +63,7 @@ public abstract class SegmentingTokenizerBase extends Tokenizer {
|
|||
* be provided to this constructor.
|
||||
*/
|
||||
public SegmentingTokenizerBase(BreakIterator iterator) {
|
||||
this(AttributeFactory.DEFAULT_ATTRIBUTE_FACTORY, iterator);
|
||||
this(Token.TOKEN_ATTRIBUTE_FACTORY, iterator);
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
|
@ -17,6 +17,7 @@ package org.apache.lucene.analysis.util;
|
|||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import org.apache.lucene.analysis.Token;
|
||||
import org.apache.lucene.analysis.Tokenizer;
|
||||
import org.apache.lucene.util.AttributeSource.AttributeFactory;
|
||||
|
||||
|
@ -72,7 +73,7 @@ public abstract class TokenizerFactory extends AbstractAnalysisFactory {
|
|||
|
||||
/** Creates a TokenStream of the specified input using the default attribute factory. */
|
||||
public final Tokenizer create() {
|
||||
return create(AttributeFactory.DEFAULT_ATTRIBUTE_FACTORY);
|
||||
return create(Token.TOKEN_ATTRIBUTE_FACTORY);
|
||||
}
|
||||
|
||||
/** Creates a TokenStream of the specified input using the given AttributeFactory */
|
||||
|
|
|
@ -19,6 +19,7 @@ package org.apache.lucene.collation;
|
|||
|
||||
import java.text.Collator;
|
||||
|
||||
import org.apache.lucene.analysis.Token;
|
||||
import org.apache.lucene.collation.tokenattributes.CollatedTermAttributeImpl;
|
||||
import org.apache.lucene.util.Attribute;
|
||||
import org.apache.lucene.util.AttributeImpl;
|
||||
|
@ -74,12 +75,12 @@ public class CollationAttributeFactory extends AttributeSource.AttributeFactory
|
|||
|
||||
/**
|
||||
* Create a CollationAttributeFactory, using
|
||||
* {@link org.apache.lucene.util.AttributeSource.AttributeFactory#DEFAULT_ATTRIBUTE_FACTORY} as the
|
||||
* {@link org.apache.lucene.analysis.Token#TOKEN_ATTRIBUTE_FACTORY} as the
|
||||
* factory for all other attributes.
|
||||
* @param collator CollationKey generator
|
||||
*/
|
||||
public CollationAttributeFactory(Collator collator) {
|
||||
this(AttributeSource.AttributeFactory.DEFAULT_ATTRIBUTE_FACTORY, collator);
|
||||
this(Token.TOKEN_ATTRIBUTE_FACTORY, collator);
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
|
@ -20,6 +20,7 @@ package org.apache.lucene.analysis.icu.segmentation;
|
|||
import java.io.IOException;
|
||||
import java.io.Reader;
|
||||
|
||||
import org.apache.lucene.analysis.Token;
|
||||
import org.apache.lucene.analysis.Tokenizer;
|
||||
import org.apache.lucene.analysis.icu.tokenattributes.ScriptAttribute;
|
||||
import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
|
||||
|
@ -79,7 +80,7 @@ public final class ICUTokenizer extends Tokenizer {
|
|||
* @param config Tailored BreakIterator configuration
|
||||
*/
|
||||
public ICUTokenizer(ICUTokenizerConfig config) {
|
||||
this(AttributeFactory.DEFAULT_ATTRIBUTE_FACTORY, config);
|
||||
this(Token.TOKEN_ATTRIBUTE_FACTORY, config);
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
|
@ -17,6 +17,7 @@ package org.apache.lucene.collation;
|
|||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import org.apache.lucene.analysis.Token;
|
||||
import org.apache.lucene.collation.tokenattributes.ICUCollatedTermAttributeImpl;
|
||||
import org.apache.lucene.util.Attribute;
|
||||
import org.apache.lucene.util.AttributeImpl;
|
||||
|
@ -68,12 +69,12 @@ public class ICUCollationAttributeFactory extends AttributeSource.AttributeFacto
|
|||
|
||||
/**
|
||||
* Create an ICUCollationAttributeFactory, using
|
||||
* {@link org.apache.lucene.util.AttributeSource.AttributeFactory#DEFAULT_ATTRIBUTE_FACTORY} as the
|
||||
* {@link org.apache.lucene.analysis.Token#TOKEN_ATTRIBUTE_FACTORY} as the
|
||||
* factory for all other attributes.
|
||||
* @param collator CollationKey generator
|
||||
*/
|
||||
public ICUCollationAttributeFactory(Collator collator) {
|
||||
this(AttributeSource.AttributeFactory.DEFAULT_ATTRIBUTE_FACTORY, collator);
|
||||
this(Token.TOKEN_ATTRIBUTE_FACTORY, collator);
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
|
@ -195,7 +195,7 @@ public final class JapaneseTokenizer extends Tokenizer {
|
|||
* @param mode tokenization mode.
|
||||
*/
|
||||
public JapaneseTokenizer(UserDictionary userDictionary, boolean discardPunctuation, Mode mode) {
|
||||
this(AttributeFactory.DEFAULT_ATTRIBUTE_FACTORY, userDictionary, discardPunctuation, mode);
|
||||
this(org.apache.lucene.analysis.Token.TOKEN_ATTRIBUTE_FACTORY, userDictionary, discardPunctuation, mode);
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
|
@ -22,6 +22,7 @@ import java.text.BreakIterator;
|
|||
import java.util.Iterator;
|
||||
import java.util.Locale;
|
||||
|
||||
import org.apache.lucene.analysis.Token;
|
||||
import org.apache.lucene.analysis.cn.smart.hhmm.SegToken;
|
||||
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
|
||||
import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
|
||||
|
@ -47,7 +48,7 @@ public class HMMChineseTokenizer extends SegmentingTokenizerBase {
|
|||
|
||||
/** Creates a new HMMChineseTokenizer */
|
||||
public HMMChineseTokenizer() {
|
||||
this(AttributeFactory.DEFAULT_ATTRIBUTE_FACTORY);
|
||||
this(Token.TOKEN_ATTRIBUTE_FACTORY);
|
||||
}
|
||||
|
||||
/** Creates a new HMMChineseTokenizer, supplying the AttributeFactory */
|
||||
|
|
|
@ -17,6 +17,7 @@ package org.apache.lucene.analysis.uima;
|
|||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import org.apache.lucene.analysis.Token;
|
||||
import org.apache.lucene.analysis.Tokenizer;
|
||||
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
|
||||
import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
|
||||
|
@ -43,7 +44,7 @@ public final class UIMAAnnotationsTokenizer extends BaseUIMATokenizer {
|
|||
private int finalOffset = 0;
|
||||
|
||||
public UIMAAnnotationsTokenizer(String descriptorPath, String tokenType, Map<String, Object> configurationParameters) {
|
||||
this(descriptorPath, tokenType, configurationParameters, AttributeFactory.DEFAULT_ATTRIBUTE_FACTORY);
|
||||
this(descriptorPath, tokenType, configurationParameters, Token.TOKEN_ATTRIBUTE_FACTORY);
|
||||
}
|
||||
|
||||
public UIMAAnnotationsTokenizer(String descriptorPath, String tokenType, Map<String, Object> configurationParameters,
|
||||
|
|
|
@ -17,6 +17,7 @@ package org.apache.lucene.analysis.uima;
|
|||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import org.apache.lucene.analysis.Token;
|
||||
import org.apache.lucene.analysis.Tokenizer;
|
||||
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
|
||||
import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
|
||||
|
@ -53,7 +54,7 @@ public final class UIMATypeAwareAnnotationsTokenizer extends BaseUIMATokenizer {
|
|||
private int finalOffset = 0;
|
||||
|
||||
public UIMATypeAwareAnnotationsTokenizer(String descriptorPath, String tokenType, String typeAttributeFeaturePath, Map<String, Object> configurationParameters) {
|
||||
this(descriptorPath, tokenType, typeAttributeFeaturePath, configurationParameters, AttributeFactory.DEFAULT_ATTRIBUTE_FACTORY);
|
||||
this(descriptorPath, tokenType, typeAttributeFeaturePath, configurationParameters, Token.TOKEN_ATTRIBUTE_FACTORY);
|
||||
}
|
||||
|
||||
public UIMATypeAwareAnnotationsTokenizer(String descriptorPath, String tokenType, String typeAttributeFeaturePath,
|
||||
|
|
|
@ -89,7 +89,7 @@ public abstract class TokenStream extends AttributeSource implements Closeable {
|
|||
* A TokenStream using the default attribute factory.
|
||||
*/
|
||||
protected TokenStream() {
|
||||
super();
|
||||
super(Token.TOKEN_ATTRIBUTE_FACTORY);
|
||||
assert assertFinal();
|
||||
}
|
||||
|
||||
|
|
|
@ -255,6 +255,8 @@ public class PreAnalyzedField extends FieldType {
|
|||
private PreAnalyzedParser parser;
|
||||
|
||||
public PreAnalyzedTokenizer(PreAnalyzedParser parser) {
|
||||
// we don't pack attributes: since we are used for (de)serialization and dont want bloat.
|
||||
super(AttributeFactory.DEFAULT_ATTRIBUTE_FACTORY);
|
||||
this.parser = parser;
|
||||
}
|
||||
|
||||
|
|
Loading…
Reference in New Issue