LUCENE-5638: pack the core attributes into one impl by default

git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1592353 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Robert Muir 2014-05-04 14:36:27 +00:00
parent 94c513ad5b
commit 6e9cbf3986
16 changed files with 35 additions and 16 deletions

View File

@ -137,6 +137,10 @@ Optimizations
* LUCENE-5634: IndexWriter reuses TokenStream instances for String and Numeric
fields by default. (Uwe Schindler, Shay Banon, Mike McCandless, Robert Muir)
* LUCENE-5638: TokenStream uses a more performant AttributeFactory by default,
that packs the core attributes into one impl, for faster clearAttributes(),
saveState(), and restoreState(). (Uwe Schindler, Robert Muir)
Bug fixes
* LUCENE-5600: HttpClientBase did not properly consume a connection if a server

View File

@ -19,6 +19,7 @@ package org.apache.lucene.analysis.path;
import java.io.IOException;
import java.io.Reader;
import org.apache.lucene.analysis.Token;
import org.apache.lucene.analysis.Tokenizer;
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
@ -68,7 +69,7 @@ public class PathHierarchyTokenizer extends Tokenizer {
}
public PathHierarchyTokenizer(int bufferSize, char delimiter, char replacement, int skip) {
this(AttributeFactory.DEFAULT_ATTRIBUTE_FACTORY, bufferSize, delimiter, replacement, skip);
this(Token.TOKEN_ATTRIBUTE_FACTORY, bufferSize, delimiter, replacement, skip);
}
public PathHierarchyTokenizer

View File

@ -21,6 +21,7 @@ import java.io.Reader;
import java.util.ArrayList;
import java.util.List;
import org.apache.lucene.analysis.Token;
import org.apache.lucene.analysis.Tokenizer;
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
@ -81,7 +82,7 @@ public class ReversePathHierarchyTokenizer extends Tokenizer {
}
public ReversePathHierarchyTokenizer( int bufferSize, char delimiter, char replacement, int skip) {
this(AttributeFactory.DEFAULT_ATTRIBUTE_FACTORY, bufferSize, delimiter, replacement, skip);
this(Token.TOKEN_ATTRIBUTE_FACTORY, bufferSize, delimiter, replacement, skip);
}
public ReversePathHierarchyTokenizer
(AttributeFactory factory, int bufferSize, char delimiter, char replacement, int skip) {

View File

@ -21,6 +21,8 @@ import java.io.IOException;
import java.io.Reader;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import org.apache.lucene.analysis.Token;
import org.apache.lucene.analysis.Tokenizer;
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
@ -65,7 +67,7 @@ public final class PatternTokenizer extends Tokenizer {
/** creates a new PatternTokenizer returning tokens from group (-1 for split functionality) */
public PatternTokenizer(Pattern pattern, int group) {
this(AttributeFactory.DEFAULT_ATTRIBUTE_FACTORY, pattern, group);
this(Token.TOKEN_ATTRIBUTE_FACTORY, pattern, group);
}
/** creates a new PatternTokenizer returning tokens from group (-1 for split functionality) */

View File

@ -20,6 +20,7 @@ package org.apache.lucene.analysis.th;
import java.text.BreakIterator;
import java.util.Locale;
import org.apache.lucene.analysis.Token;
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
import org.apache.lucene.analysis.util.CharArrayIterator;
@ -59,7 +60,7 @@ public class ThaiTokenizer extends SegmentingTokenizerBase {
/** Creates a new ThaiTokenizer */
public ThaiTokenizer() {
this(AttributeFactory.DEFAULT_ATTRIBUTE_FACTORY);
this(Token.TOKEN_ATTRIBUTE_FACTORY);
}
/** Creates a new ThaiTokenizer, supplying the AttributeFactory */

View File

@ -22,6 +22,7 @@ import java.io.Reader;
import java.text.BreakIterator;
import org.apache.lucene.analysis.Token;
import org.apache.lucene.analysis.Tokenizer;
import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
@ -62,7 +63,7 @@ public abstract class SegmentingTokenizerBase extends Tokenizer {
* be provided to this constructor.
*/
public SegmentingTokenizerBase(BreakIterator iterator) {
this(AttributeFactory.DEFAULT_ATTRIBUTE_FACTORY, iterator);
this(Token.TOKEN_ATTRIBUTE_FACTORY, iterator);
}
/**

View File

@ -17,6 +17,7 @@ package org.apache.lucene.analysis.util;
* limitations under the License.
*/
import org.apache.lucene.analysis.Token;
import org.apache.lucene.analysis.Tokenizer;
import org.apache.lucene.util.AttributeSource.AttributeFactory;
@ -72,7 +73,7 @@ public abstract class TokenizerFactory extends AbstractAnalysisFactory {
/** Creates a TokenStream of the specified input using the default attribute factory. */
public final Tokenizer create() {
return create(AttributeFactory.DEFAULT_ATTRIBUTE_FACTORY);
return create(Token.TOKEN_ATTRIBUTE_FACTORY);
}
/** Creates a TokenStream of the specified input using the given AttributeFactory */

View File

@ -19,6 +19,7 @@ package org.apache.lucene.collation;
import java.text.Collator;
import org.apache.lucene.analysis.Token;
import org.apache.lucene.collation.tokenattributes.CollatedTermAttributeImpl;
import org.apache.lucene.util.Attribute;
import org.apache.lucene.util.AttributeImpl;
@ -74,12 +75,12 @@ public class CollationAttributeFactory extends AttributeSource.AttributeFactory
/**
* Create a CollationAttributeFactory, using
* {@link org.apache.lucene.util.AttributeSource.AttributeFactory#DEFAULT_ATTRIBUTE_FACTORY} as the
* {@link org.apache.lucene.analysis.Token#TOKEN_ATTRIBUTE_FACTORY} as the
* factory for all other attributes.
* @param collator CollationKey generator
*/
public CollationAttributeFactory(Collator collator) {
this(AttributeSource.AttributeFactory.DEFAULT_ATTRIBUTE_FACTORY, collator);
this(Token.TOKEN_ATTRIBUTE_FACTORY, collator);
}
/**

View File

@ -20,6 +20,7 @@ package org.apache.lucene.analysis.icu.segmentation;
import java.io.IOException;
import java.io.Reader;
import org.apache.lucene.analysis.Token;
import org.apache.lucene.analysis.Tokenizer;
import org.apache.lucene.analysis.icu.tokenattributes.ScriptAttribute;
import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
@ -79,7 +80,7 @@ public final class ICUTokenizer extends Tokenizer {
* @param config Tailored BreakIterator configuration
*/
public ICUTokenizer(ICUTokenizerConfig config) {
this(AttributeFactory.DEFAULT_ATTRIBUTE_FACTORY, config);
this(Token.TOKEN_ATTRIBUTE_FACTORY, config);
}
/**

View File

@ -17,6 +17,7 @@ package org.apache.lucene.collation;
* limitations under the License.
*/
import org.apache.lucene.analysis.Token;
import org.apache.lucene.collation.tokenattributes.ICUCollatedTermAttributeImpl;
import org.apache.lucene.util.Attribute;
import org.apache.lucene.util.AttributeImpl;
@ -68,12 +69,12 @@ public class ICUCollationAttributeFactory extends AttributeSource.AttributeFacto
/**
* Create an ICUCollationAttributeFactory, using
* {@link org.apache.lucene.util.AttributeSource.AttributeFactory#DEFAULT_ATTRIBUTE_FACTORY} as the
* {@link org.apache.lucene.analysis.Token#TOKEN_ATTRIBUTE_FACTORY} as the
* factory for all other attributes.
* @param collator CollationKey generator
*/
public ICUCollationAttributeFactory(Collator collator) {
this(AttributeSource.AttributeFactory.DEFAULT_ATTRIBUTE_FACTORY, collator);
this(Token.TOKEN_ATTRIBUTE_FACTORY, collator);
}
/**

View File

@ -195,7 +195,7 @@ public final class JapaneseTokenizer extends Tokenizer {
* @param mode tokenization mode.
*/
public JapaneseTokenizer(UserDictionary userDictionary, boolean discardPunctuation, Mode mode) {
this(AttributeFactory.DEFAULT_ATTRIBUTE_FACTORY, userDictionary, discardPunctuation, mode);
this(org.apache.lucene.analysis.Token.TOKEN_ATTRIBUTE_FACTORY, userDictionary, discardPunctuation, mode);
}
/**

View File

@ -22,6 +22,7 @@ import java.text.BreakIterator;
import java.util.Iterator;
import java.util.Locale;
import org.apache.lucene.analysis.Token;
import org.apache.lucene.analysis.cn.smart.hhmm.SegToken;
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
@ -47,7 +48,7 @@ public class HMMChineseTokenizer extends SegmentingTokenizerBase {
/** Creates a new HMMChineseTokenizer */
public HMMChineseTokenizer() {
this(AttributeFactory.DEFAULT_ATTRIBUTE_FACTORY);
this(Token.TOKEN_ATTRIBUTE_FACTORY);
}
/** Creates a new HMMChineseTokenizer, supplying the AttributeFactory */

View File

@ -17,6 +17,7 @@ package org.apache.lucene.analysis.uima;
* limitations under the License.
*/
import org.apache.lucene.analysis.Token;
import org.apache.lucene.analysis.Tokenizer;
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
@ -43,7 +44,7 @@ public final class UIMAAnnotationsTokenizer extends BaseUIMATokenizer {
private int finalOffset = 0;
public UIMAAnnotationsTokenizer(String descriptorPath, String tokenType, Map<String, Object> configurationParameters) {
this(descriptorPath, tokenType, configurationParameters, AttributeFactory.DEFAULT_ATTRIBUTE_FACTORY);
this(descriptorPath, tokenType, configurationParameters, Token.TOKEN_ATTRIBUTE_FACTORY);
}
public UIMAAnnotationsTokenizer(String descriptorPath, String tokenType, Map<String, Object> configurationParameters,

View File

@ -17,6 +17,7 @@ package org.apache.lucene.analysis.uima;
* limitations under the License.
*/
import org.apache.lucene.analysis.Token;
import org.apache.lucene.analysis.Tokenizer;
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
@ -53,7 +54,7 @@ public final class UIMATypeAwareAnnotationsTokenizer extends BaseUIMATokenizer {
private int finalOffset = 0;
public UIMATypeAwareAnnotationsTokenizer(String descriptorPath, String tokenType, String typeAttributeFeaturePath, Map<String, Object> configurationParameters) {
this(descriptorPath, tokenType, typeAttributeFeaturePath, configurationParameters, AttributeFactory.DEFAULT_ATTRIBUTE_FACTORY);
this(descriptorPath, tokenType, typeAttributeFeaturePath, configurationParameters, Token.TOKEN_ATTRIBUTE_FACTORY);
}
public UIMATypeAwareAnnotationsTokenizer(String descriptorPath, String tokenType, String typeAttributeFeaturePath,

View File

@ -89,7 +89,7 @@ public abstract class TokenStream extends AttributeSource implements Closeable {
* A TokenStream using the default attribute factory.
*/
protected TokenStream() {
super();
super(Token.TOKEN_ATTRIBUTE_FACTORY);
assert assertFinal();
}

View File

@ -255,6 +255,8 @@ public class PreAnalyzedField extends FieldType {
private PreAnalyzedParser parser;
public PreAnalyzedTokenizer(PreAnalyzedParser parser) {
// we don't pack attributes: since we are used for (de)serialization and dont want bloat.
super(AttributeFactory.DEFAULT_ATTRIBUTE_FACTORY);
this.parser = parser;
}