mirror of https://github.com/apache/lucene.git
LUCENE-4642:
* TokenizerFactory.create(Reader) is made final, and calls the AttributeFactory-accepting version with AttributeFactory.DEFAULT_ATTRIBUTE_FACTORY * TokenizerFactory.create(AttributeFactory, Reader) is made abstract * Added AttributeFactory-accepting constructors to all Tokenizer's with existing TokenizerFactory subclasses that didn't already have them * Removed create(Reader) from all TokenizerFactory subclasses git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1456768 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
c14131d4e7
commit
67083534ff
|
@ -33,11 +33,6 @@ import java.io.Reader;
|
|||
*
|
||||
*/
|
||||
public class KeywordTokenizerFactory extends TokenizerFactory {
|
||||
@Override
|
||||
public KeywordTokenizer create(Reader input) {
|
||||
return new KeywordTokenizer(input);
|
||||
}
|
||||
|
||||
@Override
|
||||
public KeywordTokenizer create(AttributeFactory factory, Reader input) {
|
||||
return new KeywordTokenizer(factory, input, KeywordTokenizer.DEFAULT_BUFFER_SIZE);
|
||||
|
|
|
@ -41,11 +41,6 @@ public class LetterTokenizerFactory extends TokenizerFactory {
|
|||
assureMatchVersion();
|
||||
}
|
||||
|
||||
@Override
|
||||
public LetterTokenizer create(Reader input) {
|
||||
return new LetterTokenizer(luceneMatchVersion, input);
|
||||
}
|
||||
|
||||
@Override
|
||||
public LetterTokenizer create(AttributeFactory factory, Reader input) {
|
||||
return new LetterTokenizer(luceneMatchVersion, factory, input);
|
||||
|
|
|
@ -42,11 +42,6 @@ public class LowerCaseTokenizerFactory extends TokenizerFactory implements Multi
|
|||
assureMatchVersion();
|
||||
}
|
||||
|
||||
@Override
|
||||
public LowerCaseTokenizer create(Reader input) {
|
||||
return new LowerCaseTokenizer(luceneMatchVersion,input);
|
||||
}
|
||||
|
||||
@Override
|
||||
public LowerCaseTokenizer create(AttributeFactory factory, Reader input) {
|
||||
return new LowerCaseTokenizer(luceneMatchVersion, factory, input);
|
||||
|
|
|
@ -40,11 +40,6 @@ public class WhitespaceTokenizerFactory extends TokenizerFactory {
|
|||
assureMatchVersion();
|
||||
}
|
||||
|
||||
@Override
|
||||
public WhitespaceTokenizer create(Reader input) {
|
||||
return new WhitespaceTokenizer(luceneMatchVersion,input);
|
||||
}
|
||||
|
||||
@Override
|
||||
public WhitespaceTokenizer create(AttributeFactory factory, Reader input) {
|
||||
return new WhitespaceTokenizer(luceneMatchVersion, factory, input);
|
||||
|
|
|
@ -55,11 +55,6 @@ public class EdgeNGramTokenizerFactory extends TokenizerFactory {
|
|||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public EdgeNGramTokenizer create(Reader input) {
|
||||
return new EdgeNGramTokenizer(input, side, minGramSize, maxGramSize);
|
||||
}
|
||||
|
||||
@Override
|
||||
public EdgeNGramTokenizer create(AttributeFactory factory, Reader input) {
|
||||
return new EdgeNGramTokenizer(factory, input, side, minGramSize, maxGramSize);
|
||||
|
|
|
@ -49,12 +49,7 @@ public class NGramTokenizerFactory extends TokenizerFactory {
|
|||
minGramSize = (minArg != null ? Integer.parseInt(minArg) : NGramTokenizer.DEFAULT_MIN_NGRAM_SIZE);
|
||||
}
|
||||
|
||||
/** Creates the {@link TokenStream} of n-grams from the given {@link Reader}. */
|
||||
@Override
|
||||
public NGramTokenizer create(Reader input) {
|
||||
return new NGramTokenizer(input, minGramSize, maxGramSize);
|
||||
}
|
||||
|
||||
/** Creates the {@link TokenStream} of n-grams from the given {@link Reader} and {@link AttributeFactory}. */
|
||||
@Override
|
||||
public NGramTokenizer create(AttributeFactory factory, Reader input) {
|
||||
return new NGramTokenizer(factory, input, minGramSize, maxGramSize);
|
||||
|
|
|
@ -63,8 +63,17 @@ public class PathHierarchyTokenizer extends Tokenizer {
|
|||
this(input, DEFAULT_BUFFER_SIZE, delimiter, replacement, skip);
|
||||
}
|
||||
|
||||
public PathHierarchyTokenizer(AttributeFactory factory, Reader input, char delimiter, char replacement, int skip) {
|
||||
this(factory, input, DEFAULT_BUFFER_SIZE, delimiter, replacement, skip);
|
||||
}
|
||||
|
||||
public PathHierarchyTokenizer(Reader input, int bufferSize, char delimiter, char replacement, int skip) {
|
||||
super(input);
|
||||
this(AttributeFactory.DEFAULT_ATTRIBUTE_FACTORY, input, bufferSize, delimiter, replacement, skip);
|
||||
}
|
||||
|
||||
public PathHierarchyTokenizer
|
||||
(AttributeFactory factory, Reader input, int bufferSize, char delimiter, char replacement, int skip) {
|
||||
super(factory, input);
|
||||
if (bufferSize < 0) {
|
||||
throw new IllegalArgumentException("bufferSize cannot be negative");
|
||||
}
|
||||
|
|
|
@ -22,6 +22,7 @@ import java.util.Map;
|
|||
|
||||
import org.apache.lucene.analysis.Tokenizer;
|
||||
import org.apache.lucene.analysis.util.TokenizerFactory;
|
||||
import org.apache.lucene.util.AttributeSource.AttributeFactory;
|
||||
|
||||
/**
|
||||
* Factory for {@link PathHierarchyTokenizer}.
|
||||
|
@ -119,11 +120,11 @@ public class PathHierarchyTokenizerFactory extends TokenizerFactory {
|
|||
}
|
||||
|
||||
@Override
|
||||
public Tokenizer create(Reader input) {
|
||||
public Tokenizer create(AttributeFactory factory, Reader input) {
|
||||
if( reverse ) {
|
||||
return new ReversePathHierarchyTokenizer(input, delimiter, replacement, skip);
|
||||
return new ReversePathHierarchyTokenizer(factory, input, delimiter, replacement, skip);
|
||||
}
|
||||
return new PathHierarchyTokenizer(input, delimiter, replacement, skip);
|
||||
return new PathHierarchyTokenizer(factory, input, delimiter, replacement, skip);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -75,8 +75,17 @@ public class ReversePathHierarchyTokenizer extends Tokenizer {
|
|||
this(input, DEFAULT_BUFFER_SIZE, delimiter, replacement, skip);
|
||||
}
|
||||
|
||||
public ReversePathHierarchyTokenizer
|
||||
(AttributeFactory factory, Reader input, char delimiter, char replacement, int skip) {
|
||||
this(factory, input, DEFAULT_BUFFER_SIZE, delimiter, replacement, skip);
|
||||
}
|
||||
|
||||
public ReversePathHierarchyTokenizer(Reader input, int bufferSize, char delimiter, char replacement, int skip) {
|
||||
super(input);
|
||||
this(AttributeFactory.DEFAULT_ATTRIBUTE_FACTORY, input, bufferSize, delimiter, replacement, skip);
|
||||
}
|
||||
public ReversePathHierarchyTokenizer
|
||||
(AttributeFactory factory, Reader input, int bufferSize, char delimiter, char replacement, int skip) {
|
||||
super(factory, input);
|
||||
if (bufferSize < 0) {
|
||||
throw new IllegalArgumentException("bufferSize cannot be negative");
|
||||
}
|
||||
|
|
|
@ -66,7 +66,12 @@ public final class PatternTokenizer extends Tokenizer {
|
|||
|
||||
/** creates a new PatternTokenizer returning tokens from group (-1 for split functionality) */
|
||||
public PatternTokenizer(Reader input, Pattern pattern, int group) throws IOException {
|
||||
super(input);
|
||||
this(AttributeFactory.DEFAULT_ATTRIBUTE_FACTORY, input, pattern, group);
|
||||
}
|
||||
|
||||
/** creates a new PatternTokenizer returning tokens from group (-1 for split functionality) */
|
||||
public PatternTokenizer(AttributeFactory factory, Reader input, Pattern pattern, int group) throws IOException {
|
||||
super(factory, input);
|
||||
this.pattern = pattern;
|
||||
this.group = group;
|
||||
|
||||
|
|
|
@ -22,9 +22,8 @@ import java.io.Reader;
|
|||
import java.util.Map;
|
||||
import java.util.regex.Pattern;
|
||||
|
||||
import org.apache.lucene.analysis.Tokenizer;
|
||||
import org.apache.lucene.analysis.pattern.PatternTokenizer;
|
||||
import org.apache.lucene.analysis.util.TokenizerFactory;
|
||||
import org.apache.lucene.util.AttributeSource.AttributeFactory;
|
||||
|
||||
/**
|
||||
* Factory for {@link PatternTokenizer}.
|
||||
|
@ -91,9 +90,9 @@ public class PatternTokenizerFactory extends TokenizerFactory
|
|||
* Split the input using configured pattern
|
||||
*/
|
||||
@Override
|
||||
public PatternTokenizer create(final Reader in) {
|
||||
public PatternTokenizer create(final AttributeFactory factory, final Reader in) {
|
||||
try {
|
||||
return new PatternTokenizer(in, pattern, group);
|
||||
return new PatternTokenizer(factory, in, pattern, group);
|
||||
} catch( IOException ex ) {
|
||||
throw new RuntimeException("IOException thrown creating PatternTokenizer instance", ex);
|
||||
}
|
||||
|
|
|
@ -47,13 +47,6 @@ public class ClassicTokenizerFactory extends TokenizerFactory {
|
|||
StandardAnalyzer.DEFAULT_MAX_TOKEN_LENGTH);
|
||||
}
|
||||
|
||||
@Override
|
||||
public ClassicTokenizer create(Reader input) {
|
||||
ClassicTokenizer tokenizer = new ClassicTokenizer(luceneMatchVersion, input);
|
||||
tokenizer.setMaxTokenLength(maxTokenLength);
|
||||
return tokenizer;
|
||||
}
|
||||
|
||||
@Override
|
||||
public ClassicTokenizer create(AttributeFactory factory, Reader input) {
|
||||
ClassicTokenizer tokenizer = new ClassicTokenizer(luceneMatchVersion, factory, input);
|
||||
|
|
|
@ -42,22 +42,12 @@ public class StandardTokenizerFactory extends TokenizerFactory {
|
|||
public void init(Map<String,String> args) {
|
||||
super.init(args);
|
||||
assureMatchVersion();
|
||||
maxTokenLength = getInt("maxTokenLength",
|
||||
StandardAnalyzer.DEFAULT_MAX_TOKEN_LENGTH);
|
||||
}
|
||||
|
||||
@Override
|
||||
public StandardTokenizer create(Reader input) {
|
||||
StandardTokenizer tokenizer
|
||||
= new StandardTokenizer(luceneMatchVersion, input);
|
||||
tokenizer.setMaxTokenLength(maxTokenLength);
|
||||
return tokenizer;
|
||||
maxTokenLength = getInt("maxTokenLength", StandardAnalyzer.DEFAULT_MAX_TOKEN_LENGTH);
|
||||
}
|
||||
|
||||
@Override
|
||||
public StandardTokenizer create(AttributeFactory factory, Reader input) {
|
||||
StandardTokenizer tokenizer
|
||||
= new StandardTokenizer(luceneMatchVersion, factory, input);
|
||||
StandardTokenizer tokenizer = new StandardTokenizer(luceneMatchVersion, factory, input);
|
||||
tokenizer.setMaxTokenLength(maxTokenLength);
|
||||
return tokenizer;
|
||||
}
|
||||
|
|
|
@ -47,13 +47,6 @@ public class UAX29URLEmailTokenizerFactory extends TokenizerFactory {
|
|||
StandardAnalyzer.DEFAULT_MAX_TOKEN_LENGTH);
|
||||
}
|
||||
|
||||
@Override
|
||||
public UAX29URLEmailTokenizer create(Reader input) {
|
||||
UAX29URLEmailTokenizer tokenizer = new UAX29URLEmailTokenizer(luceneMatchVersion, input);
|
||||
tokenizer.setMaxTokenLength(maxTokenLength);
|
||||
return tokenizer;
|
||||
}
|
||||
|
||||
@Override
|
||||
public UAX29URLEmailTokenizer create(AttributeFactory factory, Reader input) {
|
||||
UAX29URLEmailTokenizer tokenizer = new UAX29URLEmailTokenizer(luceneMatchVersion, factory, input);
|
||||
|
|
|
@ -62,11 +62,11 @@ public abstract class TokenizerFactory extends AbstractAnalysisFactory {
|
|||
loader.reload(classloader);
|
||||
}
|
||||
|
||||
/** Creates a TokenStream of the specified input */
|
||||
public abstract Tokenizer create(Reader input);
|
||||
/** Creates a TokenStream of the specified input using the default attribute factory. */
|
||||
public final Tokenizer create(Reader input) {
|
||||
return create(AttributeFactory.DEFAULT_ATTRIBUTE_FACTORY, input);
|
||||
}
|
||||
|
||||
/** Creates a TokenStream of the specified input using the given AttributeFactory */
|
||||
public Tokenizer create(AttributeFactory factory, Reader input) {
|
||||
throw new UnsupportedOperationException();
|
||||
}
|
||||
abstract public Tokenizer create(AttributeFactory factory, Reader input);
|
||||
}
|
||||
|
|
|
@ -35,11 +35,6 @@ import org.apache.lucene.util.AttributeSource.AttributeFactory;
|
|||
*/
|
||||
public class WikipediaTokenizerFactory extends TokenizerFactory {
|
||||
// TODO: add support for WikipediaTokenizer's advanced options.
|
||||
@Override
|
||||
public WikipediaTokenizer create(Reader input) {
|
||||
return new WikipediaTokenizer(input);
|
||||
}
|
||||
|
||||
@Override
|
||||
public WikipediaTokenizer create(AttributeFactory factory, Reader input) {
|
||||
return new WikipediaTokenizer(factory, input, WikipediaTokenizer.TOKENS_ONLY,
|
||||
|
|
|
@ -33,6 +33,7 @@ import org.apache.lucene.analysis.util.ResourceLoaderAware;
|
|||
import org.apache.lucene.analysis.util.StringMockResourceLoader;
|
||||
import org.apache.lucene.analysis.util.TokenFilterFactory;
|
||||
import org.apache.lucene.analysis.util.TokenizerFactory;
|
||||
import org.apache.lucene.util.AttributeSource.AttributeFactory;
|
||||
|
||||
/**
|
||||
* Sanity check some things about all factories,
|
||||
|
@ -146,8 +147,8 @@ public class TestFactories extends BaseTokenStreamTestCase {
|
|||
// some silly classes just so we can use checkRandomData
|
||||
private TokenizerFactory assertingTokenizer = new TokenizerFactory() {
|
||||
@Override
|
||||
public MockTokenizer create(Reader input) {
|
||||
return new MockTokenizer(input);
|
||||
public MockTokenizer create(AttributeFactory factory, Reader input) {
|
||||
return new MockTokenizer(factory, input);
|
||||
}
|
||||
};
|
||||
|
||||
|
|
|
@ -62,6 +62,8 @@ public final class ICUTokenizer extends Tokenizer {
|
|||
* Reader.
|
||||
* <p>
|
||||
* The default script-specific handling is used.
|
||||
* <p>
|
||||
* The default attribute factory is used.
|
||||
*
|
||||
* @param input Reader containing text to tokenize.
|
||||
* @see DefaultICUTokenizerConfig
|
||||
|
@ -73,12 +75,26 @@ public final class ICUTokenizer extends Tokenizer {
|
|||
/**
|
||||
* Construct a new ICUTokenizer that breaks text into words from the given
|
||||
* Reader, using a tailored BreakIterator configuration.
|
||||
* <p>
|
||||
* The default attribute factory is used.
|
||||
*
|
||||
* @param input Reader containing text to tokenize.
|
||||
* @param config Tailored BreakIterator configuration
|
||||
*/
|
||||
public ICUTokenizer(Reader input, ICUTokenizerConfig config) {
|
||||
super(input);
|
||||
this(AttributeFactory.DEFAULT_ATTRIBUTE_FACTORY, input, config);
|
||||
}
|
||||
|
||||
/**
|
||||
* Construct a new ICUTokenizer that breaks text into words from the given
|
||||
* Reader, using a tailored BreakIterator configuration.
|
||||
*
|
||||
* @param factory AttributeFactory to use
|
||||
* @param input Reader containing text to tokenize.
|
||||
* @param config Tailored BreakIterator configuration
|
||||
*/
|
||||
public ICUTokenizer(AttributeFactory factory, Reader input, ICUTokenizerConfig config) {
|
||||
super(factory, input);
|
||||
this.config = config;
|
||||
breaker = new CompositeBreakIterator(config);
|
||||
}
|
||||
|
|
|
@ -25,11 +25,11 @@ import java.util.HashMap;
|
|||
import java.util.List;
|
||||
import java.util.Map;
|
||||
|
||||
import org.apache.lucene.analysis.Tokenizer;
|
||||
import org.apache.lucene.analysis.util.AbstractAnalysisFactory; // javadocs
|
||||
import org.apache.lucene.analysis.util.ResourceLoader;
|
||||
import org.apache.lucene.analysis.util.ResourceLoaderAware;
|
||||
import org.apache.lucene.analysis.util.TokenizerFactory;
|
||||
import org.apache.lucene.util.AttributeSource.AttributeFactory;
|
||||
import org.apache.lucene.util.IOUtils;
|
||||
|
||||
import com.ibm.icu.lang.UCharacter;
|
||||
|
@ -144,8 +144,8 @@ public class ICUTokenizerFactory extends TokenizerFactory implements ResourceLoa
|
|||
}
|
||||
|
||||
@Override
|
||||
public ICUTokenizer create(Reader input) {
|
||||
public ICUTokenizer create(AttributeFactory factory, Reader input) {
|
||||
assert config != null : "inform must be called first!";
|
||||
return new ICUTokenizer(input, config);
|
||||
return new ICUTokenizer(factory, input, config);
|
||||
}
|
||||
}
|
||||
|
|
|
@ -187,6 +187,8 @@ public final class JapaneseTokenizer extends Tokenizer {
|
|||
|
||||
/**
|
||||
* Create a new JapaneseTokenizer.
|
||||
* <p>
|
||||
* Uses the default AttributeFactory.
|
||||
*
|
||||
* @param input Reader containing text
|
||||
* @param userDictionary Optional: if non-null, user dictionary.
|
||||
|
@ -194,7 +196,21 @@ public final class JapaneseTokenizer extends Tokenizer {
|
|||
* @param mode tokenization mode.
|
||||
*/
|
||||
public JapaneseTokenizer(Reader input, UserDictionary userDictionary, boolean discardPunctuation, Mode mode) {
|
||||
super(input);
|
||||
this(AttributeFactory.DEFAULT_ATTRIBUTE_FACTORY, input, userDictionary, discardPunctuation, mode);
|
||||
}
|
||||
|
||||
/**
|
||||
* Create a new JapaneseTokenizer.
|
||||
*
|
||||
* @param factory the AttributeFactory to use
|
||||
* @param input Reader containing text
|
||||
* @param userDictionary Optional: if non-null, user dictionary.
|
||||
* @param discardPunctuation true if punctuation tokens should be dropped from the output.
|
||||
* @param mode tokenization mode.
|
||||
*/
|
||||
public JapaneseTokenizer
|
||||
(AttributeFactory factory, Reader input, UserDictionary userDictionary, boolean discardPunctuation, Mode mode) {
|
||||
super(factory, input);
|
||||
dictionary = TokenInfoDictionary.getInstance();
|
||||
fst = dictionary.getFST();
|
||||
unkDictionary = UnknownDictionary.getInstance();
|
||||
|
|
|
@ -27,11 +27,10 @@ import java.nio.charset.CodingErrorAction;
|
|||
import java.util.Locale;
|
||||
import java.util.Map;
|
||||
|
||||
import org.apache.lucene.analysis.Tokenizer;
|
||||
import org.apache.lucene.analysis.ja.JapaneseTokenizer;
|
||||
import org.apache.lucene.analysis.ja.JapaneseTokenizer.Mode;
|
||||
import org.apache.lucene.analysis.ja.dict.UserDictionary;
|
||||
import org.apache.lucene.analysis.util.TokenizerFactory;
|
||||
import org.apache.lucene.util.AttributeSource.AttributeFactory;
|
||||
import org.apache.lucene.util.IOUtils;
|
||||
import org.apache.lucene.analysis.util.ResourceLoader;
|
||||
import org.apache.lucene.analysis.util.ResourceLoaderAware;
|
||||
|
@ -89,8 +88,8 @@ public class JapaneseTokenizerFactory extends TokenizerFactory implements Resour
|
|||
}
|
||||
|
||||
@Override
|
||||
public JapaneseTokenizer create(Reader input) {
|
||||
return new JapaneseTokenizer(input, userDictionary, discardPunctuation, mode);
|
||||
public JapaneseTokenizer create(AttributeFactory factory, Reader input) {
|
||||
return new JapaneseTokenizer(factory, input, userDictionary, discardPunctuation, mode);
|
||||
}
|
||||
|
||||
private Mode getMode(Map<String, String> args) {
|
||||
|
|
|
@ -27,11 +27,6 @@ import org.apache.lucene.util.AttributeSource.AttributeFactory;
|
|||
* @lucene.experimental
|
||||
*/
|
||||
public class SmartChineseSentenceTokenizerFactory extends TokenizerFactory {
|
||||
@Override
|
||||
public SentenceTokenizer create(Reader input) {
|
||||
return new SentenceTokenizer(input);
|
||||
}
|
||||
|
||||
@Override
|
||||
public SentenceTokenizer create(AttributeFactory factory, Reader input) {
|
||||
return new SentenceTokenizer(factory, input);
|
||||
|
|
|
@ -44,8 +44,9 @@ public abstract class BaseUIMATokenizer extends Tokenizer {
|
|||
protected AnalysisEngine ae;
|
||||
protected CAS cas;
|
||||
|
||||
protected BaseUIMATokenizer(Reader reader, String descriptorPath, Map<String, Object> configurationParameters) {
|
||||
super(reader);
|
||||
protected BaseUIMATokenizer
|
||||
(AttributeFactory factory, Reader reader, String descriptorPath, Map<String, Object> configurationParameters) {
|
||||
super(factory, reader);
|
||||
this.descriptorPath = descriptorPath;
|
||||
this.configurationParameters = configurationParameters;
|
||||
}
|
||||
|
|
|
@ -43,7 +43,12 @@ public final class UIMAAnnotationsTokenizer extends BaseUIMATokenizer {
|
|||
private int finalOffset = 0;
|
||||
|
||||
public UIMAAnnotationsTokenizer(String descriptorPath, String tokenType, Map<String, Object> configurationParameters, Reader input) {
|
||||
super(input, descriptorPath, configurationParameters);
|
||||
this(descriptorPath, tokenType, configurationParameters, AttributeFactory.DEFAULT_ATTRIBUTE_FACTORY, input);
|
||||
}
|
||||
|
||||
public UIMAAnnotationsTokenizer(String descriptorPath, String tokenType, Map<String, Object> configurationParameters,
|
||||
AttributeFactory factory, Reader input) {
|
||||
super(factory, input, descriptorPath, configurationParameters);
|
||||
this.tokenTypeString = tokenType;
|
||||
this.termAttr = addAttribute(CharTermAttribute.class);
|
||||
this.offsetAttr = addAttribute(OffsetAttribute.class);
|
||||
|
|
|
@ -18,6 +18,7 @@ package org.apache.lucene.analysis.uima;
|
|||
*/
|
||||
|
||||
import org.apache.lucene.analysis.util.TokenizerFactory;
|
||||
import org.apache.lucene.util.AttributeSource.AttributeFactory;
|
||||
|
||||
import java.io.Reader;
|
||||
import java.util.HashMap;
|
||||
|
@ -52,7 +53,7 @@ public class UIMAAnnotationsTokenizerFactory extends TokenizerFactory {
|
|||
}
|
||||
|
||||
@Override
|
||||
public UIMAAnnotationsTokenizer create(Reader input) {
|
||||
return new UIMAAnnotationsTokenizer(descriptorPath, tokenType, configurationParameters, input);
|
||||
public UIMAAnnotationsTokenizer create(AttributeFactory factory, Reader input) {
|
||||
return new UIMAAnnotationsTokenizer(descriptorPath, tokenType, configurationParameters, factory, input);
|
||||
}
|
||||
}
|
||||
|
|
|
@ -53,7 +53,12 @@ public final class UIMATypeAwareAnnotationsTokenizer extends BaseUIMATokenizer {
|
|||
private int finalOffset = 0;
|
||||
|
||||
public UIMATypeAwareAnnotationsTokenizer(String descriptorPath, String tokenType, String typeAttributeFeaturePath, Map<String, Object> configurationParameters, Reader input) {
|
||||
super(input, descriptorPath, configurationParameters);
|
||||
this(descriptorPath, tokenType, typeAttributeFeaturePath, configurationParameters, AttributeFactory.DEFAULT_ATTRIBUTE_FACTORY, input);
|
||||
}
|
||||
|
||||
public UIMATypeAwareAnnotationsTokenizer(String descriptorPath, String tokenType, String typeAttributeFeaturePath,
|
||||
Map<String, Object> configurationParameters, AttributeFactory factory, Reader input) {
|
||||
super(factory, input, descriptorPath, configurationParameters);
|
||||
this.tokenTypeString = tokenType;
|
||||
this.termAttr = addAttribute(CharTermAttribute.class);
|
||||
this.typeAttr = addAttribute(TypeAttribute.class);
|
||||
|
|
|
@ -18,6 +18,7 @@ package org.apache.lucene.analysis.uima;
|
|||
*/
|
||||
|
||||
import org.apache.lucene.analysis.util.TokenizerFactory;
|
||||
import org.apache.lucene.util.AttributeSource.AttributeFactory;
|
||||
|
||||
import java.io.Reader;
|
||||
import java.util.HashMap;
|
||||
|
@ -54,7 +55,8 @@ public class UIMATypeAwareAnnotationsTokenizerFactory extends TokenizerFactory {
|
|||
}
|
||||
|
||||
@Override
|
||||
public UIMATypeAwareAnnotationsTokenizer create(Reader input) {
|
||||
return new UIMATypeAwareAnnotationsTokenizer(descriptorPath, tokenType, featurePath, configurationParameters, input);
|
||||
public UIMATypeAwareAnnotationsTokenizer create(AttributeFactory factory, Reader input) {
|
||||
return new UIMATypeAwareAnnotationsTokenizer
|
||||
(descriptorPath, tokenType, featurePath, configurationParameters, factory, input);
|
||||
}
|
||||
}
|
||||
|
|
|
@ -100,12 +100,21 @@ public class MockTokenizer extends Tokenizer {
|
|||
public MockTokenizer(Reader input, CharacterRunAutomaton runAutomaton, boolean lowerCase) {
|
||||
this(input, runAutomaton, lowerCase, DEFAULT_MAX_TOKEN_LENGTH);
|
||||
}
|
||||
|
||||
/** Calls {@link #MockTokenizer(Reader, CharacterRunAutomaton, boolean) MockTokenizer(Reader, WHITESPACE, true)} */
|
||||
public MockTokenizer(Reader input) {
|
||||
this(input, WHITESPACE, true);
|
||||
}
|
||||
|
||||
|
||||
public MockTokenizer(AttributeFactory factory, Reader input, CharacterRunAutomaton runAutomaton, boolean lowerCase) {
|
||||
this(factory, input, runAutomaton, lowerCase, DEFAULT_MAX_TOKEN_LENGTH);
|
||||
}
|
||||
|
||||
/** Calls {@link #MockTokenizer(org.apache.lucene.util.AttributeSource.AttributeFactory,Reader,CharacterRunAutomaton,boolean)
|
||||
* MockTokenizer(AttributeFactory, Reader, WHITESPACE, true)} */
|
||||
public MockTokenizer(AttributeFactory factory, Reader input) {
|
||||
this(input, WHITESPACE, true);
|
||||
}
|
||||
|
||||
@Override
|
||||
public final boolean incrementToken() throws IOException {
|
||||
assert !enableChecks || (streamState == State.RESET || streamState == State.INCREMENT)
|
||||
|
|
|
@ -24,6 +24,7 @@ import org.apache.lucene.analysis.Tokenizer;
|
|||
import org.apache.lucene.analysis.util.TokenizerFactory;
|
||||
import org.apache.lucene.util.Attribute;
|
||||
import org.apache.lucene.util.AttributeImpl;
|
||||
import org.apache.lucene.util.AttributeSource.AttributeFactory;
|
||||
import org.apache.solr.common.SolrException;
|
||||
import org.apache.solr.schema.DateField;
|
||||
import static org.apache.solr.schema.TrieField.TrieTypes;
|
||||
|
@ -54,8 +55,8 @@ public class TrieTokenizerFactory extends TokenizerFactory {
|
|||
}
|
||||
|
||||
@Override
|
||||
public TrieTokenizer create(Reader input) {
|
||||
return new TrieTokenizer(input, type, TrieTokenizer.getNumericTokenStream(precisionStep));
|
||||
public TrieTokenizer create(AttributeFactory factory, Reader input) {
|
||||
return new TrieTokenizer(input, type, TrieTokenizer.getNumericTokenStream(factory, precisionStep));
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -70,8 +71,8 @@ final class TrieTokenizer extends Tokenizer {
|
|||
protected int startOfs, endOfs;
|
||||
protected boolean hasValue;
|
||||
|
||||
static NumericTokenStream getNumericTokenStream(int precisionStep) {
|
||||
return new NumericTokenStream(precisionStep);
|
||||
static NumericTokenStream getNumericTokenStream(AttributeFactory factory, int precisionStep) {
|
||||
return new NumericTokenStream(factory, precisionStep);
|
||||
}
|
||||
|
||||
public TrieTokenizer(Reader input, TrieTypes type, final NumericTokenStream ts) {
|
||||
|
|
|
@ -21,8 +21,8 @@ import java.io.Reader;
|
|||
import java.util.Map;
|
||||
|
||||
import org.apache.lucene.analysis.MockTokenizer;
|
||||
import org.apache.lucene.analysis.Tokenizer;
|
||||
import org.apache.lucene.analysis.util.TokenizerFactory;
|
||||
import org.apache.lucene.util.AttributeSource.AttributeFactory;
|
||||
import org.apache.lucene.util.automaton.CharacterRunAutomaton;
|
||||
|
||||
/**
|
||||
|
@ -53,10 +53,9 @@ public class MockTokenizerFactory extends TokenizerFactory {
|
|||
enableChecks = getBoolean("enableChecks", true);
|
||||
}
|
||||
|
||||
|
||||
@Override
|
||||
public MockTokenizer create(Reader input) {
|
||||
MockTokenizer t = new MockTokenizer(input, pattern, false);
|
||||
public MockTokenizer create(AttributeFactory factory, Reader input) {
|
||||
MockTokenizer t = new MockTokenizer(factory, input, pattern, false);
|
||||
t.setEnableChecks(enableChecks);
|
||||
return t;
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue