mirror of https://github.com/apache/lucene.git
LUCENE-4877: more param parsing verbs
git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1463335 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
932923ee4b
commit
6de43dea0a
|
@ -17,14 +17,11 @@ package org.apache.lucene.analysis.charfilter;
|
|||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import org.apache.lucene.analysis.charfilter.HTMLStripCharFilter;
|
||||
import org.apache.lucene.analysis.util.CharFilterFactory;
|
||||
|
||||
import java.io.Reader;
|
||||
import java.util.HashSet;
|
||||
import java.util.Map;
|
||||
import java.util.Set;
|
||||
import java.util.regex.Matcher;
|
||||
import java.util.regex.Pattern;
|
||||
|
||||
/**
|
||||
|
@ -44,16 +41,7 @@ public class HTMLStripCharFilterFactory extends CharFilterFactory {
|
|||
/** Creates a new HTMLStripCharFilterFactory */
|
||||
public HTMLStripCharFilterFactory(Map<String,String> args) {
|
||||
super(args);
|
||||
String escapedTagsArg = args.remove("escapedTags");
|
||||
if (escapedTagsArg == null) {
|
||||
escapedTags = null;
|
||||
} else {
|
||||
escapedTags = new HashSet<String>();
|
||||
Matcher matcher = TAG_NAME_PATTERN.matcher(escapedTagsArg);
|
||||
while (matcher.find()) {
|
||||
escapedTags.add(matcher.group(0));
|
||||
}
|
||||
}
|
||||
escapedTags = getSet(args, "escapedTags");
|
||||
if (!args.isEmpty()) {
|
||||
throw new IllegalArgumentException("Unknown parameters: " + args);
|
||||
}
|
||||
|
|
|
@ -26,9 +26,11 @@ import java.util.Map;
|
|||
import java.util.regex.Matcher;
|
||||
import java.util.regex.Pattern;
|
||||
|
||||
import org.apache.lucene.analysis.charfilter.MappingCharFilter;
|
||||
import org.apache.lucene.analysis.charfilter.NormalizeCharMap;
|
||||
import org.apache.lucene.analysis.util.*;
|
||||
import org.apache.lucene.analysis.util.AbstractAnalysisFactory;
|
||||
import org.apache.lucene.analysis.util.CharFilterFactory;
|
||||
import org.apache.lucene.analysis.util.MultiTermAwareComponent;
|
||||
import org.apache.lucene.analysis.util.ResourceLoader;
|
||||
import org.apache.lucene.analysis.util.ResourceLoaderAware;
|
||||
|
||||
/**
|
||||
* Factory for {@link MappingCharFilter}.
|
||||
|
@ -51,7 +53,7 @@ public class MappingCharFilterFactory extends CharFilterFactory implements
|
|||
/** Creates a new MappingCharFilterFactory */
|
||||
public MappingCharFilterFactory(Map<String,String> args) {
|
||||
super(args);
|
||||
mapping = args.remove("mapping");
|
||||
mapping = get(args, "mapping");
|
||||
if (!args.isEmpty()) {
|
||||
throw new IllegalArgumentException("Unknown parameters: " + args);
|
||||
}
|
||||
|
|
|
@ -22,7 +22,6 @@ import java.util.Map;
|
|||
|
||||
import org.apache.lucene.analysis.TokenFilter;
|
||||
import org.apache.lucene.analysis.TokenStream;
|
||||
import org.apache.lucene.analysis.commongrams.CommonGramsFilter;
|
||||
import org.apache.lucene.analysis.core.StopAnalyzer;
|
||||
import org.apache.lucene.analysis.util.*;
|
||||
|
||||
|
@ -46,8 +45,8 @@ public class CommonGramsFilterFactory extends TokenFilterFactory implements Reso
|
|||
/** Creates a new CommonGramsFilterFactory */
|
||||
public CommonGramsFilterFactory(Map<String,String> args) {
|
||||
super(args);
|
||||
commonWordFiles = args.remove("words");
|
||||
format = args.remove("format");
|
||||
commonWordFiles = get(args, "words");
|
||||
format = get(args, "format");
|
||||
ignoreCase = getBoolean(args, "ignoreCase", false);
|
||||
if (!args.isEmpty()) {
|
||||
throw new IllegalArgumentException("Unknown parameters: " + args);
|
||||
|
|
|
@ -17,8 +17,11 @@ package org.apache.lucene.analysis.compound;
|
|||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import org.apache.lucene.analysis.util.*;
|
||||
import org.apache.lucene.analysis.TokenStream;
|
||||
import org.apache.lucene.analysis.util.CharArraySet;
|
||||
import org.apache.lucene.analysis.util.ResourceLoader;
|
||||
import org.apache.lucene.analysis.util.ResourceLoaderAware;
|
||||
import org.apache.lucene.analysis.util.TokenFilterFactory;
|
||||
|
||||
import java.util.Map;
|
||||
import java.io.IOException;
|
||||
|
@ -34,7 +37,7 @@ import java.io.IOException;
|
|||
* </analyzer>
|
||||
* </fieldType></pre>
|
||||
*/
|
||||
public class DictionaryCompoundWordTokenFilterFactory extends TokenFilterFactory implements ResourceLoaderAware {
|
||||
public class DictionaryCompoundWordTokenFilterFactory extends TokenFilterFactory implements ResourceLoaderAware {
|
||||
private CharArraySet dictionary;
|
||||
private final String dictFile;
|
||||
private final int minWordSize;
|
||||
|
@ -46,11 +49,7 @@ public class DictionaryCompoundWordTokenFilterFactory extends TokenFilterFactory
|
|||
public DictionaryCompoundWordTokenFilterFactory(Map<String, String> args) {
|
||||
super(args);
|
||||
assureMatchVersion();
|
||||
dictFile = args.remove("dictionary");
|
||||
if (null == dictFile) {
|
||||
throw new IllegalArgumentException("Missing required parameter: dictionary");
|
||||
}
|
||||
|
||||
dictFile = require(args, "dictionary");
|
||||
minWordSize = getInt(args, "minWordSize", CompoundWordTokenFilterBase.DEFAULT_MIN_WORD_SIZE);
|
||||
minSubwordSize = getInt(args, "minSubwordSize", CompoundWordTokenFilterBase.DEFAULT_MIN_SUBWORD_SIZE);
|
||||
maxSubwordSize = getInt(args, "maxSubwordSize", CompoundWordTokenFilterBase.DEFAULT_MAX_SUBWORD_SIZE);
|
||||
|
@ -68,7 +67,8 @@ public class DictionaryCompoundWordTokenFilterFactory extends TokenFilterFactory
|
|||
@Override
|
||||
public TokenStream create(TokenStream input) {
|
||||
// if the dictionary is null, it means it was empty
|
||||
return dictionary == null ? input : new DictionaryCompoundWordTokenFilter(luceneMatchVersion,input,dictionary,minWordSize,minSubwordSize,maxSubwordSize,onlyLongestMatch);
|
||||
return dictionary == null ? input : new DictionaryCompoundWordTokenFilter
|
||||
(luceneMatchVersion, input, dictionary, minWordSize, minSubwordSize, maxSubwordSize, onlyLongestMatch);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -18,10 +18,11 @@ package org.apache.lucene.analysis.compound;
|
|||
*/
|
||||
|
||||
import org.apache.lucene.analysis.TokenStream;
|
||||
import org.apache.lucene.analysis.compound.CompoundWordTokenFilterBase;
|
||||
import org.apache.lucene.analysis.compound.HyphenationCompoundWordTokenFilter;
|
||||
import org.apache.lucene.analysis.compound.hyphenation.HyphenationTree;
|
||||
import org.apache.lucene.analysis.util.*;
|
||||
import org.apache.lucene.analysis.util.CharArraySet;
|
||||
import org.apache.lucene.analysis.util.ResourceLoader;
|
||||
import org.apache.lucene.analysis.util.ResourceLoaderAware;
|
||||
import org.apache.lucene.analysis.util.TokenFilterFactory;
|
||||
import org.apache.lucene.util.IOUtils;
|
||||
|
||||
import java.util.Map;
|
||||
|
@ -71,13 +72,9 @@ public class HyphenationCompoundWordTokenFilterFactory extends TokenFilterFactor
|
|||
public HyphenationCompoundWordTokenFilterFactory(Map<String, String> args) {
|
||||
super(args);
|
||||
assureMatchVersion();
|
||||
dictFile = args.remove("dictionary");
|
||||
encoding = args.remove("encoding");
|
||||
hypFile = args.remove("hyphenator");
|
||||
if (null == hypFile) {
|
||||
throw new IllegalArgumentException("Missing required parameter: hyphenator");
|
||||
}
|
||||
|
||||
dictFile = get(args, "dictionary");
|
||||
encoding = get(args, "encoding");
|
||||
hypFile = require(args, "hyphenator");
|
||||
minWordSize = getInt(args, "minWordSize", CompoundWordTokenFilterBase.DEFAULT_MIN_WORD_SIZE);
|
||||
minSubwordSize = getInt(args, "minSubwordSize", CompoundWordTokenFilterBase.DEFAULT_MIN_SUBWORD_SIZE);
|
||||
maxSubwordSize = getInt(args, "maxSubwordSize", CompoundWordTokenFilterBase.DEFAULT_MAX_SUBWORD_SIZE);
|
||||
|
|
|
@ -17,10 +17,11 @@ package org.apache.lucene.analysis.core;
|
|||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import org.apache.lucene.analysis.util.*;
|
||||
import org.apache.lucene.analysis.TokenStream;
|
||||
import org.apache.lucene.analysis.core.StopAnalyzer;
|
||||
import org.apache.lucene.analysis.core.StopFilter;
|
||||
import org.apache.lucene.analysis.util.CharArraySet;
|
||||
import org.apache.lucene.analysis.util.ResourceLoader;
|
||||
import org.apache.lucene.analysis.util.ResourceLoaderAware;
|
||||
import org.apache.lucene.analysis.util.TokenFilterFactory;
|
||||
|
||||
import java.util.Map;
|
||||
import java.io.IOException;
|
||||
|
@ -47,8 +48,8 @@ public class StopFilterFactory extends TokenFilterFactory implements ResourceLoa
|
|||
public StopFilterFactory(Map<String,String> args) {
|
||||
super(args);
|
||||
assureMatchVersion();
|
||||
stopWordFiles = args.remove("words");
|
||||
format = args.remove("format");
|
||||
stopWordFiles = get(args, "words");
|
||||
format = get(args, "format");
|
||||
ignoreCase = getBoolean(args, "ignoreCase", false);
|
||||
enablePositionIncrements = getBoolean(args, "enablePositionIncrements", false);
|
||||
if (!args.isEmpty()) {
|
||||
|
|
|
@ -18,7 +18,6 @@ package org.apache.lucene.analysis.core;
|
|||
*/
|
||||
|
||||
import org.apache.lucene.analysis.TokenStream;
|
||||
import org.apache.lucene.analysis.core.TypeTokenFilter;
|
||||
import org.apache.lucene.analysis.util.ResourceLoader;
|
||||
import org.apache.lucene.analysis.util.ResourceLoaderAware;
|
||||
import org.apache.lucene.analysis.util.TokenFilterFactory;
|
||||
|
@ -49,10 +48,7 @@ public class TypeTokenFilterFactory extends TokenFilterFactory implements Resour
|
|||
/** Creates a new TypeTokenFilterFactory */
|
||||
public TypeTokenFilterFactory(Map<String,String> args) {
|
||||
super(args);
|
||||
stopTypesFiles = args.remove("types");
|
||||
if (stopTypesFiles == null) {
|
||||
throw new IllegalArgumentException("Missing required parameter: types.");
|
||||
}
|
||||
stopTypesFiles = require(args, "types");
|
||||
enablePositionIncrements = getBoolean(args, "enablePositionIncrements", false);
|
||||
useWhitelist = getBoolean(args, "useWhitelist", false);
|
||||
if (!args.isEmpty()) {
|
||||
|
|
|
@ -25,8 +25,6 @@ import java.util.List;
|
|||
import java.util.Map;
|
||||
|
||||
import org.apache.lucene.analysis.TokenStream;
|
||||
import org.apache.lucene.analysis.hunspell.HunspellDictionary;
|
||||
import org.apache.lucene.analysis.hunspell.HunspellStemFilter;
|
||||
import org.apache.lucene.analysis.util.ResourceLoader;
|
||||
import org.apache.lucene.analysis.util.ResourceLoaderAware;
|
||||
import org.apache.lucene.analysis.util.TokenFilterFactory;
|
||||
|
@ -67,11 +65,8 @@ public class HunspellStemFilterFactory extends TokenFilterFactory implements Res
|
|||
public HunspellStemFilterFactory(Map<String,String> args) {
|
||||
super(args);
|
||||
assureMatchVersion();
|
||||
dictionaryArg = args.remove(PARAM_DICTIONARY);
|
||||
if (dictionaryArg == null) {
|
||||
throw new IllegalArgumentException("Parameter " + PARAM_DICTIONARY + " is mandatory.");
|
||||
}
|
||||
affixFile = args.remove(PARAM_AFFIX);
|
||||
dictionaryArg = require(args, PARAM_DICTIONARY);
|
||||
affixFile = get(args, PARAM_AFFIX);
|
||||
ignoreCase = getBoolean(args, PARAM_IGNORE_CASE, false);
|
||||
strictAffixParsing = getBoolean(args, PARAM_STRICT_AFFIX_PARSING, true);
|
||||
if (!args.isEmpty()) {
|
||||
|
|
|
@ -18,7 +18,6 @@ package org.apache.lucene.analysis.miscellaneous;
|
|||
*/
|
||||
|
||||
import org.apache.lucene.analysis.TokenStream;
|
||||
import org.apache.lucene.analysis.miscellaneous.CapitalizationFilter;
|
||||
import org.apache.lucene.analysis.util.CharArraySet;
|
||||
import org.apache.lucene.analysis.util.TokenFilterFactory;
|
||||
|
||||
|
@ -26,7 +25,7 @@ import java.util.ArrayList;
|
|||
import java.util.Collection;
|
||||
import java.util.Collections;
|
||||
import java.util.Map;
|
||||
import java.util.StringTokenizer;
|
||||
import java.util.Set;
|
||||
|
||||
/**
|
||||
* Factory for {@link CapitalizationFilter}.
|
||||
|
@ -81,22 +80,17 @@ public class CapitalizationFilterFactory extends TokenFilterFactory {
|
|||
super(args);
|
||||
assureMatchVersion();
|
||||
boolean ignoreCase = getBoolean(args, KEEP_IGNORE_CASE, false);
|
||||
String k = args.remove(KEEP);
|
||||
Set<String> k = getSet(args, KEEP);
|
||||
if (k != null) {
|
||||
StringTokenizer st = new StringTokenizer(k);
|
||||
keep = new CharArraySet(luceneMatchVersion, 10, ignoreCase);
|
||||
while (st.hasMoreTokens()) {
|
||||
k = st.nextToken().trim();
|
||||
keep.add(k.toCharArray());
|
||||
}
|
||||
keep.addAll(k);
|
||||
}
|
||||
|
||||
k = args.remove(OK_PREFIX);
|
||||
k = getSet(args, OK_PREFIX);
|
||||
if (k != null) {
|
||||
okPrefix = new ArrayList<char[]>();
|
||||
StringTokenizer st = new StringTokenizer(k);
|
||||
while (st.hasMoreTokens()) {
|
||||
okPrefix.add(st.nextToken().trim().toCharArray());
|
||||
for (String item : k) {
|
||||
okPrefix.add(item.toCharArray());
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -18,7 +18,6 @@ package org.apache.lucene.analysis.miscellaneous;
|
|||
*/
|
||||
|
||||
import org.apache.lucene.analysis.TokenStream;
|
||||
import org.apache.lucene.analysis.miscellaneous.KeepWordFilter;
|
||||
import org.apache.lucene.analysis.util.CharArraySet;
|
||||
import org.apache.lucene.analysis.util.ResourceLoader;
|
||||
import org.apache.lucene.analysis.util.ResourceLoaderAware;
|
||||
|
@ -47,7 +46,7 @@ public class KeepWordFilterFactory extends TokenFilterFactory implements Resourc
|
|||
public KeepWordFilterFactory(Map<String,String> args) {
|
||||
super(args);
|
||||
assureMatchVersion();
|
||||
wordFiles = args.remove("words");
|
||||
wordFiles = get(args, "words");
|
||||
ignoreCase = getBoolean(args, "ignoreCase", false);
|
||||
enablePositionIncrements = getBoolean(args, "enablePositionIncrements", false);
|
||||
if (!args.isEmpty()) {
|
||||
|
|
|
@ -21,9 +21,11 @@ import java.io.IOException;
|
|||
import java.util.Map;
|
||||
import java.util.regex.Pattern;
|
||||
|
||||
import org.apache.lucene.analysis.miscellaneous.KeywordMarkerFilter;
|
||||
import org.apache.lucene.analysis.util.*;
|
||||
import org.apache.lucene.analysis.TokenStream;
|
||||
import org.apache.lucene.analysis.util.CharArraySet;
|
||||
import org.apache.lucene.analysis.util.ResourceLoader;
|
||||
import org.apache.lucene.analysis.util.ResourceLoaderAware;
|
||||
import org.apache.lucene.analysis.util.TokenFilterFactory;
|
||||
|
||||
/**
|
||||
* Factory for {@link KeywordMarkerFilter}.
|
||||
|
@ -47,8 +49,8 @@ public class KeywordMarkerFilterFactory extends TokenFilterFactory implements Re
|
|||
/** Creates a new KeywordMarkerFilterFactory */
|
||||
public KeywordMarkerFilterFactory(Map<String,String> args) {
|
||||
super(args);
|
||||
wordFiles = args.remove(PROTECTED_TOKENS);
|
||||
stringPattern = args.remove(PATTERN);
|
||||
wordFiles = get(args, PROTECTED_TOKENS);
|
||||
stringPattern = get(args, PATTERN);
|
||||
ignoreCase = getBoolean(args, "ignoreCase", false);
|
||||
if (!args.isEmpty()) {
|
||||
throw new IllegalArgumentException("Unknown parameters: " + args);
|
||||
|
|
|
@ -18,7 +18,6 @@ package org.apache.lucene.analysis.miscellaneous;
|
|||
*/
|
||||
|
||||
import org.apache.lucene.analysis.TokenStream;
|
||||
import org.apache.lucene.analysis.miscellaneous.LengthFilter;
|
||||
import org.apache.lucene.analysis.util.TokenFilterFactory;
|
||||
|
||||
import java.util.Map;
|
||||
|
@ -43,8 +42,8 @@ public class LengthFilterFactory extends TokenFilterFactory {
|
|||
/** Creates a new LengthFilterFactory */
|
||||
public LengthFilterFactory(Map<String, String> args) {
|
||||
super(args);
|
||||
min = getInt(args, MIN_KEY, 0, false);
|
||||
max = getInt(args, MAX_KEY, 0, false);
|
||||
min = requireInt(args, MIN_KEY);
|
||||
max = requireInt(args, MAX_KEY);
|
||||
enablePositionIncrements = getBoolean(args, "enablePositionIncrements", false);
|
||||
if (!args.isEmpty()) {
|
||||
throw new IllegalArgumentException("Unknown parameters: " + args);
|
||||
|
|
|
@ -20,7 +20,6 @@ package org.apache.lucene.analysis.miscellaneous;
|
|||
import java.util.Map;
|
||||
|
||||
import org.apache.lucene.analysis.TokenStream;
|
||||
import org.apache.lucene.analysis.miscellaneous.LimitTokenCountFilter;
|
||||
import org.apache.lucene.analysis.util.TokenFilterFactory;
|
||||
|
||||
/**
|
||||
|
@ -46,7 +45,7 @@ public class LimitTokenCountFilterFactory extends TokenFilterFactory {
|
|||
/** Creates a new LimitTokenCountFilterFactory */
|
||||
public LimitTokenCountFilterFactory(Map<String, String> args) {
|
||||
super(args);
|
||||
maxTokenCount = getInt(args, MAX_TOKEN_COUNT_KEY);
|
||||
maxTokenCount = requireInt(args, MAX_TOKEN_COUNT_KEY);
|
||||
consumeAllTokens = getBoolean(args, CONSUME_ALL_TOKENS_KEY, false);
|
||||
if (!args.isEmpty()) {
|
||||
throw new IllegalArgumentException("Unknown parameters: " + args);
|
||||
|
|
|
@ -44,7 +44,7 @@ public class LimitTokenPositionFilterFactory extends TokenFilterFactory {
|
|||
/** Creates a new LimitTokenPositionFilterFactory */
|
||||
public LimitTokenPositionFilterFactory(Map<String,String> args) {
|
||||
super(args);
|
||||
maxTokenPosition = getInt(args, MAX_TOKEN_POSITION_KEY);
|
||||
maxTokenPosition = requireInt(args, MAX_TOKEN_POSITION_KEY);
|
||||
consumeAllTokens = getBoolean(args, CONSUME_ALL_TOKENS_KEY, false);
|
||||
if (!args.isEmpty()) {
|
||||
throw new IllegalArgumentException("Unknown parameters: " + args);
|
||||
|
|
|
@ -22,7 +22,6 @@ import java.util.List;
|
|||
import java.util.Map;
|
||||
|
||||
import org.apache.lucene.analysis.TokenStream;
|
||||
import org.apache.lucene.analysis.miscellaneous.StemmerOverrideFilter;
|
||||
import org.apache.lucene.analysis.miscellaneous.StemmerOverrideFilter.StemmerOverrideMap;
|
||||
import org.apache.lucene.analysis.util.ResourceLoader;
|
||||
import org.apache.lucene.analysis.util.ResourceLoaderAware;
|
||||
|
@ -46,7 +45,7 @@ public class StemmerOverrideFilterFactory extends TokenFilterFactory implements
|
|||
/** Creates a new StemmerOverrideFilterFactory */
|
||||
public StemmerOverrideFilterFactory(Map<String,String> args) {
|
||||
super(args);
|
||||
dictionaryFiles = args.remove("dictionary");
|
||||
dictionaryFiles = get(args, "dictionary");
|
||||
ignoreCase = getBoolean(args, "ignoreCase", false);
|
||||
if (!args.isEmpty()) {
|
||||
throw new IllegalArgumentException("Unknown parameters: " + args);
|
||||
|
|
|
@ -18,9 +18,10 @@ package org.apache.lucene.analysis.miscellaneous;
|
|||
*/
|
||||
|
||||
import org.apache.lucene.analysis.TokenStream;
|
||||
import org.apache.lucene.analysis.miscellaneous.WordDelimiterFilter;
|
||||
import org.apache.lucene.analysis.miscellaneous.WordDelimiterIterator;
|
||||
import org.apache.lucene.analysis.util.*;
|
||||
import org.apache.lucene.analysis.util.CharArraySet;
|
||||
import org.apache.lucene.analysis.util.ResourceLoader;
|
||||
import org.apache.lucene.analysis.util.ResourceLoaderAware;
|
||||
import org.apache.lucene.analysis.util.TokenFilterFactory;
|
||||
|
||||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
|
@ -88,8 +89,8 @@ public class WordDelimiterFilterFactory extends TokenFilterFactory implements Re
|
|||
if (getInt(args, "stemEnglishPossessive", 1) != 0) {
|
||||
flags |= STEM_ENGLISH_POSSESSIVE;
|
||||
}
|
||||
wordFiles = args.remove(PROTECTED_TOKENS);
|
||||
types = args.remove(TYPES);
|
||||
wordFiles = get(args, PROTECTED_TOKENS);
|
||||
types = get(args, TYPES);
|
||||
this.flags = flags;
|
||||
if (!args.isEmpty()) {
|
||||
throw new IllegalArgumentException("Unknown parameters: " + args);
|
||||
|
|
|
@ -19,7 +19,6 @@ package org.apache.lucene.analysis.ngram;
|
|||
|
||||
import java.util.Map;
|
||||
import org.apache.lucene.analysis.TokenStream;
|
||||
import org.apache.lucene.analysis.ngram.EdgeNGramTokenFilter;
|
||||
import org.apache.lucene.analysis.util.TokenFilterFactory;
|
||||
|
||||
/**
|
||||
|
@ -42,13 +41,7 @@ public class EdgeNGramFilterFactory extends TokenFilterFactory {
|
|||
super(args);
|
||||
minGramSize = getInt(args, "minGramSize", EdgeNGramTokenFilter.DEFAULT_MIN_GRAM_SIZE);
|
||||
maxGramSize = getInt(args, "maxGramSize", EdgeNGramTokenFilter.DEFAULT_MAX_GRAM_SIZE);
|
||||
|
||||
String sideArg = args.remove("side");
|
||||
if (sideArg == null) {
|
||||
side = EdgeNGramTokenFilter.Side.FRONT.getLabel();
|
||||
} else {
|
||||
side = sideArg;
|
||||
}
|
||||
side = get(args, "side", EdgeNGramTokenFilter.Side.FRONT.getLabel());
|
||||
if (!args.isEmpty()) {
|
||||
throw new IllegalArgumentException("Unknown parameters: " + args);
|
||||
}
|
||||
|
|
|
@ -42,13 +42,7 @@ public class EdgeNGramTokenizerFactory extends TokenizerFactory {
|
|||
super(args);
|
||||
minGramSize = getInt(args, "minGramSize", EdgeNGramTokenizer.DEFAULT_MIN_GRAM_SIZE);
|
||||
maxGramSize = getInt(args, "maxGramSize", EdgeNGramTokenizer.DEFAULT_MAX_GRAM_SIZE);
|
||||
|
||||
String sideArg = args.remove("side");
|
||||
if (sideArg == null) {
|
||||
side = EdgeNGramTokenFilter.Side.FRONT.getLabel();
|
||||
} else {
|
||||
side = sideArg;
|
||||
}
|
||||
side = get(args, "side", EdgeNGramTokenFilter.Side.FRONT.getLabel());
|
||||
if (!args.isEmpty()) {
|
||||
throw new IllegalArgumentException("Unknown parameters: " + args);
|
||||
}
|
||||
|
|
|
@ -19,7 +19,6 @@ package org.apache.lucene.analysis.ngram;
|
|||
|
||||
import java.util.Map;
|
||||
import org.apache.lucene.analysis.TokenStream;
|
||||
import org.apache.lucene.analysis.ngram.NGramTokenFilter;
|
||||
import org.apache.lucene.analysis.util.TokenFilterFactory;
|
||||
|
||||
/**
|
||||
|
@ -41,7 +40,6 @@ public class NGramFilterFactory extends TokenFilterFactory {
|
|||
super(args);
|
||||
minGramSize = getInt(args, "minGramSize", NGramTokenFilter.DEFAULT_MIN_NGRAM_SIZE);
|
||||
maxGramSize = getInt(args, "maxGramSize", NGramTokenFilter.DEFAULT_MAX_NGRAM_SIZE);
|
||||
|
||||
if (!args.isEmpty()) {
|
||||
throw new IllegalArgumentException("Unknown parameters: " + args);
|
||||
}
|
||||
|
|
|
@ -42,7 +42,6 @@ public class NGramTokenizerFactory extends TokenizerFactory {
|
|||
super(args);
|
||||
minGramSize = getInt(args, "minGramSize", NGramTokenizer.DEFAULT_MIN_NGRAM_SIZE);
|
||||
maxGramSize = getInt(args, "maxGramSize", NGramTokenizer.DEFAULT_MAX_NGRAM_SIZE);
|
||||
|
||||
if (!args.isEmpty()) {
|
||||
throw new IllegalArgumentException("Unknown parameters: " + args);
|
||||
}
|
||||
|
|
|
@ -86,19 +86,6 @@ public class PathHierarchyTokenizerFactory extends TokenizerFactory {
|
|||
}
|
||||
}
|
||||
|
||||
private char getChar(Map<String,String> args, String name, char defaultValue) {
|
||||
String v = args.remove(name);
|
||||
if (v != null) {
|
||||
if (v.length() != 1) {
|
||||
throw new IllegalArgumentException(name + " should be a char. \"" + v + "\" is invalid");
|
||||
} else {
|
||||
return v.charAt(0);
|
||||
}
|
||||
} else {
|
||||
return defaultValue;
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public Tokenizer create(AttributeFactory factory, Reader input) {
|
||||
if (reverse) {
|
||||
|
|
|
@ -22,7 +22,6 @@ import java.util.Map;
|
|||
import java.util.regex.Pattern;
|
||||
|
||||
import org.apache.lucene.analysis.CharFilter;
|
||||
import org.apache.lucene.analysis.pattern.PatternReplaceCharFilter;
|
||||
import org.apache.lucene.analysis.util.CharFilterFactory;
|
||||
|
||||
/**
|
||||
|
@ -46,12 +45,7 @@ public class PatternReplaceCharFilterFactory extends CharFilterFactory {
|
|||
public PatternReplaceCharFilterFactory(Map<String, String> args) {
|
||||
super(args);
|
||||
pattern = getPattern(args, "pattern");
|
||||
String v = args.remove("replacement");
|
||||
if (v == null) {
|
||||
replacement = "";
|
||||
} else {
|
||||
replacement = v;
|
||||
}
|
||||
replacement = get(args, "replacement", "");
|
||||
if (!args.isEmpty()) {
|
||||
throw new IllegalArgumentException("Unknown parameters: " + args);
|
||||
}
|
||||
|
|
|
@ -18,9 +18,9 @@ package org.apache.lucene.analysis.pattern;
|
|||
*/
|
||||
|
||||
import org.apache.lucene.analysis.TokenStream;
|
||||
import org.apache.lucene.analysis.pattern.PatternReplaceFilter;
|
||||
import org.apache.lucene.analysis.util.TokenFilterFactory;
|
||||
|
||||
import java.util.Arrays;
|
||||
import java.util.Map;
|
||||
import java.util.regex.Pattern;
|
||||
|
||||
|
@ -46,17 +46,8 @@ public class PatternReplaceFilterFactory extends TokenFilterFactory {
|
|||
public PatternReplaceFilterFactory(Map<String, String> args) {
|
||||
super(args);
|
||||
pattern = getPattern(args, "pattern");
|
||||
replacement = args.remove("replacement");
|
||||
|
||||
String v = args.remove("replace");
|
||||
if (v == null || v.equals("all")) {
|
||||
replaceAll = true;
|
||||
} else if (v.equals("first")) {
|
||||
replaceAll = false;
|
||||
} else {
|
||||
throw new IllegalArgumentException("Configuration Error: " +
|
||||
"'replace' must be 'first' or 'all' in " + getClass().getName());
|
||||
}
|
||||
replacement = get(args, "replacement");
|
||||
replaceAll = "all".equals(get(args, "replace", Arrays.asList("all", "first"), "all"));
|
||||
if (!args.isEmpty()) {
|
||||
throw new IllegalArgumentException("Unknown parameters: " + args);
|
||||
}
|
||||
|
|
|
@ -72,7 +72,6 @@ public class PatternTokenizerFactory extends TokenizerFactory {
|
|||
super(args);
|
||||
pattern = getPattern(args, PATTERN);
|
||||
group = getInt(args, GROUP, -1);
|
||||
|
||||
if (!args.isEmpty()) {
|
||||
throw new IllegalArgumentException("Unknown parameters: " + args);
|
||||
}
|
||||
|
|
|
@ -18,11 +18,6 @@ package org.apache.lucene.analysis.payloads;
|
|||
*/
|
||||
|
||||
import org.apache.lucene.analysis.TokenStream;
|
||||
import org.apache.lucene.analysis.payloads.DelimitedPayloadTokenFilter;
|
||||
import org.apache.lucene.analysis.payloads.PayloadEncoder;
|
||||
import org.apache.lucene.analysis.payloads.FloatEncoder;
|
||||
import org.apache.lucene.analysis.payloads.IntegerEncoder;
|
||||
import org.apache.lucene.analysis.payloads.IdentityEncoder;
|
||||
import org.apache.lucene.analysis.util.ResourceLoader;
|
||||
import org.apache.lucene.analysis.util.ResourceLoaderAware;
|
||||
import org.apache.lucene.analysis.util.TokenFilterFactory;
|
||||
|
@ -51,18 +46,8 @@ public class DelimitedPayloadTokenFilterFactory extends TokenFilterFactory imple
|
|||
/** Creates a new DelimitedPayloadTokenFilterFactory */
|
||||
public DelimitedPayloadTokenFilterFactory(Map<String, String> args) {
|
||||
super(args);
|
||||
encoderClass = args.remove(ENCODER_ATTR);
|
||||
if (encoderClass == null) {
|
||||
throw new IllegalArgumentException("Parameter " + ENCODER_ATTR + " is mandatory");
|
||||
}
|
||||
String delim = args.remove(DELIMITER_ATTR);
|
||||
if (delim == null) {
|
||||
delimiter = '|';
|
||||
} else if (delim.length() == 1) {
|
||||
delimiter = delim.charAt(0);
|
||||
} else {
|
||||
throw new IllegalArgumentException("Delimiter must be one character only");
|
||||
}
|
||||
encoderClass = require(args, ENCODER_ATTR);
|
||||
delimiter = getChar(args, DELIMITER_ATTR, '|');
|
||||
if (!args.isEmpty()) {
|
||||
throw new IllegalArgumentException("Unknown parameters: " + args);
|
||||
}
|
||||
|
|
|
@ -17,7 +17,6 @@ package org.apache.lucene.analysis.payloads;
|
|||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import org.apache.lucene.analysis.payloads.NumericPayloadTokenFilter;
|
||||
import org.apache.lucene.analysis.TokenStream;
|
||||
import org.apache.lucene.analysis.util.TokenFilterFactory;
|
||||
import java.util.Map;
|
||||
|
@ -39,12 +38,8 @@ public class NumericPayloadTokenFilterFactory extends TokenFilterFactory {
|
|||
/** Creates a new NumericPayloadTokenFilterFactory */
|
||||
public NumericPayloadTokenFilterFactory(Map<String, String> args) {
|
||||
super(args);
|
||||
String payloadArg = args.remove("payload");
|
||||
typeMatch = args.remove("typeMatch");
|
||||
if (payloadArg == null || typeMatch == null) {
|
||||
throw new IllegalArgumentException("Both payload and typeMatch are required");
|
||||
}
|
||||
payload = Float.parseFloat(payloadArg);
|
||||
payload = requireFloat(args, "payload");
|
||||
typeMatch = require(args, "typeMatch");
|
||||
if (!args.isEmpty()) {
|
||||
throw new IllegalArgumentException("Unknown parameters: " + args);
|
||||
}
|
||||
|
|
|
@ -17,7 +17,6 @@ package org.apache.lucene.analysis.shingle;
|
|||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import org.apache.lucene.analysis.shingle.ShingleFilter;
|
||||
import org.apache.lucene.analysis.TokenStream;
|
||||
import org.apache.lucene.analysis.util.TokenFilterFactory;
|
||||
|
||||
|
@ -44,28 +43,21 @@ public class ShingleFilterFactory extends TokenFilterFactory {
|
|||
/** Creates a new ShingleFilterFactory */
|
||||
public ShingleFilterFactory(Map<String, String> args) {
|
||||
super(args);
|
||||
maxShingleSize = getInt(args, "maxShingleSize",
|
||||
ShingleFilter.DEFAULT_MAX_SHINGLE_SIZE);
|
||||
maxShingleSize = getInt(args, "maxShingleSize", ShingleFilter.DEFAULT_MAX_SHINGLE_SIZE);
|
||||
if (maxShingleSize < 2) {
|
||||
throw new IllegalArgumentException("Invalid maxShingleSize (" + maxShingleSize
|
||||
+ ") - must be at least 2");
|
||||
throw new IllegalArgumentException("Invalid maxShingleSize (" + maxShingleSize + ") - must be at least 2");
|
||||
}
|
||||
minShingleSize = getInt(args, "minShingleSize",
|
||||
ShingleFilter.DEFAULT_MIN_SHINGLE_SIZE);
|
||||
minShingleSize = getInt(args, "minShingleSize", ShingleFilter.DEFAULT_MIN_SHINGLE_SIZE);
|
||||
if (minShingleSize < 2) {
|
||||
throw new IllegalArgumentException("Invalid minShingleSize (" + minShingleSize
|
||||
+ ") - must be at least 2");
|
||||
throw new IllegalArgumentException("Invalid minShingleSize (" + minShingleSize + ") - must be at least 2");
|
||||
}
|
||||
if (minShingleSize > maxShingleSize) {
|
||||
throw new IllegalArgumentException("Invalid minShingleSize (" + minShingleSize
|
||||
+ ") - must be no greater than maxShingleSize ("
|
||||
+ maxShingleSize + ")");
|
||||
throw new IllegalArgumentException
|
||||
("Invalid minShingleSize (" + minShingleSize + ") - must be no greater than maxShingleSize (" + maxShingleSize + ")");
|
||||
}
|
||||
outputUnigrams = getBoolean(args, "outputUnigrams", true);
|
||||
outputUnigramsIfNoShingles = getBoolean(args, "outputUnigramsIfNoShingles", false);
|
||||
tokenSeparator = args.containsKey("tokenSeparator")
|
||||
? args.remove("tokenSeparator")
|
||||
: ShingleFilter.TOKEN_SEPARATOR;
|
||||
tokenSeparator = get(args, "tokenSeparator", ShingleFilter.TOKEN_SEPARATOR);
|
||||
if (!args.isEmpty()) {
|
||||
throw new IllegalArgumentException("Unknown parameters: " + args);
|
||||
}
|
||||
|
|
|
@ -23,8 +23,10 @@ import java.io.IOException;
|
|||
import org.apache.lucene.analysis.miscellaneous.SetKeywordMarkerFilter;
|
||||
import org.apache.lucene.analysis.TokenFilter;
|
||||
import org.apache.lucene.analysis.TokenStream;
|
||||
import org.apache.lucene.analysis.snowball.SnowballFilter;
|
||||
import org.apache.lucene.analysis.util.*;
|
||||
import org.apache.lucene.analysis.util.CharArraySet;
|
||||
import org.apache.lucene.analysis.util.ResourceLoader;
|
||||
import org.apache.lucene.analysis.util.ResourceLoaderAware;
|
||||
import org.apache.lucene.analysis.util.TokenFilterFactory;
|
||||
import org.tartarus.snowball.SnowballProgram;
|
||||
|
||||
/**
|
||||
|
@ -51,13 +53,8 @@ public class SnowballPorterFilterFactory extends TokenFilterFactory implements R
|
|||
/** Creates a new SnowballPorterFilterFactory */
|
||||
public SnowballPorterFilterFactory(Map<String,String> args) {
|
||||
super(args);
|
||||
String cfgLanguage = args.remove("language");
|
||||
if (cfgLanguage == null) {
|
||||
language = "English";
|
||||
} else {
|
||||
language = cfgLanguage;
|
||||
}
|
||||
wordFiles = args.remove(PROTECTED_TOKENS);
|
||||
language = get(args, "language", "English");
|
||||
wordFiles = get(args, PROTECTED_TOKENS);
|
||||
if (!args.isEmpty()) {
|
||||
throw new IllegalArgumentException("Unknown parameters: " + args);
|
||||
}
|
||||
|
|
|
@ -39,8 +39,7 @@ public class ClassicTokenizerFactory extends TokenizerFactory {
|
|||
public ClassicTokenizerFactory(Map<String,String> args) {
|
||||
super(args);
|
||||
assureMatchVersion();
|
||||
maxTokenLength = getInt(args, "maxTokenLength",
|
||||
StandardAnalyzer.DEFAULT_MAX_TOKEN_LENGTH);
|
||||
maxTokenLength = getInt(args, "maxTokenLength", StandardAnalyzer.DEFAULT_MAX_TOKEN_LENGTH);
|
||||
if (!args.isEmpty()) {
|
||||
throw new IllegalArgumentException("Unknown parameters: " + args);
|
||||
}
|
||||
|
|
|
@ -39,8 +39,7 @@ public class UAX29URLEmailTokenizerFactory extends TokenizerFactory {
|
|||
public UAX29URLEmailTokenizerFactory(Map<String,String> args) {
|
||||
super(args);
|
||||
assureMatchVersion();
|
||||
maxTokenLength = getInt(args, "maxTokenLength",
|
||||
StandardAnalyzer.DEFAULT_MAX_TOKEN_LENGTH);
|
||||
maxTokenLength = getInt(args, "maxTokenLength", StandardAnalyzer.DEFAULT_MAX_TOKEN_LENGTH);
|
||||
if (!args.isEmpty()) {
|
||||
throw new IllegalArgumentException("Unknown parameters: " + args);
|
||||
}
|
||||
|
|
|
@ -34,11 +34,10 @@ import org.apache.lucene.analysis.TokenStream;
|
|||
import org.apache.lucene.analysis.Tokenizer;
|
||||
import org.apache.lucene.analysis.core.LowerCaseFilter;
|
||||
import org.apache.lucene.analysis.core.WhitespaceTokenizer;
|
||||
import org.apache.lucene.analysis.synonym.SynonymFilter;
|
||||
import org.apache.lucene.analysis.synonym.SynonymMap;
|
||||
import org.apache.lucene.analysis.synonym.SolrSynonymParser;
|
||||
import org.apache.lucene.analysis.synonym.WordnetSynonymParser;
|
||||
import org.apache.lucene.analysis.util.*;
|
||||
import org.apache.lucene.analysis.util.ResourceLoader;
|
||||
import org.apache.lucene.analysis.util.ResourceLoaderAware;
|
||||
import org.apache.lucene.analysis.util.TokenFilterFactory;
|
||||
import org.apache.lucene.analysis.util.TokenizerFactory;
|
||||
import org.apache.lucene.util.Version;
|
||||
|
||||
/**
|
||||
|
@ -65,17 +64,13 @@ public class SynonymFilterFactory extends TokenFilterFactory implements Resource
|
|||
public SynonymFilterFactory(Map<String,String> args) {
|
||||
super(args);
|
||||
ignoreCase = getBoolean(args, "ignoreCase", false);
|
||||
tokenizerFactory = args.remove("tokenizerFactory");
|
||||
tokenizerFactory = get(args, "tokenizerFactory");
|
||||
if (tokenizerFactory != null) {
|
||||
assureMatchVersion();
|
||||
}
|
||||
synonyms = args.remove("synonyms");
|
||||
if (synonyms == null) {
|
||||
throw new IllegalArgumentException("Missing required argument 'synonyms'.");
|
||||
}
|
||||
format = args.remove("format");
|
||||
synonyms = require(args, "synonyms");
|
||||
format = get(args, "format");
|
||||
expand = getBoolean(args, "expand", true);
|
||||
|
||||
if (!args.isEmpty()) {
|
||||
throw new IllegalArgumentException("Unknown parameters: " + args);
|
||||
}
|
||||
|
|
|
@ -28,10 +28,14 @@ import java.io.Reader;
|
|||
import java.nio.charset.CharsetDecoder;
|
||||
import java.nio.charset.CodingErrorAction;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Collection;
|
||||
import java.util.Collections;
|
||||
import java.util.HashMap;
|
||||
import java.util.HashSet;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.Set;
|
||||
import java.util.regex.Matcher;
|
||||
import java.util.regex.Pattern;
|
||||
import java.util.regex.PatternSyntaxException;
|
||||
|
||||
|
@ -59,7 +63,7 @@ public abstract class AbstractAnalysisFactory {
|
|||
*/
|
||||
protected AbstractAnalysisFactory(Map<String,String> args) {
|
||||
originalArgs = Collections.unmodifiableMap(new HashMap<String,String>(args));
|
||||
String version = args.remove("luceneMatchVersion");
|
||||
String version = get(args, "luceneMatchVersion");
|
||||
luceneMatchVersion = version == null ? null : Version.parseLeniently(version);
|
||||
}
|
||||
|
||||
|
@ -80,37 +84,128 @@ public abstract class AbstractAnalysisFactory {
|
|||
public final Version getLuceneMatchVersion() {
|
||||
return this.luceneMatchVersion;
|
||||
}
|
||||
|
||||
protected final int getInt(Map<String,String> args, String name) {
|
||||
return getInt(args, name, -1, false);
|
||||
}
|
||||
|
||||
protected final int getInt(Map<String,String> args, String name, int defaultVal) {
|
||||
return getInt(args, name, defaultVal, true);
|
||||
}
|
||||
|
||||
protected final int getInt(Map<String,String> args, String name, int defaultVal, boolean useDefault) {
|
||||
|
||||
public String require(Map<String,String> args, String name) {
|
||||
String s = args.remove(name);
|
||||
if (s == null) {
|
||||
if (useDefault) {
|
||||
return defaultVal;
|
||||
}
|
||||
throw new IllegalArgumentException("Configuration Error: missing parameter '" + name + "'");
|
||||
}
|
||||
return Integer.parseInt(s);
|
||||
return s;
|
||||
}
|
||||
|
||||
protected final boolean getBoolean(Map<String,String> args, String name, boolean defaultVal) {
|
||||
return getBoolean(args, name, defaultVal, true);
|
||||
public String require(Map<String,String> args, String name, Collection<String> allowedValues) {
|
||||
return require(args, name, allowedValues, true);
|
||||
}
|
||||
|
||||
protected final boolean getBoolean(Map<String,String> args, String name, boolean defaultVal, boolean useDefault) {
|
||||
public String require(Map<String,String> args, String name, Collection<String> allowedValues, boolean caseSensitive) {
|
||||
String s = args.remove(name);
|
||||
if (s==null) {
|
||||
if (useDefault) return defaultVal;
|
||||
if (s == null) {
|
||||
throw new IllegalArgumentException("Configuration Error: missing parameter '" + name + "'");
|
||||
} else {
|
||||
for (String allowedValue : allowedValues) {
|
||||
if (caseSensitive) {
|
||||
if (s.equals(allowedValue)) {
|
||||
return s;
|
||||
}
|
||||
} else {
|
||||
if (s.equalsIgnoreCase(allowedValue)) {
|
||||
return s;
|
||||
}
|
||||
}
|
||||
}
|
||||
throw new IllegalArgumentException("Configuration Error: '" + name + "' value must be one of " + allowedValues);
|
||||
}
|
||||
}
|
||||
public String get(Map<String,String> args, String name) {
|
||||
return args.remove(name); // defaultVal = null
|
||||
}
|
||||
public String get(Map<String,String> args, String name, String defaultVal) {
|
||||
String s = args.remove(name);
|
||||
return s == null ? defaultVal : s;
|
||||
}
|
||||
public String get(Map<String,String> args, String name, Collection<String> allowedValues) {
|
||||
return get(args, name, allowedValues, null); // defaultVal = null
|
||||
}
|
||||
public String get(Map<String,String> args, String name, Collection<String> allowedValues, String defaultVal) {
|
||||
return get(args, name, allowedValues, defaultVal, true);
|
||||
}
|
||||
public String get(Map<String,String> args, String name, Collection<String> allowedValues, String defaultVal, boolean caseSensitive) {
|
||||
String s = args.remove(name);
|
||||
if (s == null) {
|
||||
return defaultVal;
|
||||
} else {
|
||||
for (String allowedValue : allowedValues) {
|
||||
if (caseSensitive) {
|
||||
if (s.equals(allowedValue)) {
|
||||
return s;
|
||||
}
|
||||
} else {
|
||||
if (s.equalsIgnoreCase(allowedValue)) {
|
||||
return s;
|
||||
}
|
||||
}
|
||||
}
|
||||
throw new IllegalArgumentException("Configuration Error: '" + name + "' value must be one of " + allowedValues);
|
||||
}
|
||||
}
|
||||
|
||||
protected final int requireInt(Map<String,String> args, String name) {
|
||||
return Integer.parseInt(require(args, name));
|
||||
}
|
||||
protected final int getInt(Map<String,String> args, String name, int defaultVal) {
|
||||
String s = args.remove(name);
|
||||
return s == null ? defaultVal : Integer.parseInt(s);
|
||||
}
|
||||
|
||||
protected final boolean requireBoolean(Map<String,String> args, String name) {
|
||||
return Boolean.parseBoolean(require(args, name));
|
||||
}
|
||||
protected final boolean getBoolean(Map<String,String> args, String name, boolean defaultVal) {
|
||||
String s = args.remove(name);
|
||||
return s == null ? defaultVal : Boolean.parseBoolean(s);
|
||||
}
|
||||
|
||||
protected final float requireFloat(Map<String,String> args, String name) {
|
||||
return Float.parseFloat(require(args, name));
|
||||
}
|
||||
protected final float getFloat(Map<String,String> args, String name, float defaultVal) {
|
||||
String s = args.remove(name);
|
||||
return s == null ? defaultVal : Float.parseFloat(s);
|
||||
}
|
||||
|
||||
public char requireChar(Map<String,String> args, String name) {
|
||||
return require(args, name).charAt(0);
|
||||
}
|
||||
public char getChar(Map<String,String> args, String name, char defaultValue) {
|
||||
String s = args.remove(name);
|
||||
if (s == null) {
|
||||
return defaultValue;
|
||||
} else {
|
||||
if (s.length() != 1) {
|
||||
throw new IllegalArgumentException(name + " should be a char. \"" + s + "\" is invalid");
|
||||
} else {
|
||||
return s.charAt(0);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private static final Pattern ITEM_PATTERN = Pattern.compile("[^,\\s]+");
|
||||
|
||||
/** Returns whitespace- and/or comma-separated set of values, or null if none are found */
|
||||
public Set<String> getSet(Map<String,String> args, String name) {
|
||||
String s = args.remove(name);
|
||||
if (s == null) {
|
||||
return null;
|
||||
} else {
|
||||
Set<String> set = null;
|
||||
Matcher matcher = ITEM_PATTERN.matcher(s);
|
||||
if (matcher.find()) {
|
||||
set = new HashSet<String>();
|
||||
set.add(matcher.group(0));
|
||||
while (matcher.find()) {
|
||||
set.add(matcher.group(0));
|
||||
}
|
||||
}
|
||||
return set;
|
||||
}
|
||||
return Boolean.parseBoolean(s);
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -118,11 +213,7 @@ public abstract class AbstractAnalysisFactory {
|
|||
*/
|
||||
protected final Pattern getPattern(Map<String,String> args, String name) {
|
||||
try {
|
||||
String pat = args.remove(name);
|
||||
if (null == pat) {
|
||||
throw new IllegalArgumentException("Configuration Error: missing parameter '" + name + "'");
|
||||
}
|
||||
return Pattern.compile(pat);
|
||||
return Pattern.compile(require(args, name));
|
||||
} catch (PatternSyntaxException e) {
|
||||
throw new IllegalArgumentException
|
||||
("Configuration Error: '" + name + "' can not be parsed in " +
|
||||
|
|
|
@ -43,7 +43,7 @@ public class ElisionFilterFactory extends TokenFilterFactory implements Resource
|
|||
/** Creates a new ElisionFilterFactory */
|
||||
public ElisionFilterFactory(Map<String,String> args) {
|
||||
super(args);
|
||||
articlesFile = args.remove("articles");
|
||||
articlesFile = get(args, "articles");
|
||||
ignoreCase = getBoolean(args, "ignoreCase", false);
|
||||
if (!args.isEmpty()) {
|
||||
throw new IllegalArgumentException("Unknown parameters: " + args);
|
||||
|
|
|
@ -17,10 +17,10 @@ package org.apache.lucene.analysis.icu;
|
|||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import java.util.Arrays;
|
||||
import java.util.Map;
|
||||
|
||||
import org.apache.lucene.analysis.TokenStream;
|
||||
import org.apache.lucene.analysis.icu.ICUNormalizer2Filter;
|
||||
import org.apache.lucene.analysis.util.AbstractAnalysisFactory; // javadocs
|
||||
import org.apache.lucene.analysis.util.MultiTermAwareComponent;
|
||||
import org.apache.lucene.analysis.util.TokenFilterFactory;
|
||||
|
@ -51,22 +51,12 @@ public class ICUNormalizer2FilterFactory extends TokenFilterFactory implements M
|
|||
/** Creates a new ICUNormalizer2FilterFactory */
|
||||
public ICUNormalizer2FilterFactory(Map<String,String> args) {
|
||||
super(args);
|
||||
String name = args.remove("name");
|
||||
if (name == null)
|
||||
name = "nfkc_cf";
|
||||
String mode = args.remove("mode");
|
||||
if (mode == null)
|
||||
mode = "compose";
|
||||
String name = get(args, "name", "nfkc_cf");
|
||||
String mode = get(args, "mode", Arrays.asList("compose", "decompose"), "compose");
|
||||
Normalizer2 normalizer = Normalizer2.getInstance
|
||||
(null, name, "compose".equals(mode) ? Normalizer2.Mode.COMPOSE : Normalizer2.Mode.DECOMPOSE);
|
||||
|
||||
Normalizer2 normalizer;
|
||||
if (mode.equals("compose"))
|
||||
normalizer = Normalizer2.getInstance(null, name, Normalizer2.Mode.COMPOSE);
|
||||
else if (mode.equals("decompose"))
|
||||
normalizer = Normalizer2.getInstance(null, name, Normalizer2.Mode.DECOMPOSE);
|
||||
else
|
||||
throw new IllegalArgumentException("Invalid mode: " + mode);
|
||||
|
||||
String filter = args.remove("filter");
|
||||
String filter = get(args, "filter");
|
||||
if (filter != null) {
|
||||
UnicodeSet set = new UnicodeSet(filter);
|
||||
if (!set.isEmpty()) {
|
||||
|
|
|
@ -17,10 +17,10 @@ package org.apache.lucene.analysis.icu;
|
|||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import java.util.Arrays;
|
||||
import java.util.Map;
|
||||
|
||||
import org.apache.lucene.analysis.TokenStream;
|
||||
import org.apache.lucene.analysis.icu.ICUTransformFilter;
|
||||
import org.apache.lucene.analysis.util.AbstractAnalysisFactory; // javadocs
|
||||
import org.apache.lucene.analysis.util.MultiTermAwareComponent;
|
||||
import org.apache.lucene.analysis.util.TokenFilterFactory;
|
||||
|
@ -44,20 +44,9 @@ public class ICUTransformFilterFactory extends TokenFilterFactory implements Mul
|
|||
/** Creates a new ICUTransformFilterFactory */
|
||||
public ICUTransformFilterFactory(Map<String,String> args) {
|
||||
super(args);
|
||||
String id = args.remove("id");
|
||||
if (id == null) {
|
||||
throw new IllegalArgumentException("id is required.");
|
||||
}
|
||||
|
||||
int dir;
|
||||
String direction = args.remove("direction");
|
||||
if (direction == null || direction.equalsIgnoreCase("forward"))
|
||||
dir = Transliterator.FORWARD;
|
||||
else if (direction.equalsIgnoreCase("reverse"))
|
||||
dir = Transliterator.REVERSE;
|
||||
else
|
||||
throw new IllegalArgumentException("invalid direction: " + direction);
|
||||
|
||||
String id = require(args, "id");
|
||||
String direction = get(args, "direction", Arrays.asList("forward", "reverse"), "forward", false);
|
||||
int dir = "forward".equals(direction) ? Transliterator.FORWARD : Transliterator.REVERSE;
|
||||
transliterator = Transliterator.getInstance(id, dir);
|
||||
if (!args.isEmpty()) {
|
||||
throw new IllegalArgumentException("Unknown parameters: " + args);
|
||||
|
|
|
@ -84,7 +84,7 @@ public class ICUTokenizerFactory extends TokenizerFactory implements ResourceLoa
|
|||
public ICUTokenizerFactory(Map<String,String> args) {
|
||||
super(args);
|
||||
tailored = new HashMap<Integer,String>();
|
||||
String rulefilesArg = args.remove(RULEFILES);
|
||||
String rulefilesArg = get(args, RULEFILES);
|
||||
if (rulefilesArg != null) {
|
||||
List<String> scriptAndResourcePaths = splitFileNames(rulefilesArg);
|
||||
for (String scriptAndResourcePath : scriptAndResourcePaths) {
|
||||
|
|
|
@ -23,8 +23,10 @@ import java.util.Map;
|
|||
import java.util.Set;
|
||||
|
||||
import org.apache.lucene.analysis.TokenStream;
|
||||
import org.apache.lucene.analysis.ja.JapanesePartOfSpeechStopFilter;
|
||||
import org.apache.lucene.analysis.util.*;
|
||||
import org.apache.lucene.analysis.util.CharArraySet;
|
||||
import org.apache.lucene.analysis.util.ResourceLoader;
|
||||
import org.apache.lucene.analysis.util.ResourceLoaderAware;
|
||||
import org.apache.lucene.analysis.util.TokenFilterFactory;
|
||||
|
||||
/**
|
||||
* Factory for {@link org.apache.lucene.analysis.ja.JapanesePartOfSpeechStopFilter}.
|
||||
|
@ -39,7 +41,7 @@ import org.apache.lucene.analysis.util.*;
|
|||
* </fieldType>
|
||||
* </pre>
|
||||
*/
|
||||
public class JapanesePartOfSpeechStopFilterFactory extends TokenFilterFactory implements ResourceLoaderAware {
|
||||
public class JapanesePartOfSpeechStopFilterFactory extends TokenFilterFactory implements ResourceLoaderAware {
|
||||
private final String stopTagFiles;
|
||||
private final boolean enablePositionIncrements;
|
||||
private Set<String> stopTags;
|
||||
|
@ -47,7 +49,7 @@ public class JapanesePartOfSpeechStopFilterFactory extends TokenFilterFactory im
|
|||
/** Creates a new JapanesePartOfSpeechStopFilterFactory */
|
||||
public JapanesePartOfSpeechStopFilterFactory(Map<String,String> args) {
|
||||
super(args);
|
||||
stopTagFiles = args.remove("tags");
|
||||
stopTagFiles = get(args, "tags");
|
||||
enablePositionIncrements = getBoolean(args, "enablePositionIncrements", false);
|
||||
if (!args.isEmpty()) {
|
||||
throw new IllegalArgumentException("Unknown parameters: " + args);
|
||||
|
|
|
@ -70,7 +70,7 @@ public class JapaneseTokenizerFactory extends TokenizerFactory implements Resour
|
|||
/** Creates a new JapaneseTokenizerFactory */
|
||||
public JapaneseTokenizerFactory(Map<String,String> args) {
|
||||
super(args);
|
||||
mode = getMode(args);
|
||||
mode = Mode.valueOf(get(args, MODE, JapaneseTokenizer.DEFAULT_MODE.toString()).toUpperCase(Locale.ROOT));
|
||||
userDictionaryPath = args.remove(USER_DICT_PATH);
|
||||
userDictionaryEncoding = args.remove(USER_DICT_ENCODING);
|
||||
discardPunctuation = getBoolean(args, DISCARD_PUNCTUATION, true);
|
||||
|
@ -101,13 +101,4 @@ public class JapaneseTokenizerFactory extends TokenizerFactory implements Resour
|
|||
public JapaneseTokenizer create(AttributeFactory factory, Reader input) {
|
||||
return new JapaneseTokenizer(factory, input, userDictionary, discardPunctuation, mode);
|
||||
}
|
||||
|
||||
private Mode getMode(Map<String,String> args) {
|
||||
String modeArg = args.remove(MODE);
|
||||
if (modeArg != null) {
|
||||
return Mode.valueOf(modeArg.toUpperCase(Locale.ROOT));
|
||||
} else {
|
||||
return JapaneseTokenizer.DEFAULT_MODE;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -24,7 +24,6 @@ import java.util.Map;
|
|||
import morfologik.stemming.PolishStemmer.DICTIONARY;
|
||||
|
||||
import org.apache.lucene.analysis.TokenStream;
|
||||
import org.apache.lucene.analysis.morfologik.MorfologikFilter;
|
||||
import org.apache.lucene.analysis.util.TokenFilterFactory;
|
||||
|
||||
/**
|
||||
|
@ -54,7 +53,7 @@ public class MorfologikFilterFactory extends TokenFilterFactory {
|
|||
/** Creates a new MorfologikFilterFactory */
|
||||
public MorfologikFilterFactory(Map<String,String> args) {
|
||||
super(args);
|
||||
String dictionaryName = args.remove(DICTIONARY_SCHEMA_ATTRIBUTE);
|
||||
String dictionaryName = get(args, DICTIONARY_SCHEMA_ATTRIBUTE);
|
||||
if (dictionaryName != null && !dictionaryName.isEmpty()) {
|
||||
try {
|
||||
DICTIONARY dictionary = DICTIONARY.valueOf(dictionaryName.toUpperCase(Locale.ROOT));
|
||||
|
|
|
@ -17,16 +17,14 @@ package org.apache.lucene.analysis.phonetic;
|
|||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import java.util.Arrays;
|
||||
import java.util.HashSet;
|
||||
import java.util.Map;
|
||||
import java.util.Set;
|
||||
|
||||
import org.apache.commons.codec.language.bm.Languages.LanguageSet;
|
||||
import org.apache.commons.codec.language.bm.NameType;
|
||||
import org.apache.commons.codec.language.bm.PhoneticEngine;
|
||||
import org.apache.commons.codec.language.bm.RuleType;
|
||||
import org.apache.lucene.analysis.TokenStream;
|
||||
import org.apache.lucene.analysis.phonetic.BeiderMorseFilter;
|
||||
import org.apache.lucene.analysis.util.TokenFilterFactory;
|
||||
|
||||
/**
|
||||
|
@ -51,22 +49,15 @@ public class BeiderMorseFilterFactory extends TokenFilterFactory {
|
|||
super(args);
|
||||
// PhoneticEngine = NameType + RuleType + concat
|
||||
// we use common-codec's defaults: GENERIC + APPROX + true
|
||||
String nameTypeArg = args.remove("nameType");
|
||||
NameType nameType = (nameTypeArg == null) ? NameType.GENERIC : NameType.valueOf(nameTypeArg);
|
||||
|
||||
String ruleTypeArg = args.remove("ruleType");
|
||||
RuleType ruleType = (ruleTypeArg == null) ? RuleType.APPROX : RuleType.valueOf(ruleTypeArg);
|
||||
NameType nameType = NameType.valueOf(get(args, "nameType", NameType.GENERIC.toString()));
|
||||
RuleType ruleType = RuleType.valueOf(get(args, "ruleType", RuleType.APPROX.toString()));
|
||||
|
||||
boolean concat = getBoolean(args, "concat", true);
|
||||
engine = new PhoneticEngine(nameType, ruleType, concat);
|
||||
|
||||
// LanguageSet: defaults to automagic, otherwise a comma-separated list.
|
||||
String languageSetArg = args.remove("languageSet");
|
||||
if (languageSetArg == null || languageSetArg.equals("auto")) {
|
||||
languageSet = null;
|
||||
} else {
|
||||
languageSet = LanguageSet.from(new HashSet<String>(Arrays.asList(languageSetArg.split(","))));
|
||||
}
|
||||
Set<String> langs = getSet(args, "languageSet");
|
||||
languageSet = (null == langs || (1 == langs.size() && langs.contains("auto"))) ? null : LanguageSet.from(langs);
|
||||
if (!args.isEmpty()) {
|
||||
throw new IllegalArgumentException("Unknown parameters: " + args);
|
||||
}
|
||||
|
|
|
@ -25,7 +25,12 @@ import java.util.Locale;
|
|||
import java.util.Map;
|
||||
|
||||
import org.apache.commons.codec.Encoder;
|
||||
import org.apache.commons.codec.language.*;
|
||||
import org.apache.commons.codec.language.Caverphone2;
|
||||
import org.apache.commons.codec.language.ColognePhonetic;
|
||||
import org.apache.commons.codec.language.DoubleMetaphone;
|
||||
import org.apache.commons.codec.language.Metaphone;
|
||||
import org.apache.commons.codec.language.RefinedSoundex;
|
||||
import org.apache.commons.codec.language.Soundex;
|
||||
import org.apache.lucene.analysis.TokenStream;
|
||||
import org.apache.lucene.analysis.util.ResourceLoader;
|
||||
import org.apache.lucene.analysis.util.ResourceLoaderAware;
|
||||
|
@ -89,12 +94,8 @@ public class PhoneticFilterFactory extends TokenFilterFactory implements Resourc
|
|||
public PhoneticFilterFactory(Map<String,String> args) {
|
||||
super(args);
|
||||
inject = getBoolean(args, INJECT, true);
|
||||
name = args.remove(ENCODER);
|
||||
if (name == null) {
|
||||
throw new IllegalArgumentException("Missing required parameter: " + ENCODER
|
||||
+ " [" + registry.keySet() + "]");
|
||||
}
|
||||
String v = args.remove(MAX_CODE_LENGTH);
|
||||
name = require(args, ENCODER);
|
||||
String v = get(args, MAX_CODE_LENGTH);
|
||||
if (v != null) {
|
||||
maxCodeLength = Integer.valueOf(v);
|
||||
} else {
|
||||
|
|
|
@ -70,7 +70,7 @@ public class TestPhoneticFilterFactory extends BaseTokenStreamTestCase {
|
|||
new PhoneticFilterFactory(new HashMap<String,String>());
|
||||
fail();
|
||||
} catch (IllegalArgumentException expected) {
|
||||
assertTrue(expected.getMessage().contains("Missing required parameter"));
|
||||
assertTrue(expected.getMessage().contains("Configuration Error: missing parameter 'encoder'"));
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -34,20 +34,11 @@ public class UIMAAnnotationsTokenizerFactory extends TokenizerFactory {
|
|||
private final Map<String,Object> configurationParameters = new HashMap<String,Object>();
|
||||
|
||||
/** Creates a new UIMAAnnotationsTokenizerFactory */
|
||||
public UIMAAnnotationsTokenizerFactory(Map<String, String> args) {
|
||||
public UIMAAnnotationsTokenizerFactory(Map<String,String> args) {
|
||||
super(args);
|
||||
for (String k : args.keySet()) {
|
||||
if (k.equals("tokenType")) {
|
||||
tokenType = args.get("tokenType");
|
||||
} else if (k.equals("descriptorPath")) {
|
||||
descriptorPath = args.get("descriptorPath");
|
||||
} else {
|
||||
configurationParameters.put(k, args.get(k));
|
||||
}
|
||||
}
|
||||
if (descriptorPath == null || tokenType == null ) {
|
||||
throw new IllegalArgumentException("descriptorPath and tokenType are mandatory");
|
||||
}
|
||||
tokenType = require(args, "tokenType");
|
||||
descriptorPath = require(args, "descriptorPath");
|
||||
configurationParameters.putAll(args);
|
||||
}
|
||||
|
||||
@Override
|
||||
|
|
|
@ -35,22 +35,12 @@ public class UIMATypeAwareAnnotationsTokenizerFactory extends TokenizerFactory {
|
|||
private final Map<String,Object> configurationParameters = new HashMap<String,Object>();
|
||||
|
||||
/** Creates a new UIMATypeAwareAnnotationsTokenizerFactory */
|
||||
public UIMATypeAwareAnnotationsTokenizerFactory(Map<String, String> args) {
|
||||
public UIMATypeAwareAnnotationsTokenizerFactory(Map<String,String> args) {
|
||||
super(args);
|
||||
for (String k : args.keySet()) {
|
||||
if (k.equals("featurePath")) {
|
||||
featurePath = args.get("featurePath");
|
||||
} else if (k.equals("tokenType")) {
|
||||
tokenType = args.get("tokenType");
|
||||
} else if (k.equals("descriptorPath")) {
|
||||
descriptorPath = args.get("descriptorPath");
|
||||
} else {
|
||||
configurationParameters.put(k, args.get(k));
|
||||
}
|
||||
}
|
||||
if (descriptorPath == null || tokenType == null || featurePath == null) {
|
||||
throw new IllegalArgumentException("descriptorPath, tokenType, and featurePath are mandatory");
|
||||
}
|
||||
featurePath = require(args, "featurePath");
|
||||
tokenType = require(args, "tokenType");
|
||||
descriptorPath = require(args, "descriptorPath");
|
||||
configurationParameters.putAll(args);
|
||||
}
|
||||
|
||||
@Override
|
||||
|
|
|
@ -136,13 +136,4 @@ public class ReversedWildcardFilterFactory extends TokenFilterFactory {
|
|||
public char getMarkerChar() {
|
||||
return markerChar;
|
||||
}
|
||||
|
||||
protected final float getFloat(Map<String,String> args, String name, float defValue) {
|
||||
String val = args.remove(name);
|
||||
if (val == null) {
|
||||
return defValue;
|
||||
} else {
|
||||
return Float.parseFloat(val);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -32,7 +32,7 @@ public class MockCharFilterFactory extends CharFilterFactory {
|
|||
/** Creates a new MockCharFilterFactory */
|
||||
public MockCharFilterFactory(Map<String,String> args) {
|
||||
super(args);
|
||||
remainder = getInt(args, "remainder", 0, false);
|
||||
remainder = requireInt(args, "remainder");
|
||||
if (!args.isEmpty()) {
|
||||
throw new IllegalArgumentException("Unknown parameters: " + args);
|
||||
}
|
||||
|
|
|
@ -18,6 +18,7 @@ package org.apache.solr.analysis;
|
|||
*/
|
||||
|
||||
import java.io.Reader;
|
||||
import java.util.Arrays;
|
||||
import java.util.Map;
|
||||
|
||||
import org.apache.lucene.analysis.MockTokenizer;
|
||||
|
@ -35,19 +36,13 @@ public class MockTokenizerFactory extends TokenizerFactory {
|
|||
/** Creates a new MockTokenizerFactory */
|
||||
public MockTokenizerFactory(Map<String,String> args) {
|
||||
super(args);
|
||||
String patternArg = args.remove("pattern");
|
||||
if (patternArg == null) {
|
||||
patternArg = "whitespace";
|
||||
}
|
||||
|
||||
if ("whitespace".equalsIgnoreCase(patternArg)) {
|
||||
pattern = MockTokenizer.WHITESPACE;
|
||||
} else if ("keyword".equalsIgnoreCase(patternArg)) {
|
||||
String patternArg = get(args, "pattern", Arrays.asList("keyword", "simple", "whitespace"));
|
||||
if ("keyword".equalsIgnoreCase(patternArg)) {
|
||||
pattern = MockTokenizer.KEYWORD;
|
||||
} else if ("simple".equalsIgnoreCase(patternArg)) {
|
||||
pattern = MockTokenizer.SIMPLE;
|
||||
} else {
|
||||
throw new RuntimeException("invalid pattern!");
|
||||
pattern = MockTokenizer.WHITESPACE;
|
||||
}
|
||||
|
||||
enableChecks = getBoolean(args, "enableChecks", true);
|
||||
|
|
Loading…
Reference in New Issue