LUCENE-4877: throw exception for invalid arguments in analysis factories

git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1463191 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Robert Muir 2013-04-01 16:30:00 +00:00
parent b1e4ba6843
commit 29b5142e79
237 changed files with 4072 additions and 2911 deletions

View File

@ -31,6 +31,11 @@ Optimizations
on Windows if NIOFSDirectory is used, mmapped files are still locked.
(Michael Poindexter, Robert Muir, Uwe Schindler)
Bug Fixes
* LUCENE-4877: Throw exception for invalid arguments in analysis factories.
(Steve Rowe, Uwe Schindler, Robert Muir)
======================= Lucene 4.3.0 =======================
Changes in backwards compatibility policy

View File

@ -17,26 +17,34 @@ package org.apache.lucene.analysis.ar;
* limitations under the License.
*/
import java.util.Map;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.ar.ArabicNormalizationFilter;
import org.apache.lucene.analysis.util.AbstractAnalysisFactory;
import org.apache.lucene.analysis.util.MultiTermAwareComponent;
import org.apache.lucene.analysis.util.TokenFilterFactory;
/**
* Factory for {@link ArabicNormalizationFilter}.
* <pre class="prettyprint" >
* <pre class="prettyprint">
* &lt;fieldType name="text_arnormal" class="solr.TextField" positionIncrementGap="100"&gt;
* &lt;analyzer&gt;
* &lt;tokenizer class="solr.StandardTokenizerFactory"/&gt;
* &lt;filter class="solr.ArabicNormalizationFilterFactory"/&gt;
* &lt;/analyzer&gt;
* &lt;/fieldType&gt;</pre>
*
*/
public class ArabicNormalizationFilterFactory extends TokenFilterFactory implements MultiTermAwareComponent {
/** Creates a new ArabicNormalizationFilterFactory */
public ArabicNormalizationFilterFactory(Map<String,String> args) {
super(args);
if (!args.isEmpty()) {
throw new IllegalArgumentException("Unknown parameters: " + args);
}
}
@Override
public ArabicNormalizationFilter create(TokenStream input) {
return new ArabicNormalizationFilter(input);

View File

@ -17,14 +17,15 @@ package org.apache.lucene.analysis.ar;
* limitations under the License.
*/
import java.util.Map;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.ar.ArabicStemFilter;
import org.apache.lucene.analysis.util.TokenFilterFactory;
/**
* Factory for {@link ArabicStemFilter}.
* <pre class="prettyprint" >
* <pre class="prettyprint">
* &lt;fieldType name="text_arstem" class="solr.TextField" positionIncrementGap="100"&gt;
* &lt;analyzer&gt;
* &lt;tokenizer class="solr.StandardTokenizerFactory"/&gt;
@ -32,10 +33,16 @@ import org.apache.lucene.analysis.util.TokenFilterFactory;
* &lt;filter class="solr.ArabicStemFilterFactory"/&gt;
* &lt;/analyzer&gt;
* &lt;/fieldType&gt;</pre>
*
*/
public class ArabicStemFilterFactory extends TokenFilterFactory {
/** Creates a new ArabicStemFilterFactory */
public ArabicStemFilterFactory(Map<String,String> args) {
super(args);
if (!args.isEmpty()) {
throw new IllegalArgumentException("Unknown parameters: " + args);
}
}
@Override
public ArabicStemFilter create(TokenStream input) {

View File

@ -17,13 +17,15 @@ package org.apache.lucene.analysis.bg;
* limitations under the License.
*/
import java.util.Map;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.bg.BulgarianStemFilter;
import org.apache.lucene.analysis.util.TokenFilterFactory;
/**
* Factory for {@link BulgarianStemFilter}.
* <pre class="prettyprint" >
* <pre class="prettyprint">
* &lt;fieldType name="text_bgstem" class="solr.TextField" positionIncrementGap="100"&gt;
* &lt;analyzer&gt;
* &lt;tokenizer class="solr.StandardTokenizerFactory"/&gt;
@ -31,9 +33,17 @@ import org.apache.lucene.analysis.util.TokenFilterFactory;
* &lt;filter class="solr.BulgarianStemFilterFactory"/&gt;
* &lt;/analyzer&gt;
* &lt;/fieldType&gt;</pre>
*
*/
public class BulgarianStemFilterFactory extends TokenFilterFactory {
/** Creates a new BulgarianStemFilterFactory */
public BulgarianStemFilterFactory(Map<String,String> args) {
super(args);
if (!args.isEmpty()) {
throw new IllegalArgumentException("Unknown parameters: " + args);
}
}
@Override
public TokenStream create(TokenStream input) {
return new BulgarianStemFilter(input);

View File

@ -17,13 +17,15 @@ package org.apache.lucene.analysis.br;
* limitations under the License.
*/
import java.util.Map;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.br.BrazilianStemFilter;
import org.apache.lucene.analysis.util.TokenFilterFactory;
/**
* Factory for {@link BrazilianStemFilter}.
* <pre class="prettyprint" >
* <pre class="prettyprint">
* &lt;fieldType name="text_brstem" class="solr.TextField" positionIncrementGap="100"&gt;
* &lt;analyzer&gt;
* &lt;tokenizer class="solr.StandardTokenizerFactory"/&gt;
@ -31,9 +33,17 @@ import org.apache.lucene.analysis.util.TokenFilterFactory;
* &lt;filter class="solr.BrazilianStemFilterFactory"/&gt;
* &lt;/analyzer&gt;
* &lt;/fieldType&gt;</pre>
*
*/
public class BrazilianStemFilterFactory extends TokenFilterFactory {
/** Creates a new BrazilianStemFilterFactory */
public BrazilianStemFilterFactory(Map<String,String> args) {
super(args);
if (!args.isEmpty()) {
throw new IllegalArgumentException("Unknown parameters: " + args);
}
}
@Override
public BrazilianStemFilter create(TokenStream in) {
return new BrazilianStemFilter(in);

View File

@ -28,20 +28,36 @@ import java.util.regex.Matcher;
import java.util.regex.Pattern;
/**
* Factory for {@link HTMLStripCharFilter}.
* <pre class="prettyprint" >
* Factory for {@link HTMLStripCharFilter}.
* <pre class="prettyprint">
* &lt;fieldType name="text_html" class="solr.TextField" positionIncrementGap="100"&gt;
* &lt;analyzer&gt;
* &lt;charFilter class="solr.HTMLStripCharFilterFactory" escapedTags="a, title" /&gt;
* &lt;tokenizer class="solr.WhitespaceTokenizerFactory"/&gt;
* &lt;/analyzer&gt;
* &lt;/fieldType&gt;</pre>
*
*/
public class HTMLStripCharFilterFactory extends CharFilterFactory {
public class HTMLStripCharFilterFactory extends CharFilterFactory {
final Set<String> escapedTags;
static final Pattern TAG_NAME_PATTERN = Pattern.compile("[^\\s,]+");
Set<String> escapedTags = null;
Pattern TAG_NAME_PATTERN = Pattern.compile("[^\\s,]+");
/** Creates a new HTMLStripCharFilterFactory */
public HTMLStripCharFilterFactory(Map<String,String> args) {
super(args);
String escapedTagsArg = args.remove("escapedTags");
if (escapedTagsArg == null) {
escapedTags = null;
} else {
escapedTags = new HashSet<String>();
Matcher matcher = TAG_NAME_PATTERN.matcher(escapedTagsArg);
while (matcher.find()) {
escapedTags.add(matcher.group(0));
}
}
if (!args.isEmpty()) {
throw new IllegalArgumentException("Unknown parameters: " + args);
}
}
@Override
public HTMLStripCharFilter create(Reader input) {
@ -53,19 +69,4 @@ import java.util.regex.Pattern;
}
return charFilter;
}
@Override
public void init(Map<String,String> args) {
super.init(args);
String escapedTagsArg = args.get("escapedTags");
if (null != escapedTagsArg) {
Matcher matcher = TAG_NAME_PATTERN.matcher(escapedTagsArg);
while (matcher.find()) {
if (null == escapedTags) {
escapedTags = new HashSet<String>();
}
escapedTags.add(matcher.group(0));
}
}
}
}

View File

@ -22,6 +22,7 @@ import java.io.IOException;
import java.io.Reader;
import java.util.ArrayList;
import java.util.List;
import java.util.Map;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
@ -31,7 +32,7 @@ import org.apache.lucene.analysis.util.*;
/**
* Factory for {@link MappingCharFilter}.
* <pre class="prettyprint" >
* <pre class="prettyprint">
* &lt;fieldType name="text_map" class="solr.TextField" positionIncrementGap="100"&gt;
* &lt;analyzer&gt;
* &lt;charFilter class="solr.MappingCharFilterFactory" mapping="mapping.txt"/&gt;
@ -39,21 +40,26 @@ import org.apache.lucene.analysis.util.*;
* &lt;/analyzer&gt;
* &lt;/fieldType&gt;</pre>
*
*
* @since Solr 1.4
*
*/
public class MappingCharFilterFactory extends CharFilterFactory implements
ResourceLoaderAware, MultiTermAwareComponent {
protected NormalizeCharMap normMap;
private String mapping;
private final String mapping;
/** Creates a new MappingCharFilterFactory */
public MappingCharFilterFactory(Map<String,String> args) {
super(args);
mapping = args.remove("mapping");
if (!args.isEmpty()) {
throw new IllegalArgumentException("Unknown parameters: " + args);
}
}
// TODO: this should use inputstreams from the loader, not File!
@Override
public void inform(ResourceLoader loader) throws IOException {
mapping = args.get("mapping");
if (mapping != null) {
List<String> wlist = null;
File mappingFile = new File(mapping);

View File

@ -25,7 +25,7 @@ import org.apache.lucene.analysis.util.TokenFilterFactory;
/**
* Factory for {@link CJKBigramFilter}.
* <pre class="prettyprint" >
* <pre class="prettyprint">
* &lt;fieldType name="text_cjk" class="solr.TextField"&gt;
* &lt;analyzer&gt;
* &lt;tokenizer class="solr.StandardTokenizerFactory"/&gt;
@ -38,26 +38,30 @@ import org.apache.lucene.analysis.util.TokenFilterFactory;
* &lt;/fieldType&gt;</pre>
*/
public class CJKBigramFilterFactory extends TokenFilterFactory {
int flags;
boolean outputUnigrams;
final int flags;
final boolean outputUnigrams;
@Override
public void init(Map<String,String> args) {
super.init(args);
flags = 0;
if (getBoolean("han", true)) {
/** Creates a new CJKBigramFilterFactory */
public CJKBigramFilterFactory(Map<String,String> args) {
super(args);
int flags = 0;
if (getBoolean(args, "han", true)) {
flags |= CJKBigramFilter.HAN;
}
if (getBoolean("hiragana", true)) {
if (getBoolean(args, "hiragana", true)) {
flags |= CJKBigramFilter.HIRAGANA;
}
if (getBoolean("katakana", true)) {
if (getBoolean(args, "katakana", true)) {
flags |= CJKBigramFilter.KATAKANA;
}
if (getBoolean("hangul", true)) {
if (getBoolean(args, "hangul", true)) {
flags |= CJKBigramFilter.HANGUL;
}
outputUnigrams = getBoolean("outputUnigrams", false);
this.flags = flags;
this.outputUnigrams = getBoolean(args, "outputUnigrams", false);
if (!args.isEmpty()) {
throw new IllegalArgumentException("Unknown parameters: " + args);
}
}
@Override

View File

@ -17,6 +17,8 @@ package org.apache.lucene.analysis.cjk;
* limitations under the License.
*/
import java.util.Map;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.cjk.CJKWidthFilter;
import org.apache.lucene.analysis.util.AbstractAnalysisFactory;
@ -25,7 +27,7 @@ import org.apache.lucene.analysis.util.TokenFilterFactory;
/**
* Factory for {@link CJKWidthFilter}.
* <pre class="prettyprint" >
* <pre class="prettyprint">
* &lt;fieldType name="text_cjk" class="solr.TextField"&gt;
* &lt;analyzer&gt;
* &lt;tokenizer class="solr.StandardTokenizerFactory"/&gt;
@ -35,9 +37,16 @@ import org.apache.lucene.analysis.util.TokenFilterFactory;
* &lt;/analyzer&gt;
* &lt;/fieldType&gt;</pre>
*/
public class CJKWidthFilterFactory extends TokenFilterFactory implements MultiTermAwareComponent {
/** Creates a new CJKWidthFilterFactory */
public CJKWidthFilterFactory(Map<String,String> args) {
super(args);
if (!args.isEmpty()) {
throw new IllegalArgumentException("Unknown parameters: " + args);
}
}
@Override
public TokenStream create(TokenStream input) {
return new CJKWidthFilter(input);

View File

@ -18,7 +18,9 @@ package org.apache.lucene.analysis.commongrams;
*/
import java.io.IOException;
import java.util.Map;
import org.apache.lucene.analysis.TokenFilter;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.commongrams.CommonGramsFilter;
import org.apache.lucene.analysis.core.StopAnalyzer;
@ -26,29 +28,36 @@ import org.apache.lucene.analysis.util.*;
/**
* Constructs a {@link CommonGramsFilter}.
* <pre class="prettyprint" >
* <pre class="prettyprint">
* &lt;fieldType name="text_cmmngrms" class="solr.TextField" positionIncrementGap="100"&gt;
* &lt;analyzer&gt;
* &lt;tokenizer class="solr.WhitespaceTokenizerFactory"/&gt;
* &lt;filter class="solr.CommonGramsFilterFactory" words="commongramsstopwords.txt" ignoreCase="false"/&gt;
* &lt;/analyzer&gt;
* &lt;/fieldType&gt;</pre>
*
*/
/*
* This is pretty close to a straight copy from StopFilterFactory
*/
public class CommonGramsFilterFactory extends TokenFilterFactory implements
ResourceLoaderAware {
public class CommonGramsFilterFactory extends TokenFilterFactory implements ResourceLoaderAware {
// TODO: shared base class for Stop/Keep/CommonGrams?
private CharArraySet commonWords;
private final String commonWordFiles;
private final String format;
private final boolean ignoreCase;
/** Creates a new CommonGramsFilterFactory */
public CommonGramsFilterFactory(Map<String,String> args) {
super(args);
commonWordFiles = args.remove("words");
format = args.remove("format");
ignoreCase = getBoolean(args, "ignoreCase", false);
if (!args.isEmpty()) {
throw new IllegalArgumentException("Unknown parameters: " + args);
}
}
@Override
public void inform(ResourceLoader loader) throws IOException {
String commonWordFiles = args.get("words");
ignoreCase = getBoolean("ignoreCase", false);
if (commonWordFiles != null) {
if ("snowball".equalsIgnoreCase(args.get("format"))) {
if ("snowball".equalsIgnoreCase(format)) {
commonWords = getSnowballWordSet(loader, commonWordFiles, ignoreCase);
} else {
commonWords = getWordSet(loader, commonWordFiles, ignoreCase);
@ -57,10 +66,6 @@ public class CommonGramsFilterFactory extends TokenFilterFactory implements
commonWords = StopAnalyzer.ENGLISH_STOP_WORDS_SET;
}
}
//Force the use of a char array set, as it is the most performant, although this may break things if Lucene ever goes away from it. See SOLR-1095
private CharArraySet commonWords;
private boolean ignoreCase;
public boolean isIgnoreCase() {
return ignoreCase;
@ -71,7 +76,7 @@ public class CommonGramsFilterFactory extends TokenFilterFactory implements
}
@Override
public CommonGramsFilter create(TokenStream input) {
public TokenFilter create(TokenStream input) {
CommonGramsFilter commonGrams = new CommonGramsFilter(luceneMatchVersion, input, commonWords);
return commonGrams;
}

View File

@ -17,77 +17,37 @@ package org.apache.lucene.analysis.commongrams;
* limitations under the License.
*/
import java.io.IOException;
import java.util.Map;
import org.apache.lucene.analysis.TokenFilter;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.commongrams.CommonGramsFilter;
import org.apache.lucene.analysis.commongrams.CommonGramsQueryFilter;
import org.apache.lucene.analysis.core.StopAnalyzer;
import org.apache.lucene.analysis.core.StopFilterFactory;
import org.apache.lucene.analysis.util.*;
/**
* Construct {@link CommonGramsQueryFilter}.
*
* This is pretty close to a straight copy from {@link StopFilterFactory}.
*
* <pre class="prettyprint" >
* <pre class="prettyprint">
* &lt;fieldType name="text_cmmngrmsqry" class="solr.TextField" positionIncrementGap="100"&gt;
* &lt;analyzer&gt;
* &lt;tokenizer class="solr.WhitespaceTokenizerFactory"/&gt;
* &lt;filter class="solr.CommonGramsQueryFilterFactory" words="commongramsquerystopwords.txt" ignoreCase="false"/&gt;
* &lt;/analyzer&gt;
* &lt;/fieldType&gt;</pre>
*
*/
public class CommonGramsQueryFilterFactory extends TokenFilterFactory
implements ResourceLoaderAware {
public class CommonGramsQueryFilterFactory extends CommonGramsFilterFactory {
@Override
public void init(Map<String,String> args) {
super.init(args);
assureMatchVersion();
}
@Override
public void inform(ResourceLoader loader) throws IOException {
String commonWordFiles = args.get("words");
ignoreCase = getBoolean("ignoreCase", false);
if (commonWordFiles != null) {
if ("snowball".equalsIgnoreCase(args.get("format"))) {
commonWords = getSnowballWordSet(loader, commonWordFiles, ignoreCase);
} else {
commonWords = getWordSet(loader, commonWordFiles, ignoreCase);
}
} else {
commonWords = StopAnalyzer.ENGLISH_STOP_WORDS_SET;
}
}
// Force the use of a char array set, as it is the most performant, although
// this may break things if Lucene ever goes away from it. See SOLR-1095
private CharArraySet commonWords;
private boolean ignoreCase;
public boolean isIgnoreCase() {
return ignoreCase;
}
public CharArraySet getCommonWords() {
return commonWords;
/** Creates a new CommonGramsQueryFilterFactory */
public CommonGramsQueryFilterFactory(Map<String,String> args) {
super(args);
}
/**
* Create a CommonGramsFilter and wrap it with a CommonGramsQueryFilter
*/
@Override
public CommonGramsQueryFilter create(TokenStream input) {
CommonGramsFilter commonGrams = new CommonGramsFilter(luceneMatchVersion, input, commonWords);
CommonGramsQueryFilter commonGramsQuery = new CommonGramsQueryFilter(
commonGrams);
return commonGramsQuery;
public TokenFilter create(TokenStream input) {
CommonGramsFilter commonGrams = (CommonGramsFilter) super.create(input);
return new CommonGramsQueryFilter(commonGrams);
}
}

View File

@ -25,7 +25,7 @@ import java.io.IOException;
/**
* Factory for {@link DictionaryCompoundWordTokenFilter}.
* <pre class="prettyprint" >
* <pre class="prettyprint">
* &lt;fieldType name="text_dictcomp" class="solr.TextField" positionIncrementGap="100"&gt;
* &lt;analyzer&gt;
* &lt;tokenizer class="solr.WhitespaceTokenizerFactory"/&gt;
@ -33,33 +33,38 @@ import java.io.IOException;
* minWordSize="5" minSubwordSize="2" maxSubwordSize="15" onlyLongestMatch="true"/&gt;
* &lt;/analyzer&gt;
* &lt;/fieldType&gt;</pre>
*
*/
public class DictionaryCompoundWordTokenFilterFactory extends TokenFilterFactory implements ResourceLoaderAware {
private CharArraySet dictionary;
private String dictFile;
private int minWordSize;
private int minSubwordSize;
private int maxSubwordSize;
private boolean onlyLongestMatch;
@Override
public void init(Map<String, String> args) {
super.init(args);
private final String dictFile;
private final int minWordSize;
private final int minSubwordSize;
private final int maxSubwordSize;
private final boolean onlyLongestMatch;
/** Creates a new DictionaryCompoundWordTokenFilterFactory */
public DictionaryCompoundWordTokenFilterFactory(Map<String, String> args) {
super(args);
assureMatchVersion();
dictFile = args.get("dictionary");
dictFile = args.remove("dictionary");
if (null == dictFile) {
throw new IllegalArgumentException("Missing required parameter: dictionary");
}
minWordSize= getInt("minWordSize",CompoundWordTokenFilterBase.DEFAULT_MIN_WORD_SIZE);
minSubwordSize= getInt("minSubwordSize",CompoundWordTokenFilterBase.DEFAULT_MIN_SUBWORD_SIZE);
maxSubwordSize= getInt("maxSubwordSize",CompoundWordTokenFilterBase.DEFAULT_MAX_SUBWORD_SIZE);
onlyLongestMatch = getBoolean("onlyLongestMatch",true);
minWordSize = getInt(args, "minWordSize", CompoundWordTokenFilterBase.DEFAULT_MIN_WORD_SIZE);
minSubwordSize = getInt(args, "minSubwordSize", CompoundWordTokenFilterBase.DEFAULT_MIN_SUBWORD_SIZE);
maxSubwordSize = getInt(args, "maxSubwordSize", CompoundWordTokenFilterBase.DEFAULT_MAX_SUBWORD_SIZE);
onlyLongestMatch = getBoolean(args, "onlyLongestMatch", true);
if (!args.isEmpty()) {
throw new IllegalArgumentException("Unknown parameters: " + args);
}
}
@Override
public void inform(ResourceLoader loader) throws IOException {
dictionary = super.getWordSet(loader, dictFile, false);
}
@Override
public TokenStream create(TokenStream input) {
// if the dictionary is null, it means it was empty

View File

@ -45,7 +45,7 @@ import org.xml.sax.InputSource;
* to the stream. defaults to false.
* </ul>
* <p>
* <pre class="prettyprint" >
* <pre class="prettyprint">
* &lt;fieldType name="text_hyphncomp" class="solr.TextField" positionIncrementGap="100"&gt;
* &lt;analyzer&gt;
* &lt;tokenizer class="solr.WhitespaceTokenizerFactory"/&gt;
@ -59,30 +59,32 @@ import org.xml.sax.InputSource;
public class HyphenationCompoundWordTokenFilterFactory extends TokenFilterFactory implements ResourceLoaderAware {
private CharArraySet dictionary;
private HyphenationTree hyphenator;
private String dictFile;
private String hypFile;
private String encoding;
private int minWordSize;
private int minSubwordSize;
private int maxSubwordSize;
private boolean onlyLongestMatch;
private final String dictFile;
private final String hypFile;
private final String encoding;
private final int minWordSize;
private final int minSubwordSize;
private final int maxSubwordSize;
private final boolean onlyLongestMatch;
@Override
public void init(Map<String, String> args) {
super.init(args);
/** Creates a new HyphenationCompoundWordTokenFilterFactory */
public HyphenationCompoundWordTokenFilterFactory(Map<String, String> args) {
super(args);
assureMatchVersion();
dictFile = args.get("dictionary");
if (args.containsKey("encoding"))
encoding = args.get("encoding");
hypFile = args.get("hyphenator");
dictFile = args.remove("dictionary");
encoding = args.remove("encoding");
hypFile = args.remove("hyphenator");
if (null == hypFile) {
throw new IllegalArgumentException("Missing required parameter: hyphenator");
}
minWordSize = getInt("minWordSize", CompoundWordTokenFilterBase.DEFAULT_MIN_WORD_SIZE);
minSubwordSize = getInt("minSubwordSize", CompoundWordTokenFilterBase.DEFAULT_MIN_SUBWORD_SIZE);
maxSubwordSize = getInt("maxSubwordSize", CompoundWordTokenFilterBase.DEFAULT_MAX_SUBWORD_SIZE);
onlyLongestMatch = getBoolean("onlyLongestMatch", false);
minWordSize = getInt(args, "minWordSize", CompoundWordTokenFilterBase.DEFAULT_MIN_WORD_SIZE);
minSubwordSize = getInt(args, "minSubwordSize", CompoundWordTokenFilterBase.DEFAULT_MIN_SUBWORD_SIZE);
maxSubwordSize = getInt(args, "maxSubwordSize", CompoundWordTokenFilterBase.DEFAULT_MAX_SUBWORD_SIZE);
onlyLongestMatch = getBoolean(args, "onlyLongestMatch", false);
if (!args.isEmpty()) {
throw new IllegalArgumentException("Unknown parameters: " + args);
}
}
@Override

View File

@ -21,18 +21,27 @@ import org.apache.lucene.analysis.util.TokenizerFactory;
import org.apache.lucene.util.AttributeSource.AttributeFactory;
import java.io.Reader;
import java.util.Map;
/**
* Factory for {@link KeywordTokenizer}.
* <pre class="prettyprint" >
* <pre class="prettyprint">
* &lt;fieldType name="text_keyword" class="solr.TextField" positionIncrementGap="100"&gt;
* &lt;analyzer&gt;
* &lt;tokenizer class="solr.KeywordTokenizerFactory"/&gt;
* &lt;/analyzer&gt;
* &lt;/fieldType&gt;</pre>
*
*/
public class KeywordTokenizerFactory extends TokenizerFactory {
/** Creates a new KeywordTokenizerFactory */
public KeywordTokenizerFactory(Map<String,String> args) {
super(args);
if (!args.isEmpty()) {
throw new IllegalArgumentException("Unknown parameters: " + args);
}
}
@Override
public KeywordTokenizer create(AttributeFactory factory, Reader input) {
return new KeywordTokenizer(factory, input, KeywordTokenizer.DEFAULT_BUFFER_SIZE);

View File

@ -25,20 +25,22 @@ import java.util.Map;
/**
* Factory for {@link LetterTokenizer}.
* <pre class="prettyprint" >
* <pre class="prettyprint">
* &lt;fieldType name="text_letter" class="solr.TextField" positionIncrementGap="100"&gt;
* &lt;analyzer&gt;
* &lt;tokenizer class="solr.LetterTokenizerFactory"/&gt;
* &lt;/analyzer&gt;
* &lt;/fieldType&gt;</pre>
*
* &lt;/fieldType&gt;</pre>
*/
public class LetterTokenizerFactory extends TokenizerFactory {
@Override
public void init(Map<String,String> args) {
super.init(args);
/** Creates a new LetterTokenizerFactory */
public LetterTokenizerFactory(Map<String,String> args) {
super(args);
assureMatchVersion();
if (!args.isEmpty()) {
throw new IllegalArgumentException("Unknown parameters: " + args);
}
}
@Override

View File

@ -27,20 +27,23 @@ import org.apache.lucene.analysis.util.TokenFilterFactory;
/**
* Factory for {@link LowerCaseFilter}.
* <pre class="prettyprint" >
* <pre class="prettyprint">
* &lt;fieldType name="text_lwrcase" class="solr.TextField" positionIncrementGap="100"&gt;
* &lt;analyzer&gt;
* &lt;tokenizer class="solr.WhitespaceTokenizerFactory"/&gt;
* &lt;filter class="solr.LowerCaseFilterFactory"/&gt;
* &lt;/analyzer&gt;
* &lt;/fieldType&gt;</pre>
*
* &lt;/fieldType&gt;</pre>
*/
public class LowerCaseFilterFactory extends TokenFilterFactory implements MultiTermAwareComponent {
@Override
public void init(Map<String,String> args) {
super.init(args);
/** Creates a new LowerCaseFilterFactory */
public LowerCaseFilterFactory(Map<String,String> args) {
super(args);
assureMatchVersion();
if (!args.isEmpty()) {
throw new IllegalArgumentException("Unknown parameters: " + args);
}
}
@Override

View File

@ -23,23 +23,27 @@ import org.apache.lucene.analysis.util.TokenizerFactory;
import org.apache.lucene.util.AttributeSource.AttributeFactory;
import java.io.Reader;
import java.util.HashMap;
import java.util.Map;
/**
* Factory for {@link LowerCaseTokenizer}.
* <pre class="prettyprint" >
* <pre class="prettyprint">
* &lt;fieldType name="text_lwrcase" class="solr.TextField" positionIncrementGap="100"&gt;
* &lt;analyzer&gt;
* &lt;tokenizer class="solr.LowerCaseTokenizerFactory"/&gt;
* &lt;/analyzer&gt;
* &lt;/fieldType&gt;</pre>
*
* &lt;/fieldType&gt;</pre>
*/
public class LowerCaseTokenizerFactory extends TokenizerFactory implements MultiTermAwareComponent {
@Override
public void init(Map<String,String> args) {
super.init(args);
/** Creates a new LowerCaseTokenizerFactory */
public LowerCaseTokenizerFactory(Map<String,String> args) {
super(args);
assureMatchVersion();
if (!args.isEmpty()) {
throw new IllegalArgumentException("Unknown parameters: " + args);
}
}
@Override
@ -49,9 +53,6 @@ public class LowerCaseTokenizerFactory extends TokenizerFactory implements Multi
@Override
public AbstractAnalysisFactory getMultiTermComponent() {
LowerCaseFilterFactory filt = new LowerCaseFilterFactory();
filt.setLuceneMatchVersion(luceneMatchVersion);
filt.init(args);
return filt;
return new LowerCaseFilterFactory(new HashMap<String,String>(getOriginalArgs()));
}
}

View File

@ -27,7 +27,7 @@ import java.io.IOException;
/**
* Factory for {@link StopFilter}.
* <pre class="prettyprint" >
* <pre class="prettyprint">
* &lt;fieldType name="text_stop" class="solr.TextField" positionIncrementGap="100" autoGeneratePhraseQueries="true"&gt;
* &lt;analyzer&gt;
* &lt;tokenizer class="solr.WhitespaceTokenizerFactory"/&gt;
@ -35,24 +35,31 @@ import java.io.IOException;
* words="stopwords.txt" enablePositionIncrements="true"/&gt;
* &lt;/analyzer&gt;
* &lt;/fieldType&gt;</pre>
*
*/
public class StopFilterFactory extends TokenFilterFactory implements ResourceLoaderAware {
@Override
public void init(Map<String,String> args) {
super.init(args);
private CharArraySet stopWords;
private final String stopWordFiles;
private final String format;
private final boolean ignoreCase;
private final boolean enablePositionIncrements;
/** Creates a new StopFilterFactory */
public StopFilterFactory(Map<String,String> args) {
super(args);
assureMatchVersion();
stopWordFiles = args.remove("words");
format = args.remove("format");
ignoreCase = getBoolean(args, "ignoreCase", false);
enablePositionIncrements = getBoolean(args, "enablePositionIncrements", false);
if (!args.isEmpty()) {
throw new IllegalArgumentException("Unknown parameters: " + args);
}
}
@Override
public void inform(ResourceLoader loader) throws IOException {
String stopWordFiles = args.get("words");
ignoreCase = getBoolean("ignoreCase",false);
enablePositionIncrements = getBoolean("enablePositionIncrements",false);
if (stopWordFiles != null) {
if ("snowball".equalsIgnoreCase(args.get("format"))) {
if ("snowball".equalsIgnoreCase(format)) {
stopWords = getSnowballWordSet(loader, stopWordFiles, ignoreCase);
} else {
stopWords = getWordSet(loader, stopWordFiles, ignoreCase);
@ -62,10 +69,6 @@ public class StopFilterFactory extends TokenFilterFactory implements ResourceLoa
}
}
private CharArraySet stopWords;
private boolean ignoreCase;
private boolean enablePositionIncrements;
public boolean isEnablePositionIncrements() {
return enablePositionIncrements;
}

View File

@ -26,11 +26,12 @@ import org.apache.lucene.analysis.util.TokenFilterFactory;
import java.io.IOException;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Set;
/**
* Factory class for {@link TypeTokenFilter}.
* <pre class="prettyprint" >
* <pre class="prettyprint">
* &lt;fieldType name="chars" class="solr.TextField" positionIncrementGap="100"&gt;
* &lt;analyzer&gt;
* &lt;tokenizer class="solr.StandardTokenizerFactory"/&gt;
@ -40,29 +41,36 @@ import java.util.Set;
* &lt;/fieldType&gt;</pre>
*/
public class TypeTokenFilterFactory extends TokenFilterFactory implements ResourceLoaderAware {
@Override
public void inform(ResourceLoader loader) throws IOException {
String stopTypesFiles = args.get("types");
enablePositionIncrements = getBoolean("enablePositionIncrements", false);
useWhitelist = getBoolean("useWhitelist", false);
if (stopTypesFiles != null) {
List<String> files = splitFileNames(stopTypesFiles);
if (files.size() > 0) {
stopTypes = new HashSet<String>();
for (String file : files) {
List<String> typesLines = getLines(loader, file.trim());
stopTypes.addAll(typesLines);
}
}
} else {
private final boolean useWhitelist;
private final boolean enablePositionIncrements;
private final String stopTypesFiles;
private Set<String> stopTypes;
/** Creates a new TypeTokenFilterFactory */
public TypeTokenFilterFactory(Map<String,String> args) {
super(args);
stopTypesFiles = args.remove("types");
if (stopTypesFiles == null) {
throw new IllegalArgumentException("Missing required parameter: types.");
}
enablePositionIncrements = getBoolean(args, "enablePositionIncrements", false);
useWhitelist = getBoolean(args, "useWhitelist", false);
if (!args.isEmpty()) {
throw new IllegalArgumentException("Unknown parameters: " + args);
}
}
@Override
public void inform(ResourceLoader loader) throws IOException {
List<String> files = splitFileNames(stopTypesFiles);
if (files.size() > 0) {
stopTypes = new HashSet<String>();
for (String file : files) {
List<String> typesLines = getLines(loader, file.trim());
stopTypes.addAll(typesLines);
}
}
}
private boolean useWhitelist;
private Set<String> stopTypes;
private boolean enablePositionIncrements;
public boolean isEnablePositionIncrements() {
return enablePositionIncrements;

View File

@ -25,19 +25,22 @@ import java.util.Map;
/**
* Factory for {@link WhitespaceTokenizer}.
* <pre class="prettyprint" >
* <pre class="prettyprint">
* &lt;fieldType name="text_ws" class="solr.TextField" positionIncrementGap="100"&gt;
* &lt;analyzer&gt;
* &lt;tokenizer class="solr.WhitespaceTokenizerFactory"/&gt;
* &lt;/analyzer&gt;
* &lt;/fieldType&gt;</pre>
*
* &lt;/fieldType&gt;</pre>
*/
public class WhitespaceTokenizerFactory extends TokenizerFactory {
@Override
public void init(Map<String,String> args) {
super.init(args);
/** Creates a new WhitespaceTokenizerFactory */
public WhitespaceTokenizerFactory(Map<String,String> args) {
super(args);
assureMatchVersion();
if (!args.isEmpty()) {
throw new IllegalArgumentException("Unknown parameters: " + args);
}
}
@Override

View File

@ -17,13 +17,15 @@ package org.apache.lucene.analysis.cz;
* limitations under the License.
*/
import java.util.Map;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.cz.CzechStemFilter;
import org.apache.lucene.analysis.util.TokenFilterFactory;
/**
* Factory for {@link CzechStemFilter}.
* <pre class="prettyprint" >
* Factory for {@link CzechStemFilter}.
* <pre class="prettyprint">
* &lt;fieldType name="text_czstem" class="solr.TextField" positionIncrementGap="100"&gt;
* &lt;analyzer&gt;
* &lt;tokenizer class="solr.StandardTokenizerFactory"/&gt;
@ -33,6 +35,15 @@ import org.apache.lucene.analysis.util.TokenFilterFactory;
* &lt;/fieldType&gt;</pre>
*/
public class CzechStemFilterFactory extends TokenFilterFactory {
/** Creates a new CzechStemFilterFactory */
public CzechStemFilterFactory(Map<String,String> args) {
super(args);
if (!args.isEmpty()) {
throw new IllegalArgumentException("Unknown parameters: " + args);
}
}
@Override
public TokenStream create(TokenStream input) {
return new CzechStemFilter(input);

View File

@ -17,23 +17,33 @@ package org.apache.lucene.analysis.de;
* limitations under the License.
*/
import java.util.Map;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.de.GermanLightStemFilter;
import org.apache.lucene.analysis.util.TokenFilterFactory;
/**
* Factory for {@link GermanLightStemFilter}.
* <pre class="prettyprint" >
* <pre class="prettyprint">
* &lt;fieldType name="text_delgtstem" class="solr.TextField" positionIncrementGap="100"&gt;
* &lt;analyzer&gt;
* &lt;tokenizer class="solr.StandardTokenizerFactory"/&gt;
* &lt;filter class="solr.LowerCaseFilterFactory"/&gt;
* &lt;filter class="solr.GermanLightStemFilterFactory"/&gt;
* &lt;/analyzer&gt;
* &lt;/fieldType&gt;</pre>
*
* &lt;/fieldType&gt;</pre>
*/
public class GermanLightStemFilterFactory extends TokenFilterFactory {
/** Creates a new GermanLightStemFilterFactory */
public GermanLightStemFilterFactory(Map<String,String> args) {
super(args);
if (!args.isEmpty()) {
throw new IllegalArgumentException("Unknown parameters: " + args);
}
}
@Override
public TokenStream create(TokenStream input) {
return new GermanLightStemFilter(input);

View File

@ -17,23 +17,33 @@ package org.apache.lucene.analysis.de;
* limitations under the License.
*/
import java.util.Map;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.de.GermanMinimalStemFilter;
import org.apache.lucene.analysis.util.TokenFilterFactory;
/**
* Factory for {@link GermanMinimalStemFilter}.
* <pre class="prettyprint" >
* <pre class="prettyprint">
* &lt;fieldType name="text_deminstem" class="solr.TextField" positionIncrementGap="100"&gt;
* &lt;analyzer&gt;
* &lt;tokenizer class="solr.StandardTokenizerFactory"/&gt;
* &lt;filter class="solr.LowerCaseFilterFactory"/&gt;
* &lt;filter class="solr.GermanMinimalStemFilterFactory"/&gt;
* &lt;/analyzer&gt;
* &lt;/fieldType&gt;</pre>
*
* &lt;/fieldType&gt;</pre>
*/
public class GermanMinimalStemFilterFactory extends TokenFilterFactory {
/** Creates a new GermanMinimalStemFilterFactory */
public GermanMinimalStemFilterFactory(Map<String,String> args) {
super(args);
if (!args.isEmpty()) {
throw new IllegalArgumentException("Unknown parameters: " + args);
}
}
@Override
public TokenStream create(TokenStream input) {
return new GermanMinimalStemFilter(input);

View File

@ -17,6 +17,8 @@ package org.apache.lucene.analysis.de;
* limitations under the License.
*/
import java.util.Map;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.de.GermanNormalizationFilter;
import org.apache.lucene.analysis.util.AbstractAnalysisFactory;
@ -25,7 +27,7 @@ import org.apache.lucene.analysis.util.TokenFilterFactory;
/**
* Factory for {@link GermanNormalizationFilter}.
* <pre class="prettyprint" >
* <pre class="prettyprint">
* &lt;fieldType name="text_denorm" class="solr.TextField" positionIncrementGap="100"&gt;
* &lt;analyzer&gt;
* &lt;tokenizer class="solr.StandardTokenizerFactory"/&gt;
@ -36,6 +38,14 @@ import org.apache.lucene.analysis.util.TokenFilterFactory;
*/
public class GermanNormalizationFilterFactory extends TokenFilterFactory implements MultiTermAwareComponent {
/** Creates a new GermanNormalizationFilterFactory */
public GermanNormalizationFilterFactory(Map<String,String> args) {
super(args);
if (!args.isEmpty()) {
throw new IllegalArgumentException("Unknown parameters: " + args);
}
}
@Override
public TokenStream create(TokenStream input) {
return new GermanNormalizationFilter(input);

View File

@ -17,23 +17,33 @@ package org.apache.lucene.analysis.de;
* limitations under the License.
*/
import java.util.Map;
import org.apache.lucene.analysis.de.GermanStemFilter;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.util.TokenFilterFactory;
/**
* Factory for {@link GermanStemFilter}.
* <pre class="prettyprint" >
* <pre class="prettyprint">
* &lt;fieldType name="text_destem" class="solr.TextField" positionIncrementGap="100"&gt;
* &lt;analyzer&gt;
* &lt;tokenizer class="solr.StandardTokenizerFactory"/&gt;
* &lt;filter class="solr.LowerCaseFilterFactory"/&gt;
* &lt;filter class="solr.GermanStemFilterFactory"/&gt;
* &lt;/analyzer&gt;
* &lt;/fieldType&gt;</pre>
*
* &lt;/fieldType&gt;</pre>
*/
public class GermanStemFilterFactory extends TokenFilterFactory {
/** Creates a new GermanStemFilterFactory */
public GermanStemFilterFactory(Map<String,String> args) {
super(args);
if (!args.isEmpty()) {
throw new IllegalArgumentException("Unknown parameters: " + args);
}
}
@Override
public GermanStemFilter create(TokenStream in) {
return new GermanStemFilter(in);

View File

@ -27,25 +27,23 @@ import org.apache.lucene.analysis.util.TokenFilterFactory;
/**
* Factory for {@link GreekLowerCaseFilter}.
* <pre class="prettyprint" >
* <pre class="prettyprint">
* &lt;fieldType name="text_glc" class="solr.TextField" positionIncrementGap="100"&gt;
* &lt;analyzer&gt;
* &lt;tokenizer class="solr.StandardTokenizerFactory"/&gt;
* &lt;filter class="solr.GreekLowerCaseFilterFactory"/&gt;
* &lt;/analyzer&gt;
* &lt;/fieldType&gt;</pre>
*
* &lt;/fieldType&gt;</pre>
*/
public class GreekLowerCaseFilterFactory extends TokenFilterFactory implements MultiTermAwareComponent {
@Override
public void init(Map<String, String> args) {
super.init(args);
/** Creates a new GreekLowerCaseFilterFactory */
public GreekLowerCaseFilterFactory(Map<String,String> args) {
super(args);
assureMatchVersion();
if (args.containsKey("charset"))
throw new IllegalArgumentException(
"The charset parameter is no longer supported. "
+ "Please process your documents as Unicode instead.");
if (!args.isEmpty()) {
throw new IllegalArgumentException("Unknown parameters: " + args);
}
}
@Override

View File

@ -17,13 +17,15 @@ package org.apache.lucene.analysis.el;
* limitations under the License.
*/
import java.util.Map;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.el.GreekStemFilter;
import org.apache.lucene.analysis.util.TokenFilterFactory;
/**
* Factory for {@link GreekStemFilter}.
* <pre class="prettyprint" >
* <pre class="prettyprint">
* &lt;fieldType name="text_gstem" class="solr.TextField" positionIncrementGap="100"&gt;
* &lt;analyzer&gt;
* &lt;tokenizer class="solr.StandardTokenizerFactory"/&gt;
@ -31,13 +33,19 @@ import org.apache.lucene.analysis.util.TokenFilterFactory;
* &lt;filter class="solr.GreekStemFilterFactory"/&gt;
* &lt;/analyzer&gt;
* &lt;/fieldType&gt;</pre>
*
*/
public class GreekStemFilterFactory extends TokenFilterFactory {
/** Creates a new GreekStemFilterFactory */
public GreekStemFilterFactory(Map<String,String> args) {
super(args);
if (!args.isEmpty()) {
throw new IllegalArgumentException("Unknown parameters: " + args);
}
}
@Override
public TokenStream create(TokenStream input) {
return new GreekStemFilter(input);
}
}

View File

@ -17,13 +17,15 @@ package org.apache.lucene.analysis.en;
* limitations under the License.
*/
import java.util.Map;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.en.EnglishMinimalStemFilter;
import org.apache.lucene.analysis.util.TokenFilterFactory;
/**
* Factory for {@link EnglishMinimalStemFilter}.
* <pre class="prettyprint" >
* <pre class="prettyprint">
* &lt;fieldType name="text_enminstem" class="solr.TextField" positionIncrementGap="100"&gt;
* &lt;analyzer&gt;
* &lt;tokenizer class="solr.StandardTokenizerFactory"/&gt;
@ -31,9 +33,17 @@ import org.apache.lucene.analysis.util.TokenFilterFactory;
* &lt;filter class="solr.EnglishMinimalStemFilterFactory"/&gt;
* &lt;/analyzer&gt;
* &lt;/fieldType&gt;</pre>
*
*/
public class EnglishMinimalStemFilterFactory extends TokenFilterFactory {
/** Creates a new EnglishMinimalStemFilterFactory */
public EnglishMinimalStemFilterFactory(Map<String,String> args) {
super(args);
if (!args.isEmpty()) {
throw new IllegalArgumentException("Unknown parameters: " + args);
}
}
@Override
public TokenStream create(TokenStream input) {
return new EnglishMinimalStemFilter(input);

View File

@ -25,22 +25,24 @@ import org.apache.lucene.analysis.util.TokenFilterFactory;
/**
* Factory for {@link EnglishPossessiveFilter}.
* <pre class="prettyprint" >
* <pre class="prettyprint">
* &lt;fieldType name="text_enpossessive" class="solr.TextField" positionIncrementGap="100"&gt;
* &lt;analyzer&gt;
* &lt;tokenizer class="solr.StandardTokenizerFactory"/&gt;
* &lt;filter class="solr.LowerCaseFilterFactory"/&gt;
* &lt;filter class="solr.EnglishPossessiveFilterFactory"/&gt;
* &lt;/analyzer&gt;
* &lt;/fieldType&gt;</pre>
*
* &lt;/fieldType&gt;</pre>
*/
public class EnglishPossessiveFilterFactory extends TokenFilterFactory {
@Override
public void init(Map<String,String> args) {
super.init(args);
/** Creates a new EnglishPossessiveFilterFactory */
public EnglishPossessiveFilterFactory(Map<String,String> args) {
super(args);
assureMatchVersion();
if (!args.isEmpty()) {
throw new IllegalArgumentException("Unknown parameters: " + args);
}
}
@Override

View File

@ -17,16 +17,34 @@ package org.apache.lucene.analysis.en;
* limitations under the License.
*/
import java.util.Map;
import org.apache.lucene.analysis.TokenFilter;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.en.KStemFilter;
import org.apache.lucene.analysis.util.TokenFilterFactory;
/**
* Factory for {@link KStemFilter}
* Factory for {@link KStemFilter}.
* <pre class="prettyprint">
* &lt;fieldType name="text_kstem" class="solr.TextField" positionIncrementGap="100"&gt;
* &lt;analyzer&gt;
* &lt;tokenizer class="solr.StandardTokenizerFactory"/&gt;
* &lt;filter class="solr.LowerCaseFilterFactory"/&gt;
* &lt;filter class="solr.KStemFilterFactory"/&gt;
* &lt;/analyzer&gt;
* &lt;/fieldType&gt;</pre>
*/
public class KStemFilterFactory extends TokenFilterFactory {
/** Creates a new KStemFilterFactory */
public KStemFilterFactory(Map<String,String> args) {
super(args);
if (!args.isEmpty()) {
throw new IllegalArgumentException("Unknown parameters: " + args);
}
}
@Override
public TokenFilter create(TokenStream input) {
return new KStemFilter(input);

View File

@ -17,13 +17,15 @@ package org.apache.lucene.analysis.en;
* limitations under the License.
*/
import java.util.Map;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.en.PorterStemFilter;
import org.apache.lucene.analysis.util.TokenFilterFactory;
/**
* Factory for {@link PorterStemFilter}.
* <pre class="prettyprint" >
* <pre class="prettyprint">
* &lt;fieldType name="text_porterstem" class="solr.TextField" positionIncrementGap="100"&gt;
* &lt;analyzer&gt;
* &lt;tokenizer class="solr.StandardTokenizerFactory"/&gt;
@ -31,9 +33,17 @@ import org.apache.lucene.analysis.util.TokenFilterFactory;
* &lt;filter class="solr.PorterStemFilterFactory"/&gt;
* &lt;/analyzer&gt;
* &lt;/fieldType&gt;</pre>
*
*/
public class PorterStemFilterFactory extends TokenFilterFactory {
/** Creates a new PorterStemFilterFactory */
public PorterStemFilterFactory(Map<String,String> args) {
super(args);
if (!args.isEmpty()) {
throw new IllegalArgumentException("Unknown parameters: " + args);
}
}
@Override
public PorterStemFilter create(TokenStream input) {
return new PorterStemFilter(input);

View File

@ -17,13 +17,15 @@ package org.apache.lucene.analysis.es;
* limitations under the License.
*/
import java.util.Map;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.es.SpanishLightStemFilter;
import org.apache.lucene.analysis.util.TokenFilterFactory;
/**
* Factory for {@link SpanishLightStemFilter}.
* <pre class="prettyprint" >
* <pre class="prettyprint">
* &lt;fieldType name="text_eslgtstem" class="solr.TextField" positionIncrementGap="100"&gt;
* &lt;analyzer&gt;
* &lt;tokenizer class="solr.StandardTokenizerFactory"/&gt;
@ -31,9 +33,17 @@ import org.apache.lucene.analysis.util.TokenFilterFactory;
* &lt;filter class="solr.SpanishLightStemFilterFactory"/&gt;
* &lt;/analyzer&gt;
* &lt;/fieldType&gt;</pre>
*
*/
public class SpanishLightStemFilterFactory extends TokenFilterFactory {
/** Creates a new SpanishLightStemFilterFactory */
public SpanishLightStemFilterFactory(Map<String,String> args) {
super(args);
if (!args.isEmpty()) {
throw new IllegalArgumentException("Unknown parameters: " + args);
}
}
@Override
public TokenStream create(TokenStream input) {
return new SpanishLightStemFilter(input);

View File

@ -18,6 +18,7 @@ package org.apache.lucene.analysis.fa;
*/
import java.io.Reader;
import java.util.Map;
import org.apache.lucene.analysis.CharFilter;
import org.apache.lucene.analysis.fa.PersianCharFilter;
@ -27,17 +28,24 @@ import org.apache.lucene.analysis.util.MultiTermAwareComponent;
/**
* Factory for {@link PersianCharFilter}.
* <pre class="prettyprint" >
* <pre class="prettyprint">
* &lt;fieldType name="text_fa" class="solr.TextField" positionIncrementGap="100"&gt;
* &lt;analyzer&gt;
* &lt;charFilter class="solr.PersianCharFilterFactory"/&gt;
* &lt;tokenizer class="solr.StandardTokenizerFactory"/&gt;
* &lt;/analyzer&gt;
* &lt;/fieldType&gt;</pre>
*
*/
public class PersianCharFilterFactory extends CharFilterFactory implements MultiTermAwareComponent {
/** Creates a new PersianCharFilterFactory */
public PersianCharFilterFactory(Map<String,String> args) {
super(args);
if (!args.isEmpty()) {
throw new IllegalArgumentException("Unknown parameters: " + args);
}
}
@Override
public CharFilter create(Reader input) {
return new PersianCharFilter(input);

View File

@ -17,6 +17,8 @@ package org.apache.lucene.analysis.fa;
* limitations under the License.
*/
import java.util.Map;
import org.apache.lucene.analysis.fa.PersianNormalizationFilter;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.util.AbstractAnalysisFactory;
@ -25,7 +27,7 @@ import org.apache.lucene.analysis.util.TokenFilterFactory;
/**
* Factory for {@link PersianNormalizationFilter}.
* <pre class="prettyprint" >
* <pre class="prettyprint">
* &lt;fieldType name="text_fanormal" class="solr.TextField" positionIncrementGap="100"&gt;
* &lt;analyzer&gt;
* &lt;charFilter class="solr.PersianCharFilterFactory"/&gt;
@ -33,9 +35,17 @@ import org.apache.lucene.analysis.util.TokenFilterFactory;
* &lt;filter class="solr.PersianNormalizationFilterFactory"/&gt;
* &lt;/analyzer&gt;
* &lt;/fieldType&gt;</pre>
*
*/
public class PersianNormalizationFilterFactory extends TokenFilterFactory implements MultiTermAwareComponent {
/** Creates a new PersianNormalizationFilterFactory */
public PersianNormalizationFilterFactory(Map<String,String> args) {
super(args);
if (!args.isEmpty()) {
throw new IllegalArgumentException("Unknown parameters: " + args);
}
}
@Override
public PersianNormalizationFilter create(TokenStream input) {
return new PersianNormalizationFilter(input);

View File

@ -17,23 +17,33 @@ package org.apache.lucene.analysis.fi;
* limitations under the License.
*/
import java.util.Map;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.fi.FinnishLightStemFilter;
import org.apache.lucene.analysis.util.TokenFilterFactory;
/**
* Factory for {@link FinnishLightStemFilter}.
* <pre class="prettyprint" >
* <pre class="prettyprint">
* &lt;fieldType name="text_filgtstem" class="solr.TextField" positionIncrementGap="100"&gt;
* &lt;analyzer&gt;
* &lt;tokenizer class="solr.StandardTokenizerFactory"/&gt;
* &lt;filter class="solr.LowerCaseFilterFactory"/&gt;
* &lt;filter class="solr.FinnishLightStemFilterFactory"/&gt;
* &lt;/analyzer&gt;
* &lt;/fieldType&gt;</pre>
*
* &lt;/fieldType&gt;</pre>
*/
public class FinnishLightStemFilterFactory extends TokenFilterFactory {
/** Creates a new FinnishLightStemFilterFactory */
public FinnishLightStemFilterFactory(Map<String,String> args) {
super(args);
if (!args.isEmpty()) {
throw new IllegalArgumentException("Unknown parameters: " + args);
}
}
@Override
public TokenStream create(TokenStream input) {
return new FinnishLightStemFilter(input);

View File

@ -17,13 +17,15 @@ package org.apache.lucene.analysis.fr;
* limitations under the License.
*/
import java.util.Map;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.fr.FrenchLightStemFilter;
import org.apache.lucene.analysis.util.TokenFilterFactory;
/**
* Factory for {@link FrenchLightStemFilter}.
* <pre class="prettyprint" >
* <pre class="prettyprint">
* &lt;fieldType name="text_frlgtstem" class="solr.TextField" positionIncrementGap="100"&gt;
* &lt;analyzer&gt;
* &lt;tokenizer class="solr.StandardTokenizerFactory"/&gt;
@ -31,10 +33,18 @@ import org.apache.lucene.analysis.util.TokenFilterFactory;
* &lt;filter class="solr.ElisionFilterFactory"/&gt;
* &lt;filter class="solr.FrenchLightStemFilterFactory"/&gt;
* &lt;/analyzer&gt;
* &lt;/fieldType&gt;</pre>
*
* &lt;/fieldType&gt;</pre>
*/
public class FrenchLightStemFilterFactory extends TokenFilterFactory {
/** Creates a new FrenchLightStemFilterFactory */
public FrenchLightStemFilterFactory(Map<String,String> args) {
super(args);
if (!args.isEmpty()) {
throw new IllegalArgumentException("Unknown parameters: " + args);
}
}
@Override
public TokenStream create(TokenStream input) {
return new FrenchLightStemFilter(input);

View File

@ -17,13 +17,15 @@ package org.apache.lucene.analysis.fr;
* limitations under the License.
*/
import java.util.Map;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.fr.FrenchMinimalStemFilter;
import org.apache.lucene.analysis.util.TokenFilterFactory;
/**
* Factory for {@link FrenchMinimalStemFilter}.
* <pre class="prettyprint" >
* <pre class="prettyprint">
* &lt;fieldType name="text_frminstem" class="solr.TextField" positionIncrementGap="100"&gt;
* &lt;analyzer&gt;
* &lt;tokenizer class="solr.StandardTokenizerFactory"/&gt;
@ -31,10 +33,18 @@ import org.apache.lucene.analysis.util.TokenFilterFactory;
* &lt;filter class="solr.ElisionFilterFactory"/&gt;
* &lt;filter class="solr.FrenchMinimalStemFilterFactory"/&gt;
* &lt;/analyzer&gt;
* &lt;/fieldType&gt;</pre>
*
* &lt;/fieldType&gt;</pre>
*/
public class FrenchMinimalStemFilterFactory extends TokenFilterFactory {
/** Creates a new FrenchMinimalStemFilterFactory */
public FrenchMinimalStemFilterFactory(Map<String,String> args) {
super(args);
if (!args.isEmpty()) {
throw new IllegalArgumentException("Unknown parameters: " + args);
}
}
@Override
public TokenStream create(TokenStream input) {
return new FrenchMinimalStemFilter(input);

View File

@ -17,6 +17,8 @@ package org.apache.lucene.analysis.ga;
* limitations under the License.
*/
import java.util.Map;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.ga.IrishLowerCaseFilter;
import org.apache.lucene.analysis.util.AbstractAnalysisFactory;
@ -25,17 +27,24 @@ import org.apache.lucene.analysis.util.TokenFilterFactory;
/**
* Factory for {@link IrishLowerCaseFilter}.
* <pre class="prettyprint" >
* <pre class="prettyprint">
* &lt;fieldType name="text_ga" class="solr.TextField" positionIncrementGap="100"&gt;
* &lt;analyzer&gt;
* &lt;tokenizer class="solr.StandardTokenizerFactory"/&gt;
* &lt;filter class="solr.IrishLowerCaseFilterFactory"/&gt;
* &lt;/analyzer&gt;
* &lt;/fieldType&gt;</pre>
*
*/
public class IrishLowerCaseFilterFactory extends TokenFilterFactory implements MultiTermAwareComponent {
/** Creates a new IrishLowerCaseFilterFactory */
public IrishLowerCaseFilterFactory(Map<String,String> args) {
super(args);
if (!args.isEmpty()) {
throw new IllegalArgumentException("Unknown parameters: " + args);
}
}
@Override
public TokenStream create(TokenStream input) {
return new IrishLowerCaseFilter(input);

View File

@ -17,23 +17,33 @@ package org.apache.lucene.analysis.gl;
* limitations under the License.
*/
import java.util.Map;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.gl.GalicianMinimalStemFilter;
import org.apache.lucene.analysis.util.TokenFilterFactory;
/**
* Factory for {@link GalicianMinimalStemFilter}.
* <pre class="prettyprint" >
* <pre class="prettyprint">
* &lt;fieldType name="text_glplural" class="solr.TextField" positionIncrementGap="100"&gt;
* &lt;analyzer&gt;
* &lt;tokenizer class="solr.StandardTokenizerFactory"/&gt;
* &lt;filter class="solr.LowerCaseFilterFactory"/&gt;
* &lt;filter class="solr.GalicianMinimalStemFilterFactory"/&gt;
* &lt;/analyzer&gt;
* &lt;/fieldType&gt;</pre>
*
* &lt;/fieldType&gt;</pre>
*/
public class GalicianMinimalStemFilterFactory extends TokenFilterFactory {
/** Creates a new GalicianMinimalStemFilterFactory */
public GalicianMinimalStemFilterFactory(Map<String,String> args) {
super(args);
if (!args.isEmpty()) {
throw new IllegalArgumentException("Unknown parameters: " + args);
}
}
@Override
public TokenStream create(TokenStream input) {
return new GalicianMinimalStemFilter(input);

View File

@ -17,23 +17,33 @@ package org.apache.lucene.analysis.gl;
* limitations under the License.
*/
import java.util.Map;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.gl.GalicianStemFilter;
import org.apache.lucene.analysis.util.TokenFilterFactory;
/**
* Factory for {@link GalicianStemFilter}.
* <pre class="prettyprint" >
* <pre class="prettyprint">
* &lt;fieldType name="text_glstem" class="solr.TextField" positionIncrementGap="100"&gt;
* &lt;analyzer&gt;
* &lt;tokenizer class="solr.StandardTokenizerFactory"/&gt;
* &lt;filter class="solr.LowerCaseFilterFactory"/&gt;
* &lt;filter class="solr.GalicianStemFilterFactory"/&gt;
* &lt;/analyzer&gt;
* &lt;/fieldType&gt;</pre>
*
* &lt;/fieldType&gt;</pre>
*/
public class GalicianStemFilterFactory extends TokenFilterFactory {
/** Creates a new GalicianStemFilterFactory */
public GalicianStemFilterFactory(Map<String,String> args) {
super(args);
if (!args.isEmpty()) {
throw new IllegalArgumentException("Unknown parameters: " + args);
}
}
@Override
public TokenStream create(TokenStream input) {
return new GalicianStemFilter(input);

View File

@ -17,6 +17,8 @@ package org.apache.lucene.analysis.hi;
* limitations under the License.
*/
import java.util.Map;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.hi.HindiNormalizationFilter;
import org.apache.lucene.analysis.util.AbstractAnalysisFactory;
@ -25,16 +27,24 @@ import org.apache.lucene.analysis.util.TokenFilterFactory;
/**
* Factory for {@link HindiNormalizationFilter}.
* <pre class="prettyprint" >
* <pre class="prettyprint">
* &lt;fieldType name="text_hinormal" class="solr.TextField" positionIncrementGap="100"&gt;
* &lt;analyzer&gt;
* &lt;tokenizer class="solr.StandardTokenizerFactory"/&gt;
* &lt;filter class="solr.HindiNormalizationFilterFactory"/&gt;
* &lt;/analyzer&gt;
* &lt;/fieldType&gt;</pre>
*
*/
public class HindiNormalizationFilterFactory extends TokenFilterFactory implements MultiTermAwareComponent {
/** Creates a new HindiNormalizationFilterFactory */
public HindiNormalizationFilterFactory(Map<String,String> args) {
super(args);
if (!args.isEmpty()) {
throw new IllegalArgumentException("Unknown parameters: " + args);
}
}
@Override
public TokenStream create(TokenStream input) {
return new HindiNormalizationFilter(input);

View File

@ -17,22 +17,32 @@ package org.apache.lucene.analysis.hi;
* limitations under the License.
*/
import java.util.Map;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.hi.HindiStemFilter;
import org.apache.lucene.analysis.util.TokenFilterFactory;
/**
* Factory for {@link HindiStemFilter}.
* <pre class="prettyprint" >
* <pre class="prettyprint">
* &lt;fieldType name="text_histem" class="solr.TextField" positionIncrementGap="100"&gt;
* &lt;analyzer&gt;
* &lt;tokenizer class="solr.StandardTokenizerFactory"/&gt;
* &lt;filter class="solr.HindiStemFilterFactory"/&gt;
* &lt;/analyzer&gt;
* &lt;/fieldType&gt;</pre>
*
*/
public class HindiStemFilterFactory extends TokenFilterFactory {
/** Creates a new HindiStemFilterFactory */
public HindiStemFilterFactory(Map<String,String> args) {
super(args);
if (!args.isEmpty()) {
throw new IllegalArgumentException("Unknown parameters: " + args);
}
}
@Override
public TokenStream create(TokenStream input) {
return new HindiStemFilter(input);

View File

@ -17,23 +17,33 @@ package org.apache.lucene.analysis.hu;
* limitations under the License.
*/
import java.util.Map;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.hu.HungarianLightStemFilter;
import org.apache.lucene.analysis.util.TokenFilterFactory;
/**
* Factory for {@link HungarianLightStemFilter}.
* <pre class="prettyprint" >
* <pre class="prettyprint">
* &lt;fieldType name="text_hulgtstem" class="solr.TextField" positionIncrementGap="100"&gt;
* &lt;analyzer&gt;
* &lt;tokenizer class="solr.StandardTokenizerFactory"/&gt;
* &lt;filter class="solr.LowerCaseFilterFactory"/&gt;
* &lt;filter class="solr.HungarianLightStemFilterFactory"/&gt;
* &lt;/analyzer&gt;
* &lt;/fieldType&gt;</pre>
*
* &lt;/fieldType&gt;</pre>
*/
public class HungarianLightStemFilterFactory extends TokenFilterFactory {
/** Creates a new HungarianLightStemFilterFactory */
public HungarianLightStemFilterFactory(Map<String,String> args) {
super(args);
if (!args.isEmpty()) {
throw new IllegalArgumentException("Unknown parameters: " + args);
}
}
@Override
public TokenStream create(TokenStream input) {
return new HungarianLightStemFilter(input);

View File

@ -22,6 +22,7 @@ import java.io.InputStream;
import java.text.ParseException;
import java.util.ArrayList;
import java.util.List;
import java.util.Map;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.hunspell.HunspellDictionary;
@ -34,7 +35,7 @@ import org.apache.lucene.util.IOUtils;
/**
* TokenFilterFactory that creates instances of {@link org.apache.lucene.analysis.hunspell.HunspellStemFilter}.
* Example config for British English including a custom dictionary, case insensitive matching:
* <pre class="prettyprint" >
* <pre class="prettyprint">
* &lt;filter class=&quot;solr.HunspellStemFilterFactory&quot;
* dictionary=&quot;en_GB.dic,my_custom.dic&quot;
* affix=&quot;en_GB.aff&quot;
@ -51,16 +52,32 @@ import org.apache.lucene.util.IOUtils;
* See <a href="http://wiki.apache.org/solr/Hunspell">http://wiki.apache.org/solr/Hunspell</a>
*/
public class HunspellStemFilterFactory extends TokenFilterFactory implements ResourceLoaderAware {
private static final String PARAM_DICTIONARY = "dictionary";
private static final String PARAM_AFFIX = "affix";
private static final String PARAM_IGNORE_CASE = "ignoreCase";
private static final String PARAM_STRICT_AFFIX_PARSING = "strictAffixParsing";
private static final String TRUE = "true";
private static final String FALSE = "false";
private final String dictionaryArg;
private final String affixFile;
private final boolean ignoreCase;
private final boolean strictAffixParsing;
private HunspellDictionary dictionary;
private boolean ignoreCase = false;
/** Creates a new HunspellStemFilterFactory */
public HunspellStemFilterFactory(Map<String,String> args) {
super(args);
assureMatchVersion();
dictionaryArg = args.remove(PARAM_DICTIONARY);
if (dictionaryArg == null) {
throw new IllegalArgumentException("Parameter " + PARAM_DICTIONARY + " is mandatory.");
}
affixFile = args.remove(PARAM_AFFIX);
ignoreCase = getBoolean(args, PARAM_IGNORE_CASE, false);
strictAffixParsing = getBoolean(args, PARAM_STRICT_AFFIX_PARSING, true);
if (!args.isEmpty()) {
throw new IllegalArgumentException("Unknown parameters: " + args);
}
}
/**
* Loads the hunspell dictionary and affix files defined in the configuration
@ -69,27 +86,7 @@ public class HunspellStemFilterFactory extends TokenFilterFactory implements Res
*/
@Override
public void inform(ResourceLoader loader) throws IOException {
assureMatchVersion();
String dictionaryArg = args.get(PARAM_DICTIONARY);
if (dictionaryArg == null) {
throw new IllegalArgumentException("Parameter " + PARAM_DICTIONARY + " is mandatory.");
}
String dictionaryFiles[] = args.get(PARAM_DICTIONARY).split(",");
String affixFile = args.get(PARAM_AFFIX);
String pic = args.get(PARAM_IGNORE_CASE);
if(pic != null) {
if(pic.equalsIgnoreCase(TRUE)) ignoreCase = true;
else if(pic.equalsIgnoreCase(FALSE)) ignoreCase = false;
else throw new IllegalArgumentException("Unknown value for " + PARAM_IGNORE_CASE + ": " + pic + ". Must be true or false");
}
String strictAffixParsingParam = args.get(PARAM_STRICT_AFFIX_PARSING);
boolean strictAffixParsing = true;
if(strictAffixParsingParam != null) {
if(strictAffixParsingParam.equalsIgnoreCase(FALSE)) strictAffixParsing = false;
else if(strictAffixParsingParam.equalsIgnoreCase(TRUE)) strictAffixParsing = true;
else throw new IllegalArgumentException("Unknown value for " + PARAM_STRICT_AFFIX_PARSING + ": " + strictAffixParsingParam + ". Must be true or false");
}
String dictionaryFiles[] = dictionaryArg.split(",");
InputStream affix = null;
List<InputStream> dictionaries = new ArrayList<InputStream>();
@ -103,7 +100,7 @@ public class HunspellStemFilterFactory extends TokenFilterFactory implements Res
this.dictionary = new HunspellDictionary(affix, dictionaries, luceneMatchVersion, ignoreCase, strictAffixParsing);
} catch (ParseException e) {
throw new IOException("Unable to load hunspell data! [dictionary=" + args.get("dictionary") + ",affix=" + affixFile + "]", e);
throw new IOException("Unable to load hunspell data! [dictionary=" + dictionaryArg + ",affix=" + affixFile + "]", e);
} finally {
IOUtils.closeWhileHandlingException(affix);
IOUtils.closeWhileHandlingException(dictionaries);

View File

@ -25,7 +25,7 @@ import org.apache.lucene.analysis.util.TokenFilterFactory;
/**
* Factory for {@link IndonesianStemFilter}.
* <pre class="prettyprint" >
* <pre class="prettyprint">
* &lt;fieldType name="text_idstem" class="solr.TextField" positionIncrementGap="100"&gt;
* &lt;analyzer&gt;
* &lt;tokenizer class="solr.StandardTokenizerFactory"/&gt;
@ -33,15 +33,17 @@ import org.apache.lucene.analysis.util.TokenFilterFactory;
* &lt;filter class="solr.IndonesianStemFilterFactory" stemDerivational="true"/&gt;
* &lt;/analyzer&gt;
* &lt;/fieldType&gt;</pre>
*
*/
public class IndonesianStemFilterFactory extends TokenFilterFactory {
private boolean stemDerivational = true;
private final boolean stemDerivational;
@Override
public void init(Map<String, String> args) {
super.init(args);
stemDerivational = getBoolean("stemDerivational", true);
/** Creates a new IndonesianStemFilterFactory */
public IndonesianStemFilterFactory(Map<String,String> args) {
super(args);
stemDerivational = getBoolean(args, "stemDerivational", true);
if (!args.isEmpty()) {
throw new IllegalArgumentException("Unknown parameters: " + args);
}
}
@Override

View File

@ -17,6 +17,8 @@ package org.apache.lucene.analysis.in;
* limitations under the License.
*/
import java.util.Map;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.in.IndicNormalizationFilter;
import org.apache.lucene.analysis.util.AbstractAnalysisFactory;
@ -25,16 +27,24 @@ import org.apache.lucene.analysis.util.TokenFilterFactory;
/**
* Factory for {@link IndicNormalizationFilter}.
* <pre class="prettyprint" >
* <pre class="prettyprint">
* &lt;fieldType name="text_innormal" class="solr.TextField" positionIncrementGap="100"&gt;
* &lt;analyzer&gt;
* &lt;tokenizer class="solr.StandardTokenizerFactory"/&gt;
* &lt;filter class="solr.IndicNormalizationFilterFactory"/&gt;
* &lt;/analyzer&gt;
* &lt;/fieldType&gt;</pre>
*
*/
public class IndicNormalizationFilterFactory extends TokenFilterFactory implements MultiTermAwareComponent {
/** Creates a new IndicNormalizationFilterFactory */
public IndicNormalizationFilterFactory(Map<String,String> args) {
super(args);
if (!args.isEmpty()) {
throw new IllegalArgumentException("Unknown parameters: " + args);
}
}
@Override
public TokenStream create(TokenStream input) {
return new IndicNormalizationFilter(input);

View File

@ -17,13 +17,15 @@ package org.apache.lucene.analysis.it;
* limitations under the License.
*/
import java.util.Map;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.it.ItalianLightStemFilter;
import org.apache.lucene.analysis.util.TokenFilterFactory;
/**
* Factory for {@link ItalianLightStemFilter}.
* <pre class="prettyprint" >
* <pre class="prettyprint">
* &lt;fieldType name="text_itlgtstem" class="solr.TextField" positionIncrementGap="100"&gt;
* &lt;analyzer&gt;
* &lt;tokenizer class="solr.StandardTokenizerFactory"/&gt;
@ -31,9 +33,17 @@ import org.apache.lucene.analysis.util.TokenFilterFactory;
* &lt;filter class="solr.ItalianLightStemFilterFactory"/&gt;
* &lt;/analyzer&gt;
* &lt;/fieldType&gt;</pre>
*
*/
public class ItalianLightStemFilterFactory extends TokenFilterFactory {
/** Creates a new ItalianLightStemFilterFactory */
public ItalianLightStemFilterFactory(Map<String,String> args) {
super(args);
if (!args.isEmpty()) {
throw new IllegalArgumentException("Unknown parameters: " + args);
}
}
@Override
public TokenStream create(TokenStream input) {
return new ItalianLightStemFilter(input);

View File

@ -17,13 +17,15 @@ package org.apache.lucene.analysis.lv;
* limitations under the License.
*/
import java.util.Map;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.lv.LatvianStemFilter;
import org.apache.lucene.analysis.util.TokenFilterFactory;
/**
* Factory for {@link LatvianStemFilter}.
* <pre class="prettyprint" >
* <pre class="prettyprint">
* &lt;fieldType name="text_lvstem" class="solr.TextField" positionIncrementGap="100"&gt;
* &lt;analyzer&gt;
* &lt;tokenizer class="solr.StandardTokenizerFactory"/&gt;
@ -33,6 +35,15 @@ import org.apache.lucene.analysis.util.TokenFilterFactory;
* &lt;/fieldType&gt;</pre>
*/
public class LatvianStemFilterFactory extends TokenFilterFactory {
/** Creates a new LatvianStemFilterFactory */
public LatvianStemFilterFactory(Map<String,String> args) {
super(args);
if (!args.isEmpty()) {
throw new IllegalArgumentException("Unknown parameters: " + args);
}
}
@Override
public TokenStream create(TokenStream input) {
return new LatvianStemFilter(input);

View File

@ -17,6 +17,8 @@ package org.apache.lucene.analysis.miscellaneous;
* limitations under the License.
*/
import java.util.Map;
import org.apache.lucene.analysis.util.AbstractAnalysisFactory;
import org.apache.lucene.analysis.util.MultiTermAwareComponent;
import org.apache.lucene.analysis.util.TokenFilterFactory;
@ -25,16 +27,24 @@ import org.apache.lucene.analysis.TokenStream;
/**
* Factory for {@link ASCIIFoldingFilter}.
* <pre class="prettyprint" >
* <pre class="prettyprint">
* &lt;fieldType name="text_ascii" class="solr.TextField" positionIncrementGap="100"&gt;
* &lt;analyzer&gt;
* &lt;tokenizer class="solr.WhitespaceTokenizerFactory"/&gt;
* &lt;filter class="solr.ASCIIFoldingFilterFactory"/&gt;
* &lt;/analyzer&gt;
* &lt;/fieldType&gt;</pre>
*
*/
public class ASCIIFoldingFilterFactory extends TokenFilterFactory implements MultiTermAwareComponent {
/** Creates a new ASCIIFoldingFilterFactory */
public ASCIIFoldingFilterFactory(Map<String,String> args) {
super(args);
if (!args.isEmpty()) {
throw new IllegalArgumentException("Unknown parameters: " + args);
}
}
@Override
public ASCIIFoldingFilter create(TokenStream input) {
return new ASCIIFoldingFilter(input);

View File

@ -44,7 +44,7 @@ import java.util.StringTokenizer;
* "maxWordCount" - if the token contains more then maxWordCount words, the capitalization is
* assumed to be correct.<br/>
*
* <pre class="prettyprint" >
* <pre class="prettyprint">
* &lt;fieldType name="text_cptlztn" class="solr.TextField" positionIncrementGap="100"&gt;
* &lt;analyzer&gt;
* &lt;tokenizer class="solr.WhitespaceTokenizerFactory"/&gt;
@ -54,7 +54,6 @@ import java.util.StringTokenizer;
* &lt;/analyzer&gt;
* &lt;/fieldType&gt;</pre>
*
*
* @since solr 1.3
*/
public class CapitalizationFilterFactory extends TokenFilterFactory {
@ -67,30 +66,24 @@ public class CapitalizationFilterFactory extends TokenFilterFactory {
public static final String ONLY_FIRST_WORD = "onlyFirstWord";
public static final String FORCE_FIRST_LETTER = "forceFirstLetter";
//Map<String,String> keep = new HashMap<String, String>(); // not synchronized because it is only initialized once
CharArraySet keep;
Collection<char[]> okPrefix = Collections.emptyList(); // for Example: McK
int minWordLength = 0; // don't modify capitalization for words shorter then this
int maxWordCount = CapitalizationFilter.DEFAULT_MAX_WORD_COUNT;
int maxTokenLength = CapitalizationFilter.DEFAULT_MAX_TOKEN_LENGTH;
boolean onlyFirstWord = true;
boolean forceFirstLetter = true; // make sure the first letter is capitol even if it is in the keep list
final int minWordLength; // don't modify capitalization for words shorter then this
final int maxWordCount;
final int maxTokenLength;
final boolean onlyFirstWord;
final boolean forceFirstLetter; // make sure the first letter is capital even if it is in the keep list
@Override
public void init(Map<String, String> args) {
super.init(args);
/** Creates a new CapitalizationFilterFactory */
public CapitalizationFilterFactory(Map<String, String> args) {
super(args);
assureMatchVersion();
String k = args.get(KEEP);
boolean ignoreCase = getBoolean(args, KEEP_IGNORE_CASE, false);
String k = args.remove(KEEP);
if (k != null) {
StringTokenizer st = new StringTokenizer(k);
boolean ignoreCase = false;
String ignoreStr = args.get(KEEP_IGNORE_CASE);
if ("true".equalsIgnoreCase(ignoreStr)) {
ignoreCase = true;
}
keep = new CharArraySet(luceneMatchVersion, 10, ignoreCase);
while (st.hasMoreTokens()) {
k = st.nextToken().trim();
@ -98,7 +91,7 @@ public class CapitalizationFilterFactory extends TokenFilterFactory {
}
}
k = args.get(OK_PREFIX);
k = args.remove(OK_PREFIX);
if (k != null) {
okPrefix = new ArrayList<char[]>();
StringTokenizer st = new StringTokenizer(k);
@ -107,29 +100,13 @@ public class CapitalizationFilterFactory extends TokenFilterFactory {
}
}
k = args.get(MIN_WORD_LENGTH);
if (k != null) {
minWordLength = Integer.valueOf(k);
}
k = args.get(MAX_WORD_COUNT);
if (k != null) {
maxWordCount = Integer.valueOf(k);
}
k = args.get(MAX_TOKEN_LENGTH);
if (k != null) {
maxTokenLength = Integer.valueOf(k);
}
k = args.get(ONLY_FIRST_WORD);
if (k != null) {
onlyFirstWord = Boolean.valueOf(k);
}
k = args.get(FORCE_FIRST_LETTER);
if (k != null) {
forceFirstLetter = Boolean.valueOf(k);
minWordLength = getInt(args, MIN_WORD_LENGTH, 0);
maxWordCount = getInt(args, MAX_WORD_COUNT, CapitalizationFilter.DEFAULT_MAX_WORD_COUNT);
maxTokenLength = getInt(args, MAX_TOKEN_LENGTH, CapitalizationFilter.DEFAULT_MAX_TOKEN_LENGTH);
onlyFirstWord = getBoolean(args, ONLY_FIRST_WORD, true);
forceFirstLetter = getBoolean(args, FORCE_FIRST_LETTER, true);
if (!args.isEmpty()) {
throw new IllegalArgumentException("Unknown parameters: " + args);
}
}

View File

@ -17,22 +17,32 @@ package org.apache.lucene.analysis.miscellaneous;
* limitations under the License.
*/
import java.util.Map;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.miscellaneous.HyphenatedWordsFilter;
import org.apache.lucene.analysis.util.TokenFilterFactory;
/**
* Factory for {@link HyphenatedWordsFilter}.
* <pre class="prettyprint" >
* <pre class="prettyprint">
* &lt;fieldType name="text_hyphn" class="solr.TextField" positionIncrementGap="100"&gt;
* &lt;analyzer&gt;
* &lt;tokenizer class="solr.WhitespaceTokenizerFactory"/&gt;
* &lt;filter class="solr.HyphenatedWordsFilterFactory"/&gt;
* &lt;/analyzer&gt;
* &lt;/fieldType&gt;</pre>
*
* &lt;/fieldType&gt;</pre>
*/
public class HyphenatedWordsFilterFactory extends TokenFilterFactory {
/** Creates a new HyphenatedWordsFilterFactory */
public HyphenatedWordsFilterFactory(Map<String,String> args) {
super(args);
if (!args.isEmpty()) {
throw new IllegalArgumentException("Unknown parameters: " + args);
}
}
@Override
public HyphenatedWordsFilter create(TokenStream input) {
return new HyphenatedWordsFilter(input);

View File

@ -17,63 +17,51 @@ package org.apache.lucene.analysis.miscellaneous;
* limitations under the License.
*/
import org.apache.lucene.analysis.util.*;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.miscellaneous.KeepWordFilter;
import org.apache.lucene.analysis.util.CharArraySet;
import org.apache.lucene.analysis.util.ResourceLoader;
import org.apache.lucene.analysis.util.ResourceLoaderAware;
import org.apache.lucene.analysis.util.TokenFilterFactory;
import java.util.Map;
import java.util.Set;
import java.io.IOException;
/**
* Factory for {@link KeepWordFilter}.
* <pre class="prettyprint" >
* <pre class="prettyprint">
* &lt;fieldType name="text_keepword" class="solr.TextField" positionIncrementGap="100"&gt;
* &lt;analyzer&gt;
* &lt;tokenizer class="solr.WhitespaceTokenizerFactory"/&gt;
* &lt;filter class="solr.KeepWordFilterFactory" words="keepwords.txt" ignoreCase="false" enablePositionIncrements="false"/&gt;
* &lt;/analyzer&gt;
* &lt;/fieldType&gt;</pre>
*
* &lt;/fieldType&gt;</pre>
*/
public class KeepWordFilterFactory extends TokenFilterFactory implements ResourceLoaderAware {
@Override
public void init(Map<String,String> args) {
super.init(args);
private final boolean ignoreCase;
private final boolean enablePositionIncrements;
private final String wordFiles;
private CharArraySet words;
/** Creates a new KeepWordFilterFactory */
public KeepWordFilterFactory(Map<String,String> args) {
super(args);
assureMatchVersion();
wordFiles = args.remove("words");
ignoreCase = getBoolean(args, "ignoreCase", false);
enablePositionIncrements = getBoolean(args, "enablePositionIncrements", false);
if (!args.isEmpty()) {
throw new IllegalArgumentException("Unknown parameters: " + args);
}
}
@Override
public void inform(ResourceLoader loader) throws IOException {
String wordFiles = args.get("words");
ignoreCase = getBoolean("ignoreCase", false);
enablePositionIncrements = getBoolean("enablePositionIncrements",false);
if (wordFiles != null) {
words = getWordSet(loader, wordFiles, ignoreCase);
}
}
private CharArraySet words;
private boolean ignoreCase;
private boolean enablePositionIncrements;
/**
* Set the keep word list.
* NOTE: if ignoreCase==true, the words are expected to be lowercase
*/
public void setWords(Set<String> words) {
this.words = new CharArraySet(luceneMatchVersion, words, ignoreCase);
}
public void setIgnoreCase(boolean ignoreCase) {
if (words != null && this.ignoreCase != ignoreCase) {
words = new CharArraySet(luceneMatchVersion, words, ignoreCase);
}
this.ignoreCase = ignoreCase;
}
public boolean isEnablePositionIncrements() {
return enablePositionIncrements;
}

View File

@ -18,6 +18,7 @@ package org.apache.lucene.analysis.miscellaneous;
*/
import java.io.IOException;
import java.util.Map;
import java.util.regex.Pattern;
import org.apache.lucene.analysis.miscellaneous.KeywordMarkerFilter;
@ -26,34 +27,42 @@ import org.apache.lucene.analysis.TokenStream;
/**
* Factory for {@link KeywordMarkerFilter}.
* <pre class="prettyprint" >
* <pre class="prettyprint">
* &lt;fieldType name="text_keyword" class="solr.TextField" positionIncrementGap="100"&gt;
* &lt;analyzer&gt;
* &lt;tokenizer class="solr.WhitespaceTokenizerFactory"/&gt;
* &lt;filter class="solr.KeywordMarkerFilterFactory" protected="protectedkeyword.txt" pattern="^.+er$" ignoreCase="false"/&gt;
* &lt;/analyzer&gt;
* &lt;/fieldType&gt;</pre>
*
* &lt;/fieldType&gt;</pre>
*/
public class KeywordMarkerFilterFactory extends TokenFilterFactory implements ResourceLoaderAware {
public static final String PROTECTED_TOKENS = "protected";
public static final String PATTERN = "pattern";
private CharArraySet protectedWords;
private boolean ignoreCase;
private final String wordFiles;
private final String stringPattern;
private final boolean ignoreCase;
private Pattern pattern;
private CharArraySet protectedWords;
/** Creates a new KeywordMarkerFilterFactory */
public KeywordMarkerFilterFactory(Map<String,String> args) {
super(args);
wordFiles = args.remove(PROTECTED_TOKENS);
stringPattern = args.remove(PATTERN);
ignoreCase = getBoolean(args, "ignoreCase", false);
if (!args.isEmpty()) {
throw new IllegalArgumentException("Unknown parameters: " + args);
}
}
@Override
public void inform(ResourceLoader loader) throws IOException {
String wordFiles = args.get(PROTECTED_TOKENS);
String stringPattern = args.get(PATTERN);
ignoreCase = getBoolean("ignoreCase", false);
if (wordFiles != null) {
protectedWords = getWordSet(loader, wordFiles, ignoreCase);
}
if (stringPattern != null) {
pattern = ignoreCase ? Pattern.compile(stringPattern, Pattern.CASE_INSENSITIVE | Pattern.UNICODE_CASE) : Pattern.compile(stringPattern);
}
}
public boolean isIgnoreCase() {

View File

@ -17,6 +17,8 @@ package org.apache.lucene.analysis.miscellaneous;
* limitations under the License.
*/
import java.util.Map;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.util.TokenFilterFactory;
@ -28,6 +30,15 @@ import org.apache.lucene.analysis.util.TokenFilterFactory;
* {@link RemoveDuplicatesTokenFilterFactory} later in the analysis chain.
*/
public final class KeywordRepeatFilterFactory extends TokenFilterFactory {
/** Creates a new KeywordRepeatFilterFactory */
public KeywordRepeatFilterFactory(Map<String,String> args) {
super(args);
if (!args.isEmpty()) {
throw new IllegalArgumentException("Unknown parameters: " + args);
}
}
@Override
public TokenStream create(TokenStream input) {
return new KeywordRepeatFilter(input);

View File

@ -25,32 +25,30 @@ import java.util.Map;
/**
* Factory for {@link LengthFilter}.
* <pre class="prettyprint" >
* <pre class="prettyprint">
* &lt;fieldType name="text_lngth" class="solr.TextField" positionIncrementGap="100"&gt;
* &lt;analyzer&gt;
* &lt;tokenizer class="solr.WhitespaceTokenizerFactory"/&gt;
* &lt;filter class="solr.LengthFilterFactory" min="0" max="1" enablePositionIncrements="false"/&gt;
* &lt;/analyzer&gt;
* &lt;/fieldType&gt;</pre>
*
* &lt;/fieldType&gt;</pre>
*/
public class LengthFilterFactory extends TokenFilterFactory {
int min,max;
boolean enablePositionIncrements;
final int min;
final int max;
final boolean enablePositionIncrements;
public static final String MIN_KEY = "min";
public static final String MAX_KEY = "max";
@Override
public void init(Map<String, String> args) {
super.init(args);
String minKey = args.get(MIN_KEY);
String maxKey = args.get(MAX_KEY);
if (minKey == null || maxKey == null) {
throw new IllegalArgumentException("Both " + MIN_KEY + " and " + MAX_KEY + " are mandatory");
/** Creates a new LengthFilterFactory */
public LengthFilterFactory(Map<String, String> args) {
super(args);
min = getInt(args, MIN_KEY, 0, false);
max = getInt(args, MAX_KEY, 0, false);
enablePositionIncrements = getBoolean(args, "enablePositionIncrements", false);
if (!args.isEmpty()) {
throw new IllegalArgumentException("Unknown parameters: " + args);
}
min=Integer.parseInt(minKey);
max=Integer.parseInt(maxKey);
enablePositionIncrements = getBoolean("enablePositionIncrements",false);
}
@Override

View File

@ -25,28 +25,32 @@ import org.apache.lucene.analysis.util.TokenFilterFactory;
/**
* Factory for {@link LimitTokenCountFilter}.
* <pre class="prettyprint" >
* <pre class="prettyprint">
* &lt;fieldType name="text_lngthcnt" class="solr.TextField" positionIncrementGap="100"&gt;
* &lt;analyzer&gt;
* &lt;tokenizer class="solr.WhitespaceTokenizerFactory"/&gt;
* &lt;filter class="solr.LimitTokenCountFilterFactory" maxTokenCount="10" consumeAllTokens="false" /&gt;
* &lt;/analyzer&gt;
* &lt;/fieldType&gt;</pre>
* &lt;/fieldType&gt;</pre>
* <p>
* The {@code consumeAllTokens} property is optional and defaults to {@code false}. See {@link LimitTokenCountFilter} for an explanation of it's use.
* The {@code consumeAllTokens} property is optional and defaults to {@code false}.
* See {@link LimitTokenCountFilter} for an explanation of it's use.
*/
public class LimitTokenCountFilterFactory extends TokenFilterFactory {
public static final String MAX_TOKEN_COUNT_KEY = "maxTokenCount";
public static final String CONSUME_ALL_TOKENS_KEY = "consumeAllTokens";
int maxTokenCount;
boolean consumeAllTokens;
final int maxTokenCount;
final boolean consumeAllTokens;
@Override
public void init(Map<String, String> args) {
super.init( args );
maxTokenCount = getInt(MAX_TOKEN_COUNT_KEY);
consumeAllTokens = getBoolean(CONSUME_ALL_TOKENS_KEY, false);
/** Creates a new LimitTokenCountFilterFactory */
public LimitTokenCountFilterFactory(Map<String, String> args) {
super(args);
maxTokenCount = getInt(args, MAX_TOKEN_COUNT_KEY);
consumeAllTokens = getBoolean(args, CONSUME_ALL_TOKENS_KEY, false);
if (!args.isEmpty()) {
throw new IllegalArgumentException("Unknown parameters: " + args);
}
}
@Override

View File

@ -23,13 +23,13 @@ import org.apache.lucene.analysis.util.TokenFilterFactory;
/**
* Factory for {@link LimitTokenPositionFilter}.
* <pre class="prettyprint" >
* <pre class="prettyprint">
* &lt;fieldType name="text_limit_pos" class="solr.TextField" positionIncrementGap="100"&gt;
* &lt;analyzer&gt;
* &lt;tokenizer class="solr.WhitespaceTokenizerFactory"/&gt;
* &lt;filter class="solr.LimitTokenPositionFilterFactory" maxTokenPosition="3" consumeAllTokens="false" /&gt;
* &lt;/analyzer&gt;
* &lt;/fieldType&gt;</pre>
* &lt;/fieldType&gt;</pre>
* <p>
* The {@code consumeAllTokens} property is optional and defaults to {@code false}.
* See {@link LimitTokenPositionFilter} for an explanation of its use.
@ -38,14 +38,17 @@ public class LimitTokenPositionFilterFactory extends TokenFilterFactory {
public static final String MAX_TOKEN_POSITION_KEY = "maxTokenPosition";
public static final String CONSUME_ALL_TOKENS_KEY = "consumeAllTokens";
int maxTokenPosition;
boolean consumeAllTokens;
final int maxTokenPosition;
final boolean consumeAllTokens;
@Override
public void init(Map<String,String> args) {
super.init(args);
maxTokenPosition = getInt(MAX_TOKEN_POSITION_KEY);
consumeAllTokens = getBoolean(CONSUME_ALL_TOKENS_KEY, false);
/** Creates a new LimitTokenPositionFilterFactory */
public LimitTokenPositionFilterFactory(Map<String,String> args) {
super(args);
maxTokenPosition = getInt(args, MAX_TOKEN_POSITION_KEY);
consumeAllTokens = getBoolean(args, CONSUME_ALL_TOKENS_KEY, false);
if (!args.isEmpty()) {
throw new IllegalArgumentException("Unknown parameters: " + args);
}
}
@Override

View File

@ -17,22 +17,32 @@ package org.apache.lucene.analysis.miscellaneous;
* limitations under the License.
*/
import java.util.Map;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.miscellaneous.RemoveDuplicatesTokenFilter;
import org.apache.lucene.analysis.util.TokenFilterFactory;
/**
* Factory for {@link RemoveDuplicatesTokenFilter}.
* <pre class="prettyprint" >
* <pre class="prettyprint">
* &lt;fieldType name="text_rmdup" class="solr.TextField" positionIncrementGap="100"&gt;
* &lt;analyzer&gt;
* &lt;tokenizer class="solr.WhitespaceTokenizerFactory"/&gt;
* &lt;filter class="solr.RemoveDuplicatesTokenFilterFactory"/&gt;
* &lt;/analyzer&gt;
* &lt;/fieldType&gt;</pre>
*
*/
public class RemoveDuplicatesTokenFilterFactory extends TokenFilterFactory {
/** Creates a new RemoveDuplicatesTokenFilterFactory */
public RemoveDuplicatesTokenFilterFactory(Map<String,String> args) {
super(args);
if (!args.isEmpty()) {
throw new IllegalArgumentException("Unknown parameters: " + args);
}
}
@Override
public RemoveDuplicatesTokenFilter create(TokenStream input) {
return new RemoveDuplicatesTokenFilter(input);

View File

@ -19,35 +19,42 @@ package org.apache.lucene.analysis.miscellaneous;
import java.io.IOException;
import java.util.List;
import java.util.Locale;
import java.util.Map;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.miscellaneous.StemmerOverrideFilter;
import org.apache.lucene.analysis.miscellaneous.StemmerOverrideFilter.StemmerOverrideMap;
import org.apache.lucene.analysis.util.*;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.CharsRef;
import org.apache.lucene.util.fst.FST;
import org.apache.lucene.analysis.util.ResourceLoader;
import org.apache.lucene.analysis.util.ResourceLoaderAware;
import org.apache.lucene.analysis.util.TokenFilterFactory;
/**
* Factory for {@link StemmerOverrideFilter}.
* <pre class="prettyprint" >
* <pre class="prettyprint">
* &lt;fieldType name="text_dicstem" class="solr.TextField" positionIncrementGap="100"&gt;
* &lt;analyzer&gt;
* &lt;tokenizer class="solr.WhitespaceTokenizerFactory"/&gt;
* &lt;filter class="solr.StemmerOverrideFilterFactory" dictionary="dictionary.txt" ignoreCase="false"/&gt;
* &lt;/analyzer&gt;
* &lt;/fieldType&gt;</pre>
*
*/
public class StemmerOverrideFilterFactory extends TokenFilterFactory implements ResourceLoaderAware {
private StemmerOverrideMap dictionary;
private boolean ignoreCase;
private final String dictionaryFiles;
private final boolean ignoreCase;
/** Creates a new StemmerOverrideFilterFactory */
public StemmerOverrideFilterFactory(Map<String,String> args) {
super(args);
dictionaryFiles = args.remove("dictionary");
ignoreCase = getBoolean(args, "ignoreCase", false);
if (!args.isEmpty()) {
throw new IllegalArgumentException("Unknown parameters: " + args);
}
}
@Override
public void inform(ResourceLoader loader) throws IOException {
String dictionaryFiles = args.get("dictionary");
ignoreCase = getBoolean("ignoreCase", false);
if (dictionaryFiles != null) {
assureMatchVersion();
List<String> files = splitFileNames(dictionaryFiles);

View File

@ -25,7 +25,7 @@ import org.apache.lucene.analysis.util.TokenFilterFactory;
/**
* Factory for {@link TrimFilter}.
* <pre class="prettyprint" >
* <pre class="prettyprint">
* &lt;fieldType name="text_trm" class="solr.TextField" positionIncrementGap="100"&gt;
* &lt;analyzer&gt;
* &lt;tokenizer class="solr.NGramTokenizerFactory"/&gt;
@ -37,15 +37,14 @@ import org.apache.lucene.analysis.util.TokenFilterFactory;
*/
public class TrimFilterFactory extends TokenFilterFactory {
protected boolean updateOffsets = false;
protected final boolean updateOffsets;
@Override
public void init(Map<String,String> args) {
super.init( args );
String v = args.get( "updateOffsets" );
if (v != null) {
updateOffsets = Boolean.valueOf( v );
/** Creates a new TrimFilterFactory */
public TrimFilterFactory(Map<String,String> args) {
super(args);
updateOffsets = getBoolean(args, "updateOffsets", false);
if (!args.isEmpty()) {
throw new IllegalArgumentException("Unknown parameters: " + args);
}
}

View File

@ -33,10 +33,9 @@ import java.io.IOException;
import static org.apache.lucene.analysis.miscellaneous.WordDelimiterFilter.*;
/**
* Factory for {@link WordDelimiterFilter}.
* <pre class="prettyprint" >
* <pre class="prettyprint">
* &lt;fieldType name="text_wd" class="solr.TextField" positionIncrementGap="100"&gt;
* &lt;analyzer&gt;
* &lt;tokenizer class="solr.WhitespaceTokenizerFactory"/&gt;
@ -46,20 +45,62 @@ import static org.apache.lucene.analysis.miscellaneous.WordDelimiterFilter.*;
* generateWordParts="1" generateNumberParts="1" stemEnglishPossessive="1"
* types="wdfftypes.txt" /&gt;
* &lt;/analyzer&gt;
* &lt;/fieldType&gt;</pre>
*
* &lt;/fieldType&gt;</pre>
*/
public class WordDelimiterFilterFactory extends TokenFilterFactory implements ResourceLoaderAware {
public static final String PROTECTED_TOKENS = "protected";
public static final String TYPES = "types";
private final String wordFiles;
private final String types;
private final int flags;
byte[] typeTable = null;
private CharArraySet protectedWords = null;
/** Creates a new WordDelimiterFilterFactory */
public WordDelimiterFilterFactory(Map<String, String> args) {
super(args);
int flags = 0;
if (getInt(args, "generateWordParts", 1) != 0) {
flags |= GENERATE_WORD_PARTS;
}
if (getInt(args, "generateNumberParts", 1) != 0) {
flags |= GENERATE_NUMBER_PARTS;
}
if (getInt(args, "catenateWords", 0) != 0) {
flags |= CATENATE_WORDS;
}
if (getInt(args, "catenateNumbers", 0) != 0) {
flags |= CATENATE_NUMBERS;
}
if (getInt(args, "catenateAll", 0) != 0) {
flags |= CATENATE_ALL;
}
if (getInt(args, "splitOnCaseChange", 1) != 0) {
flags |= SPLIT_ON_CASE_CHANGE;
}
if (getInt(args, "splitOnNumerics", 1) != 0) {
flags |= SPLIT_ON_NUMERICS;
}
if (getInt(args, "preserveOriginal", 0) != 0) {
flags |= PRESERVE_ORIGINAL;
}
if (getInt(args, "stemEnglishPossessive", 1) != 0) {
flags |= STEM_ENGLISH_POSSESSIVE;
}
wordFiles = args.remove(PROTECTED_TOKENS);
types = args.remove(TYPES);
this.flags = flags;
if (!args.isEmpty()) {
throw new IllegalArgumentException("Unknown parameters: " + args);
}
}
@Override
public void inform(ResourceLoader loader) throws IOException {
String wordFiles = args.get(PROTECTED_TOKENS);
if (wordFiles != null) {
protectedWords = getWordSet(loader, wordFiles, false);
}
String types = args.get(TYPES);
if (types != null) {
List<String> files = splitFileNames( types );
List<String> wlist = new ArrayList<String>();
@ -71,42 +112,6 @@ public class WordDelimiterFilterFactory extends TokenFilterFactory implements Re
}
}
private CharArraySet protectedWords = null;
private int flags;
byte[] typeTable = null;
@Override
public void init(Map<String, String> args) {
super.init(args);
if (getInt("generateWordParts", 1) != 0) {
flags |= GENERATE_WORD_PARTS;
}
if (getInt("generateNumberParts", 1) != 0) {
flags |= GENERATE_NUMBER_PARTS;
}
if (getInt("catenateWords", 0) != 0) {
flags |= CATENATE_WORDS;
}
if (getInt("catenateNumbers", 0) != 0) {
flags |= CATENATE_NUMBERS;
}
if (getInt("catenateAll", 0) != 0) {
flags |= CATENATE_ALL;
}
if (getInt("splitOnCaseChange", 1) != 0) {
flags |= SPLIT_ON_CASE_CHANGE;
}
if (getInt("splitOnNumerics", 1) != 0) {
flags |= SPLIT_ON_NUMERICS;
}
if (getInt("preserveOriginal", 0) != 0) {
flags |= PRESERVE_ORIGINAL;
}
if (getInt("stemEnglishPossessive", 1) != 0) {
flags |= STEM_ENGLISH_POSSESSIVE;
}
}
@Override
public WordDelimiterFilter create(TokenStream input) {
return new WordDelimiterFilter(input, typeTable == null ? WordDelimiterIterator.DEFAULT_WORD_DELIM_TABLE : typeTable,

View File

@ -24,36 +24,33 @@ import org.apache.lucene.analysis.util.TokenFilterFactory;
/**
* Creates new instances of {@link EdgeNGramTokenFilter}.
* <pre class="prettyprint" >
* <pre class="prettyprint">
* &lt;fieldType name="text_edgngrm" class="solr.TextField" positionIncrementGap="100"&gt;
* &lt;analyzer&gt;
* &lt;tokenizer class="solr.WhitespaceTokenizerFactory"/&gt;
* &lt;filter class="solr.EdgeNGramFilterFactory" side="front" minGramSize="1" maxGramSize="1"/&gt;
* &lt;/analyzer&gt;
* &lt;/fieldType&gt;</pre>
*
*/
public class EdgeNGramFilterFactory extends TokenFilterFactory {
private int maxGramSize = 0;
private final int maxGramSize;
private final int minGramSize;
private final String side;
private int minGramSize = 0;
/** Creates a new EdgeNGramFilterFactory */
public EdgeNGramFilterFactory(Map<String, String> args) {
super(args);
minGramSize = getInt(args, "minGramSize", EdgeNGramTokenFilter.DEFAULT_MIN_GRAM_SIZE);
maxGramSize = getInt(args, "maxGramSize", EdgeNGramTokenFilter.DEFAULT_MAX_GRAM_SIZE);
private String side;
@Override
public void init(Map<String, String> args) {
super.init(args);
String maxArg = args.get("maxGramSize");
maxGramSize = (maxArg != null ? Integer.parseInt(maxArg)
: EdgeNGramTokenFilter.DEFAULT_MAX_GRAM_SIZE);
String minArg = args.get("minGramSize");
minGramSize = (minArg != null ? Integer.parseInt(minArg)
: EdgeNGramTokenFilter.DEFAULT_MIN_GRAM_SIZE);
side = args.get("side");
if (side == null) {
String sideArg = args.remove("side");
if (sideArg == null) {
side = EdgeNGramTokenFilter.Side.FRONT.getLabel();
} else {
side = sideArg;
}
if (!args.isEmpty()) {
throw new IllegalArgumentException("Unknown parameters: " + args);
}
}

View File

@ -25,33 +25,32 @@ import java.util.Map;
/**
* Creates new instances of {@link EdgeNGramTokenizer}.
* <pre class="prettyprint" >
* <pre class="prettyprint">
* &lt;fieldType name="text_edgngrm" class="solr.TextField" positionIncrementGap="100"&gt;
* &lt;analyzer&gt;
* &lt;tokenizer class="solr.EdgeNGramTokenizerFactory" side="front" minGramSize="1" maxGramSize="1"/&gt;
* &lt;/analyzer&gt;
* &lt;/fieldType&gt;</pre>
*
*/
public class EdgeNGramTokenizerFactory extends TokenizerFactory {
private int maxGramSize = 0;
private int minGramSize = 0;
private String side;
@Override
public void init(Map<String, String> args) {
super.init(args);
String maxArg = args.get("maxGramSize");
maxGramSize = (maxArg != null ? Integer.parseInt(maxArg) : EdgeNGramTokenizer.DEFAULT_MAX_GRAM_SIZE);
String minArg = args.get("minGramSize");
minGramSize = (minArg != null ? Integer.parseInt(minArg) : EdgeNGramTokenizer.DEFAULT_MIN_GRAM_SIZE);
side = args.get("side");
if (side == null) {
side = EdgeNGramTokenizer.Side.FRONT.getLabel();
private final int maxGramSize;
private final int minGramSize;
private final String side;
/** Creates a new EdgeNGramTokenizerFactory */
public EdgeNGramTokenizerFactory(Map<String, String> args) {
super(args);
minGramSize = getInt(args, "minGramSize", EdgeNGramTokenFilter.DEFAULT_MIN_GRAM_SIZE);
maxGramSize = getInt(args, "maxGramSize", EdgeNGramTokenFilter.DEFAULT_MAX_GRAM_SIZE);
String sideArg = args.remove("side");
if (sideArg == null) {
side = EdgeNGramTokenFilter.Side.FRONT.getLabel();
} else {
side = sideArg;
}
if (!args.isEmpty()) {
throw new IllegalArgumentException("Unknown parameters: " + args);
}
}

View File

@ -24,31 +24,27 @@ import org.apache.lucene.analysis.util.TokenFilterFactory;
/**
* Factory for {@link NGramTokenFilter}.
* <pre class="prettyprint" >
* <pre class="prettyprint">
* &lt;fieldType name="text_ngrm" class="solr.TextField" positionIncrementGap="100"&gt;
* &lt;analyzer&gt;
* &lt;tokenizer class="solr.WhitespaceTokenizerFactory"/&gt;
* &lt;filter class="solr.NGramFilterFactory" minGramSize="1" maxGramSize="2"/&gt;
* &lt;/analyzer&gt;
* &lt;/fieldType&gt;</pre>
*
* &lt;/fieldType&gt;</pre>
*/
public class NGramFilterFactory extends TokenFilterFactory {
private int maxGramSize = 0;
private final int maxGramSize;
private final int minGramSize;
private int minGramSize = 0;
/** Creates a new NGramFilterFactory */
public NGramFilterFactory(Map<String, String> args) {
super(args);
minGramSize = getInt(args, "minGramSize", NGramTokenFilter.DEFAULT_MIN_NGRAM_SIZE);
maxGramSize = getInt(args, "maxGramSize", NGramTokenFilter.DEFAULT_MAX_NGRAM_SIZE);
/** Initialize the n-gram min and max sizes and the side from which one should start tokenizing. */
@Override
public void init(Map<String, String> args) {
super.init(args);
String maxArg = args.get("maxGramSize");
maxGramSize = (maxArg != null ? Integer.parseInt(maxArg)
: NGramTokenFilter.DEFAULT_MAX_NGRAM_SIZE);
String minArg = args.get("minGramSize");
minGramSize = (minArg != null ? Integer.parseInt(minArg)
: NGramTokenFilter.DEFAULT_MIN_NGRAM_SIZE);
if (!args.isEmpty()) {
throw new IllegalArgumentException("Unknown parameters: " + args);
}
}
@Override

View File

@ -26,27 +26,26 @@ import java.util.Map;
/**
* Factory for {@link NGramTokenizer}.
* <pre class="prettyprint" >
* <pre class="prettyprint">
* &lt;fieldType name="text_ngrm" class="solr.TextField" positionIncrementGap="100"&gt;
* &lt;analyzer&gt;
* &lt;tokenizer class="solr.NGramTokenizerFactory" minGramSize="1" maxGramSize="2"/&gt;
* &lt;/analyzer&gt;
* &lt;/fieldType&gt;</pre>
*
* &lt;/fieldType&gt;</pre>
*/
public class NGramTokenizerFactory extends TokenizerFactory {
private int maxGramSize = 0;
private int minGramSize = 0;
/** Initializes the n-gram min and max sizes and the side from which one should start tokenizing. */
@Override
public void init(Map<String, String> args) {
super.init(args);
String maxArg = args.get("maxGramSize");
maxGramSize = (maxArg != null ? Integer.parseInt(maxArg) : NGramTokenizer.DEFAULT_MAX_NGRAM_SIZE);
String minArg = args.get("minGramSize");
minGramSize = (minArg != null ? Integer.parseInt(minArg) : NGramTokenizer.DEFAULT_MIN_NGRAM_SIZE);
private final int maxGramSize;
private final int minGramSize;
/** Creates a new NGramTokenizerFactory */
public NGramTokenizerFactory(Map<String, String> args) {
super(args);
minGramSize = getInt(args, "minGramSize", NGramTokenizer.DEFAULT_MIN_NGRAM_SIZE);
maxGramSize = getInt(args, "maxGramSize", NGramTokenizer.DEFAULT_MAX_NGRAM_SIZE);
if (!args.isEmpty()) {
throw new IllegalArgumentException("Unknown parameters: " + args);
}
}
/** Creates the {@link TokenStream} of n-grams from the given {@link Reader} and {@link AttributeFactory}. */

View File

@ -17,13 +17,15 @@ package org.apache.lucene.analysis.no;
* limitations under the License.
*/
import java.util.Map;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.no.NorwegianLightStemFilter;
import org.apache.lucene.analysis.util.TokenFilterFactory;
/**
* Factory for {@link NorwegianLightStemFilter}.
* <pre class="prettyprint" >
* <pre class="prettyprint">
* &lt;fieldType name="text_svlgtstem" class="solr.TextField" positionIncrementGap="100"&gt;
* &lt;analyzer&gt;
* &lt;tokenizer class="solr.StandardTokenizerFactory"/&gt;
@ -33,6 +35,15 @@ import org.apache.lucene.analysis.util.TokenFilterFactory;
* &lt;/fieldType&gt;</pre>
*/
public class NorwegianLightStemFilterFactory extends TokenFilterFactory {
/** Creates a new NorwegianLightStemFilterFactory */
public NorwegianLightStemFilterFactory(Map<String,String> args) {
super(args);
if (!args.isEmpty()) {
throw new IllegalArgumentException("Unknown parameters: " + args);
}
}
@Override
public TokenStream create(TokenStream input) {
return new NorwegianLightStemFilter(input);

View File

@ -17,13 +17,15 @@ package org.apache.lucene.analysis.no;
* limitations under the License.
*/
import java.util.Map;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.no.NorwegianMinimalStemFilter;
import org.apache.lucene.analysis.util.TokenFilterFactory;
/**
* Factory for {@link NorwegianMinimalStemFilter}.
* <pre class="prettyprint" >
* <pre class="prettyprint">
* &lt;fieldType name="text_svlgtstem" class="solr.TextField" positionIncrementGap="100"&gt;
* &lt;analyzer&gt;
* &lt;tokenizer class="solr.StandardTokenizerFactory"/&gt;
@ -33,6 +35,15 @@ import org.apache.lucene.analysis.util.TokenFilterFactory;
* &lt;/fieldType&gt;</pre>
*/
public class NorwegianMinimalStemFilterFactory extends TokenFilterFactory {
/** Creates a new NorwegianMinimalStemFilterFactory */
public NorwegianMinimalStemFilterFactory(Map<String,String> args) {
super(args);
if (!args.isEmpty()) {
throw new IllegalArgumentException("Unknown parameters: " + args);
}
}
@Override
public TokenStream create(TokenStream input) {
return new NorwegianMinimalStemFilter(input);

View File

@ -39,7 +39,7 @@ import org.apache.lucene.util.AttributeSource.AttributeFactory;
* <code>Books/Fic</code>...
* </p>
*
* <pre class="prettyprint" >
* <pre class="prettyprint">
* &lt;fieldType name="descendent_path" class="solr.TextField"&gt;
* &lt;analyzer type="index"&gt;
* &lt;tokenizer class="solr.PathHierarchyTokenizerFactory" delimiter="/" /&gt;
@ -57,7 +57,7 @@ import org.apache.lucene.util.AttributeSource.AttributeFactory;
* <code>Books/NonFic/Science/Physics/Theory</code> or
* <code>Books/NonFic/Law</code>.
* </p>
* <pre class="prettyprint" >
* <pre class="prettyprint">
* &lt;fieldType name="descendent_path" class="solr.TextField"&gt;
* &lt;analyzer type="index"&gt;
* &lt;tokenizer class="solr.KeywordTokenizerFactory" /&gt;
@ -69,59 +69,39 @@ import org.apache.lucene.util.AttributeSource.AttributeFactory;
* </pre>
*/
public class PathHierarchyTokenizerFactory extends TokenizerFactory {
private final char delimiter;
private final char replacement;
private final boolean reverse;
private final int skip;
private char delimiter;
private char replacement;
private boolean reverse = false;
private int skip = PathHierarchyTokenizer.DEFAULT_SKIP;
/** Creates a new PathHierarchyTokenizerFactory */
public PathHierarchyTokenizerFactory(Map<String,String> args) {
super(args);
delimiter = getChar(args, "delimiter", PathHierarchyTokenizer.DEFAULT_DELIMITER);
replacement = getChar(args, "replace", delimiter);
reverse = getBoolean(args, "reverse", false);
skip = getInt(args, "skip", PathHierarchyTokenizer.DEFAULT_SKIP);
if (!args.isEmpty()) {
throw new IllegalArgumentException("Unknown parameters: " + args);
}
}
/**
* Require a configured pattern
*/
@Override
public void init(Map<String,String> args){
super.init( args );
String v = args.get( "delimiter" );
if( v != null ){
if( v.length() != 1 ){
throw new IllegalArgumentException("delimiter should be a char. \"" + v + "\" is invalid");
private char getChar(Map<String,String> args, String name, char defaultValue) {
String v = args.remove(name);
if (v != null) {
if (v.length() != 1) {
throw new IllegalArgumentException(name + " should be a char. \"" + v + "\" is invalid");
} else {
return v.charAt(0);
}
else{
delimiter = v.charAt(0);
}
}
else{
delimiter = PathHierarchyTokenizer.DEFAULT_DELIMITER;
}
v = args.get( "replace" );
if( v != null ){
if( v.length() != 1 ){
throw new IllegalArgumentException("replace should be a char. \"" + v + "\" is invalid");
}
else{
replacement = v.charAt(0);
}
}
else{
replacement = delimiter;
}
v = args.get( "reverse" );
if( v != null ){
reverse = "true".equals( v );
}
v = args.get( "skip" );
if( v != null ){
skip = Integer.parseInt( v );
} else {
return defaultValue;
}
}
@Override
public Tokenizer create(AttributeFactory factory, Reader input) {
if( reverse ) {
if (reverse) {
return new ReversePathHierarchyTokenizer(factory, input, delimiter, replacement, skip);
}
return new PathHierarchyTokenizer(factory, input, delimiter, replacement, skip);

View File

@ -27,7 +27,7 @@ import org.apache.lucene.analysis.util.CharFilterFactory;
/**
* Factory for {@link PatternReplaceCharFilter}.
* <pre class="prettyprint" >
* <pre class="prettyprint">
* &lt;fieldType name="text_ptnreplace" class="solr.TextField" positionIncrementGap="100"&gt;
* &lt;analyzer&gt;
* &lt;charFilter class="solr.PatternReplaceCharFilterFactory"
@ -36,26 +36,29 @@ import org.apache.lucene.analysis.util.CharFilterFactory;
* &lt;/analyzer&gt;
* &lt;/fieldType&gt;</pre>
*
*
* @since Solr 3.1
*/
public class PatternReplaceCharFilterFactory extends CharFilterFactory {
private Pattern p;
private String replacement;
private final Pattern pattern;
private final String replacement;
@Override
public void init(Map<String, String> args) {
super.init( args );
p = getPattern("pattern");
replacement = args.get( "replacement" );
if( replacement == null )
/** Creates a new PatternReplaceCharFilterFactory */
public PatternReplaceCharFilterFactory(Map<String, String> args) {
super(args);
pattern = getPattern(args, "pattern");
String v = args.remove("replacement");
if (v == null) {
replacement = "";
// TODO: throw exception if you set maxBlockChars or blockDelimiters ?
} else {
replacement = v;
}
if (!args.isEmpty()) {
throw new IllegalArgumentException("Unknown parameters: " + args);
}
}
@Override
public CharFilter create(Reader input) {
return new PatternReplaceCharFilter( p, replacement, input );
return new PatternReplaceCharFilter(pattern, replacement, input);
}
}

View File

@ -23,11 +23,10 @@ import org.apache.lucene.analysis.util.TokenFilterFactory;
import java.util.Map;
import java.util.regex.Pattern;
import java.util.regex.PatternSyntaxException;
/**
* Factory for {@link PatternReplaceFilter}.
* <pre class="prettyprint" >
* <pre class="prettyprint">
* &lt;fieldType name="text_ptnreplace" class="solr.TextField" positionIncrementGap="100"&gt;
* &lt;analyzer&gt;
* &lt;tokenizer class="solr.KeywordTokenizerFactory"/&gt;
@ -39,34 +38,32 @@ import java.util.regex.PatternSyntaxException;
* @see PatternReplaceFilter
*/
public class PatternReplaceFilterFactory extends TokenFilterFactory {
Pattern p;
String replacement;
boolean all = true;
final Pattern pattern;
final String replacement;
final boolean replaceAll;
@Override
public void init(Map<String, String> args) {
super.init(args);
p = getPattern("pattern");
replacement = args.get("replacement");
/** Creates a new PatternReplaceFilterFactory */
public PatternReplaceFilterFactory(Map<String, String> args) {
super(args);
pattern = getPattern(args, "pattern");
replacement = args.remove("replacement");
String r = args.get("replace");
if (null != r) {
if (r.equals("all")) {
all = true;
} else {
if (r.equals("first")) {
all = false;
} else {
throw new IllegalArgumentException
("Configuration Error: 'replace' must be 'first' or 'all' in "
+ this.getClass().getName());
}
}
String v = args.remove("replace");
if (v == null || v.equals("all")) {
replaceAll = true;
} else if (v.equals("first")) {
replaceAll = false;
} else {
throw new IllegalArgumentException("Configuration Error: " +
"'replace' must be 'first' or 'all' in " + getClass().getName());
}
if (!args.isEmpty()) {
throw new IllegalArgumentException("Unknown parameters: " + args);
}
}
@Override
public PatternReplaceFilter create(TokenStream input) {
return new PatternReplaceFilter(input, p, replacement, all);
return new PatternReplaceFilter(input, pattern, replacement, replaceAll);
}
}

View File

@ -17,7 +17,6 @@ package org.apache.lucene.analysis.pattern;
* limitations under the License.
*/
import java.io.IOException;
import java.io.Reader;
import java.util.Map;
import java.util.regex.Pattern;
@ -45,13 +44,13 @@ import org.apache.lucene.util.AttributeSource.AttributeFactory;
* pattern = \'([^\']+)\'
* group = 0
* input = aaa 'bbb' 'ccc'
*</pre>
* </pre>
* the output will be two tokens: 'bbb' and 'ccc' (including the ' marks). With the same input
* but using group=1, the output would be: bbb and ccc (no ' marks)
* </p>
* <p>NOTE: This Tokenizer does not output tokens that are of zero length.</p>
*
* <pre class="prettyprint" >
* <pre class="prettyprint">
* &lt;fieldType name="text_ptn" class="solr.TextField" positionIncrementGap="100"&gt;
* &lt;analyzer&gt;
* &lt;tokenizer class="solr.PatternTokenizerFactory" pattern="\'([^\']+)\'" group="1"/&gt;
@ -60,29 +59,27 @@ import org.apache.lucene.util.AttributeSource.AttributeFactory;
*
* @see PatternTokenizer
* @since solr1.2
*
*/
public class PatternTokenizerFactory extends TokenizerFactory
{
public class PatternTokenizerFactory extends TokenizerFactory {
public static final String PATTERN = "pattern";
public static final String GROUP = "group";
protected Pattern pattern;
protected int group;
protected final Pattern pattern;
protected final int group;
/**
* Require a configured pattern
*/
@Override
public void init(Map<String,String> args)
{
super.init(args);
pattern = getPattern( PATTERN );
/** Creates a new PatternTokenizerFactory */
public PatternTokenizerFactory(Map<String,String> args) {
super(args);
pattern = getPattern(args, PATTERN);
group = -1; // use 'split'
String g = args.get( GROUP );
if( g != null ) {
group = Integer.parseInt( g );
String v = args.remove(GROUP);
if (v == null) {
group = -1; // use 'split'
} else {
group = Integer.parseInt(v);
}
if (!args.isEmpty()) {
throw new IllegalArgumentException("Unknown parameters: " + args);
}
}

View File

@ -30,41 +30,51 @@ import org.apache.lucene.analysis.util.TokenFilterFactory;
import java.util.Map;
/**
*
* Factory for {@link DelimitedPayloadTokenFilter}.
* <pre class="prettyprint" >
* <pre class="prettyprint">
* &lt;fieldType name="text_dlmtd" class="solr.TextField" positionIncrementGap="100"&gt;
* &lt;analyzer&gt;
* &lt;tokenizer class="solr.WhitespaceTokenizerFactory"/&gt;
* &lt;filter class="solr.DelimitedPayloadTokenFilterFactory" encoder="float" delimiter="|"/&gt;
* &lt;/analyzer&gt;
* &lt;/fieldType&gt;</pre>
*
*
*/
public class DelimitedPayloadTokenFilterFactory extends TokenFilterFactory implements ResourceLoaderAware {
public static final String ENCODER_ATTR = "encoder";
public static final String DELIMITER_ATTR = "delimiter";
private final String encoderClass;
private final char delimiter;
private PayloadEncoder encoder;
private char delimiter = '|';
/** Creates a new DelimitedPayloadTokenFilterFactory */
public DelimitedPayloadTokenFilterFactory(Map<String, String> args) {
super(args);
encoderClass = args.remove(ENCODER_ATTR);
if (encoderClass == null) {
throw new IllegalArgumentException("Parameter " + ENCODER_ATTR + " is mandatory");
}
String delim = args.remove(DELIMITER_ATTR);
if (delim == null) {
delimiter = '|';
} else if (delim.length() == 1) {
delimiter = delim.charAt(0);
} else {
throw new IllegalArgumentException("Delimiter must be one character only");
}
if (!args.isEmpty()) {
throw new IllegalArgumentException("Unknown parameters: " + args);
}
}
@Override
public DelimitedPayloadTokenFilter create(TokenStream input) {
return new DelimitedPayloadTokenFilter(input, delimiter, encoder);
}
@Override
public void init(Map<String, String> args) {
super.init(args);
}
@Override
public void inform(ResourceLoader loader) {
String encoderClass = args.get(ENCODER_ATTR);
if (encoderClass == null) {
throw new IllegalArgumentException("Parameter " + ENCODER_ATTR + " is mandatory");
}
if (encoderClass.equals("float")){
encoder = new FloatEncoder();
} else if (encoderClass.equals("integer")){
@ -74,14 +84,5 @@ public class DelimitedPayloadTokenFilterFactory extends TokenFilterFactory imple
} else {
encoder = loader.newInstance(encoderClass, PayloadEncoder.class);
}
String delim = args.get(DELIMITER_ATTR);
if (delim != null){
if (delim.length() == 1) {
delimiter = delim.charAt(0);
} else{
throw new IllegalArgumentException("Delimiter must be one character only");
}
}
}
}

View File

@ -24,28 +24,32 @@ import java.util.Map;
/**
* Factory for {@link NumericPayloadTokenFilter}.
* <pre class="prettyprint" >
* <pre class="prettyprint">
* &lt;fieldType name="text_numpayload" class="solr.TextField" positionIncrementGap="100"&gt;
* &lt;analyzer&gt;
* &lt;tokenizer class="solr.WhitespaceTokenizerFactory"/&gt;
* &lt;filter class="solr.NumericPayloadTokenFilterFactory" payload="24" typeMatch="word"/&gt;
* &lt;/analyzer&gt;
* &lt;/fieldType&gt;</pre>
*
*/
public class NumericPayloadTokenFilterFactory extends TokenFilterFactory {
private float payload;
private String typeMatch;
@Override
public void init(Map<String, String> args) {
super.init(args);
String payloadArg = args.get("payload");
typeMatch = args.get("typeMatch");
private final float payload;
private final String typeMatch;
/** Creates a new NumericPayloadTokenFilterFactory */
public NumericPayloadTokenFilterFactory(Map<String, String> args) {
super(args);
String payloadArg = args.remove("payload");
typeMatch = args.remove("typeMatch");
if (payloadArg == null || typeMatch == null) {
throw new IllegalArgumentException("Both payload and typeMatch are required");
}
payload = Float.parseFloat(payloadArg);
if (!args.isEmpty()) {
throw new IllegalArgumentException("Unknown parameters: " + args);
}
}
@Override
public NumericPayloadTokenFilter create(TokenStream input) {
return new NumericPayloadTokenFilter(input,payload,typeMatch);

View File

@ -17,22 +17,32 @@ package org.apache.lucene.analysis.payloads;
* limitations under the License.
*/
import java.util.Map;
import org.apache.lucene.analysis.payloads.TokenOffsetPayloadTokenFilter;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.util.TokenFilterFactory;
/**
* Factory for {@link TokenOffsetPayloadTokenFilter}.
* <pre class="prettyprint" >
* <pre class="prettyprint">
* &lt;fieldType name="text_tokenoffset" class="solr.TextField" positionIncrementGap="100"&gt;
* &lt;analyzer&gt;
* &lt;tokenizer class="solr.WhitespaceTokenizerFactory"/&gt;
* &lt;filter class="solr.TokenOffsetPayloadTokenFilterFactory"/&gt;
* &lt;/analyzer&gt;
* &lt;/fieldType&gt;</pre>
*
*/
public class TokenOffsetPayloadTokenFilterFactory extends TokenFilterFactory {
/** Creates a new TokenOffsetPayloadTokenFilterFactory */
public TokenOffsetPayloadTokenFilterFactory(Map<String,String> args) {
super(args);
if (!args.isEmpty()) {
throw new IllegalArgumentException("Unknown parameters: " + args);
}
}
@Override
public TokenOffsetPayloadTokenFilter create(TokenStream input) {
return new TokenOffsetPayloadTokenFilter(input);

View File

@ -17,22 +17,32 @@ package org.apache.lucene.analysis.payloads;
* limitations under the License.
*/
import java.util.Map;
import org.apache.lucene.analysis.payloads.TypeAsPayloadTokenFilter;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.util.TokenFilterFactory;
/**
* Factory for {@link TypeAsPayloadTokenFilter}.
* <pre class="prettyprint" >
* <pre class="prettyprint">
* &lt;fieldType name="text_typeaspayload" class="solr.TextField" positionIncrementGap="100"&gt;
* &lt;analyzer&gt;
* &lt;tokenizer class="solr.WhitespaceTokenizerFactory"/&gt;
* &lt;filter class="solr.TypeAsPayloadTokenFilterFactory"/&gt;
* &lt;/analyzer&gt;
* &lt;/fieldType&gt;</pre>
*
* &lt;/fieldType&gt;</pre>
*/
public class TypeAsPayloadTokenFilterFactory extends TokenFilterFactory {
/** Creates a new TypeAsPayloadTokenFilterFactory */
public TypeAsPayloadTokenFilterFactory(Map<String,String> args) {
super(args);
if (!args.isEmpty()) {
throw new IllegalArgumentException("Unknown parameters: " + args);
}
}
@Override
public TypeAsPayloadTokenFilter create(TokenStream input) {
return new TypeAsPayloadTokenFilter(input);

View File

@ -27,7 +27,7 @@ import java.util.Map;
* Factory for {@link PositionFilter}.
* Set the positionIncrement of all tokens to the "positionIncrement", except the first return token which retains its
* original positionIncrement value. The default positionIncrement value is zero.
* <pre class="prettyprint" >
* <pre class="prettyprint">
* &lt;fieldType name="text_position" class="solr.TextField" positionIncrementGap="100"&gt;
* &lt;analyzer&gt;
* &lt;tokenizer class="solr.WhitespaceTokenizerFactory"/&gt;
@ -35,17 +35,19 @@ import java.util.Map;
* &lt;/analyzer&gt;
* &lt;/fieldType&gt;</pre>
*
*
* @see org.apache.lucene.analysis.position.PositionFilter
* @since solr 1.4
*/
public class PositionFilterFactory extends TokenFilterFactory {
private int positionIncrement;
private final int positionIncrement;
@Override
public void init(Map<String, String> args) {
super.init(args);
positionIncrement = getInt("positionIncrement", 0);
/** Creates a new PositionFilterFactory */
public PositionFilterFactory(Map<String,String> args) {
super(args);
positionIncrement = getInt(args, "positionIncrement", 0);
if (!args.isEmpty()) {
throw new IllegalArgumentException("Unknown parameters: " + args);
}
}
@Override

View File

@ -17,13 +17,15 @@ package org.apache.lucene.analysis.pt;
* limitations under the License.
*/
import java.util.Map;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.pt.PortugueseLightStemFilter;
import org.apache.lucene.analysis.util.TokenFilterFactory;
/**
* Factory for {@link PortugueseLightStemFilter}.
* <pre class="prettyprint" >
* <pre class="prettyprint">
* &lt;fieldType name="text_ptlgtstem" class="solr.TextField" positionIncrementGap="100"&gt;
* &lt;analyzer&gt;
* &lt;tokenizer class="solr.StandardTokenizerFactory"/&gt;
@ -31,9 +33,17 @@ import org.apache.lucene.analysis.util.TokenFilterFactory;
* &lt;filter class="solr.PortugueseLightStemFilterFactory"/&gt;
* &lt;/analyzer&gt;
* &lt;/fieldType&gt;</pre>
*
*/
public class PortugueseLightStemFilterFactory extends TokenFilterFactory {
/** Creates a new PortugueseLightStemFilterFactory */
public PortugueseLightStemFilterFactory(Map<String,String> args) {
super(args);
if (!args.isEmpty()) {
throw new IllegalArgumentException("Unknown parameters: " + args);
}
}
@Override
public TokenStream create(TokenStream input) {
return new PortugueseLightStemFilter(input);

View File

@ -17,13 +17,15 @@ package org.apache.lucene.analysis.pt;
* limitations under the License.
*/
import java.util.Map;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.pt.PortugueseMinimalStemFilter;
import org.apache.lucene.analysis.util.TokenFilterFactory;
/**
* Factory for {@link PortugueseMinimalStemFilter}.
* <pre class="prettyprint" >
* <pre class="prettyprint">
* &lt;fieldType name="text_ptminstem" class="solr.TextField" positionIncrementGap="100"&gt;
* &lt;analyzer&gt;
* &lt;tokenizer class="solr.StandardTokenizerFactory"/&gt;
@ -31,9 +33,17 @@ import org.apache.lucene.analysis.util.TokenFilterFactory;
* &lt;filter class="solr.PortugueseMinimalStemFilterFactory"/&gt;
* &lt;/analyzer&gt;
* &lt;/fieldType&gt;</pre>
*
*/
public class PortugueseMinimalStemFilterFactory extends TokenFilterFactory {
/** Creates a new PortugueseMinimalStemFilterFactory */
public PortugueseMinimalStemFilterFactory(Map<String,String> args) {
super(args);
if (!args.isEmpty()) {
throw new IllegalArgumentException("Unknown parameters: " + args);
}
}
@Override
public TokenStream create(TokenStream input) {
return new PortugueseMinimalStemFilter(input);

View File

@ -17,13 +17,15 @@ package org.apache.lucene.analysis.pt;
* limitations under the License.
*/
import java.util.Map;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.pt.PortugueseStemFilter;
import org.apache.lucene.analysis.util.TokenFilterFactory;
/**
* Factory for {@link PortugueseStemFilter}.
* <pre class="prettyprint" >
* <pre class="prettyprint">
* &lt;fieldType name="text_ptstem" class="solr.TextField" positionIncrementGap="100"&gt;
* &lt;analyzer&gt;
* &lt;tokenizer class="solr.StandardTokenizerFactory"/&gt;
@ -31,9 +33,17 @@ import org.apache.lucene.analysis.util.TokenFilterFactory;
* &lt;filter class="solr.PortugueseStemFilterFactory"/&gt;
* &lt;/analyzer&gt;
* &lt;/fieldType&gt;</pre>
*
*/
public class PortugueseStemFilterFactory extends TokenFilterFactory {
/** Creates a new PortugueseStemFilterFactory */
public PortugueseStemFilterFactory(Map<String,String> args) {
super(args);
if (!args.isEmpty()) {
throw new IllegalArgumentException("Unknown parameters: " + args);
}
}
@Override
public TokenStream create(TokenStream input) {
return new PortugueseStemFilter(input);

View File

@ -17,13 +17,15 @@ package org.apache.lucene.analysis.reverse;
* limitations under the License.
*/
import java.util.Map;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.reverse.ReverseStringFilter;
import org.apache.lucene.analysis.util.TokenFilterFactory;
/**
* Factory for {@link ReverseStringFilter}.
* <pre class="prettyprint" >
* <pre class="prettyprint">
* &lt;fieldType name="text_rvsstr" class="solr.TextField" positionIncrementGap="100"&gt;
* &lt;analyzer&gt;
* &lt;tokenizer class="solr.WhitespaceTokenizerFactory"/&gt;
@ -31,13 +33,21 @@ import org.apache.lucene.analysis.util.TokenFilterFactory;
* &lt;/analyzer&gt;
* &lt;/fieldType&gt;</pre>
*
*
* @since solr 1.4
*/
public class ReverseStringFilterFactory extends TokenFilterFactory {
/** Creates a new ReverseStringFilterFactory */
public ReverseStringFilterFactory(Map<String,String> args) {
super(args);
assureMatchVersion();
if (!args.isEmpty()) {
throw new IllegalArgumentException("Unknown parameters: " + args);
}
}
@Override
public ReverseStringFilter create(TokenStream in) {
assureMatchVersion();
return new ReverseStringFilter(luceneMatchVersion,in);
}
}

View File

@ -17,13 +17,15 @@ package org.apache.lucene.analysis.ru;
* limitations under the License.
*/
import java.util.Map;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.ru.RussianLightStemFilter;
import org.apache.lucene.analysis.util.TokenFilterFactory;
/**
* Factory for {@link RussianLightStemFilter}.
* <pre class="prettyprint" >
* <pre class="prettyprint">
* &lt;fieldType name="text_rulgtstem" class="solr.TextField" positionIncrementGap="100"&gt;
* &lt;analyzer&gt;
* &lt;tokenizer class="solr.StandardTokenizerFactory"/&gt;
@ -31,9 +33,17 @@ import org.apache.lucene.analysis.util.TokenFilterFactory;
* &lt;filter class="solr.RussianLightStemFilterFactory"/&gt;
* &lt;/analyzer&gt;
* &lt;/fieldType&gt;</pre>
*
*/
public class RussianLightStemFilterFactory extends TokenFilterFactory {
/** Creates a new RussianLightStemFilterFactory */
public RussianLightStemFilterFactory(Map<String,String> args) {
super(args);
if (!args.isEmpty()) {
throw new IllegalArgumentException("Unknown parameters: " + args);
}
}
@Override
public TokenStream create(TokenStream input) {
return new RussianLightStemFilter(input);

View File

@ -25,7 +25,7 @@ import java.util.Map;
/**
* Factory for {@link ShingleFilter}.
* <pre class="prettyprint" >
* <pre class="prettyprint">
* &lt;fieldType name="text_shingle" class="solr.TextField" positionIncrementGap="100"&gt;
* &lt;analyzer&gt;
* &lt;tokenizer class="solr.WhitespaceTokenizerFactory"/&gt;
@ -33,25 +33,24 @@ import java.util.Map;
* outputUnigrams="true" outputUnigramsIfNoShingles="false" tokenSeparator=" "/&gt;
* &lt;/analyzer&gt;
* &lt;/fieldType&gt;</pre>
*
*/
public class ShingleFilterFactory extends TokenFilterFactory {
private int minShingleSize;
private int maxShingleSize;
private boolean outputUnigrams;
private boolean outputUnigramsIfNoShingles;
private String tokenSeparator;
private final int minShingleSize;
private final int maxShingleSize;
private final boolean outputUnigrams;
private final boolean outputUnigramsIfNoShingles;
private final String tokenSeparator;
@Override
public void init(Map<String, String> args) {
super.init(args);
maxShingleSize = getInt("maxShingleSize",
/** Creates a new ShingleFilterFactory */
public ShingleFilterFactory(Map<String, String> args) {
super(args);
maxShingleSize = getInt(args, "maxShingleSize",
ShingleFilter.DEFAULT_MAX_SHINGLE_SIZE);
if (maxShingleSize < 2) {
throw new IllegalArgumentException("Invalid maxShingleSize (" + maxShingleSize
+ ") - must be at least 2");
}
minShingleSize = getInt("minShingleSize",
minShingleSize = getInt(args, "minShingleSize",
ShingleFilter.DEFAULT_MIN_SHINGLE_SIZE);
if (minShingleSize < 2) {
throw new IllegalArgumentException("Invalid minShingleSize (" + minShingleSize
@ -62,12 +61,16 @@ public class ShingleFilterFactory extends TokenFilterFactory {
+ ") - must be no greater than maxShingleSize ("
+ maxShingleSize + ")");
}
outputUnigrams = getBoolean("outputUnigrams", true);
outputUnigramsIfNoShingles = getBoolean("outputUnigramsIfNoShingles", false);
outputUnigrams = getBoolean(args, "outputUnigrams", true);
outputUnigramsIfNoShingles = getBoolean(args, "outputUnigramsIfNoShingles", false);
tokenSeparator = args.containsKey("tokenSeparator")
? args.get("tokenSeparator")
? args.remove("tokenSeparator")
: ShingleFilter.TOKEN_SEPARATOR;
if (!args.isEmpty()) {
throw new IllegalArgumentException("Unknown parameters: " + args);
}
}
@Override
public ShingleFilter create(TokenStream input) {
ShingleFilter r = new ShingleFilter(input, minShingleSize, maxShingleSize);

View File

@ -31,7 +31,7 @@ import org.tartarus.snowball.SnowballProgram;
* Factory for {@link SnowballFilter}, with configurable language
* <p>
* Note: Use of the "Lovins" stemmer is not recommended, as it is implemented with reflection.
* <pre class="prettyprint" >
* <pre class="prettyprint">
* &lt;fieldType name="text_snowballstem" class="solr.TextField" positionIncrementGap="100"&gt;
* &lt;analyzer&gt;
* &lt;tokenizer class="solr.StandardTokenizerFactory"/&gt;
@ -39,26 +39,35 @@ import org.tartarus.snowball.SnowballProgram;
* &lt;filter class="solr.SnowballPorterFilterFactory" protected="protectedkeyword.txt" language="English"/&gt;
* &lt;/analyzer&gt;
* &lt;/fieldType&gt;</pre>
*
*
*/
public class SnowballPorterFilterFactory extends TokenFilterFactory implements ResourceLoaderAware {
public static final String PROTECTED_TOKENS = "protected";
private String language = "English";
private final String language;
private final String wordFiles;
private Class<? extends SnowballProgram> stemClass;
private CharArraySet protectedWords = null;
/** Creates a new SnowballPorterFilterFactory */
public SnowballPorterFilterFactory(Map<String,String> args) {
super(args);
String cfgLanguage = args.remove("language");
if (cfgLanguage == null) {
language = "English";
} else {
language = cfgLanguage;
}
wordFiles = args.remove(PROTECTED_TOKENS);
if (!args.isEmpty()) {
throw new IllegalArgumentException("Unknown parameters: " + args);
}
}
@Override
public void inform(ResourceLoader loader) throws IOException {
String cfgLanguage = args.get("language");
if (cfgLanguage != null)
language = cfgLanguage;
String className = "org.tartarus.snowball.ext." + language + "Stemmer";
stemClass = loader.newInstance(className, SnowballProgram.class).getClass();
String wordFiles = args.get(PROTECTED_TOKENS);
if (wordFiles != null) {
protectedWords = getWordSet(loader, wordFiles, false);
}

View File

@ -17,6 +17,8 @@ package org.apache.lucene.analysis.standard;
* limitations under the License.
*/
import java.util.Map;
import org.apache.lucene.analysis.TokenFilter;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.util.TokenFilterFactory;
@ -24,17 +26,24 @@ import org.apache.lucene.analysis.standard.ClassicFilter;
/**
* Factory for {@link ClassicFilter}.
* <pre class="prettyprint" >
* <pre class="prettyprint">
* &lt;fieldType name="text_clssc" class="solr.TextField" positionIncrementGap="100"&gt;
* &lt;analyzer&gt;
* &lt;tokenizer class="solr.ClassicTokenizerFactory"/&gt;
* &lt;filter class="solr.ClassicFilterFactory"/&gt;
* &lt;/analyzer&gt;
* &lt;/fieldType&gt;</pre>
*
*
*/
public class ClassicFilterFactory extends TokenFilterFactory {
/** Creates a new ClassicFilterFactory */
public ClassicFilterFactory(Map<String,String> args) {
super(args);
if (!args.isEmpty()) {
throw new IllegalArgumentException("Unknown parameters: " + args);
}
}
@Override
public TokenFilter create(TokenStream input) {
return new ClassicFilter(input);

View File

@ -25,26 +25,25 @@ import java.util.Map;
/**
* Factory for {@link ClassicTokenizer}.
* <pre class="prettyprint" >
* <pre class="prettyprint">
* &lt;fieldType name="text_clssc" class="solr.TextField" positionIncrementGap="100"&gt;
* &lt;analyzer&gt;
* &lt;tokenizer class="solr.ClassicTokenizerFactory" maxTokenLength="120"/&gt;
* &lt;/analyzer&gt;
* &lt;/fieldType&gt;</pre>
*
*
*/
public class ClassicTokenizerFactory extends TokenizerFactory {
private final int maxTokenLength;
private int maxTokenLength;
@Override
public void init(Map<String,String> args) {
super.init(args);
/** Creates a new ClassicTokenizerFactory */
public ClassicTokenizerFactory(Map<String,String> args) {
super(args);
assureMatchVersion();
maxTokenLength = getInt("maxTokenLength",
maxTokenLength = getInt(args, "maxTokenLength",
StandardAnalyzer.DEFAULT_MAX_TOKEN_LENGTH);
if (!args.isEmpty()) {
throw new IllegalArgumentException("Unknown parameters: " + args);
}
}
@Override

View File

@ -25,20 +25,23 @@ import org.apache.lucene.analysis.util.TokenFilterFactory;
/**
* Factory for {@link StandardFilter}.
* <pre class="prettyprint" >
* <pre class="prettyprint">
* &lt;fieldType name="text_stndrd" class="solr.TextField" positionIncrementGap="100"&gt;
* &lt;analyzer&gt;
* &lt;tokenizer class="solr.StandardTokenizerFactory"/&gt;
* &lt;filter class="solr.StandardFilterFactory"/&gt;
* &lt;/analyzer&gt;
* &lt;/fieldType&gt;</pre>
*
*/
public class StandardFilterFactory extends TokenFilterFactory {
@Override
public void init(Map<String,String> args) {
super.init(args);
/** Creates a new StandardFilterFactory */
public StandardFilterFactory(Map<String,String> args) {
super(args);
assureMatchVersion();
if (!args.isEmpty()) {
throw new IllegalArgumentException("Unknown parameters: " + args);
}
}
@Override

View File

@ -25,24 +25,24 @@ import java.util.Map;
/**
* Factory for {@link StandardTokenizer}.
* <pre class="prettyprint" >
* <pre class="prettyprint">
* &lt;fieldType name="text_stndrd" class="solr.TextField" positionIncrementGap="100"&gt;
* &lt;analyzer&gt;
* &lt;tokenizer class="solr.StandardTokenizerFactory" maxTokenLength="255"/&gt;
* &lt;/analyzer&gt;
* &lt;/fieldType&gt;</pre>
*
*/
public class StandardTokenizerFactory extends TokenizerFactory {
private final int maxTokenLength;
private int maxTokenLength;
@Override
public void init(Map<String,String> args) {
super.init(args);
/** Creates a new StandardTokenizerFactory */
public StandardTokenizerFactory(Map<String,String> args) {
super(args);
assureMatchVersion();
maxTokenLength = getInt("maxTokenLength", StandardAnalyzer.DEFAULT_MAX_TOKEN_LENGTH);
maxTokenLength = getInt(args, "maxTokenLength", StandardAnalyzer.DEFAULT_MAX_TOKEN_LENGTH);
if (!args.isEmpty()) {
throw new IllegalArgumentException("Unknown parameters: " + args);
}
}
@Override

View File

@ -25,26 +25,25 @@ import java.util.Map;
/**
* Factory for {@link UAX29URLEmailTokenizer}.
* <pre class="prettyprint" >
* <pre class="prettyprint">
* &lt;fieldType name="text_urlemail" class="solr.TextField" positionIncrementGap="100"&gt;
* &lt;analyzer&gt;
* &lt;tokenizer class="solr.UAX29URLEmailTokenizerFactory" maxTokenLength="255"/&gt;
* &lt;/analyzer&gt;
* &lt;/fieldType&gt;</pre>
*
*
*/
public class UAX29URLEmailTokenizerFactory extends TokenizerFactory {
private final int maxTokenLength;
private int maxTokenLength;
@Override
public void init(Map<String,String> args) {
super.init(args);
/** Creates a new UAX29URLEmailTokenizerFactory */
public UAX29URLEmailTokenizerFactory(Map<String,String> args) {
super(args);
assureMatchVersion();
maxTokenLength = getInt("maxTokenLength",
maxTokenLength = getInt(args, "maxTokenLength",
StandardAnalyzer.DEFAULT_MAX_TOKEN_LENGTH);
if (!args.isEmpty()) {
throw new IllegalArgumentException("Unknown parameters: " + args);
}
}
@Override

View File

@ -17,13 +17,15 @@ package org.apache.lucene.analysis.sv;
* limitations under the License.
*/
import java.util.Map;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.sv.SwedishLightStemFilter;
import org.apache.lucene.analysis.util.TokenFilterFactory;
/**
* Factory for {@link SwedishLightStemFilter}.
* <pre class="prettyprint" >
* <pre class="prettyprint">
* &lt;fieldType name="text_svlgtstem" class="solr.TextField" positionIncrementGap="100"&gt;
* &lt;analyzer&gt;
* &lt;tokenizer class="solr.StandardTokenizerFactory"/&gt;
@ -31,9 +33,17 @@ import org.apache.lucene.analysis.util.TokenFilterFactory;
* &lt;filter class="solr.SwedishLightStemFilterFactory"/&gt;
* &lt;/analyzer&gt;
* &lt;/fieldType&gt;</pre>
*
*/
public class SwedishLightStemFilterFactory extends TokenFilterFactory {
/** Creates a new SwedishLightStemFilterFactory */
public SwedishLightStemFilterFactory(Map<String,String> args) {
super(args);
if (!args.isEmpty()) {
throw new IllegalArgumentException("Unknown parameters: " + args);
}
}
@Override
public TokenStream create(TokenStream input) {
return new SwedishLightStemFilter(input);

View File

@ -25,7 +25,9 @@ import java.nio.charset.Charset;
import java.nio.charset.CharsetDecoder;
import java.nio.charset.CodingErrorAction;
import java.text.ParseException;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.TokenStream;
@ -41,7 +43,7 @@ import org.apache.lucene.util.Version;
/**
* Factory for {@link SynonymFilter}.
* <pre class="prettyprint" >
* <pre class="prettyprint">
* &lt;fieldType name="text_synonym" class="solr.TextField" positionIncrementGap="100"&gt;
* &lt;analyzer&gt;
* &lt;tokenizer class="solr.WhitespaceTokenizerFactory"/&gt;
@ -52,8 +54,32 @@ import org.apache.lucene.util.Version;
* &lt;/fieldType&gt;</pre>
*/
public class SynonymFilterFactory extends TokenFilterFactory implements ResourceLoaderAware {
private final boolean ignoreCase;
private final String tokenizerFactory;
private final String synonyms;
private final String format;
private final boolean expand;
private SynonymMap map;
private boolean ignoreCase;
public SynonymFilterFactory(Map<String,String> args) {
super(args);
ignoreCase = getBoolean(args, "ignoreCase", false);
tokenizerFactory = args.remove("tokenizerFactory");
if (tokenizerFactory != null) {
assureMatchVersion();
}
synonyms = args.remove("synonyms");
if (synonyms == null) {
throw new IllegalArgumentException("Missing required argument 'synonyms'.");
}
format = args.remove("format");
expand = getBoolean(args, "expand", true);
if (!args.isEmpty()) {
throw new IllegalArgumentException("Unknown parameters: " + args);
}
}
@Override
public TokenStream create(TokenStream input) {
@ -64,12 +90,7 @@ public class SynonymFilterFactory extends TokenFilterFactory implements Resource
@Override
public void inform(ResourceLoader loader) throws IOException {
final boolean ignoreCase = getBoolean("ignoreCase", false);
this.ignoreCase = ignoreCase;
String tf = args.get("tokenizerFactory");
final TokenizerFactory factory = tf == null ? null : loadTokenizerFactory(loader, tf);
final TokenizerFactory factory = tokenizerFactory == null ? null : loadTokenizerFactory(loader, tokenizerFactory);
Analyzer analyzer = new Analyzer() {
@Override
@ -80,7 +101,6 @@ public class SynonymFilterFactory extends TokenFilterFactory implements Resource
}
};
String format = args.get("format");
try {
if (format == null || format.equals("solr")) {
// TODO: expose dedup as a parameter?
@ -99,12 +119,7 @@ public class SynonymFilterFactory extends TokenFilterFactory implements Resource
/**
* Load synonyms from the solr format, "format=solr".
*/
private SynonymMap loadSolrSynonyms(ResourceLoader loader, boolean dedup, Analyzer analyzer) throws IOException, ParseException {
final boolean expand = getBoolean("expand", true);
String synonyms = args.get("synonyms");
if (synonyms == null)
throw new IllegalArgumentException("Missing required argument 'synonyms'.");
private SynonymMap loadSolrSynonyms(ResourceLoader loader, boolean dedup, Analyzer analyzer) throws IOException, ParseException {
CharsetDecoder decoder = Charset.forName("UTF-8").newDecoder()
.onMalformedInput(CodingErrorAction.REPORT)
.onUnmappableCharacter(CodingErrorAction.REPORT);
@ -128,11 +143,6 @@ public class SynonymFilterFactory extends TokenFilterFactory implements Resource
* Load synonyms from the wordnet format, "format=wordnet".
*/
private SynonymMap loadWordnetSynonyms(ResourceLoader loader, boolean dedup, Analyzer analyzer) throws IOException, ParseException {
final boolean expand = getBoolean("expand", true);
String synonyms = args.get("synonyms");
if (synonyms == null)
throw new IllegalArgumentException("Missing required argument 'synonyms'.");
CharsetDecoder decoder = Charset.forName("UTF-8").newDecoder()
.onMalformedInput(CodingErrorAction.REPORT)
.onUnmappableCharacter(CodingErrorAction.REPORT);
@ -154,12 +164,17 @@ public class SynonymFilterFactory extends TokenFilterFactory implements Resource
// (there are no tests for this functionality)
private TokenizerFactory loadTokenizerFactory(ResourceLoader loader, String cname) throws IOException {
TokenizerFactory tokFactory = loader.newInstance(cname, TokenizerFactory.class);
tokFactory.setLuceneMatchVersion(luceneMatchVersion);
tokFactory.init(args);
if (tokFactory instanceof ResourceLoaderAware) {
((ResourceLoaderAware) tokFactory).inform(loader);
Map<String,String> args = new HashMap<String,String>();
args.put("luceneMatchVersion", getLuceneMatchVersion().toString());
Class<? extends TokenizerFactory> clazz = loader.findClass(cname, TokenizerFactory.class);
try {
TokenizerFactory tokFactory = clazz.getConstructor(Map.class).newInstance(args);
if (tokFactory instanceof ResourceLoaderAware) {
((ResourceLoaderAware) tokFactory).inform(loader);
}
return tokFactory;
} catch (Exception e) {
throw new RuntimeException(e);
}
return tokFactory;
}
}

View File

@ -17,6 +17,8 @@ package org.apache.lucene.analysis.th;
* limitations under the License.
*/
import java.util.Map;
import org.apache.lucene.analysis.th.ThaiWordFilter;
import org.apache.lucene.analysis.TokenStream;
@ -24,19 +26,27 @@ import org.apache.lucene.analysis.util.TokenFilterFactory;
/**
* Factory for {@link ThaiWordFilter}.
* <pre class="prettyprint" >
* <pre class="prettyprint">
* &lt;fieldType name="text_thai" class="solr.TextField" positionIncrementGap="100"&gt;
* &lt;analyzer&gt;
* &lt;tokenizer class="solr.StandardTokenizerFactory"/&gt;
* &lt;filter class="solr.ThaiWordFilterFactory"/&gt;
* &lt;/analyzer&gt;
* &lt;/fieldType&gt;</pre>
*
*/
public class ThaiWordFilterFactory extends TokenFilterFactory {
/** Creates a new ThaiWordFilterFactory */
public ThaiWordFilterFactory(Map<String,String> args) {
super(args);
assureMatchVersion();
if (!args.isEmpty()) {
throw new IllegalArgumentException("Unknown parameters: " + args);
}
}
@Override
public ThaiWordFilter create(TokenStream input) {
assureMatchVersion();
return new ThaiWordFilter(luceneMatchVersion, input);
}
}

View File

@ -17,6 +17,8 @@ package org.apache.lucene.analysis.tr;
* limitations under the License.
*/
import java.util.Map;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.tr.TurkishLowerCaseFilter;
import org.apache.lucene.analysis.util.AbstractAnalysisFactory;
@ -25,16 +27,24 @@ import org.apache.lucene.analysis.util.TokenFilterFactory;
/**
* Factory for {@link TurkishLowerCaseFilter}.
* <pre class="prettyprint" >
* <pre class="prettyprint">
* &lt;fieldType name="text_trlwr" class="solr.TextField" positionIncrementGap="100"&gt;
* &lt;analyzer&gt;
* &lt;tokenizer class="solr.StandardTokenizerFactory"/&gt;
* &lt;filter class="solr.TurkishLowerCaseFilterFactory"/&gt;
* &lt;/analyzer&gt;
* &lt;/fieldType&gt;</pre>
*
* &lt;/fieldType&gt;</pre>
*/
public class TurkishLowerCaseFilterFactory extends TokenFilterFactory implements MultiTermAwareComponent {
/** Creates a new TurkishLowerCaseFilterFactory */
public TurkishLowerCaseFilterFactory(Map<String,String> args) {
super(args);
if (!args.isEmpty()) {
throw new IllegalArgumentException("Unknown parameters: " + args);
}
}
@Override
public TokenStream create(TokenStream input) {
return new TurkishLowerCaseFilter(input);

View File

@ -41,37 +41,29 @@ import java.util.regex.PatternSyntaxException;
* <p>
* The typical lifecycle for a factory consumer is:
* <ol>
* <li>Create factory via its a no-arg constructor
* <li>Set version emulation by calling {@link #setLuceneMatchVersion(Version)}
* <li>Calls {@link #init(Map)} passing arguments as key-value mappings.
* <li>Create factory via its constructor (or via XXXFactory.forName)
* <li>(Optional) If the factory uses resources such as files, {@link ResourceLoaderAware#inform(ResourceLoader)} is called to initialize those resources.
* <li>Consumer calls create() to obtain instances.
* </ol>
*/
public abstract class AbstractAnalysisFactory {
/** The original args, before init() processes them */
private Map<String,String> originalArgs;
/** The init args */
protected Map<String,String> args;
/** The original args, before any processing */
private final Map<String,String> originalArgs;
/** the luceneVersion arg */
protected Version luceneMatchVersion = null;
protected final Version luceneMatchVersion;
/**
* Initialize this factory via a set of key-value pairs.
*/
public void init(Map<String,String> args) {
originalArgs = Collections.unmodifiableMap(args);
this.args = new HashMap<String,String>(args);
}
public Map<String,String> getArgs() {
return args;
protected AbstractAnalysisFactory(Map<String,String> args) {
originalArgs = Collections.unmodifiableMap(new HashMap<String,String>(args));
String version = args.remove("luceneMatchVersion");
luceneMatchVersion = version == null ? null : Version.parseLeniently(version);
}
public Map<String,String> getOriginalArgs() {
public final Map<String,String> getOriginalArgs() {
return originalArgs;
}
@ -85,24 +77,20 @@ public abstract class AbstractAnalysisFactory {
}
}
public void setLuceneMatchVersion(Version luceneMatchVersion) {
this.luceneMatchVersion = luceneMatchVersion;
}
public Version getLuceneMatchVersion() {
public final Version getLuceneMatchVersion() {
return this.luceneMatchVersion;
}
protected int getInt(String name) {
return getInt(name, -1, false);
protected final int getInt(Map<String,String> args, String name) {
return getInt(args, name, -1, false);
}
protected int getInt(String name, int defaultVal) {
return getInt(name, defaultVal, true);
protected final int getInt(Map<String,String> args, String name, int defaultVal) {
return getInt(args, name, defaultVal, true);
}
protected int getInt(String name, int defaultVal, boolean useDefault) {
String s = args.get(name);
protected final int getInt(Map<String,String> args, String name, int defaultVal, boolean useDefault) {
String s = args.remove(name);
if (s == null) {
if (useDefault) {
return defaultVal;
@ -112,12 +100,12 @@ public abstract class AbstractAnalysisFactory {
return Integer.parseInt(s);
}
protected boolean getBoolean(String name, boolean defaultVal) {
return getBoolean(name, defaultVal, true);
protected final boolean getBoolean(Map<String,String> args, String name, boolean defaultVal) {
return getBoolean(args, name, defaultVal, true);
}
protected boolean getBoolean(String name, boolean defaultVal, boolean useDefault) {
String s = args.get(name);
protected final boolean getBoolean(Map<String,String> args, String name, boolean defaultVal, boolean useDefault) {
String s = args.remove(name);
if (s==null) {
if (useDefault) return defaultVal;
throw new IllegalArgumentException("Configuration Error: missing parameter '" + name + "'");
@ -128,13 +116,13 @@ public abstract class AbstractAnalysisFactory {
/**
* Compiles a pattern for the value of the specified argument key <code>name</code>
*/
protected Pattern getPattern(String name) {
protected final Pattern getPattern(Map<String,String> args, String name) {
try {
String pat = args.get(name);
String pat = args.remove(name);
if (null == pat) {
throw new IllegalArgumentException("Configuration Error: missing parameter '" + name + "'");
}
return Pattern.compile(args.get(name));
return Pattern.compile(pat);
} catch (PatternSyntaxException e) {
throw new IllegalArgumentException
("Configuration Error: '" + name + "' can not be parsed in " +
@ -146,7 +134,7 @@ public abstract class AbstractAnalysisFactory {
* Returns as {@link CharArraySet} from wordFiles, which
* can be a comma-separated list of filenames
*/
protected CharArraySet getWordSet(ResourceLoader loader,
protected final CharArraySet getWordSet(ResourceLoader loader,
String wordFiles, boolean ignoreCase) throws IOException {
assureMatchVersion();
List<String> files = splitFileNames(wordFiles);
@ -168,13 +156,13 @@ public abstract class AbstractAnalysisFactory {
/**
* Returns the resource's lines (with content treated as UTF-8)
*/
protected List<String> getLines(ResourceLoader loader, String resource) throws IOException {
protected final List<String> getLines(ResourceLoader loader, String resource) throws IOException {
return WordlistLoader.getLines(loader.openResource(resource), IOUtils.CHARSET_UTF_8);
}
/** same as {@link #getWordSet(ResourceLoader, String, boolean)},
* except the input is in snowball format. */
protected CharArraySet getSnowballWordSet(ResourceLoader loader,
protected final CharArraySet getSnowballWordSet(ResourceLoader loader,
String wordFiles, boolean ignoreCase) throws IOException {
assureMatchVersion();
List<String> files = splitFileNames(wordFiles);
@ -209,7 +197,7 @@ public abstract class AbstractAnalysisFactory {
* @param fileNames the string containing file names
* @return a list of file names with the escaping backslashed removed
*/
protected List<String> splitFileNames(String fileNames) {
protected final List<String> splitFileNames(String fileNames) {
if (fileNames == null)
return Collections.<String>emptyList();

View File

@ -104,10 +104,10 @@ final class AnalysisSPILoader<S extends AbstractAnalysisFactory> {
this.services = Collections.unmodifiableMap(services);
}
public S newInstance(String name) {
public S newInstance(String name, Map<String,String> args) {
final Class<? extends S> service = lookupClass(name);
try {
return service.newInstance();
return service.getConstructor(Map.class).newInstance(args);
} catch (Exception e) {
throw new IllegalArgumentException("SPI class of type "+clazz.getName()+" with name '"+name+"' cannot be instantiated. " +
"This is likely due to a misconfiguration of the java class '" + service.getName() + "': ", e);

View File

@ -18,6 +18,7 @@ package org.apache.lucene.analysis.util;
*/
import java.io.Reader;
import java.util.Map;
import java.util.Set;
import org.apache.lucene.analysis.CharFilter;
@ -32,8 +33,8 @@ public abstract class CharFilterFactory extends AbstractAnalysisFactory {
new AnalysisSPILoader<CharFilterFactory>(CharFilterFactory.class);
/** looks up a charfilter by name from context classpath */
public static CharFilterFactory forName(String name) {
return loader.newInstance(name);
public static CharFilterFactory forName(String name, Map<String,String> args) {
return loader.newInstance(name, args);
}
/** looks up a charfilter class by name from context classpath */
@ -61,6 +62,13 @@ public abstract class CharFilterFactory extends AbstractAnalysisFactory {
loader.reload(classloader);
}
/**
* Initialize this factory via a set of key-value pairs.
*/
protected CharFilterFactory(Map<String,String> args) {
super(args);
}
/** Wraps the given Reader with a CharFilter. */
public abstract Reader create(Reader input);
}

View File

@ -67,14 +67,23 @@ public final class ClasspathResourceLoader implements ResourceLoader {
throw new IOException("Resource not found: " + resource);
return stream;
}
@Override
public <T> Class<? extends T> findClass(String cname, Class<T> expectedType) {
try {
return Class.forName(cname, true, loader).asSubclass(expectedType);
} catch (Exception e) {
throw new RuntimeException("Cannot load class: " + cname, e);
}
}
@Override
public <T> T newInstance(String cname, Class<T> expectedType) {
Class<? extends T> clazz = findClass(cname, expectedType);
try {
final Class<? extends T> clazz = Class.forName(cname, true, loader).asSubclass(expectedType);
return clazz.newInstance();
} catch (Exception e) {
throw new RuntimeException("Cannot instantiate class: " + cname, e);
throw new RuntimeException("Cannot create instance: " + cname, e);
}
}
}

View File

@ -18,12 +18,14 @@ package org.apache.lucene.analysis.util;
*/
import java.io.IOException;
import java.util.Map;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.fr.FrenchAnalyzer;
/**
* Factory for {@link ElisionFilter}.
* <pre class="prettyprint" >
* <pre class="prettyprint">
* &lt;fieldType name="text_elsn" class="solr.TextField" positionIncrementGap="100"&gt;
* &lt;analyzer&gt;
* &lt;tokenizer class="solr.StandardTokenizerFactory"/&gt;
@ -32,22 +34,28 @@ import org.apache.lucene.analysis.fr.FrenchAnalyzer;
* articles="stopwordarticles.txt" ignoreCase="true"/&gt;
* &lt;/analyzer&gt;
* &lt;/fieldType&gt;</pre>
*
*/
public class ElisionFilterFactory extends TokenFilterFactory implements ResourceLoaderAware, MultiTermAwareComponent {
private final String articlesFile;
private final boolean ignoreCase;
private CharArraySet articles;
/** Creates a new ElisionFilterFactory */
public ElisionFilterFactory(Map<String,String> args) {
super(args);
articlesFile = args.remove("articles");
ignoreCase = getBoolean(args, "ignoreCase", false);
if (!args.isEmpty()) {
throw new IllegalArgumentException("Unknown parameters: " + args);
}
}
@Override
public void inform(ResourceLoader loader) throws IOException {
String articlesFile = args.get("articles");
boolean ignoreCase = getBoolean("ignoreCase", false);
if (articlesFile != null) {
articles = getWordSet(loader, articlesFile, ignoreCase);
}
if (articles == null) {
if (articlesFile == null) {
articles = FrenchAnalyzer.DEFAULT_ARTICLES;
} else {
articles = getWordSet(loader, articlesFile, ignoreCase);
}
}

View File

@ -91,4 +91,9 @@ public final class FilesystemResourceLoader implements ResourceLoader {
public <T> T newInstance(String cname, Class<T> expectedType) {
return delegate.newInstance(cname, expectedType);
}
@Override
public <T> Class<? extends T> findClass(String cname, Class<T> expectedType) {
return delegate.findClass(cname, expectedType);
}
}

View File

@ -30,8 +30,14 @@ public interface ResourceLoader {
*/
public InputStream openResource(String resource) throws IOException;
/**
* Creates a class of the name and expected type
* Finds class of the name and expected type
*/
public <T> Class<? extends T> findClass(String cname, Class<T> expectedType);
/**
* Creates an instance of the name and expected type
*/
// TODO: fix exception handling
public <T> T newInstance(String cname, Class<T> expectedType);

View File

@ -17,6 +17,7 @@ package org.apache.lucene.analysis.util;
* limitations under the License.
*/
import java.util.Map;
import java.util.Set;
import org.apache.lucene.analysis.TokenStream;
@ -32,8 +33,8 @@ public abstract class TokenFilterFactory extends AbstractAnalysisFactory {
new String[] { "TokenFilterFactory", "FilterFactory" });
/** looks up a tokenfilter by name from context classpath */
public static TokenFilterFactory forName(String name) {
return loader.newInstance(name);
public static TokenFilterFactory forName(String name, Map<String,String> args) {
return loader.newInstance(name, args);
}
/** looks up a tokenfilter class by name from context classpath */
@ -60,6 +61,13 @@ public abstract class TokenFilterFactory extends AbstractAnalysisFactory {
public static void reloadTokenFilters(ClassLoader classloader) {
loader.reload(classloader);
}
/**
* Initialize this factory via a set of key-value pairs.
*/
protected TokenFilterFactory(Map<String,String> args) {
super(args);
}
/** Transform the specified input TokenStream */
public abstract TokenStream create(TokenStream input);

Some files were not shown because too many files have changed in this diff Show More