mirror of https://github.com/apache/lucene.git
LUCENE-4877: throw exception for invalid arguments in analysis factories
git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1463191 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
b1e4ba6843
commit
29b5142e79
|
@ -31,6 +31,11 @@ Optimizations
|
|||
on Windows if NIOFSDirectory is used, mmapped files are still locked.
|
||||
(Michael Poindexter, Robert Muir, Uwe Schindler)
|
||||
|
||||
Bug Fixes
|
||||
|
||||
* LUCENE-4877: Throw exception for invalid arguments in analysis factories.
|
||||
(Steve Rowe, Uwe Schindler, Robert Muir)
|
||||
|
||||
======================= Lucene 4.3.0 =======================
|
||||
|
||||
Changes in backwards compatibility policy
|
||||
|
|
|
@ -17,26 +17,34 @@ package org.apache.lucene.analysis.ar;
|
|||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import java.util.Map;
|
||||
|
||||
import org.apache.lucene.analysis.TokenStream;
|
||||
import org.apache.lucene.analysis.ar.ArabicNormalizationFilter;
|
||||
import org.apache.lucene.analysis.util.AbstractAnalysisFactory;
|
||||
import org.apache.lucene.analysis.util.MultiTermAwareComponent;
|
||||
import org.apache.lucene.analysis.util.TokenFilterFactory;
|
||||
|
||||
|
||||
/**
|
||||
* Factory for {@link ArabicNormalizationFilter}.
|
||||
* <pre class="prettyprint" >
|
||||
* <pre class="prettyprint">
|
||||
* <fieldType name="text_arnormal" class="solr.TextField" positionIncrementGap="100">
|
||||
* <analyzer>
|
||||
* <tokenizer class="solr.StandardTokenizerFactory"/>
|
||||
* <filter class="solr.ArabicNormalizationFilterFactory"/>
|
||||
* </analyzer>
|
||||
* </fieldType></pre>
|
||||
*
|
||||
*/
|
||||
public class ArabicNormalizationFilterFactory extends TokenFilterFactory implements MultiTermAwareComponent {
|
||||
|
||||
/** Creates a new ArabicNormalizationFilterFactory */
|
||||
public ArabicNormalizationFilterFactory(Map<String,String> args) {
|
||||
super(args);
|
||||
if (!args.isEmpty()) {
|
||||
throw new IllegalArgumentException("Unknown parameters: " + args);
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public ArabicNormalizationFilter create(TokenStream input) {
|
||||
return new ArabicNormalizationFilter(input);
|
||||
|
|
|
@ -17,14 +17,15 @@ package org.apache.lucene.analysis.ar;
|
|||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import java.util.Map;
|
||||
|
||||
import org.apache.lucene.analysis.TokenStream;
|
||||
import org.apache.lucene.analysis.ar.ArabicStemFilter;
|
||||
import org.apache.lucene.analysis.util.TokenFilterFactory;
|
||||
|
||||
|
||||
/**
|
||||
* Factory for {@link ArabicStemFilter}.
|
||||
* <pre class="prettyprint" >
|
||||
* <pre class="prettyprint">
|
||||
* <fieldType name="text_arstem" class="solr.TextField" positionIncrementGap="100">
|
||||
* <analyzer>
|
||||
* <tokenizer class="solr.StandardTokenizerFactory"/>
|
||||
|
@ -32,10 +33,16 @@ import org.apache.lucene.analysis.util.TokenFilterFactory;
|
|||
* <filter class="solr.ArabicStemFilterFactory"/>
|
||||
* </analyzer>
|
||||
* </fieldType></pre>
|
||||
*
|
||||
*/
|
||||
public class ArabicStemFilterFactory extends TokenFilterFactory {
|
||||
|
||||
/** Creates a new ArabicStemFilterFactory */
|
||||
public ArabicStemFilterFactory(Map<String,String> args) {
|
||||
super(args);
|
||||
if (!args.isEmpty()) {
|
||||
throw new IllegalArgumentException("Unknown parameters: " + args);
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public ArabicStemFilter create(TokenStream input) {
|
||||
|
|
|
@ -17,13 +17,15 @@ package org.apache.lucene.analysis.bg;
|
|||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import java.util.Map;
|
||||
|
||||
import org.apache.lucene.analysis.TokenStream;
|
||||
import org.apache.lucene.analysis.bg.BulgarianStemFilter;
|
||||
import org.apache.lucene.analysis.util.TokenFilterFactory;
|
||||
|
||||
/**
|
||||
* Factory for {@link BulgarianStemFilter}.
|
||||
* <pre class="prettyprint" >
|
||||
* <pre class="prettyprint">
|
||||
* <fieldType name="text_bgstem" class="solr.TextField" positionIncrementGap="100">
|
||||
* <analyzer>
|
||||
* <tokenizer class="solr.StandardTokenizerFactory"/>
|
||||
|
@ -31,9 +33,17 @@ import org.apache.lucene.analysis.util.TokenFilterFactory;
|
|||
* <filter class="solr.BulgarianStemFilterFactory"/>
|
||||
* </analyzer>
|
||||
* </fieldType></pre>
|
||||
*
|
||||
*/
|
||||
public class BulgarianStemFilterFactory extends TokenFilterFactory {
|
||||
|
||||
/** Creates a new BulgarianStemFilterFactory */
|
||||
public BulgarianStemFilterFactory(Map<String,String> args) {
|
||||
super(args);
|
||||
if (!args.isEmpty()) {
|
||||
throw new IllegalArgumentException("Unknown parameters: " + args);
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public TokenStream create(TokenStream input) {
|
||||
return new BulgarianStemFilter(input);
|
||||
|
|
|
@ -17,13 +17,15 @@ package org.apache.lucene.analysis.br;
|
|||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import java.util.Map;
|
||||
|
||||
import org.apache.lucene.analysis.TokenStream;
|
||||
import org.apache.lucene.analysis.br.BrazilianStemFilter;
|
||||
import org.apache.lucene.analysis.util.TokenFilterFactory;
|
||||
|
||||
/**
|
||||
* Factory for {@link BrazilianStemFilter}.
|
||||
* <pre class="prettyprint" >
|
||||
* <pre class="prettyprint">
|
||||
* <fieldType name="text_brstem" class="solr.TextField" positionIncrementGap="100">
|
||||
* <analyzer>
|
||||
* <tokenizer class="solr.StandardTokenizerFactory"/>
|
||||
|
@ -31,9 +33,17 @@ import org.apache.lucene.analysis.util.TokenFilterFactory;
|
|||
* <filter class="solr.BrazilianStemFilterFactory"/>
|
||||
* </analyzer>
|
||||
* </fieldType></pre>
|
||||
*
|
||||
*/
|
||||
public class BrazilianStemFilterFactory extends TokenFilterFactory {
|
||||
|
||||
/** Creates a new BrazilianStemFilterFactory */
|
||||
public BrazilianStemFilterFactory(Map<String,String> args) {
|
||||
super(args);
|
||||
if (!args.isEmpty()) {
|
||||
throw new IllegalArgumentException("Unknown parameters: " + args);
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public BrazilianStemFilter create(TokenStream in) {
|
||||
return new BrazilianStemFilter(in);
|
||||
|
|
|
@ -28,20 +28,36 @@ import java.util.regex.Matcher;
|
|||
import java.util.regex.Pattern;
|
||||
|
||||
/**
|
||||
* Factory for {@link HTMLStripCharFilter}.
|
||||
* <pre class="prettyprint" >
|
||||
* Factory for {@link HTMLStripCharFilter}.
|
||||
* <pre class="prettyprint">
|
||||
* <fieldType name="text_html" class="solr.TextField" positionIncrementGap="100">
|
||||
* <analyzer>
|
||||
* <charFilter class="solr.HTMLStripCharFilterFactory" escapedTags="a, title" />
|
||||
* <tokenizer class="solr.WhitespaceTokenizerFactory"/>
|
||||
* </analyzer>
|
||||
* </fieldType></pre>
|
||||
*
|
||||
*/
|
||||
public class HTMLStripCharFilterFactory extends CharFilterFactory {
|
||||
public class HTMLStripCharFilterFactory extends CharFilterFactory {
|
||||
final Set<String> escapedTags;
|
||||
static final Pattern TAG_NAME_PATTERN = Pattern.compile("[^\\s,]+");
|
||||
|
||||
Set<String> escapedTags = null;
|
||||
Pattern TAG_NAME_PATTERN = Pattern.compile("[^\\s,]+");
|
||||
/** Creates a new HTMLStripCharFilterFactory */
|
||||
public HTMLStripCharFilterFactory(Map<String,String> args) {
|
||||
super(args);
|
||||
String escapedTagsArg = args.remove("escapedTags");
|
||||
if (escapedTagsArg == null) {
|
||||
escapedTags = null;
|
||||
} else {
|
||||
escapedTags = new HashSet<String>();
|
||||
Matcher matcher = TAG_NAME_PATTERN.matcher(escapedTagsArg);
|
||||
while (matcher.find()) {
|
||||
escapedTags.add(matcher.group(0));
|
||||
}
|
||||
}
|
||||
if (!args.isEmpty()) {
|
||||
throw new IllegalArgumentException("Unknown parameters: " + args);
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public HTMLStripCharFilter create(Reader input) {
|
||||
|
@ -53,19 +69,4 @@ import java.util.regex.Pattern;
|
|||
}
|
||||
return charFilter;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void init(Map<String,String> args) {
|
||||
super.init(args);
|
||||
String escapedTagsArg = args.get("escapedTags");
|
||||
if (null != escapedTagsArg) {
|
||||
Matcher matcher = TAG_NAME_PATTERN.matcher(escapedTagsArg);
|
||||
while (matcher.find()) {
|
||||
if (null == escapedTags) {
|
||||
escapedTags = new HashSet<String>();
|
||||
}
|
||||
escapedTags.add(matcher.group(0));
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -22,6 +22,7 @@ import java.io.IOException;
|
|||
import java.io.Reader;
|
||||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.regex.Matcher;
|
||||
import java.util.regex.Pattern;
|
||||
|
||||
|
@ -31,7 +32,7 @@ import org.apache.lucene.analysis.util.*;
|
|||
|
||||
/**
|
||||
* Factory for {@link MappingCharFilter}.
|
||||
* <pre class="prettyprint" >
|
||||
* <pre class="prettyprint">
|
||||
* <fieldType name="text_map" class="solr.TextField" positionIncrementGap="100">
|
||||
* <analyzer>
|
||||
* <charFilter class="solr.MappingCharFilterFactory" mapping="mapping.txt"/>
|
||||
|
@ -39,21 +40,26 @@ import org.apache.lucene.analysis.util.*;
|
|||
* </analyzer>
|
||||
* </fieldType></pre>
|
||||
*
|
||||
*
|
||||
* @since Solr 1.4
|
||||
*
|
||||
*/
|
||||
public class MappingCharFilterFactory extends CharFilterFactory implements
|
||||
ResourceLoaderAware, MultiTermAwareComponent {
|
||||
|
||||
protected NormalizeCharMap normMap;
|
||||
private String mapping;
|
||||
private final String mapping;
|
||||
|
||||
/** Creates a new MappingCharFilterFactory */
|
||||
public MappingCharFilterFactory(Map<String,String> args) {
|
||||
super(args);
|
||||
mapping = args.remove("mapping");
|
||||
if (!args.isEmpty()) {
|
||||
throw new IllegalArgumentException("Unknown parameters: " + args);
|
||||
}
|
||||
}
|
||||
|
||||
// TODO: this should use inputstreams from the loader, not File!
|
||||
@Override
|
||||
public void inform(ResourceLoader loader) throws IOException {
|
||||
mapping = args.get("mapping");
|
||||
|
||||
if (mapping != null) {
|
||||
List<String> wlist = null;
|
||||
File mappingFile = new File(mapping);
|
||||
|
|
|
@ -25,7 +25,7 @@ import org.apache.lucene.analysis.util.TokenFilterFactory;
|
|||
|
||||
/**
|
||||
* Factory for {@link CJKBigramFilter}.
|
||||
* <pre class="prettyprint" >
|
||||
* <pre class="prettyprint">
|
||||
* <fieldType name="text_cjk" class="solr.TextField">
|
||||
* <analyzer>
|
||||
* <tokenizer class="solr.StandardTokenizerFactory"/>
|
||||
|
@ -38,26 +38,30 @@ import org.apache.lucene.analysis.util.TokenFilterFactory;
|
|||
* </fieldType></pre>
|
||||
*/
|
||||
public class CJKBigramFilterFactory extends TokenFilterFactory {
|
||||
int flags;
|
||||
boolean outputUnigrams;
|
||||
final int flags;
|
||||
final boolean outputUnigrams;
|
||||
|
||||
@Override
|
||||
public void init(Map<String,String> args) {
|
||||
super.init(args);
|
||||
flags = 0;
|
||||
if (getBoolean("han", true)) {
|
||||
/** Creates a new CJKBigramFilterFactory */
|
||||
public CJKBigramFilterFactory(Map<String,String> args) {
|
||||
super(args);
|
||||
int flags = 0;
|
||||
if (getBoolean(args, "han", true)) {
|
||||
flags |= CJKBigramFilter.HAN;
|
||||
}
|
||||
if (getBoolean("hiragana", true)) {
|
||||
if (getBoolean(args, "hiragana", true)) {
|
||||
flags |= CJKBigramFilter.HIRAGANA;
|
||||
}
|
||||
if (getBoolean("katakana", true)) {
|
||||
if (getBoolean(args, "katakana", true)) {
|
||||
flags |= CJKBigramFilter.KATAKANA;
|
||||
}
|
||||
if (getBoolean("hangul", true)) {
|
||||
if (getBoolean(args, "hangul", true)) {
|
||||
flags |= CJKBigramFilter.HANGUL;
|
||||
}
|
||||
outputUnigrams = getBoolean("outputUnigrams", false);
|
||||
this.flags = flags;
|
||||
this.outputUnigrams = getBoolean(args, "outputUnigrams", false);
|
||||
if (!args.isEmpty()) {
|
||||
throw new IllegalArgumentException("Unknown parameters: " + args);
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
|
|
|
@ -17,6 +17,8 @@ package org.apache.lucene.analysis.cjk;
|
|||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import java.util.Map;
|
||||
|
||||
import org.apache.lucene.analysis.TokenStream;
|
||||
import org.apache.lucene.analysis.cjk.CJKWidthFilter;
|
||||
import org.apache.lucene.analysis.util.AbstractAnalysisFactory;
|
||||
|
@ -25,7 +27,7 @@ import org.apache.lucene.analysis.util.TokenFilterFactory;
|
|||
|
||||
/**
|
||||
* Factory for {@link CJKWidthFilter}.
|
||||
* <pre class="prettyprint" >
|
||||
* <pre class="prettyprint">
|
||||
* <fieldType name="text_cjk" class="solr.TextField">
|
||||
* <analyzer>
|
||||
* <tokenizer class="solr.StandardTokenizerFactory"/>
|
||||
|
@ -35,9 +37,16 @@ import org.apache.lucene.analysis.util.TokenFilterFactory;
|
|||
* </analyzer>
|
||||
* </fieldType></pre>
|
||||
*/
|
||||
|
||||
public class CJKWidthFilterFactory extends TokenFilterFactory implements MultiTermAwareComponent {
|
||||
|
||||
/** Creates a new CJKWidthFilterFactory */
|
||||
public CJKWidthFilterFactory(Map<String,String> args) {
|
||||
super(args);
|
||||
if (!args.isEmpty()) {
|
||||
throw new IllegalArgumentException("Unknown parameters: " + args);
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public TokenStream create(TokenStream input) {
|
||||
return new CJKWidthFilter(input);
|
||||
|
|
|
@ -18,7 +18,9 @@ package org.apache.lucene.analysis.commongrams;
|
|||
*/
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.Map;
|
||||
|
||||
import org.apache.lucene.analysis.TokenFilter;
|
||||
import org.apache.lucene.analysis.TokenStream;
|
||||
import org.apache.lucene.analysis.commongrams.CommonGramsFilter;
|
||||
import org.apache.lucene.analysis.core.StopAnalyzer;
|
||||
|
@ -26,29 +28,36 @@ import org.apache.lucene.analysis.util.*;
|
|||
|
||||
/**
|
||||
* Constructs a {@link CommonGramsFilter}.
|
||||
* <pre class="prettyprint" >
|
||||
* <pre class="prettyprint">
|
||||
* <fieldType name="text_cmmngrms" class="solr.TextField" positionIncrementGap="100">
|
||||
* <analyzer>
|
||||
* <tokenizer class="solr.WhitespaceTokenizerFactory"/>
|
||||
* <filter class="solr.CommonGramsFilterFactory" words="commongramsstopwords.txt" ignoreCase="false"/>
|
||||
* </analyzer>
|
||||
* </fieldType></pre>
|
||||
*
|
||||
*/
|
||||
|
||||
/*
|
||||
* This is pretty close to a straight copy from StopFilterFactory
|
||||
*/
|
||||
public class CommonGramsFilterFactory extends TokenFilterFactory implements
|
||||
ResourceLoaderAware {
|
||||
public class CommonGramsFilterFactory extends TokenFilterFactory implements ResourceLoaderAware {
|
||||
// TODO: shared base class for Stop/Keep/CommonGrams?
|
||||
private CharArraySet commonWords;
|
||||
private final String commonWordFiles;
|
||||
private final String format;
|
||||
private final boolean ignoreCase;
|
||||
|
||||
/** Creates a new CommonGramsFilterFactory */
|
||||
public CommonGramsFilterFactory(Map<String,String> args) {
|
||||
super(args);
|
||||
commonWordFiles = args.remove("words");
|
||||
format = args.remove("format");
|
||||
ignoreCase = getBoolean(args, "ignoreCase", false);
|
||||
if (!args.isEmpty()) {
|
||||
throw new IllegalArgumentException("Unknown parameters: " + args);
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public void inform(ResourceLoader loader) throws IOException {
|
||||
String commonWordFiles = args.get("words");
|
||||
ignoreCase = getBoolean("ignoreCase", false);
|
||||
|
||||
if (commonWordFiles != null) {
|
||||
if ("snowball".equalsIgnoreCase(args.get("format"))) {
|
||||
if ("snowball".equalsIgnoreCase(format)) {
|
||||
commonWords = getSnowballWordSet(loader, commonWordFiles, ignoreCase);
|
||||
} else {
|
||||
commonWords = getWordSet(loader, commonWordFiles, ignoreCase);
|
||||
|
@ -57,10 +66,6 @@ public class CommonGramsFilterFactory extends TokenFilterFactory implements
|
|||
commonWords = StopAnalyzer.ENGLISH_STOP_WORDS_SET;
|
||||
}
|
||||
}
|
||||
|
||||
//Force the use of a char array set, as it is the most performant, although this may break things if Lucene ever goes away from it. See SOLR-1095
|
||||
private CharArraySet commonWords;
|
||||
private boolean ignoreCase;
|
||||
|
||||
public boolean isIgnoreCase() {
|
||||
return ignoreCase;
|
||||
|
@ -71,7 +76,7 @@ public class CommonGramsFilterFactory extends TokenFilterFactory implements
|
|||
}
|
||||
|
||||
@Override
|
||||
public CommonGramsFilter create(TokenStream input) {
|
||||
public TokenFilter create(TokenStream input) {
|
||||
CommonGramsFilter commonGrams = new CommonGramsFilter(luceneMatchVersion, input, commonWords);
|
||||
return commonGrams;
|
||||
}
|
||||
|
|
|
@ -17,77 +17,37 @@ package org.apache.lucene.analysis.commongrams;
|
|||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.Map;
|
||||
|
||||
import org.apache.lucene.analysis.TokenFilter;
|
||||
import org.apache.lucene.analysis.TokenStream;
|
||||
import org.apache.lucene.analysis.commongrams.CommonGramsFilter;
|
||||
import org.apache.lucene.analysis.commongrams.CommonGramsQueryFilter;
|
||||
import org.apache.lucene.analysis.core.StopAnalyzer;
|
||||
import org.apache.lucene.analysis.core.StopFilterFactory;
|
||||
import org.apache.lucene.analysis.util.*;
|
||||
|
||||
/**
|
||||
* Construct {@link CommonGramsQueryFilter}.
|
||||
*
|
||||
* This is pretty close to a straight copy from {@link StopFilterFactory}.
|
||||
*
|
||||
* <pre class="prettyprint" >
|
||||
* <pre class="prettyprint">
|
||||
* <fieldType name="text_cmmngrmsqry" class="solr.TextField" positionIncrementGap="100">
|
||||
* <analyzer>
|
||||
* <tokenizer class="solr.WhitespaceTokenizerFactory"/>
|
||||
* <filter class="solr.CommonGramsQueryFilterFactory" words="commongramsquerystopwords.txt" ignoreCase="false"/>
|
||||
* </analyzer>
|
||||
* </fieldType></pre>
|
||||
*
|
||||
*/
|
||||
public class CommonGramsQueryFilterFactory extends TokenFilterFactory
|
||||
implements ResourceLoaderAware {
|
||||
public class CommonGramsQueryFilterFactory extends CommonGramsFilterFactory {
|
||||
|
||||
@Override
|
||||
public void init(Map<String,String> args) {
|
||||
super.init(args);
|
||||
assureMatchVersion();
|
||||
}
|
||||
|
||||
@Override
|
||||
public void inform(ResourceLoader loader) throws IOException {
|
||||
String commonWordFiles = args.get("words");
|
||||
ignoreCase = getBoolean("ignoreCase", false);
|
||||
|
||||
if (commonWordFiles != null) {
|
||||
if ("snowball".equalsIgnoreCase(args.get("format"))) {
|
||||
commonWords = getSnowballWordSet(loader, commonWordFiles, ignoreCase);
|
||||
} else {
|
||||
commonWords = getWordSet(loader, commonWordFiles, ignoreCase);
|
||||
}
|
||||
} else {
|
||||
commonWords = StopAnalyzer.ENGLISH_STOP_WORDS_SET;
|
||||
}
|
||||
}
|
||||
|
||||
// Force the use of a char array set, as it is the most performant, although
|
||||
// this may break things if Lucene ever goes away from it. See SOLR-1095
|
||||
private CharArraySet commonWords;
|
||||
|
||||
private boolean ignoreCase;
|
||||
|
||||
public boolean isIgnoreCase() {
|
||||
return ignoreCase;
|
||||
}
|
||||
|
||||
public CharArraySet getCommonWords() {
|
||||
return commonWords;
|
||||
/** Creates a new CommonGramsQueryFilterFactory */
|
||||
public CommonGramsQueryFilterFactory(Map<String,String> args) {
|
||||
super(args);
|
||||
}
|
||||
|
||||
/**
|
||||
* Create a CommonGramsFilter and wrap it with a CommonGramsQueryFilter
|
||||
*/
|
||||
@Override
|
||||
public CommonGramsQueryFilter create(TokenStream input) {
|
||||
CommonGramsFilter commonGrams = new CommonGramsFilter(luceneMatchVersion, input, commonWords);
|
||||
CommonGramsQueryFilter commonGramsQuery = new CommonGramsQueryFilter(
|
||||
commonGrams);
|
||||
return commonGramsQuery;
|
||||
public TokenFilter create(TokenStream input) {
|
||||
CommonGramsFilter commonGrams = (CommonGramsFilter) super.create(input);
|
||||
return new CommonGramsQueryFilter(commonGrams);
|
||||
}
|
||||
}
|
||||
|
|
|
@ -25,7 +25,7 @@ import java.io.IOException;
|
|||
|
||||
/**
|
||||
* Factory for {@link DictionaryCompoundWordTokenFilter}.
|
||||
* <pre class="prettyprint" >
|
||||
* <pre class="prettyprint">
|
||||
* <fieldType name="text_dictcomp" class="solr.TextField" positionIncrementGap="100">
|
||||
* <analyzer>
|
||||
* <tokenizer class="solr.WhitespaceTokenizerFactory"/>
|
||||
|
@ -33,33 +33,38 @@ import java.io.IOException;
|
|||
* minWordSize="5" minSubwordSize="2" maxSubwordSize="15" onlyLongestMatch="true"/>
|
||||
* </analyzer>
|
||||
* </fieldType></pre>
|
||||
*
|
||||
*/
|
||||
public class DictionaryCompoundWordTokenFilterFactory extends TokenFilterFactory implements ResourceLoaderAware {
|
||||
private CharArraySet dictionary;
|
||||
private String dictFile;
|
||||
private int minWordSize;
|
||||
private int minSubwordSize;
|
||||
private int maxSubwordSize;
|
||||
private boolean onlyLongestMatch;
|
||||
@Override
|
||||
public void init(Map<String, String> args) {
|
||||
super.init(args);
|
||||
private final String dictFile;
|
||||
private final int minWordSize;
|
||||
private final int minSubwordSize;
|
||||
private final int maxSubwordSize;
|
||||
private final boolean onlyLongestMatch;
|
||||
|
||||
/** Creates a new DictionaryCompoundWordTokenFilterFactory */
|
||||
public DictionaryCompoundWordTokenFilterFactory(Map<String, String> args) {
|
||||
super(args);
|
||||
assureMatchVersion();
|
||||
dictFile = args.get("dictionary");
|
||||
dictFile = args.remove("dictionary");
|
||||
if (null == dictFile) {
|
||||
throw new IllegalArgumentException("Missing required parameter: dictionary");
|
||||
}
|
||||
|
||||
minWordSize= getInt("minWordSize",CompoundWordTokenFilterBase.DEFAULT_MIN_WORD_SIZE);
|
||||
minSubwordSize= getInt("minSubwordSize",CompoundWordTokenFilterBase.DEFAULT_MIN_SUBWORD_SIZE);
|
||||
maxSubwordSize= getInt("maxSubwordSize",CompoundWordTokenFilterBase.DEFAULT_MAX_SUBWORD_SIZE);
|
||||
onlyLongestMatch = getBoolean("onlyLongestMatch",true);
|
||||
minWordSize = getInt(args, "minWordSize", CompoundWordTokenFilterBase.DEFAULT_MIN_WORD_SIZE);
|
||||
minSubwordSize = getInt(args, "minSubwordSize", CompoundWordTokenFilterBase.DEFAULT_MIN_SUBWORD_SIZE);
|
||||
maxSubwordSize = getInt(args, "maxSubwordSize", CompoundWordTokenFilterBase.DEFAULT_MAX_SUBWORD_SIZE);
|
||||
onlyLongestMatch = getBoolean(args, "onlyLongestMatch", true);
|
||||
if (!args.isEmpty()) {
|
||||
throw new IllegalArgumentException("Unknown parameters: " + args);
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public void inform(ResourceLoader loader) throws IOException {
|
||||
dictionary = super.getWordSet(loader, dictFile, false);
|
||||
}
|
||||
|
||||
@Override
|
||||
public TokenStream create(TokenStream input) {
|
||||
// if the dictionary is null, it means it was empty
|
||||
|
|
|
@ -45,7 +45,7 @@ import org.xml.sax.InputSource;
|
|||
* to the stream. defaults to false.
|
||||
* </ul>
|
||||
* <p>
|
||||
* <pre class="prettyprint" >
|
||||
* <pre class="prettyprint">
|
||||
* <fieldType name="text_hyphncomp" class="solr.TextField" positionIncrementGap="100">
|
||||
* <analyzer>
|
||||
* <tokenizer class="solr.WhitespaceTokenizerFactory"/>
|
||||
|
@ -59,30 +59,32 @@ import org.xml.sax.InputSource;
|
|||
public class HyphenationCompoundWordTokenFilterFactory extends TokenFilterFactory implements ResourceLoaderAware {
|
||||
private CharArraySet dictionary;
|
||||
private HyphenationTree hyphenator;
|
||||
private String dictFile;
|
||||
private String hypFile;
|
||||
private String encoding;
|
||||
private int minWordSize;
|
||||
private int minSubwordSize;
|
||||
private int maxSubwordSize;
|
||||
private boolean onlyLongestMatch;
|
||||
private final String dictFile;
|
||||
private final String hypFile;
|
||||
private final String encoding;
|
||||
private final int minWordSize;
|
||||
private final int minSubwordSize;
|
||||
private final int maxSubwordSize;
|
||||
private final boolean onlyLongestMatch;
|
||||
|
||||
@Override
|
||||
public void init(Map<String, String> args) {
|
||||
super.init(args);
|
||||
/** Creates a new HyphenationCompoundWordTokenFilterFactory */
|
||||
public HyphenationCompoundWordTokenFilterFactory(Map<String, String> args) {
|
||||
super(args);
|
||||
assureMatchVersion();
|
||||
dictFile = args.get("dictionary");
|
||||
if (args.containsKey("encoding"))
|
||||
encoding = args.get("encoding");
|
||||
hypFile = args.get("hyphenator");
|
||||
dictFile = args.remove("dictionary");
|
||||
encoding = args.remove("encoding");
|
||||
hypFile = args.remove("hyphenator");
|
||||
if (null == hypFile) {
|
||||
throw new IllegalArgumentException("Missing required parameter: hyphenator");
|
||||
}
|
||||
|
||||
minWordSize = getInt("minWordSize", CompoundWordTokenFilterBase.DEFAULT_MIN_WORD_SIZE);
|
||||
minSubwordSize = getInt("minSubwordSize", CompoundWordTokenFilterBase.DEFAULT_MIN_SUBWORD_SIZE);
|
||||
maxSubwordSize = getInt("maxSubwordSize", CompoundWordTokenFilterBase.DEFAULT_MAX_SUBWORD_SIZE);
|
||||
onlyLongestMatch = getBoolean("onlyLongestMatch", false);
|
||||
minWordSize = getInt(args, "minWordSize", CompoundWordTokenFilterBase.DEFAULT_MIN_WORD_SIZE);
|
||||
minSubwordSize = getInt(args, "minSubwordSize", CompoundWordTokenFilterBase.DEFAULT_MIN_SUBWORD_SIZE);
|
||||
maxSubwordSize = getInt(args, "maxSubwordSize", CompoundWordTokenFilterBase.DEFAULT_MAX_SUBWORD_SIZE);
|
||||
onlyLongestMatch = getBoolean(args, "onlyLongestMatch", false);
|
||||
if (!args.isEmpty()) {
|
||||
throw new IllegalArgumentException("Unknown parameters: " + args);
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
|
|
|
@ -21,18 +21,27 @@ import org.apache.lucene.analysis.util.TokenizerFactory;
|
|||
import org.apache.lucene.util.AttributeSource.AttributeFactory;
|
||||
|
||||
import java.io.Reader;
|
||||
import java.util.Map;
|
||||
|
||||
/**
|
||||
* Factory for {@link KeywordTokenizer}.
|
||||
* <pre class="prettyprint" >
|
||||
* <pre class="prettyprint">
|
||||
* <fieldType name="text_keyword" class="solr.TextField" positionIncrementGap="100">
|
||||
* <analyzer>
|
||||
* <tokenizer class="solr.KeywordTokenizerFactory"/>
|
||||
* </analyzer>
|
||||
* </fieldType></pre>
|
||||
*
|
||||
*/
|
||||
public class KeywordTokenizerFactory extends TokenizerFactory {
|
||||
|
||||
/** Creates a new KeywordTokenizerFactory */
|
||||
public KeywordTokenizerFactory(Map<String,String> args) {
|
||||
super(args);
|
||||
if (!args.isEmpty()) {
|
||||
throw new IllegalArgumentException("Unknown parameters: " + args);
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public KeywordTokenizer create(AttributeFactory factory, Reader input) {
|
||||
return new KeywordTokenizer(factory, input, KeywordTokenizer.DEFAULT_BUFFER_SIZE);
|
||||
|
|
|
@ -25,20 +25,22 @@ import java.util.Map;
|
|||
|
||||
/**
|
||||
* Factory for {@link LetterTokenizer}.
|
||||
* <pre class="prettyprint" >
|
||||
* <pre class="prettyprint">
|
||||
* <fieldType name="text_letter" class="solr.TextField" positionIncrementGap="100">
|
||||
* <analyzer>
|
||||
* <tokenizer class="solr.LetterTokenizerFactory"/>
|
||||
* </analyzer>
|
||||
* </fieldType></pre>
|
||||
*
|
||||
* </fieldType></pre>
|
||||
*/
|
||||
public class LetterTokenizerFactory extends TokenizerFactory {
|
||||
|
||||
@Override
|
||||
public void init(Map<String,String> args) {
|
||||
super.init(args);
|
||||
/** Creates a new LetterTokenizerFactory */
|
||||
public LetterTokenizerFactory(Map<String,String> args) {
|
||||
super(args);
|
||||
assureMatchVersion();
|
||||
if (!args.isEmpty()) {
|
||||
throw new IllegalArgumentException("Unknown parameters: " + args);
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
|
|
|
@ -27,20 +27,23 @@ import org.apache.lucene.analysis.util.TokenFilterFactory;
|
|||
|
||||
/**
|
||||
* Factory for {@link LowerCaseFilter}.
|
||||
* <pre class="prettyprint" >
|
||||
* <pre class="prettyprint">
|
||||
* <fieldType name="text_lwrcase" class="solr.TextField" positionIncrementGap="100">
|
||||
* <analyzer>
|
||||
* <tokenizer class="solr.WhitespaceTokenizerFactory"/>
|
||||
* <filter class="solr.LowerCaseFilterFactory"/>
|
||||
* </analyzer>
|
||||
* </fieldType></pre>
|
||||
*
|
||||
* </fieldType></pre>
|
||||
*/
|
||||
public class LowerCaseFilterFactory extends TokenFilterFactory implements MultiTermAwareComponent {
|
||||
@Override
|
||||
public void init(Map<String,String> args) {
|
||||
super.init(args);
|
||||
|
||||
/** Creates a new LowerCaseFilterFactory */
|
||||
public LowerCaseFilterFactory(Map<String,String> args) {
|
||||
super(args);
|
||||
assureMatchVersion();
|
||||
if (!args.isEmpty()) {
|
||||
throw new IllegalArgumentException("Unknown parameters: " + args);
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
|
|
|
@ -23,23 +23,27 @@ import org.apache.lucene.analysis.util.TokenizerFactory;
|
|||
import org.apache.lucene.util.AttributeSource.AttributeFactory;
|
||||
|
||||
import java.io.Reader;
|
||||
import java.util.HashMap;
|
||||
import java.util.Map;
|
||||
|
||||
/**
|
||||
* Factory for {@link LowerCaseTokenizer}.
|
||||
* <pre class="prettyprint" >
|
||||
* <pre class="prettyprint">
|
||||
* <fieldType name="text_lwrcase" class="solr.TextField" positionIncrementGap="100">
|
||||
* <analyzer>
|
||||
* <tokenizer class="solr.LowerCaseTokenizerFactory"/>
|
||||
* </analyzer>
|
||||
* </fieldType></pre>
|
||||
*
|
||||
* </fieldType></pre>
|
||||
*/
|
||||
public class LowerCaseTokenizerFactory extends TokenizerFactory implements MultiTermAwareComponent {
|
||||
@Override
|
||||
public void init(Map<String,String> args) {
|
||||
super.init(args);
|
||||
|
||||
/** Creates a new LowerCaseTokenizerFactory */
|
||||
public LowerCaseTokenizerFactory(Map<String,String> args) {
|
||||
super(args);
|
||||
assureMatchVersion();
|
||||
if (!args.isEmpty()) {
|
||||
throw new IllegalArgumentException("Unknown parameters: " + args);
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
|
@ -49,9 +53,6 @@ public class LowerCaseTokenizerFactory extends TokenizerFactory implements Multi
|
|||
|
||||
@Override
|
||||
public AbstractAnalysisFactory getMultiTermComponent() {
|
||||
LowerCaseFilterFactory filt = new LowerCaseFilterFactory();
|
||||
filt.setLuceneMatchVersion(luceneMatchVersion);
|
||||
filt.init(args);
|
||||
return filt;
|
||||
return new LowerCaseFilterFactory(new HashMap<String,String>(getOriginalArgs()));
|
||||
}
|
||||
}
|
||||
|
|
|
@ -27,7 +27,7 @@ import java.io.IOException;
|
|||
|
||||
/**
|
||||
* Factory for {@link StopFilter}.
|
||||
* <pre class="prettyprint" >
|
||||
* <pre class="prettyprint">
|
||||
* <fieldType name="text_stop" class="solr.TextField" positionIncrementGap="100" autoGeneratePhraseQueries="true">
|
||||
* <analyzer>
|
||||
* <tokenizer class="solr.WhitespaceTokenizerFactory"/>
|
||||
|
@ -35,24 +35,31 @@ import java.io.IOException;
|
|||
* words="stopwords.txt" enablePositionIncrements="true"/>
|
||||
* </analyzer>
|
||||
* </fieldType></pre>
|
||||
*
|
||||
*/
|
||||
public class StopFilterFactory extends TokenFilterFactory implements ResourceLoaderAware {
|
||||
|
||||
@Override
|
||||
public void init(Map<String,String> args) {
|
||||
super.init(args);
|
||||
private CharArraySet stopWords;
|
||||
private final String stopWordFiles;
|
||||
private final String format;
|
||||
private final boolean ignoreCase;
|
||||
private final boolean enablePositionIncrements;
|
||||
|
||||
/** Creates a new StopFilterFactory */
|
||||
public StopFilterFactory(Map<String,String> args) {
|
||||
super(args);
|
||||
assureMatchVersion();
|
||||
stopWordFiles = args.remove("words");
|
||||
format = args.remove("format");
|
||||
ignoreCase = getBoolean(args, "ignoreCase", false);
|
||||
enablePositionIncrements = getBoolean(args, "enablePositionIncrements", false);
|
||||
if (!args.isEmpty()) {
|
||||
throw new IllegalArgumentException("Unknown parameters: " + args);
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public void inform(ResourceLoader loader) throws IOException {
|
||||
String stopWordFiles = args.get("words");
|
||||
ignoreCase = getBoolean("ignoreCase",false);
|
||||
enablePositionIncrements = getBoolean("enablePositionIncrements",false);
|
||||
|
||||
if (stopWordFiles != null) {
|
||||
if ("snowball".equalsIgnoreCase(args.get("format"))) {
|
||||
if ("snowball".equalsIgnoreCase(format)) {
|
||||
stopWords = getSnowballWordSet(loader, stopWordFiles, ignoreCase);
|
||||
} else {
|
||||
stopWords = getWordSet(loader, stopWordFiles, ignoreCase);
|
||||
|
@ -62,10 +69,6 @@ public class StopFilterFactory extends TokenFilterFactory implements ResourceLoa
|
|||
}
|
||||
}
|
||||
|
||||
private CharArraySet stopWords;
|
||||
private boolean ignoreCase;
|
||||
private boolean enablePositionIncrements;
|
||||
|
||||
public boolean isEnablePositionIncrements() {
|
||||
return enablePositionIncrements;
|
||||
}
|
||||
|
|
|
@ -26,11 +26,12 @@ import org.apache.lucene.analysis.util.TokenFilterFactory;
|
|||
import java.io.IOException;
|
||||
import java.util.HashSet;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.Set;
|
||||
|
||||
/**
|
||||
* Factory class for {@link TypeTokenFilter}.
|
||||
* <pre class="prettyprint" >
|
||||
* <pre class="prettyprint">
|
||||
* <fieldType name="chars" class="solr.TextField" positionIncrementGap="100">
|
||||
* <analyzer>
|
||||
* <tokenizer class="solr.StandardTokenizerFactory"/>
|
||||
|
@ -40,29 +41,36 @@ import java.util.Set;
|
|||
* </fieldType></pre>
|
||||
*/
|
||||
public class TypeTokenFilterFactory extends TokenFilterFactory implements ResourceLoaderAware {
|
||||
|
||||
@Override
|
||||
public void inform(ResourceLoader loader) throws IOException {
|
||||
String stopTypesFiles = args.get("types");
|
||||
enablePositionIncrements = getBoolean("enablePositionIncrements", false);
|
||||
useWhitelist = getBoolean("useWhitelist", false);
|
||||
if (stopTypesFiles != null) {
|
||||
List<String> files = splitFileNames(stopTypesFiles);
|
||||
if (files.size() > 0) {
|
||||
stopTypes = new HashSet<String>();
|
||||
for (String file : files) {
|
||||
List<String> typesLines = getLines(loader, file.trim());
|
||||
stopTypes.addAll(typesLines);
|
||||
}
|
||||
}
|
||||
} else {
|
||||
private final boolean useWhitelist;
|
||||
private final boolean enablePositionIncrements;
|
||||
private final String stopTypesFiles;
|
||||
private Set<String> stopTypes;
|
||||
|
||||
/** Creates a new TypeTokenFilterFactory */
|
||||
public TypeTokenFilterFactory(Map<String,String> args) {
|
||||
super(args);
|
||||
stopTypesFiles = args.remove("types");
|
||||
if (stopTypesFiles == null) {
|
||||
throw new IllegalArgumentException("Missing required parameter: types.");
|
||||
}
|
||||
enablePositionIncrements = getBoolean(args, "enablePositionIncrements", false);
|
||||
useWhitelist = getBoolean(args, "useWhitelist", false);
|
||||
if (!args.isEmpty()) {
|
||||
throw new IllegalArgumentException("Unknown parameters: " + args);
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public void inform(ResourceLoader loader) throws IOException {
|
||||
List<String> files = splitFileNames(stopTypesFiles);
|
||||
if (files.size() > 0) {
|
||||
stopTypes = new HashSet<String>();
|
||||
for (String file : files) {
|
||||
List<String> typesLines = getLines(loader, file.trim());
|
||||
stopTypes.addAll(typesLines);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private boolean useWhitelist;
|
||||
private Set<String> stopTypes;
|
||||
private boolean enablePositionIncrements;
|
||||
|
||||
public boolean isEnablePositionIncrements() {
|
||||
return enablePositionIncrements;
|
||||
|
|
|
@ -25,19 +25,22 @@ import java.util.Map;
|
|||
|
||||
/**
|
||||
* Factory for {@link WhitespaceTokenizer}.
|
||||
* <pre class="prettyprint" >
|
||||
* <pre class="prettyprint">
|
||||
* <fieldType name="text_ws" class="solr.TextField" positionIncrementGap="100">
|
||||
* <analyzer>
|
||||
* <tokenizer class="solr.WhitespaceTokenizerFactory"/>
|
||||
* </analyzer>
|
||||
* </fieldType></pre>
|
||||
*
|
||||
* </fieldType></pre>
|
||||
*/
|
||||
public class WhitespaceTokenizerFactory extends TokenizerFactory {
|
||||
@Override
|
||||
public void init(Map<String,String> args) {
|
||||
super.init(args);
|
||||
|
||||
/** Creates a new WhitespaceTokenizerFactory */
|
||||
public WhitespaceTokenizerFactory(Map<String,String> args) {
|
||||
super(args);
|
||||
assureMatchVersion();
|
||||
if (!args.isEmpty()) {
|
||||
throw new IllegalArgumentException("Unknown parameters: " + args);
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
|
|
|
@ -17,13 +17,15 @@ package org.apache.lucene.analysis.cz;
|
|||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import java.util.Map;
|
||||
|
||||
import org.apache.lucene.analysis.TokenStream;
|
||||
import org.apache.lucene.analysis.cz.CzechStemFilter;
|
||||
import org.apache.lucene.analysis.util.TokenFilterFactory;
|
||||
|
||||
/**
|
||||
* Factory for {@link CzechStemFilter}.
|
||||
* <pre class="prettyprint" >
|
||||
* Factory for {@link CzechStemFilter}.
|
||||
* <pre class="prettyprint">
|
||||
* <fieldType name="text_czstem" class="solr.TextField" positionIncrementGap="100">
|
||||
* <analyzer>
|
||||
* <tokenizer class="solr.StandardTokenizerFactory"/>
|
||||
|
@ -33,6 +35,15 @@ import org.apache.lucene.analysis.util.TokenFilterFactory;
|
|||
* </fieldType></pre>
|
||||
*/
|
||||
public class CzechStemFilterFactory extends TokenFilterFactory {
|
||||
|
||||
/** Creates a new CzechStemFilterFactory */
|
||||
public CzechStemFilterFactory(Map<String,String> args) {
|
||||
super(args);
|
||||
if (!args.isEmpty()) {
|
||||
throw new IllegalArgumentException("Unknown parameters: " + args);
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public TokenStream create(TokenStream input) {
|
||||
return new CzechStemFilter(input);
|
||||
|
|
|
@ -17,23 +17,33 @@ package org.apache.lucene.analysis.de;
|
|||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import java.util.Map;
|
||||
|
||||
import org.apache.lucene.analysis.TokenStream;
|
||||
import org.apache.lucene.analysis.de.GermanLightStemFilter;
|
||||
import org.apache.lucene.analysis.util.TokenFilterFactory;
|
||||
|
||||
/**
|
||||
* Factory for {@link GermanLightStemFilter}.
|
||||
* <pre class="prettyprint" >
|
||||
* <pre class="prettyprint">
|
||||
* <fieldType name="text_delgtstem" class="solr.TextField" positionIncrementGap="100">
|
||||
* <analyzer>
|
||||
* <tokenizer class="solr.StandardTokenizerFactory"/>
|
||||
* <filter class="solr.LowerCaseFilterFactory"/>
|
||||
* <filter class="solr.GermanLightStemFilterFactory"/>
|
||||
* </analyzer>
|
||||
* </fieldType></pre>
|
||||
*
|
||||
* </fieldType></pre>
|
||||
*/
|
||||
public class GermanLightStemFilterFactory extends TokenFilterFactory {
|
||||
|
||||
/** Creates a new GermanLightStemFilterFactory */
|
||||
public GermanLightStemFilterFactory(Map<String,String> args) {
|
||||
super(args);
|
||||
if (!args.isEmpty()) {
|
||||
throw new IllegalArgumentException("Unknown parameters: " + args);
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public TokenStream create(TokenStream input) {
|
||||
return new GermanLightStemFilter(input);
|
||||
|
|
|
@ -17,23 +17,33 @@ package org.apache.lucene.analysis.de;
|
|||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import java.util.Map;
|
||||
|
||||
import org.apache.lucene.analysis.TokenStream;
|
||||
import org.apache.lucene.analysis.de.GermanMinimalStemFilter;
|
||||
import org.apache.lucene.analysis.util.TokenFilterFactory;
|
||||
|
||||
/**
|
||||
* Factory for {@link GermanMinimalStemFilter}.
|
||||
* <pre class="prettyprint" >
|
||||
* <pre class="prettyprint">
|
||||
* <fieldType name="text_deminstem" class="solr.TextField" positionIncrementGap="100">
|
||||
* <analyzer>
|
||||
* <tokenizer class="solr.StandardTokenizerFactory"/>
|
||||
* <filter class="solr.LowerCaseFilterFactory"/>
|
||||
* <filter class="solr.GermanMinimalStemFilterFactory"/>
|
||||
* </analyzer>
|
||||
* </fieldType></pre>
|
||||
*
|
||||
* </fieldType></pre>
|
||||
*/
|
||||
public class GermanMinimalStemFilterFactory extends TokenFilterFactory {
|
||||
|
||||
/** Creates a new GermanMinimalStemFilterFactory */
|
||||
public GermanMinimalStemFilterFactory(Map<String,String> args) {
|
||||
super(args);
|
||||
if (!args.isEmpty()) {
|
||||
throw new IllegalArgumentException("Unknown parameters: " + args);
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public TokenStream create(TokenStream input) {
|
||||
return new GermanMinimalStemFilter(input);
|
||||
|
|
|
@ -17,6 +17,8 @@ package org.apache.lucene.analysis.de;
|
|||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import java.util.Map;
|
||||
|
||||
import org.apache.lucene.analysis.TokenStream;
|
||||
import org.apache.lucene.analysis.de.GermanNormalizationFilter;
|
||||
import org.apache.lucene.analysis.util.AbstractAnalysisFactory;
|
||||
|
@ -25,7 +27,7 @@ import org.apache.lucene.analysis.util.TokenFilterFactory;
|
|||
|
||||
/**
|
||||
* Factory for {@link GermanNormalizationFilter}.
|
||||
* <pre class="prettyprint" >
|
||||
* <pre class="prettyprint">
|
||||
* <fieldType name="text_denorm" class="solr.TextField" positionIncrementGap="100">
|
||||
* <analyzer>
|
||||
* <tokenizer class="solr.StandardTokenizerFactory"/>
|
||||
|
@ -36,6 +38,14 @@ import org.apache.lucene.analysis.util.TokenFilterFactory;
|
|||
*/
|
||||
public class GermanNormalizationFilterFactory extends TokenFilterFactory implements MultiTermAwareComponent {
|
||||
|
||||
/** Creates a new GermanNormalizationFilterFactory */
|
||||
public GermanNormalizationFilterFactory(Map<String,String> args) {
|
||||
super(args);
|
||||
if (!args.isEmpty()) {
|
||||
throw new IllegalArgumentException("Unknown parameters: " + args);
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public TokenStream create(TokenStream input) {
|
||||
return new GermanNormalizationFilter(input);
|
||||
|
|
|
@ -17,23 +17,33 @@ package org.apache.lucene.analysis.de;
|
|||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import java.util.Map;
|
||||
|
||||
import org.apache.lucene.analysis.de.GermanStemFilter;
|
||||
import org.apache.lucene.analysis.TokenStream;
|
||||
import org.apache.lucene.analysis.util.TokenFilterFactory;
|
||||
|
||||
/**
|
||||
* Factory for {@link GermanStemFilter}.
|
||||
* <pre class="prettyprint" >
|
||||
* <pre class="prettyprint">
|
||||
* <fieldType name="text_destem" class="solr.TextField" positionIncrementGap="100">
|
||||
* <analyzer>
|
||||
* <tokenizer class="solr.StandardTokenizerFactory"/>
|
||||
* <filter class="solr.LowerCaseFilterFactory"/>
|
||||
* <filter class="solr.GermanStemFilterFactory"/>
|
||||
* </analyzer>
|
||||
* </fieldType></pre>
|
||||
*
|
||||
* </fieldType></pre>
|
||||
*/
|
||||
public class GermanStemFilterFactory extends TokenFilterFactory {
|
||||
|
||||
/** Creates a new GermanStemFilterFactory */
|
||||
public GermanStemFilterFactory(Map<String,String> args) {
|
||||
super(args);
|
||||
if (!args.isEmpty()) {
|
||||
throw new IllegalArgumentException("Unknown parameters: " + args);
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public GermanStemFilter create(TokenStream in) {
|
||||
return new GermanStemFilter(in);
|
||||
|
|
|
@ -27,25 +27,23 @@ import org.apache.lucene.analysis.util.TokenFilterFactory;
|
|||
|
||||
/**
|
||||
* Factory for {@link GreekLowerCaseFilter}.
|
||||
* <pre class="prettyprint" >
|
||||
* <pre class="prettyprint">
|
||||
* <fieldType name="text_glc" class="solr.TextField" positionIncrementGap="100">
|
||||
* <analyzer>
|
||||
* <tokenizer class="solr.StandardTokenizerFactory"/>
|
||||
* <filter class="solr.GreekLowerCaseFilterFactory"/>
|
||||
* </analyzer>
|
||||
* </fieldType></pre>
|
||||
*
|
||||
* </fieldType></pre>
|
||||
*/
|
||||
public class GreekLowerCaseFilterFactory extends TokenFilterFactory implements MultiTermAwareComponent {
|
||||
|
||||
@Override
|
||||
public void init(Map<String, String> args) {
|
||||
super.init(args);
|
||||
/** Creates a new GreekLowerCaseFilterFactory */
|
||||
public GreekLowerCaseFilterFactory(Map<String,String> args) {
|
||||
super(args);
|
||||
assureMatchVersion();
|
||||
if (args.containsKey("charset"))
|
||||
throw new IllegalArgumentException(
|
||||
"The charset parameter is no longer supported. "
|
||||
+ "Please process your documents as Unicode instead.");
|
||||
if (!args.isEmpty()) {
|
||||
throw new IllegalArgumentException("Unknown parameters: " + args);
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
|
|
|
@ -17,13 +17,15 @@ package org.apache.lucene.analysis.el;
|
|||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import java.util.Map;
|
||||
|
||||
import org.apache.lucene.analysis.TokenStream;
|
||||
import org.apache.lucene.analysis.el.GreekStemFilter;
|
||||
import org.apache.lucene.analysis.util.TokenFilterFactory;
|
||||
|
||||
/**
|
||||
* Factory for {@link GreekStemFilter}.
|
||||
* <pre class="prettyprint" >
|
||||
* <pre class="prettyprint">
|
||||
* <fieldType name="text_gstem" class="solr.TextField" positionIncrementGap="100">
|
||||
* <analyzer>
|
||||
* <tokenizer class="solr.StandardTokenizerFactory"/>
|
||||
|
@ -31,13 +33,19 @@ import org.apache.lucene.analysis.util.TokenFilterFactory;
|
|||
* <filter class="solr.GreekStemFilterFactory"/>
|
||||
* </analyzer>
|
||||
* </fieldType></pre>
|
||||
*
|
||||
*/
|
||||
public class GreekStemFilterFactory extends TokenFilterFactory {
|
||||
|
||||
/** Creates a new GreekStemFilterFactory */
|
||||
public GreekStemFilterFactory(Map<String,String> args) {
|
||||
super(args);
|
||||
if (!args.isEmpty()) {
|
||||
throw new IllegalArgumentException("Unknown parameters: " + args);
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public TokenStream create(TokenStream input) {
|
||||
return new GreekStemFilter(input);
|
||||
}
|
||||
|
||||
}
|
||||
|
|
|
@ -17,13 +17,15 @@ package org.apache.lucene.analysis.en;
|
|||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import java.util.Map;
|
||||
|
||||
import org.apache.lucene.analysis.TokenStream;
|
||||
import org.apache.lucene.analysis.en.EnglishMinimalStemFilter;
|
||||
import org.apache.lucene.analysis.util.TokenFilterFactory;
|
||||
|
||||
/**
|
||||
* Factory for {@link EnglishMinimalStemFilter}.
|
||||
* <pre class="prettyprint" >
|
||||
* <pre class="prettyprint">
|
||||
* <fieldType name="text_enminstem" class="solr.TextField" positionIncrementGap="100">
|
||||
* <analyzer>
|
||||
* <tokenizer class="solr.StandardTokenizerFactory"/>
|
||||
|
@ -31,9 +33,17 @@ import org.apache.lucene.analysis.util.TokenFilterFactory;
|
|||
* <filter class="solr.EnglishMinimalStemFilterFactory"/>
|
||||
* </analyzer>
|
||||
* </fieldType></pre>
|
||||
*
|
||||
*/
|
||||
public class EnglishMinimalStemFilterFactory extends TokenFilterFactory {
|
||||
|
||||
/** Creates a new EnglishMinimalStemFilterFactory */
|
||||
public EnglishMinimalStemFilterFactory(Map<String,String> args) {
|
||||
super(args);
|
||||
if (!args.isEmpty()) {
|
||||
throw new IllegalArgumentException("Unknown parameters: " + args);
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public TokenStream create(TokenStream input) {
|
||||
return new EnglishMinimalStemFilter(input);
|
||||
|
|
|
@ -25,22 +25,24 @@ import org.apache.lucene.analysis.util.TokenFilterFactory;
|
|||
|
||||
/**
|
||||
* Factory for {@link EnglishPossessiveFilter}.
|
||||
* <pre class="prettyprint" >
|
||||
* <pre class="prettyprint">
|
||||
* <fieldType name="text_enpossessive" class="solr.TextField" positionIncrementGap="100">
|
||||
* <analyzer>
|
||||
* <tokenizer class="solr.StandardTokenizerFactory"/>
|
||||
* <filter class="solr.LowerCaseFilterFactory"/>
|
||||
* <filter class="solr.EnglishPossessiveFilterFactory"/>
|
||||
* </analyzer>
|
||||
* </fieldType></pre>
|
||||
*
|
||||
* </fieldType></pre>
|
||||
*/
|
||||
public class EnglishPossessiveFilterFactory extends TokenFilterFactory {
|
||||
|
||||
@Override
|
||||
public void init(Map<String,String> args) {
|
||||
super.init(args);
|
||||
/** Creates a new EnglishPossessiveFilterFactory */
|
||||
public EnglishPossessiveFilterFactory(Map<String,String> args) {
|
||||
super(args);
|
||||
assureMatchVersion();
|
||||
if (!args.isEmpty()) {
|
||||
throw new IllegalArgumentException("Unknown parameters: " + args);
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
|
|
|
@ -17,16 +17,34 @@ package org.apache.lucene.analysis.en;
|
|||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import java.util.Map;
|
||||
|
||||
import org.apache.lucene.analysis.TokenFilter;
|
||||
import org.apache.lucene.analysis.TokenStream;
|
||||
import org.apache.lucene.analysis.en.KStemFilter;
|
||||
import org.apache.lucene.analysis.util.TokenFilterFactory;
|
||||
|
||||
/**
|
||||
* Factory for {@link KStemFilter}
|
||||
* Factory for {@link KStemFilter}.
|
||||
* <pre class="prettyprint">
|
||||
* <fieldType name="text_kstem" class="solr.TextField" positionIncrementGap="100">
|
||||
* <analyzer>
|
||||
* <tokenizer class="solr.StandardTokenizerFactory"/>
|
||||
* <filter class="solr.LowerCaseFilterFactory"/>
|
||||
* <filter class="solr.KStemFilterFactory"/>
|
||||
* </analyzer>
|
||||
* </fieldType></pre>
|
||||
*/
|
||||
public class KStemFilterFactory extends TokenFilterFactory {
|
||||
|
||||
/** Creates a new KStemFilterFactory */
|
||||
public KStemFilterFactory(Map<String,String> args) {
|
||||
super(args);
|
||||
if (!args.isEmpty()) {
|
||||
throw new IllegalArgumentException("Unknown parameters: " + args);
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public TokenFilter create(TokenStream input) {
|
||||
return new KStemFilter(input);
|
||||
|
|
|
@ -17,13 +17,15 @@ package org.apache.lucene.analysis.en;
|
|||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import java.util.Map;
|
||||
|
||||
import org.apache.lucene.analysis.TokenStream;
|
||||
import org.apache.lucene.analysis.en.PorterStemFilter;
|
||||
import org.apache.lucene.analysis.util.TokenFilterFactory;
|
||||
|
||||
/**
|
||||
* Factory for {@link PorterStemFilter}.
|
||||
* <pre class="prettyprint" >
|
||||
* <pre class="prettyprint">
|
||||
* <fieldType name="text_porterstem" class="solr.TextField" positionIncrementGap="100">
|
||||
* <analyzer>
|
||||
* <tokenizer class="solr.StandardTokenizerFactory"/>
|
||||
|
@ -31,9 +33,17 @@ import org.apache.lucene.analysis.util.TokenFilterFactory;
|
|||
* <filter class="solr.PorterStemFilterFactory"/>
|
||||
* </analyzer>
|
||||
* </fieldType></pre>
|
||||
*
|
||||
*/
|
||||
public class PorterStemFilterFactory extends TokenFilterFactory {
|
||||
|
||||
/** Creates a new PorterStemFilterFactory */
|
||||
public PorterStemFilterFactory(Map<String,String> args) {
|
||||
super(args);
|
||||
if (!args.isEmpty()) {
|
||||
throw new IllegalArgumentException("Unknown parameters: " + args);
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public PorterStemFilter create(TokenStream input) {
|
||||
return new PorterStemFilter(input);
|
||||
|
|
|
@ -17,13 +17,15 @@ package org.apache.lucene.analysis.es;
|
|||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import java.util.Map;
|
||||
|
||||
import org.apache.lucene.analysis.TokenStream;
|
||||
import org.apache.lucene.analysis.es.SpanishLightStemFilter;
|
||||
import org.apache.lucene.analysis.util.TokenFilterFactory;
|
||||
|
||||
/**
|
||||
* Factory for {@link SpanishLightStemFilter}.
|
||||
* <pre class="prettyprint" >
|
||||
* <pre class="prettyprint">
|
||||
* <fieldType name="text_eslgtstem" class="solr.TextField" positionIncrementGap="100">
|
||||
* <analyzer>
|
||||
* <tokenizer class="solr.StandardTokenizerFactory"/>
|
||||
|
@ -31,9 +33,17 @@ import org.apache.lucene.analysis.util.TokenFilterFactory;
|
|||
* <filter class="solr.SpanishLightStemFilterFactory"/>
|
||||
* </analyzer>
|
||||
* </fieldType></pre>
|
||||
*
|
||||
*/
|
||||
public class SpanishLightStemFilterFactory extends TokenFilterFactory {
|
||||
|
||||
/** Creates a new SpanishLightStemFilterFactory */
|
||||
public SpanishLightStemFilterFactory(Map<String,String> args) {
|
||||
super(args);
|
||||
if (!args.isEmpty()) {
|
||||
throw new IllegalArgumentException("Unknown parameters: " + args);
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public TokenStream create(TokenStream input) {
|
||||
return new SpanishLightStemFilter(input);
|
||||
|
|
|
@ -18,6 +18,7 @@ package org.apache.lucene.analysis.fa;
|
|||
*/
|
||||
|
||||
import java.io.Reader;
|
||||
import java.util.Map;
|
||||
|
||||
import org.apache.lucene.analysis.CharFilter;
|
||||
import org.apache.lucene.analysis.fa.PersianCharFilter;
|
||||
|
@ -27,17 +28,24 @@ import org.apache.lucene.analysis.util.MultiTermAwareComponent;
|
|||
|
||||
/**
|
||||
* Factory for {@link PersianCharFilter}.
|
||||
* <pre class="prettyprint" >
|
||||
* <pre class="prettyprint">
|
||||
* <fieldType name="text_fa" class="solr.TextField" positionIncrementGap="100">
|
||||
* <analyzer>
|
||||
* <charFilter class="solr.PersianCharFilterFactory"/>
|
||||
* <tokenizer class="solr.StandardTokenizerFactory"/>
|
||||
* </analyzer>
|
||||
* </fieldType></pre>
|
||||
*
|
||||
*/
|
||||
public class PersianCharFilterFactory extends CharFilterFactory implements MultiTermAwareComponent {
|
||||
|
||||
/** Creates a new PersianCharFilterFactory */
|
||||
public PersianCharFilterFactory(Map<String,String> args) {
|
||||
super(args);
|
||||
if (!args.isEmpty()) {
|
||||
throw new IllegalArgumentException("Unknown parameters: " + args);
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public CharFilter create(Reader input) {
|
||||
return new PersianCharFilter(input);
|
||||
|
|
|
@ -17,6 +17,8 @@ package org.apache.lucene.analysis.fa;
|
|||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import java.util.Map;
|
||||
|
||||
import org.apache.lucene.analysis.fa.PersianNormalizationFilter;
|
||||
import org.apache.lucene.analysis.TokenStream;
|
||||
import org.apache.lucene.analysis.util.AbstractAnalysisFactory;
|
||||
|
@ -25,7 +27,7 @@ import org.apache.lucene.analysis.util.TokenFilterFactory;
|
|||
|
||||
/**
|
||||
* Factory for {@link PersianNormalizationFilter}.
|
||||
* <pre class="prettyprint" >
|
||||
* <pre class="prettyprint">
|
||||
* <fieldType name="text_fanormal" class="solr.TextField" positionIncrementGap="100">
|
||||
* <analyzer>
|
||||
* <charFilter class="solr.PersianCharFilterFactory"/>
|
||||
|
@ -33,9 +35,17 @@ import org.apache.lucene.analysis.util.TokenFilterFactory;
|
|||
* <filter class="solr.PersianNormalizationFilterFactory"/>
|
||||
* </analyzer>
|
||||
* </fieldType></pre>
|
||||
*
|
||||
*/
|
||||
public class PersianNormalizationFilterFactory extends TokenFilterFactory implements MultiTermAwareComponent {
|
||||
|
||||
/** Creates a new PersianNormalizationFilterFactory */
|
||||
public PersianNormalizationFilterFactory(Map<String,String> args) {
|
||||
super(args);
|
||||
if (!args.isEmpty()) {
|
||||
throw new IllegalArgumentException("Unknown parameters: " + args);
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public PersianNormalizationFilter create(TokenStream input) {
|
||||
return new PersianNormalizationFilter(input);
|
||||
|
|
|
@ -17,23 +17,33 @@ package org.apache.lucene.analysis.fi;
|
|||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import java.util.Map;
|
||||
|
||||
import org.apache.lucene.analysis.TokenStream;
|
||||
import org.apache.lucene.analysis.fi.FinnishLightStemFilter;
|
||||
import org.apache.lucene.analysis.util.TokenFilterFactory;
|
||||
|
||||
/**
|
||||
* Factory for {@link FinnishLightStemFilter}.
|
||||
* <pre class="prettyprint" >
|
||||
* <pre class="prettyprint">
|
||||
* <fieldType name="text_filgtstem" class="solr.TextField" positionIncrementGap="100">
|
||||
* <analyzer>
|
||||
* <tokenizer class="solr.StandardTokenizerFactory"/>
|
||||
* <filter class="solr.LowerCaseFilterFactory"/>
|
||||
* <filter class="solr.FinnishLightStemFilterFactory"/>
|
||||
* </analyzer>
|
||||
* </fieldType></pre>
|
||||
*
|
||||
* </fieldType></pre>
|
||||
*/
|
||||
public class FinnishLightStemFilterFactory extends TokenFilterFactory {
|
||||
|
||||
/** Creates a new FinnishLightStemFilterFactory */
|
||||
public FinnishLightStemFilterFactory(Map<String,String> args) {
|
||||
super(args);
|
||||
if (!args.isEmpty()) {
|
||||
throw new IllegalArgumentException("Unknown parameters: " + args);
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public TokenStream create(TokenStream input) {
|
||||
return new FinnishLightStemFilter(input);
|
||||
|
|
|
@ -17,13 +17,15 @@ package org.apache.lucene.analysis.fr;
|
|||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import java.util.Map;
|
||||
|
||||
import org.apache.lucene.analysis.TokenStream;
|
||||
import org.apache.lucene.analysis.fr.FrenchLightStemFilter;
|
||||
import org.apache.lucene.analysis.util.TokenFilterFactory;
|
||||
|
||||
/**
|
||||
* Factory for {@link FrenchLightStemFilter}.
|
||||
* <pre class="prettyprint" >
|
||||
* <pre class="prettyprint">
|
||||
* <fieldType name="text_frlgtstem" class="solr.TextField" positionIncrementGap="100">
|
||||
* <analyzer>
|
||||
* <tokenizer class="solr.StandardTokenizerFactory"/>
|
||||
|
@ -31,10 +33,18 @@ import org.apache.lucene.analysis.util.TokenFilterFactory;
|
|||
* <filter class="solr.ElisionFilterFactory"/>
|
||||
* <filter class="solr.FrenchLightStemFilterFactory"/>
|
||||
* </analyzer>
|
||||
* </fieldType></pre>
|
||||
*
|
||||
* </fieldType></pre>
|
||||
*/
|
||||
public class FrenchLightStemFilterFactory extends TokenFilterFactory {
|
||||
|
||||
/** Creates a new FrenchLightStemFilterFactory */
|
||||
public FrenchLightStemFilterFactory(Map<String,String> args) {
|
||||
super(args);
|
||||
if (!args.isEmpty()) {
|
||||
throw new IllegalArgumentException("Unknown parameters: " + args);
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public TokenStream create(TokenStream input) {
|
||||
return new FrenchLightStemFilter(input);
|
||||
|
|
|
@ -17,13 +17,15 @@ package org.apache.lucene.analysis.fr;
|
|||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import java.util.Map;
|
||||
|
||||
import org.apache.lucene.analysis.TokenStream;
|
||||
import org.apache.lucene.analysis.fr.FrenchMinimalStemFilter;
|
||||
import org.apache.lucene.analysis.util.TokenFilterFactory;
|
||||
|
||||
/**
|
||||
* Factory for {@link FrenchMinimalStemFilter}.
|
||||
* <pre class="prettyprint" >
|
||||
* <pre class="prettyprint">
|
||||
* <fieldType name="text_frminstem" class="solr.TextField" positionIncrementGap="100">
|
||||
* <analyzer>
|
||||
* <tokenizer class="solr.StandardTokenizerFactory"/>
|
||||
|
@ -31,10 +33,18 @@ import org.apache.lucene.analysis.util.TokenFilterFactory;
|
|||
* <filter class="solr.ElisionFilterFactory"/>
|
||||
* <filter class="solr.FrenchMinimalStemFilterFactory"/>
|
||||
* </analyzer>
|
||||
* </fieldType></pre>
|
||||
*
|
||||
* </fieldType></pre>
|
||||
*/
|
||||
public class FrenchMinimalStemFilterFactory extends TokenFilterFactory {
|
||||
|
||||
/** Creates a new FrenchMinimalStemFilterFactory */
|
||||
public FrenchMinimalStemFilterFactory(Map<String,String> args) {
|
||||
super(args);
|
||||
if (!args.isEmpty()) {
|
||||
throw new IllegalArgumentException("Unknown parameters: " + args);
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public TokenStream create(TokenStream input) {
|
||||
return new FrenchMinimalStemFilter(input);
|
||||
|
|
|
@ -17,6 +17,8 @@ package org.apache.lucene.analysis.ga;
|
|||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import java.util.Map;
|
||||
|
||||
import org.apache.lucene.analysis.TokenStream;
|
||||
import org.apache.lucene.analysis.ga.IrishLowerCaseFilter;
|
||||
import org.apache.lucene.analysis.util.AbstractAnalysisFactory;
|
||||
|
@ -25,17 +27,24 @@ import org.apache.lucene.analysis.util.TokenFilterFactory;
|
|||
|
||||
/**
|
||||
* Factory for {@link IrishLowerCaseFilter}.
|
||||
* <pre class="prettyprint" >
|
||||
* <pre class="prettyprint">
|
||||
* <fieldType name="text_ga" class="solr.TextField" positionIncrementGap="100">
|
||||
* <analyzer>
|
||||
* <tokenizer class="solr.StandardTokenizerFactory"/>
|
||||
* <filter class="solr.IrishLowerCaseFilterFactory"/>
|
||||
* </analyzer>
|
||||
* </fieldType></pre>
|
||||
*
|
||||
*/
|
||||
public class IrishLowerCaseFilterFactory extends TokenFilterFactory implements MultiTermAwareComponent {
|
||||
|
||||
/** Creates a new IrishLowerCaseFilterFactory */
|
||||
public IrishLowerCaseFilterFactory(Map<String,String> args) {
|
||||
super(args);
|
||||
if (!args.isEmpty()) {
|
||||
throw new IllegalArgumentException("Unknown parameters: " + args);
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public TokenStream create(TokenStream input) {
|
||||
return new IrishLowerCaseFilter(input);
|
||||
|
|
|
@ -17,23 +17,33 @@ package org.apache.lucene.analysis.gl;
|
|||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import java.util.Map;
|
||||
|
||||
import org.apache.lucene.analysis.TokenStream;
|
||||
import org.apache.lucene.analysis.gl.GalicianMinimalStemFilter;
|
||||
import org.apache.lucene.analysis.util.TokenFilterFactory;
|
||||
|
||||
/**
|
||||
* Factory for {@link GalicianMinimalStemFilter}.
|
||||
* <pre class="prettyprint" >
|
||||
* <pre class="prettyprint">
|
||||
* <fieldType name="text_glplural" class="solr.TextField" positionIncrementGap="100">
|
||||
* <analyzer>
|
||||
* <tokenizer class="solr.StandardTokenizerFactory"/>
|
||||
* <filter class="solr.LowerCaseFilterFactory"/>
|
||||
* <filter class="solr.GalicianMinimalStemFilterFactory"/>
|
||||
* </analyzer>
|
||||
* </fieldType></pre>
|
||||
*
|
||||
* </fieldType></pre>
|
||||
*/
|
||||
public class GalicianMinimalStemFilterFactory extends TokenFilterFactory {
|
||||
|
||||
/** Creates a new GalicianMinimalStemFilterFactory */
|
||||
public GalicianMinimalStemFilterFactory(Map<String,String> args) {
|
||||
super(args);
|
||||
if (!args.isEmpty()) {
|
||||
throw new IllegalArgumentException("Unknown parameters: " + args);
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public TokenStream create(TokenStream input) {
|
||||
return new GalicianMinimalStemFilter(input);
|
||||
|
|
|
@ -17,23 +17,33 @@ package org.apache.lucene.analysis.gl;
|
|||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import java.util.Map;
|
||||
|
||||
import org.apache.lucene.analysis.TokenStream;
|
||||
import org.apache.lucene.analysis.gl.GalicianStemFilter;
|
||||
import org.apache.lucene.analysis.util.TokenFilterFactory;
|
||||
|
||||
/**
|
||||
* Factory for {@link GalicianStemFilter}.
|
||||
* <pre class="prettyprint" >
|
||||
* <pre class="prettyprint">
|
||||
* <fieldType name="text_glstem" class="solr.TextField" positionIncrementGap="100">
|
||||
* <analyzer>
|
||||
* <tokenizer class="solr.StandardTokenizerFactory"/>
|
||||
* <filter class="solr.LowerCaseFilterFactory"/>
|
||||
* <filter class="solr.GalicianStemFilterFactory"/>
|
||||
* </analyzer>
|
||||
* </fieldType></pre>
|
||||
*
|
||||
* </fieldType></pre>
|
||||
*/
|
||||
public class GalicianStemFilterFactory extends TokenFilterFactory {
|
||||
|
||||
/** Creates a new GalicianStemFilterFactory */
|
||||
public GalicianStemFilterFactory(Map<String,String> args) {
|
||||
super(args);
|
||||
if (!args.isEmpty()) {
|
||||
throw new IllegalArgumentException("Unknown parameters: " + args);
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public TokenStream create(TokenStream input) {
|
||||
return new GalicianStemFilter(input);
|
||||
|
|
|
@ -17,6 +17,8 @@ package org.apache.lucene.analysis.hi;
|
|||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import java.util.Map;
|
||||
|
||||
import org.apache.lucene.analysis.TokenStream;
|
||||
import org.apache.lucene.analysis.hi.HindiNormalizationFilter;
|
||||
import org.apache.lucene.analysis.util.AbstractAnalysisFactory;
|
||||
|
@ -25,16 +27,24 @@ import org.apache.lucene.analysis.util.TokenFilterFactory;
|
|||
|
||||
/**
|
||||
* Factory for {@link HindiNormalizationFilter}.
|
||||
* <pre class="prettyprint" >
|
||||
* <pre class="prettyprint">
|
||||
* <fieldType name="text_hinormal" class="solr.TextField" positionIncrementGap="100">
|
||||
* <analyzer>
|
||||
* <tokenizer class="solr.StandardTokenizerFactory"/>
|
||||
* <filter class="solr.HindiNormalizationFilterFactory"/>
|
||||
* </analyzer>
|
||||
* </fieldType></pre>
|
||||
*
|
||||
*/
|
||||
public class HindiNormalizationFilterFactory extends TokenFilterFactory implements MultiTermAwareComponent {
|
||||
|
||||
/** Creates a new HindiNormalizationFilterFactory */
|
||||
public HindiNormalizationFilterFactory(Map<String,String> args) {
|
||||
super(args);
|
||||
if (!args.isEmpty()) {
|
||||
throw new IllegalArgumentException("Unknown parameters: " + args);
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public TokenStream create(TokenStream input) {
|
||||
return new HindiNormalizationFilter(input);
|
||||
|
|
|
@ -17,22 +17,32 @@ package org.apache.lucene.analysis.hi;
|
|||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import java.util.Map;
|
||||
|
||||
import org.apache.lucene.analysis.TokenStream;
|
||||
import org.apache.lucene.analysis.hi.HindiStemFilter;
|
||||
import org.apache.lucene.analysis.util.TokenFilterFactory;
|
||||
|
||||
/**
|
||||
* Factory for {@link HindiStemFilter}.
|
||||
* <pre class="prettyprint" >
|
||||
* <pre class="prettyprint">
|
||||
* <fieldType name="text_histem" class="solr.TextField" positionIncrementGap="100">
|
||||
* <analyzer>
|
||||
* <tokenizer class="solr.StandardTokenizerFactory"/>
|
||||
* <filter class="solr.HindiStemFilterFactory"/>
|
||||
* </analyzer>
|
||||
* </fieldType></pre>
|
||||
*
|
||||
*/
|
||||
public class HindiStemFilterFactory extends TokenFilterFactory {
|
||||
|
||||
/** Creates a new HindiStemFilterFactory */
|
||||
public HindiStemFilterFactory(Map<String,String> args) {
|
||||
super(args);
|
||||
if (!args.isEmpty()) {
|
||||
throw new IllegalArgumentException("Unknown parameters: " + args);
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public TokenStream create(TokenStream input) {
|
||||
return new HindiStemFilter(input);
|
||||
|
|
|
@ -17,23 +17,33 @@ package org.apache.lucene.analysis.hu;
|
|||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import java.util.Map;
|
||||
|
||||
import org.apache.lucene.analysis.TokenStream;
|
||||
import org.apache.lucene.analysis.hu.HungarianLightStemFilter;
|
||||
import org.apache.lucene.analysis.util.TokenFilterFactory;
|
||||
|
||||
/**
|
||||
* Factory for {@link HungarianLightStemFilter}.
|
||||
* <pre class="prettyprint" >
|
||||
* <pre class="prettyprint">
|
||||
* <fieldType name="text_hulgtstem" class="solr.TextField" positionIncrementGap="100">
|
||||
* <analyzer>
|
||||
* <tokenizer class="solr.StandardTokenizerFactory"/>
|
||||
* <filter class="solr.LowerCaseFilterFactory"/>
|
||||
* <filter class="solr.HungarianLightStemFilterFactory"/>
|
||||
* </analyzer>
|
||||
* </fieldType></pre>
|
||||
*
|
||||
* </fieldType></pre>
|
||||
*/
|
||||
public class HungarianLightStemFilterFactory extends TokenFilterFactory {
|
||||
|
||||
/** Creates a new HungarianLightStemFilterFactory */
|
||||
public HungarianLightStemFilterFactory(Map<String,String> args) {
|
||||
super(args);
|
||||
if (!args.isEmpty()) {
|
||||
throw new IllegalArgumentException("Unknown parameters: " + args);
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public TokenStream create(TokenStream input) {
|
||||
return new HungarianLightStemFilter(input);
|
||||
|
|
|
@ -22,6 +22,7 @@ import java.io.InputStream;
|
|||
import java.text.ParseException;
|
||||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
|
||||
import org.apache.lucene.analysis.TokenStream;
|
||||
import org.apache.lucene.analysis.hunspell.HunspellDictionary;
|
||||
|
@ -34,7 +35,7 @@ import org.apache.lucene.util.IOUtils;
|
|||
/**
|
||||
* TokenFilterFactory that creates instances of {@link org.apache.lucene.analysis.hunspell.HunspellStemFilter}.
|
||||
* Example config for British English including a custom dictionary, case insensitive matching:
|
||||
* <pre class="prettyprint" >
|
||||
* <pre class="prettyprint">
|
||||
* <filter class="solr.HunspellStemFilterFactory"
|
||||
* dictionary="en_GB.dic,my_custom.dic"
|
||||
* affix="en_GB.aff"
|
||||
|
@ -51,16 +52,32 @@ import org.apache.lucene.util.IOUtils;
|
|||
* See <a href="http://wiki.apache.org/solr/Hunspell">http://wiki.apache.org/solr/Hunspell</a>
|
||||
*/
|
||||
public class HunspellStemFilterFactory extends TokenFilterFactory implements ResourceLoaderAware {
|
||||
|
||||
private static final String PARAM_DICTIONARY = "dictionary";
|
||||
private static final String PARAM_AFFIX = "affix";
|
||||
private static final String PARAM_IGNORE_CASE = "ignoreCase";
|
||||
private static final String PARAM_STRICT_AFFIX_PARSING = "strictAffixParsing";
|
||||
private static final String TRUE = "true";
|
||||
private static final String FALSE = "false";
|
||||
|
||||
|
||||
private final String dictionaryArg;
|
||||
private final String affixFile;
|
||||
private final boolean ignoreCase;
|
||||
private final boolean strictAffixParsing;
|
||||
private HunspellDictionary dictionary;
|
||||
private boolean ignoreCase = false;
|
||||
|
||||
/** Creates a new HunspellStemFilterFactory */
|
||||
public HunspellStemFilterFactory(Map<String,String> args) {
|
||||
super(args);
|
||||
assureMatchVersion();
|
||||
dictionaryArg = args.remove(PARAM_DICTIONARY);
|
||||
if (dictionaryArg == null) {
|
||||
throw new IllegalArgumentException("Parameter " + PARAM_DICTIONARY + " is mandatory.");
|
||||
}
|
||||
affixFile = args.remove(PARAM_AFFIX);
|
||||
ignoreCase = getBoolean(args, PARAM_IGNORE_CASE, false);
|
||||
strictAffixParsing = getBoolean(args, PARAM_STRICT_AFFIX_PARSING, true);
|
||||
if (!args.isEmpty()) {
|
||||
throw new IllegalArgumentException("Unknown parameters: " + args);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Loads the hunspell dictionary and affix files defined in the configuration
|
||||
|
@ -69,27 +86,7 @@ public class HunspellStemFilterFactory extends TokenFilterFactory implements Res
|
|||
*/
|
||||
@Override
|
||||
public void inform(ResourceLoader loader) throws IOException {
|
||||
assureMatchVersion();
|
||||
String dictionaryArg = args.get(PARAM_DICTIONARY);
|
||||
if (dictionaryArg == null) {
|
||||
throw new IllegalArgumentException("Parameter " + PARAM_DICTIONARY + " is mandatory.");
|
||||
}
|
||||
String dictionaryFiles[] = args.get(PARAM_DICTIONARY).split(",");
|
||||
String affixFile = args.get(PARAM_AFFIX);
|
||||
String pic = args.get(PARAM_IGNORE_CASE);
|
||||
if(pic != null) {
|
||||
if(pic.equalsIgnoreCase(TRUE)) ignoreCase = true;
|
||||
else if(pic.equalsIgnoreCase(FALSE)) ignoreCase = false;
|
||||
else throw new IllegalArgumentException("Unknown value for " + PARAM_IGNORE_CASE + ": " + pic + ". Must be true or false");
|
||||
}
|
||||
|
||||
String strictAffixParsingParam = args.get(PARAM_STRICT_AFFIX_PARSING);
|
||||
boolean strictAffixParsing = true;
|
||||
if(strictAffixParsingParam != null) {
|
||||
if(strictAffixParsingParam.equalsIgnoreCase(FALSE)) strictAffixParsing = false;
|
||||
else if(strictAffixParsingParam.equalsIgnoreCase(TRUE)) strictAffixParsing = true;
|
||||
else throw new IllegalArgumentException("Unknown value for " + PARAM_STRICT_AFFIX_PARSING + ": " + strictAffixParsingParam + ". Must be true or false");
|
||||
}
|
||||
String dictionaryFiles[] = dictionaryArg.split(",");
|
||||
|
||||
InputStream affix = null;
|
||||
List<InputStream> dictionaries = new ArrayList<InputStream>();
|
||||
|
@ -103,7 +100,7 @@ public class HunspellStemFilterFactory extends TokenFilterFactory implements Res
|
|||
|
||||
this.dictionary = new HunspellDictionary(affix, dictionaries, luceneMatchVersion, ignoreCase, strictAffixParsing);
|
||||
} catch (ParseException e) {
|
||||
throw new IOException("Unable to load hunspell data! [dictionary=" + args.get("dictionary") + ",affix=" + affixFile + "]", e);
|
||||
throw new IOException("Unable to load hunspell data! [dictionary=" + dictionaryArg + ",affix=" + affixFile + "]", e);
|
||||
} finally {
|
||||
IOUtils.closeWhileHandlingException(affix);
|
||||
IOUtils.closeWhileHandlingException(dictionaries);
|
||||
|
|
|
@ -25,7 +25,7 @@ import org.apache.lucene.analysis.util.TokenFilterFactory;
|
|||
|
||||
/**
|
||||
* Factory for {@link IndonesianStemFilter}.
|
||||
* <pre class="prettyprint" >
|
||||
* <pre class="prettyprint">
|
||||
* <fieldType name="text_idstem" class="solr.TextField" positionIncrementGap="100">
|
||||
* <analyzer>
|
||||
* <tokenizer class="solr.StandardTokenizerFactory"/>
|
||||
|
@ -33,15 +33,17 @@ import org.apache.lucene.analysis.util.TokenFilterFactory;
|
|||
* <filter class="solr.IndonesianStemFilterFactory" stemDerivational="true"/>
|
||||
* </analyzer>
|
||||
* </fieldType></pre>
|
||||
*
|
||||
*/
|
||||
public class IndonesianStemFilterFactory extends TokenFilterFactory {
|
||||
private boolean stemDerivational = true;
|
||||
private final boolean stemDerivational;
|
||||
|
||||
@Override
|
||||
public void init(Map<String, String> args) {
|
||||
super.init(args);
|
||||
stemDerivational = getBoolean("stemDerivational", true);
|
||||
/** Creates a new IndonesianStemFilterFactory */
|
||||
public IndonesianStemFilterFactory(Map<String,String> args) {
|
||||
super(args);
|
||||
stemDerivational = getBoolean(args, "stemDerivational", true);
|
||||
if (!args.isEmpty()) {
|
||||
throw new IllegalArgumentException("Unknown parameters: " + args);
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
|
|
|
@ -17,6 +17,8 @@ package org.apache.lucene.analysis.in;
|
|||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import java.util.Map;
|
||||
|
||||
import org.apache.lucene.analysis.TokenStream;
|
||||
import org.apache.lucene.analysis.in.IndicNormalizationFilter;
|
||||
import org.apache.lucene.analysis.util.AbstractAnalysisFactory;
|
||||
|
@ -25,16 +27,24 @@ import org.apache.lucene.analysis.util.TokenFilterFactory;
|
|||
|
||||
/**
|
||||
* Factory for {@link IndicNormalizationFilter}.
|
||||
* <pre class="prettyprint" >
|
||||
* <pre class="prettyprint">
|
||||
* <fieldType name="text_innormal" class="solr.TextField" positionIncrementGap="100">
|
||||
* <analyzer>
|
||||
* <tokenizer class="solr.StandardTokenizerFactory"/>
|
||||
* <filter class="solr.IndicNormalizationFilterFactory"/>
|
||||
* </analyzer>
|
||||
* </fieldType></pre>
|
||||
*
|
||||
*/
|
||||
public class IndicNormalizationFilterFactory extends TokenFilterFactory implements MultiTermAwareComponent {
|
||||
|
||||
/** Creates a new IndicNormalizationFilterFactory */
|
||||
public IndicNormalizationFilterFactory(Map<String,String> args) {
|
||||
super(args);
|
||||
if (!args.isEmpty()) {
|
||||
throw new IllegalArgumentException("Unknown parameters: " + args);
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public TokenStream create(TokenStream input) {
|
||||
return new IndicNormalizationFilter(input);
|
||||
|
|
|
@ -17,13 +17,15 @@ package org.apache.lucene.analysis.it;
|
|||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import java.util.Map;
|
||||
|
||||
import org.apache.lucene.analysis.TokenStream;
|
||||
import org.apache.lucene.analysis.it.ItalianLightStemFilter;
|
||||
import org.apache.lucene.analysis.util.TokenFilterFactory;
|
||||
|
||||
/**
|
||||
* Factory for {@link ItalianLightStemFilter}.
|
||||
* <pre class="prettyprint" >
|
||||
* <pre class="prettyprint">
|
||||
* <fieldType name="text_itlgtstem" class="solr.TextField" positionIncrementGap="100">
|
||||
* <analyzer>
|
||||
* <tokenizer class="solr.StandardTokenizerFactory"/>
|
||||
|
@ -31,9 +33,17 @@ import org.apache.lucene.analysis.util.TokenFilterFactory;
|
|||
* <filter class="solr.ItalianLightStemFilterFactory"/>
|
||||
* </analyzer>
|
||||
* </fieldType></pre>
|
||||
*
|
||||
*/
|
||||
public class ItalianLightStemFilterFactory extends TokenFilterFactory {
|
||||
|
||||
/** Creates a new ItalianLightStemFilterFactory */
|
||||
public ItalianLightStemFilterFactory(Map<String,String> args) {
|
||||
super(args);
|
||||
if (!args.isEmpty()) {
|
||||
throw new IllegalArgumentException("Unknown parameters: " + args);
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public TokenStream create(TokenStream input) {
|
||||
return new ItalianLightStemFilter(input);
|
||||
|
|
|
@ -17,13 +17,15 @@ package org.apache.lucene.analysis.lv;
|
|||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import java.util.Map;
|
||||
|
||||
import org.apache.lucene.analysis.TokenStream;
|
||||
import org.apache.lucene.analysis.lv.LatvianStemFilter;
|
||||
import org.apache.lucene.analysis.util.TokenFilterFactory;
|
||||
|
||||
/**
|
||||
* Factory for {@link LatvianStemFilter}.
|
||||
* <pre class="prettyprint" >
|
||||
* <pre class="prettyprint">
|
||||
* <fieldType name="text_lvstem" class="solr.TextField" positionIncrementGap="100">
|
||||
* <analyzer>
|
||||
* <tokenizer class="solr.StandardTokenizerFactory"/>
|
||||
|
@ -33,6 +35,15 @@ import org.apache.lucene.analysis.util.TokenFilterFactory;
|
|||
* </fieldType></pre>
|
||||
*/
|
||||
public class LatvianStemFilterFactory extends TokenFilterFactory {
|
||||
|
||||
/** Creates a new LatvianStemFilterFactory */
|
||||
public LatvianStemFilterFactory(Map<String,String> args) {
|
||||
super(args);
|
||||
if (!args.isEmpty()) {
|
||||
throw new IllegalArgumentException("Unknown parameters: " + args);
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public TokenStream create(TokenStream input) {
|
||||
return new LatvianStemFilter(input);
|
||||
|
|
|
@ -17,6 +17,8 @@ package org.apache.lucene.analysis.miscellaneous;
|
|||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import java.util.Map;
|
||||
|
||||
import org.apache.lucene.analysis.util.AbstractAnalysisFactory;
|
||||
import org.apache.lucene.analysis.util.MultiTermAwareComponent;
|
||||
import org.apache.lucene.analysis.util.TokenFilterFactory;
|
||||
|
@ -25,16 +27,24 @@ import org.apache.lucene.analysis.TokenStream;
|
|||
|
||||
/**
|
||||
* Factory for {@link ASCIIFoldingFilter}.
|
||||
* <pre class="prettyprint" >
|
||||
* <pre class="prettyprint">
|
||||
* <fieldType name="text_ascii" class="solr.TextField" positionIncrementGap="100">
|
||||
* <analyzer>
|
||||
* <tokenizer class="solr.WhitespaceTokenizerFactory"/>
|
||||
* <filter class="solr.ASCIIFoldingFilterFactory"/>
|
||||
* </analyzer>
|
||||
* </fieldType></pre>
|
||||
*
|
||||
*/
|
||||
public class ASCIIFoldingFilterFactory extends TokenFilterFactory implements MultiTermAwareComponent {
|
||||
|
||||
/** Creates a new ASCIIFoldingFilterFactory */
|
||||
public ASCIIFoldingFilterFactory(Map<String,String> args) {
|
||||
super(args);
|
||||
if (!args.isEmpty()) {
|
||||
throw new IllegalArgumentException("Unknown parameters: " + args);
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public ASCIIFoldingFilter create(TokenStream input) {
|
||||
return new ASCIIFoldingFilter(input);
|
||||
|
|
|
@ -44,7 +44,7 @@ import java.util.StringTokenizer;
|
|||
* "maxWordCount" - if the token contains more then maxWordCount words, the capitalization is
|
||||
* assumed to be correct.<br/>
|
||||
*
|
||||
* <pre class="prettyprint" >
|
||||
* <pre class="prettyprint">
|
||||
* <fieldType name="text_cptlztn" class="solr.TextField" positionIncrementGap="100">
|
||||
* <analyzer>
|
||||
* <tokenizer class="solr.WhitespaceTokenizerFactory"/>
|
||||
|
@ -54,7 +54,6 @@ import java.util.StringTokenizer;
|
|||
* </analyzer>
|
||||
* </fieldType></pre>
|
||||
*
|
||||
*
|
||||
* @since solr 1.3
|
||||
*/
|
||||
public class CapitalizationFilterFactory extends TokenFilterFactory {
|
||||
|
@ -67,30 +66,24 @@ public class CapitalizationFilterFactory extends TokenFilterFactory {
|
|||
public static final String ONLY_FIRST_WORD = "onlyFirstWord";
|
||||
public static final String FORCE_FIRST_LETTER = "forceFirstLetter";
|
||||
|
||||
//Map<String,String> keep = new HashMap<String, String>(); // not synchronized because it is only initialized once
|
||||
CharArraySet keep;
|
||||
|
||||
Collection<char[]> okPrefix = Collections.emptyList(); // for Example: McK
|
||||
|
||||
int minWordLength = 0; // don't modify capitalization for words shorter then this
|
||||
int maxWordCount = CapitalizationFilter.DEFAULT_MAX_WORD_COUNT;
|
||||
int maxTokenLength = CapitalizationFilter.DEFAULT_MAX_TOKEN_LENGTH;
|
||||
boolean onlyFirstWord = true;
|
||||
boolean forceFirstLetter = true; // make sure the first letter is capitol even if it is in the keep list
|
||||
final int minWordLength; // don't modify capitalization for words shorter then this
|
||||
final int maxWordCount;
|
||||
final int maxTokenLength;
|
||||
final boolean onlyFirstWord;
|
||||
final boolean forceFirstLetter; // make sure the first letter is capital even if it is in the keep list
|
||||
|
||||
@Override
|
||||
public void init(Map<String, String> args) {
|
||||
super.init(args);
|
||||
/** Creates a new CapitalizationFilterFactory */
|
||||
public CapitalizationFilterFactory(Map<String, String> args) {
|
||||
super(args);
|
||||
assureMatchVersion();
|
||||
|
||||
String k = args.get(KEEP);
|
||||
boolean ignoreCase = getBoolean(args, KEEP_IGNORE_CASE, false);
|
||||
String k = args.remove(KEEP);
|
||||
if (k != null) {
|
||||
StringTokenizer st = new StringTokenizer(k);
|
||||
boolean ignoreCase = false;
|
||||
String ignoreStr = args.get(KEEP_IGNORE_CASE);
|
||||
if ("true".equalsIgnoreCase(ignoreStr)) {
|
||||
ignoreCase = true;
|
||||
}
|
||||
keep = new CharArraySet(luceneMatchVersion, 10, ignoreCase);
|
||||
while (st.hasMoreTokens()) {
|
||||
k = st.nextToken().trim();
|
||||
|
@ -98,7 +91,7 @@ public class CapitalizationFilterFactory extends TokenFilterFactory {
|
|||
}
|
||||
}
|
||||
|
||||
k = args.get(OK_PREFIX);
|
||||
k = args.remove(OK_PREFIX);
|
||||
if (k != null) {
|
||||
okPrefix = new ArrayList<char[]>();
|
||||
StringTokenizer st = new StringTokenizer(k);
|
||||
|
@ -107,29 +100,13 @@ public class CapitalizationFilterFactory extends TokenFilterFactory {
|
|||
}
|
||||
}
|
||||
|
||||
k = args.get(MIN_WORD_LENGTH);
|
||||
if (k != null) {
|
||||
minWordLength = Integer.valueOf(k);
|
||||
}
|
||||
|
||||
k = args.get(MAX_WORD_COUNT);
|
||||
if (k != null) {
|
||||
maxWordCount = Integer.valueOf(k);
|
||||
}
|
||||
|
||||
k = args.get(MAX_TOKEN_LENGTH);
|
||||
if (k != null) {
|
||||
maxTokenLength = Integer.valueOf(k);
|
||||
}
|
||||
|
||||
k = args.get(ONLY_FIRST_WORD);
|
||||
if (k != null) {
|
||||
onlyFirstWord = Boolean.valueOf(k);
|
||||
}
|
||||
|
||||
k = args.get(FORCE_FIRST_LETTER);
|
||||
if (k != null) {
|
||||
forceFirstLetter = Boolean.valueOf(k);
|
||||
minWordLength = getInt(args, MIN_WORD_LENGTH, 0);
|
||||
maxWordCount = getInt(args, MAX_WORD_COUNT, CapitalizationFilter.DEFAULT_MAX_WORD_COUNT);
|
||||
maxTokenLength = getInt(args, MAX_TOKEN_LENGTH, CapitalizationFilter.DEFAULT_MAX_TOKEN_LENGTH);
|
||||
onlyFirstWord = getBoolean(args, ONLY_FIRST_WORD, true);
|
||||
forceFirstLetter = getBoolean(args, FORCE_FIRST_LETTER, true);
|
||||
if (!args.isEmpty()) {
|
||||
throw new IllegalArgumentException("Unknown parameters: " + args);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -17,22 +17,32 @@ package org.apache.lucene.analysis.miscellaneous;
|
|||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import java.util.Map;
|
||||
|
||||
import org.apache.lucene.analysis.TokenStream;
|
||||
import org.apache.lucene.analysis.miscellaneous.HyphenatedWordsFilter;
|
||||
import org.apache.lucene.analysis.util.TokenFilterFactory;
|
||||
|
||||
/**
|
||||
* Factory for {@link HyphenatedWordsFilter}.
|
||||
* <pre class="prettyprint" >
|
||||
* <pre class="prettyprint">
|
||||
* <fieldType name="text_hyphn" class="solr.TextField" positionIncrementGap="100">
|
||||
* <analyzer>
|
||||
* <tokenizer class="solr.WhitespaceTokenizerFactory"/>
|
||||
* <filter class="solr.HyphenatedWordsFilterFactory"/>
|
||||
* </analyzer>
|
||||
* </fieldType></pre>
|
||||
*
|
||||
* </fieldType></pre>
|
||||
*/
|
||||
public class HyphenatedWordsFilterFactory extends TokenFilterFactory {
|
||||
|
||||
/** Creates a new HyphenatedWordsFilterFactory */
|
||||
public HyphenatedWordsFilterFactory(Map<String,String> args) {
|
||||
super(args);
|
||||
if (!args.isEmpty()) {
|
||||
throw new IllegalArgumentException("Unknown parameters: " + args);
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public HyphenatedWordsFilter create(TokenStream input) {
|
||||
return new HyphenatedWordsFilter(input);
|
||||
|
|
|
@ -17,63 +17,51 @@ package org.apache.lucene.analysis.miscellaneous;
|
|||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import org.apache.lucene.analysis.util.*;
|
||||
import org.apache.lucene.analysis.TokenStream;
|
||||
import org.apache.lucene.analysis.miscellaneous.KeepWordFilter;
|
||||
import org.apache.lucene.analysis.util.CharArraySet;
|
||||
import org.apache.lucene.analysis.util.ResourceLoader;
|
||||
import org.apache.lucene.analysis.util.ResourceLoaderAware;
|
||||
import org.apache.lucene.analysis.util.TokenFilterFactory;
|
||||
|
||||
import java.util.Map;
|
||||
import java.util.Set;
|
||||
import java.io.IOException;
|
||||
|
||||
/**
|
||||
* Factory for {@link KeepWordFilter}.
|
||||
* <pre class="prettyprint" >
|
||||
* <pre class="prettyprint">
|
||||
* <fieldType name="text_keepword" class="solr.TextField" positionIncrementGap="100">
|
||||
* <analyzer>
|
||||
* <tokenizer class="solr.WhitespaceTokenizerFactory"/>
|
||||
* <filter class="solr.KeepWordFilterFactory" words="keepwords.txt" ignoreCase="false" enablePositionIncrements="false"/>
|
||||
* </analyzer>
|
||||
* </fieldType></pre>
|
||||
*
|
||||
* </fieldType></pre>
|
||||
*/
|
||||
public class KeepWordFilterFactory extends TokenFilterFactory implements ResourceLoaderAware {
|
||||
|
||||
@Override
|
||||
public void init(Map<String,String> args) {
|
||||
super.init(args);
|
||||
private final boolean ignoreCase;
|
||||
private final boolean enablePositionIncrements;
|
||||
private final String wordFiles;
|
||||
private CharArraySet words;
|
||||
|
||||
/** Creates a new KeepWordFilterFactory */
|
||||
public KeepWordFilterFactory(Map<String,String> args) {
|
||||
super(args);
|
||||
assureMatchVersion();
|
||||
wordFiles = args.remove("words");
|
||||
ignoreCase = getBoolean(args, "ignoreCase", false);
|
||||
enablePositionIncrements = getBoolean(args, "enablePositionIncrements", false);
|
||||
if (!args.isEmpty()) {
|
||||
throw new IllegalArgumentException("Unknown parameters: " + args);
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public void inform(ResourceLoader loader) throws IOException {
|
||||
String wordFiles = args.get("words");
|
||||
ignoreCase = getBoolean("ignoreCase", false);
|
||||
enablePositionIncrements = getBoolean("enablePositionIncrements",false);
|
||||
|
||||
if (wordFiles != null) {
|
||||
words = getWordSet(loader, wordFiles, ignoreCase);
|
||||
}
|
||||
}
|
||||
|
||||
private CharArraySet words;
|
||||
private boolean ignoreCase;
|
||||
private boolean enablePositionIncrements;
|
||||
|
||||
/**
|
||||
* Set the keep word list.
|
||||
* NOTE: if ignoreCase==true, the words are expected to be lowercase
|
||||
*/
|
||||
public void setWords(Set<String> words) {
|
||||
this.words = new CharArraySet(luceneMatchVersion, words, ignoreCase);
|
||||
}
|
||||
|
||||
public void setIgnoreCase(boolean ignoreCase) {
|
||||
if (words != null && this.ignoreCase != ignoreCase) {
|
||||
words = new CharArraySet(luceneMatchVersion, words, ignoreCase);
|
||||
}
|
||||
this.ignoreCase = ignoreCase;
|
||||
}
|
||||
|
||||
public boolean isEnablePositionIncrements() {
|
||||
return enablePositionIncrements;
|
||||
}
|
||||
|
|
|
@ -18,6 +18,7 @@ package org.apache.lucene.analysis.miscellaneous;
|
|||
*/
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.Map;
|
||||
import java.util.regex.Pattern;
|
||||
|
||||
import org.apache.lucene.analysis.miscellaneous.KeywordMarkerFilter;
|
||||
|
@ -26,34 +27,42 @@ import org.apache.lucene.analysis.TokenStream;
|
|||
|
||||
/**
|
||||
* Factory for {@link KeywordMarkerFilter}.
|
||||
* <pre class="prettyprint" >
|
||||
* <pre class="prettyprint">
|
||||
* <fieldType name="text_keyword" class="solr.TextField" positionIncrementGap="100">
|
||||
* <analyzer>
|
||||
* <tokenizer class="solr.WhitespaceTokenizerFactory"/>
|
||||
* <filter class="solr.KeywordMarkerFilterFactory" protected="protectedkeyword.txt" pattern="^.+er$" ignoreCase="false"/>
|
||||
* </analyzer>
|
||||
* </fieldType></pre>
|
||||
*
|
||||
* </fieldType></pre>
|
||||
*/
|
||||
public class KeywordMarkerFilterFactory extends TokenFilterFactory implements ResourceLoaderAware {
|
||||
public static final String PROTECTED_TOKENS = "protected";
|
||||
public static final String PATTERN = "pattern";
|
||||
private CharArraySet protectedWords;
|
||||
private boolean ignoreCase;
|
||||
private final String wordFiles;
|
||||
private final String stringPattern;
|
||||
private final boolean ignoreCase;
|
||||
private Pattern pattern;
|
||||
private CharArraySet protectedWords;
|
||||
|
||||
/** Creates a new KeywordMarkerFilterFactory */
|
||||
public KeywordMarkerFilterFactory(Map<String,String> args) {
|
||||
super(args);
|
||||
wordFiles = args.remove(PROTECTED_TOKENS);
|
||||
stringPattern = args.remove(PATTERN);
|
||||
ignoreCase = getBoolean(args, "ignoreCase", false);
|
||||
if (!args.isEmpty()) {
|
||||
throw new IllegalArgumentException("Unknown parameters: " + args);
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public void inform(ResourceLoader loader) throws IOException {
|
||||
String wordFiles = args.get(PROTECTED_TOKENS);
|
||||
String stringPattern = args.get(PATTERN);
|
||||
ignoreCase = getBoolean("ignoreCase", false);
|
||||
if (wordFiles != null) {
|
||||
protectedWords = getWordSet(loader, wordFiles, ignoreCase);
|
||||
}
|
||||
if (stringPattern != null) {
|
||||
pattern = ignoreCase ? Pattern.compile(stringPattern, Pattern.CASE_INSENSITIVE | Pattern.UNICODE_CASE) : Pattern.compile(stringPattern);
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
public boolean isIgnoreCase() {
|
||||
|
|
|
@ -17,6 +17,8 @@ package org.apache.lucene.analysis.miscellaneous;
|
|||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import java.util.Map;
|
||||
|
||||
import org.apache.lucene.analysis.TokenStream;
|
||||
import org.apache.lucene.analysis.util.TokenFilterFactory;
|
||||
|
||||
|
@ -28,6 +30,15 @@ import org.apache.lucene.analysis.util.TokenFilterFactory;
|
|||
* {@link RemoveDuplicatesTokenFilterFactory} later in the analysis chain.
|
||||
*/
|
||||
public final class KeywordRepeatFilterFactory extends TokenFilterFactory {
|
||||
|
||||
/** Creates a new KeywordRepeatFilterFactory */
|
||||
public KeywordRepeatFilterFactory(Map<String,String> args) {
|
||||
super(args);
|
||||
if (!args.isEmpty()) {
|
||||
throw new IllegalArgumentException("Unknown parameters: " + args);
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public TokenStream create(TokenStream input) {
|
||||
return new KeywordRepeatFilter(input);
|
||||
|
|
|
@ -25,32 +25,30 @@ import java.util.Map;
|
|||
|
||||
/**
|
||||
* Factory for {@link LengthFilter}.
|
||||
* <pre class="prettyprint" >
|
||||
* <pre class="prettyprint">
|
||||
* <fieldType name="text_lngth" class="solr.TextField" positionIncrementGap="100">
|
||||
* <analyzer>
|
||||
* <tokenizer class="solr.WhitespaceTokenizerFactory"/>
|
||||
* <filter class="solr.LengthFilterFactory" min="0" max="1" enablePositionIncrements="false"/>
|
||||
* </analyzer>
|
||||
* </fieldType></pre>
|
||||
*
|
||||
* </fieldType></pre>
|
||||
*/
|
||||
public class LengthFilterFactory extends TokenFilterFactory {
|
||||
int min,max;
|
||||
boolean enablePositionIncrements;
|
||||
final int min;
|
||||
final int max;
|
||||
final boolean enablePositionIncrements;
|
||||
public static final String MIN_KEY = "min";
|
||||
public static final String MAX_KEY = "max";
|
||||
|
||||
@Override
|
||||
public void init(Map<String, String> args) {
|
||||
super.init(args);
|
||||
String minKey = args.get(MIN_KEY);
|
||||
String maxKey = args.get(MAX_KEY);
|
||||
if (minKey == null || maxKey == null) {
|
||||
throw new IllegalArgumentException("Both " + MIN_KEY + " and " + MAX_KEY + " are mandatory");
|
||||
/** Creates a new LengthFilterFactory */
|
||||
public LengthFilterFactory(Map<String, String> args) {
|
||||
super(args);
|
||||
min = getInt(args, MIN_KEY, 0, false);
|
||||
max = getInt(args, MAX_KEY, 0, false);
|
||||
enablePositionIncrements = getBoolean(args, "enablePositionIncrements", false);
|
||||
if (!args.isEmpty()) {
|
||||
throw new IllegalArgumentException("Unknown parameters: " + args);
|
||||
}
|
||||
min=Integer.parseInt(minKey);
|
||||
max=Integer.parseInt(maxKey);
|
||||
enablePositionIncrements = getBoolean("enablePositionIncrements",false);
|
||||
}
|
||||
|
||||
@Override
|
||||
|
|
|
@ -25,28 +25,32 @@ import org.apache.lucene.analysis.util.TokenFilterFactory;
|
|||
|
||||
/**
|
||||
* Factory for {@link LimitTokenCountFilter}.
|
||||
* <pre class="prettyprint" >
|
||||
* <pre class="prettyprint">
|
||||
* <fieldType name="text_lngthcnt" class="solr.TextField" positionIncrementGap="100">
|
||||
* <analyzer>
|
||||
* <tokenizer class="solr.WhitespaceTokenizerFactory"/>
|
||||
* <filter class="solr.LimitTokenCountFilterFactory" maxTokenCount="10" consumeAllTokens="false" />
|
||||
* </analyzer>
|
||||
* </fieldType></pre>
|
||||
* </fieldType></pre>
|
||||
* <p>
|
||||
* The {@code consumeAllTokens} property is optional and defaults to {@code false}. See {@link LimitTokenCountFilter} for an explanation of it's use.
|
||||
* The {@code consumeAllTokens} property is optional and defaults to {@code false}.
|
||||
* See {@link LimitTokenCountFilter} for an explanation of it's use.
|
||||
*/
|
||||
public class LimitTokenCountFilterFactory extends TokenFilterFactory {
|
||||
|
||||
public static final String MAX_TOKEN_COUNT_KEY = "maxTokenCount";
|
||||
public static final String CONSUME_ALL_TOKENS_KEY = "consumeAllTokens";
|
||||
int maxTokenCount;
|
||||
boolean consumeAllTokens;
|
||||
final int maxTokenCount;
|
||||
final boolean consumeAllTokens;
|
||||
|
||||
@Override
|
||||
public void init(Map<String, String> args) {
|
||||
super.init( args );
|
||||
maxTokenCount = getInt(MAX_TOKEN_COUNT_KEY);
|
||||
consumeAllTokens = getBoolean(CONSUME_ALL_TOKENS_KEY, false);
|
||||
/** Creates a new LimitTokenCountFilterFactory */
|
||||
public LimitTokenCountFilterFactory(Map<String, String> args) {
|
||||
super(args);
|
||||
maxTokenCount = getInt(args, MAX_TOKEN_COUNT_KEY);
|
||||
consumeAllTokens = getBoolean(args, CONSUME_ALL_TOKENS_KEY, false);
|
||||
if (!args.isEmpty()) {
|
||||
throw new IllegalArgumentException("Unknown parameters: " + args);
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
|
|
|
@ -23,13 +23,13 @@ import org.apache.lucene.analysis.util.TokenFilterFactory;
|
|||
|
||||
/**
|
||||
* Factory for {@link LimitTokenPositionFilter}.
|
||||
* <pre class="prettyprint" >
|
||||
* <pre class="prettyprint">
|
||||
* <fieldType name="text_limit_pos" class="solr.TextField" positionIncrementGap="100">
|
||||
* <analyzer>
|
||||
* <tokenizer class="solr.WhitespaceTokenizerFactory"/>
|
||||
* <filter class="solr.LimitTokenPositionFilterFactory" maxTokenPosition="3" consumeAllTokens="false" />
|
||||
* </analyzer>
|
||||
* </fieldType></pre>
|
||||
* </fieldType></pre>
|
||||
* <p>
|
||||
* The {@code consumeAllTokens} property is optional and defaults to {@code false}.
|
||||
* See {@link LimitTokenPositionFilter} for an explanation of its use.
|
||||
|
@ -38,14 +38,17 @@ public class LimitTokenPositionFilterFactory extends TokenFilterFactory {
|
|||
|
||||
public static final String MAX_TOKEN_POSITION_KEY = "maxTokenPosition";
|
||||
public static final String CONSUME_ALL_TOKENS_KEY = "consumeAllTokens";
|
||||
int maxTokenPosition;
|
||||
boolean consumeAllTokens;
|
||||
final int maxTokenPosition;
|
||||
final boolean consumeAllTokens;
|
||||
|
||||
@Override
|
||||
public void init(Map<String,String> args) {
|
||||
super.init(args);
|
||||
maxTokenPosition = getInt(MAX_TOKEN_POSITION_KEY);
|
||||
consumeAllTokens = getBoolean(CONSUME_ALL_TOKENS_KEY, false);
|
||||
/** Creates a new LimitTokenPositionFilterFactory */
|
||||
public LimitTokenPositionFilterFactory(Map<String,String> args) {
|
||||
super(args);
|
||||
maxTokenPosition = getInt(args, MAX_TOKEN_POSITION_KEY);
|
||||
consumeAllTokens = getBoolean(args, CONSUME_ALL_TOKENS_KEY, false);
|
||||
if (!args.isEmpty()) {
|
||||
throw new IllegalArgumentException("Unknown parameters: " + args);
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
|
|
|
@ -17,22 +17,32 @@ package org.apache.lucene.analysis.miscellaneous;
|
|||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import java.util.Map;
|
||||
|
||||
import org.apache.lucene.analysis.TokenStream;
|
||||
import org.apache.lucene.analysis.miscellaneous.RemoveDuplicatesTokenFilter;
|
||||
import org.apache.lucene.analysis.util.TokenFilterFactory;
|
||||
|
||||
/**
|
||||
* Factory for {@link RemoveDuplicatesTokenFilter}.
|
||||
* <pre class="prettyprint" >
|
||||
* <pre class="prettyprint">
|
||||
* <fieldType name="text_rmdup" class="solr.TextField" positionIncrementGap="100">
|
||||
* <analyzer>
|
||||
* <tokenizer class="solr.WhitespaceTokenizerFactory"/>
|
||||
* <filter class="solr.RemoveDuplicatesTokenFilterFactory"/>
|
||||
* </analyzer>
|
||||
* </fieldType></pre>
|
||||
*
|
||||
*/
|
||||
public class RemoveDuplicatesTokenFilterFactory extends TokenFilterFactory {
|
||||
|
||||
/** Creates a new RemoveDuplicatesTokenFilterFactory */
|
||||
public RemoveDuplicatesTokenFilterFactory(Map<String,String> args) {
|
||||
super(args);
|
||||
if (!args.isEmpty()) {
|
||||
throw new IllegalArgumentException("Unknown parameters: " + args);
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public RemoveDuplicatesTokenFilter create(TokenStream input) {
|
||||
return new RemoveDuplicatesTokenFilter(input);
|
||||
|
|
|
@ -19,35 +19,42 @@ package org.apache.lucene.analysis.miscellaneous;
|
|||
|
||||
import java.io.IOException;
|
||||
import java.util.List;
|
||||
import java.util.Locale;
|
||||
import java.util.Map;
|
||||
|
||||
import org.apache.lucene.analysis.TokenStream;
|
||||
import org.apache.lucene.analysis.miscellaneous.StemmerOverrideFilter;
|
||||
import org.apache.lucene.analysis.miscellaneous.StemmerOverrideFilter.StemmerOverrideMap;
|
||||
import org.apache.lucene.analysis.util.*;
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
import org.apache.lucene.util.CharsRef;
|
||||
import org.apache.lucene.util.fst.FST;
|
||||
import org.apache.lucene.analysis.util.ResourceLoader;
|
||||
import org.apache.lucene.analysis.util.ResourceLoaderAware;
|
||||
import org.apache.lucene.analysis.util.TokenFilterFactory;
|
||||
|
||||
/**
|
||||
* Factory for {@link StemmerOverrideFilter}.
|
||||
* <pre class="prettyprint" >
|
||||
* <pre class="prettyprint">
|
||||
* <fieldType name="text_dicstem" class="solr.TextField" positionIncrementGap="100">
|
||||
* <analyzer>
|
||||
* <tokenizer class="solr.WhitespaceTokenizerFactory"/>
|
||||
* <filter class="solr.StemmerOverrideFilterFactory" dictionary="dictionary.txt" ignoreCase="false"/>
|
||||
* </analyzer>
|
||||
* </fieldType></pre>
|
||||
*
|
||||
*/
|
||||
public class StemmerOverrideFilterFactory extends TokenFilterFactory implements ResourceLoaderAware {
|
||||
private StemmerOverrideMap dictionary;
|
||||
private boolean ignoreCase;
|
||||
private final String dictionaryFiles;
|
||||
private final boolean ignoreCase;
|
||||
|
||||
/** Creates a new StemmerOverrideFilterFactory */
|
||||
public StemmerOverrideFilterFactory(Map<String,String> args) {
|
||||
super(args);
|
||||
dictionaryFiles = args.remove("dictionary");
|
||||
ignoreCase = getBoolean(args, "ignoreCase", false);
|
||||
if (!args.isEmpty()) {
|
||||
throw new IllegalArgumentException("Unknown parameters: " + args);
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public void inform(ResourceLoader loader) throws IOException {
|
||||
String dictionaryFiles = args.get("dictionary");
|
||||
ignoreCase = getBoolean("ignoreCase", false);
|
||||
if (dictionaryFiles != null) {
|
||||
assureMatchVersion();
|
||||
List<String> files = splitFileNames(dictionaryFiles);
|
||||
|
|
|
@ -25,7 +25,7 @@ import org.apache.lucene.analysis.util.TokenFilterFactory;
|
|||
|
||||
/**
|
||||
* Factory for {@link TrimFilter}.
|
||||
* <pre class="prettyprint" >
|
||||
* <pre class="prettyprint">
|
||||
* <fieldType name="text_trm" class="solr.TextField" positionIncrementGap="100">
|
||||
* <analyzer>
|
||||
* <tokenizer class="solr.NGramTokenizerFactory"/>
|
||||
|
@ -37,15 +37,14 @@ import org.apache.lucene.analysis.util.TokenFilterFactory;
|
|||
*/
|
||||
public class TrimFilterFactory extends TokenFilterFactory {
|
||||
|
||||
protected boolean updateOffsets = false;
|
||||
protected final boolean updateOffsets;
|
||||
|
||||
@Override
|
||||
public void init(Map<String,String> args) {
|
||||
super.init( args );
|
||||
|
||||
String v = args.get( "updateOffsets" );
|
||||
if (v != null) {
|
||||
updateOffsets = Boolean.valueOf( v );
|
||||
/** Creates a new TrimFilterFactory */
|
||||
public TrimFilterFactory(Map<String,String> args) {
|
||||
super(args);
|
||||
updateOffsets = getBoolean(args, "updateOffsets", false);
|
||||
if (!args.isEmpty()) {
|
||||
throw new IllegalArgumentException("Unknown parameters: " + args);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -33,10 +33,9 @@ import java.io.IOException;
|
|||
|
||||
import static org.apache.lucene.analysis.miscellaneous.WordDelimiterFilter.*;
|
||||
|
||||
|
||||
/**
|
||||
* Factory for {@link WordDelimiterFilter}.
|
||||
* <pre class="prettyprint" >
|
||||
* <pre class="prettyprint">
|
||||
* <fieldType name="text_wd" class="solr.TextField" positionIncrementGap="100">
|
||||
* <analyzer>
|
||||
* <tokenizer class="solr.WhitespaceTokenizerFactory"/>
|
||||
|
@ -46,20 +45,62 @@ import static org.apache.lucene.analysis.miscellaneous.WordDelimiterFilter.*;
|
|||
* generateWordParts="1" generateNumberParts="1" stemEnglishPossessive="1"
|
||||
* types="wdfftypes.txt" />
|
||||
* </analyzer>
|
||||
* </fieldType></pre>
|
||||
*
|
||||
* </fieldType></pre>
|
||||
*/
|
||||
public class WordDelimiterFilterFactory extends TokenFilterFactory implements ResourceLoaderAware {
|
||||
public static final String PROTECTED_TOKENS = "protected";
|
||||
public static final String TYPES = "types";
|
||||
|
||||
private final String wordFiles;
|
||||
private final String types;
|
||||
private final int flags;
|
||||
byte[] typeTable = null;
|
||||
private CharArraySet protectedWords = null;
|
||||
|
||||
/** Creates a new WordDelimiterFilterFactory */
|
||||
public WordDelimiterFilterFactory(Map<String, String> args) {
|
||||
super(args);
|
||||
int flags = 0;
|
||||
if (getInt(args, "generateWordParts", 1) != 0) {
|
||||
flags |= GENERATE_WORD_PARTS;
|
||||
}
|
||||
if (getInt(args, "generateNumberParts", 1) != 0) {
|
||||
flags |= GENERATE_NUMBER_PARTS;
|
||||
}
|
||||
if (getInt(args, "catenateWords", 0) != 0) {
|
||||
flags |= CATENATE_WORDS;
|
||||
}
|
||||
if (getInt(args, "catenateNumbers", 0) != 0) {
|
||||
flags |= CATENATE_NUMBERS;
|
||||
}
|
||||
if (getInt(args, "catenateAll", 0) != 0) {
|
||||
flags |= CATENATE_ALL;
|
||||
}
|
||||
if (getInt(args, "splitOnCaseChange", 1) != 0) {
|
||||
flags |= SPLIT_ON_CASE_CHANGE;
|
||||
}
|
||||
if (getInt(args, "splitOnNumerics", 1) != 0) {
|
||||
flags |= SPLIT_ON_NUMERICS;
|
||||
}
|
||||
if (getInt(args, "preserveOriginal", 0) != 0) {
|
||||
flags |= PRESERVE_ORIGINAL;
|
||||
}
|
||||
if (getInt(args, "stemEnglishPossessive", 1) != 0) {
|
||||
flags |= STEM_ENGLISH_POSSESSIVE;
|
||||
}
|
||||
wordFiles = args.remove(PROTECTED_TOKENS);
|
||||
types = args.remove(TYPES);
|
||||
this.flags = flags;
|
||||
if (!args.isEmpty()) {
|
||||
throw new IllegalArgumentException("Unknown parameters: " + args);
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public void inform(ResourceLoader loader) throws IOException {
|
||||
String wordFiles = args.get(PROTECTED_TOKENS);
|
||||
if (wordFiles != null) {
|
||||
protectedWords = getWordSet(loader, wordFiles, false);
|
||||
}
|
||||
String types = args.get(TYPES);
|
||||
if (types != null) {
|
||||
List<String> files = splitFileNames( types );
|
||||
List<String> wlist = new ArrayList<String>();
|
||||
|
@ -71,42 +112,6 @@ public class WordDelimiterFilterFactory extends TokenFilterFactory implements Re
|
|||
}
|
||||
}
|
||||
|
||||
private CharArraySet protectedWords = null;
|
||||
private int flags;
|
||||
byte[] typeTable = null;
|
||||
|
||||
@Override
|
||||
public void init(Map<String, String> args) {
|
||||
super.init(args);
|
||||
if (getInt("generateWordParts", 1) != 0) {
|
||||
flags |= GENERATE_WORD_PARTS;
|
||||
}
|
||||
if (getInt("generateNumberParts", 1) != 0) {
|
||||
flags |= GENERATE_NUMBER_PARTS;
|
||||
}
|
||||
if (getInt("catenateWords", 0) != 0) {
|
||||
flags |= CATENATE_WORDS;
|
||||
}
|
||||
if (getInt("catenateNumbers", 0) != 0) {
|
||||
flags |= CATENATE_NUMBERS;
|
||||
}
|
||||
if (getInt("catenateAll", 0) != 0) {
|
||||
flags |= CATENATE_ALL;
|
||||
}
|
||||
if (getInt("splitOnCaseChange", 1) != 0) {
|
||||
flags |= SPLIT_ON_CASE_CHANGE;
|
||||
}
|
||||
if (getInt("splitOnNumerics", 1) != 0) {
|
||||
flags |= SPLIT_ON_NUMERICS;
|
||||
}
|
||||
if (getInt("preserveOriginal", 0) != 0) {
|
||||
flags |= PRESERVE_ORIGINAL;
|
||||
}
|
||||
if (getInt("stemEnglishPossessive", 1) != 0) {
|
||||
flags |= STEM_ENGLISH_POSSESSIVE;
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public WordDelimiterFilter create(TokenStream input) {
|
||||
return new WordDelimiterFilter(input, typeTable == null ? WordDelimiterIterator.DEFAULT_WORD_DELIM_TABLE : typeTable,
|
||||
|
|
|
@ -24,36 +24,33 @@ import org.apache.lucene.analysis.util.TokenFilterFactory;
|
|||
|
||||
/**
|
||||
* Creates new instances of {@link EdgeNGramTokenFilter}.
|
||||
* <pre class="prettyprint" >
|
||||
* <pre class="prettyprint">
|
||||
* <fieldType name="text_edgngrm" class="solr.TextField" positionIncrementGap="100">
|
||||
* <analyzer>
|
||||
* <tokenizer class="solr.WhitespaceTokenizerFactory"/>
|
||||
* <filter class="solr.EdgeNGramFilterFactory" side="front" minGramSize="1" maxGramSize="1"/>
|
||||
* </analyzer>
|
||||
* </fieldType></pre>
|
||||
*
|
||||
*/
|
||||
public class EdgeNGramFilterFactory extends TokenFilterFactory {
|
||||
private int maxGramSize = 0;
|
||||
private final int maxGramSize;
|
||||
private final int minGramSize;
|
||||
private final String side;
|
||||
|
||||
private int minGramSize = 0;
|
||||
/** Creates a new EdgeNGramFilterFactory */
|
||||
public EdgeNGramFilterFactory(Map<String, String> args) {
|
||||
super(args);
|
||||
minGramSize = getInt(args, "minGramSize", EdgeNGramTokenFilter.DEFAULT_MIN_GRAM_SIZE);
|
||||
maxGramSize = getInt(args, "maxGramSize", EdgeNGramTokenFilter.DEFAULT_MAX_GRAM_SIZE);
|
||||
|
||||
private String side;
|
||||
|
||||
@Override
|
||||
public void init(Map<String, String> args) {
|
||||
super.init(args);
|
||||
String maxArg = args.get("maxGramSize");
|
||||
maxGramSize = (maxArg != null ? Integer.parseInt(maxArg)
|
||||
: EdgeNGramTokenFilter.DEFAULT_MAX_GRAM_SIZE);
|
||||
|
||||
String minArg = args.get("minGramSize");
|
||||
minGramSize = (minArg != null ? Integer.parseInt(minArg)
|
||||
: EdgeNGramTokenFilter.DEFAULT_MIN_GRAM_SIZE);
|
||||
|
||||
side = args.get("side");
|
||||
if (side == null) {
|
||||
String sideArg = args.remove("side");
|
||||
if (sideArg == null) {
|
||||
side = EdgeNGramTokenFilter.Side.FRONT.getLabel();
|
||||
} else {
|
||||
side = sideArg;
|
||||
}
|
||||
if (!args.isEmpty()) {
|
||||
throw new IllegalArgumentException("Unknown parameters: " + args);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -25,33 +25,32 @@ import java.util.Map;
|
|||
|
||||
/**
|
||||
* Creates new instances of {@link EdgeNGramTokenizer}.
|
||||
* <pre class="prettyprint" >
|
||||
* <pre class="prettyprint">
|
||||
* <fieldType name="text_edgngrm" class="solr.TextField" positionIncrementGap="100">
|
||||
* <analyzer>
|
||||
* <tokenizer class="solr.EdgeNGramTokenizerFactory" side="front" minGramSize="1" maxGramSize="1"/>
|
||||
* </analyzer>
|
||||
* </fieldType></pre>
|
||||
*
|
||||
*/
|
||||
public class EdgeNGramTokenizerFactory extends TokenizerFactory {
|
||||
private int maxGramSize = 0;
|
||||
|
||||
private int minGramSize = 0;
|
||||
|
||||
private String side;
|
||||
|
||||
@Override
|
||||
public void init(Map<String, String> args) {
|
||||
super.init(args);
|
||||
String maxArg = args.get("maxGramSize");
|
||||
maxGramSize = (maxArg != null ? Integer.parseInt(maxArg) : EdgeNGramTokenizer.DEFAULT_MAX_GRAM_SIZE);
|
||||
|
||||
String minArg = args.get("minGramSize");
|
||||
minGramSize = (minArg != null ? Integer.parseInt(minArg) : EdgeNGramTokenizer.DEFAULT_MIN_GRAM_SIZE);
|
||||
|
||||
side = args.get("side");
|
||||
if (side == null) {
|
||||
side = EdgeNGramTokenizer.Side.FRONT.getLabel();
|
||||
private final int maxGramSize;
|
||||
private final int minGramSize;
|
||||
private final String side;
|
||||
|
||||
/** Creates a new EdgeNGramTokenizerFactory */
|
||||
public EdgeNGramTokenizerFactory(Map<String, String> args) {
|
||||
super(args);
|
||||
minGramSize = getInt(args, "minGramSize", EdgeNGramTokenFilter.DEFAULT_MIN_GRAM_SIZE);
|
||||
maxGramSize = getInt(args, "maxGramSize", EdgeNGramTokenFilter.DEFAULT_MAX_GRAM_SIZE);
|
||||
|
||||
String sideArg = args.remove("side");
|
||||
if (sideArg == null) {
|
||||
side = EdgeNGramTokenFilter.Side.FRONT.getLabel();
|
||||
} else {
|
||||
side = sideArg;
|
||||
}
|
||||
if (!args.isEmpty()) {
|
||||
throw new IllegalArgumentException("Unknown parameters: " + args);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -24,31 +24,27 @@ import org.apache.lucene.analysis.util.TokenFilterFactory;
|
|||
|
||||
/**
|
||||
* Factory for {@link NGramTokenFilter}.
|
||||
* <pre class="prettyprint" >
|
||||
* <pre class="prettyprint">
|
||||
* <fieldType name="text_ngrm" class="solr.TextField" positionIncrementGap="100">
|
||||
* <analyzer>
|
||||
* <tokenizer class="solr.WhitespaceTokenizerFactory"/>
|
||||
* <filter class="solr.NGramFilterFactory" minGramSize="1" maxGramSize="2"/>
|
||||
* </analyzer>
|
||||
* </fieldType></pre>
|
||||
*
|
||||
* </fieldType></pre>
|
||||
*/
|
||||
public class NGramFilterFactory extends TokenFilterFactory {
|
||||
private int maxGramSize = 0;
|
||||
private final int maxGramSize;
|
||||
private final int minGramSize;
|
||||
|
||||
private int minGramSize = 0;
|
||||
/** Creates a new NGramFilterFactory */
|
||||
public NGramFilterFactory(Map<String, String> args) {
|
||||
super(args);
|
||||
minGramSize = getInt(args, "minGramSize", NGramTokenFilter.DEFAULT_MIN_NGRAM_SIZE);
|
||||
maxGramSize = getInt(args, "maxGramSize", NGramTokenFilter.DEFAULT_MAX_NGRAM_SIZE);
|
||||
|
||||
/** Initialize the n-gram min and max sizes and the side from which one should start tokenizing. */
|
||||
@Override
|
||||
public void init(Map<String, String> args) {
|
||||
super.init(args);
|
||||
String maxArg = args.get("maxGramSize");
|
||||
maxGramSize = (maxArg != null ? Integer.parseInt(maxArg)
|
||||
: NGramTokenFilter.DEFAULT_MAX_NGRAM_SIZE);
|
||||
|
||||
String minArg = args.get("minGramSize");
|
||||
minGramSize = (minArg != null ? Integer.parseInt(minArg)
|
||||
: NGramTokenFilter.DEFAULT_MIN_NGRAM_SIZE);
|
||||
if (!args.isEmpty()) {
|
||||
throw new IllegalArgumentException("Unknown parameters: " + args);
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
|
|
|
@ -26,27 +26,26 @@ import java.util.Map;
|
|||
|
||||
/**
|
||||
* Factory for {@link NGramTokenizer}.
|
||||
* <pre class="prettyprint" >
|
||||
* <pre class="prettyprint">
|
||||
* <fieldType name="text_ngrm" class="solr.TextField" positionIncrementGap="100">
|
||||
* <analyzer>
|
||||
* <tokenizer class="solr.NGramTokenizerFactory" minGramSize="1" maxGramSize="2"/>
|
||||
* </analyzer>
|
||||
* </fieldType></pre>
|
||||
*
|
||||
* </fieldType></pre>
|
||||
*/
|
||||
public class NGramTokenizerFactory extends TokenizerFactory {
|
||||
private int maxGramSize = 0;
|
||||
private int minGramSize = 0;
|
||||
|
||||
/** Initializes the n-gram min and max sizes and the side from which one should start tokenizing. */
|
||||
@Override
|
||||
public void init(Map<String, String> args) {
|
||||
super.init(args);
|
||||
String maxArg = args.get("maxGramSize");
|
||||
maxGramSize = (maxArg != null ? Integer.parseInt(maxArg) : NGramTokenizer.DEFAULT_MAX_NGRAM_SIZE);
|
||||
|
||||
String minArg = args.get("minGramSize");
|
||||
minGramSize = (minArg != null ? Integer.parseInt(minArg) : NGramTokenizer.DEFAULT_MIN_NGRAM_SIZE);
|
||||
private final int maxGramSize;
|
||||
private final int minGramSize;
|
||||
|
||||
/** Creates a new NGramTokenizerFactory */
|
||||
public NGramTokenizerFactory(Map<String, String> args) {
|
||||
super(args);
|
||||
minGramSize = getInt(args, "minGramSize", NGramTokenizer.DEFAULT_MIN_NGRAM_SIZE);
|
||||
maxGramSize = getInt(args, "maxGramSize", NGramTokenizer.DEFAULT_MAX_NGRAM_SIZE);
|
||||
|
||||
if (!args.isEmpty()) {
|
||||
throw new IllegalArgumentException("Unknown parameters: " + args);
|
||||
}
|
||||
}
|
||||
|
||||
/** Creates the {@link TokenStream} of n-grams from the given {@link Reader} and {@link AttributeFactory}. */
|
||||
|
|
|
@ -17,13 +17,15 @@ package org.apache.lucene.analysis.no;
|
|||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import java.util.Map;
|
||||
|
||||
import org.apache.lucene.analysis.TokenStream;
|
||||
import org.apache.lucene.analysis.no.NorwegianLightStemFilter;
|
||||
import org.apache.lucene.analysis.util.TokenFilterFactory;
|
||||
|
||||
/**
|
||||
* Factory for {@link NorwegianLightStemFilter}.
|
||||
* <pre class="prettyprint" >
|
||||
* <pre class="prettyprint">
|
||||
* <fieldType name="text_svlgtstem" class="solr.TextField" positionIncrementGap="100">
|
||||
* <analyzer>
|
||||
* <tokenizer class="solr.StandardTokenizerFactory"/>
|
||||
|
@ -33,6 +35,15 @@ import org.apache.lucene.analysis.util.TokenFilterFactory;
|
|||
* </fieldType></pre>
|
||||
*/
|
||||
public class NorwegianLightStemFilterFactory extends TokenFilterFactory {
|
||||
|
||||
/** Creates a new NorwegianLightStemFilterFactory */
|
||||
public NorwegianLightStemFilterFactory(Map<String,String> args) {
|
||||
super(args);
|
||||
if (!args.isEmpty()) {
|
||||
throw new IllegalArgumentException("Unknown parameters: " + args);
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public TokenStream create(TokenStream input) {
|
||||
return new NorwegianLightStemFilter(input);
|
||||
|
|
|
@ -17,13 +17,15 @@ package org.apache.lucene.analysis.no;
|
|||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import java.util.Map;
|
||||
|
||||
import org.apache.lucene.analysis.TokenStream;
|
||||
import org.apache.lucene.analysis.no.NorwegianMinimalStemFilter;
|
||||
import org.apache.lucene.analysis.util.TokenFilterFactory;
|
||||
|
||||
/**
|
||||
* Factory for {@link NorwegianMinimalStemFilter}.
|
||||
* <pre class="prettyprint" >
|
||||
* <pre class="prettyprint">
|
||||
* <fieldType name="text_svlgtstem" class="solr.TextField" positionIncrementGap="100">
|
||||
* <analyzer>
|
||||
* <tokenizer class="solr.StandardTokenizerFactory"/>
|
||||
|
@ -33,6 +35,15 @@ import org.apache.lucene.analysis.util.TokenFilterFactory;
|
|||
* </fieldType></pre>
|
||||
*/
|
||||
public class NorwegianMinimalStemFilterFactory extends TokenFilterFactory {
|
||||
|
||||
/** Creates a new NorwegianMinimalStemFilterFactory */
|
||||
public NorwegianMinimalStemFilterFactory(Map<String,String> args) {
|
||||
super(args);
|
||||
if (!args.isEmpty()) {
|
||||
throw new IllegalArgumentException("Unknown parameters: " + args);
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public TokenStream create(TokenStream input) {
|
||||
return new NorwegianMinimalStemFilter(input);
|
||||
|
|
|
@ -39,7 +39,7 @@ import org.apache.lucene.util.AttributeSource.AttributeFactory;
|
|||
* <code>Books/Fic</code>...
|
||||
* </p>
|
||||
*
|
||||
* <pre class="prettyprint" >
|
||||
* <pre class="prettyprint">
|
||||
* <fieldType name="descendent_path" class="solr.TextField">
|
||||
* <analyzer type="index">
|
||||
* <tokenizer class="solr.PathHierarchyTokenizerFactory" delimiter="/" />
|
||||
|
@ -57,7 +57,7 @@ import org.apache.lucene.util.AttributeSource.AttributeFactory;
|
|||
* <code>Books/NonFic/Science/Physics/Theory</code> or
|
||||
* <code>Books/NonFic/Law</code>.
|
||||
* </p>
|
||||
* <pre class="prettyprint" >
|
||||
* <pre class="prettyprint">
|
||||
* <fieldType name="descendent_path" class="solr.TextField">
|
||||
* <analyzer type="index">
|
||||
* <tokenizer class="solr.KeywordTokenizerFactory" />
|
||||
|
@ -69,59 +69,39 @@ import org.apache.lucene.util.AttributeSource.AttributeFactory;
|
|||
* </pre>
|
||||
*/
|
||||
public class PathHierarchyTokenizerFactory extends TokenizerFactory {
|
||||
private final char delimiter;
|
||||
private final char replacement;
|
||||
private final boolean reverse;
|
||||
private final int skip;
|
||||
|
||||
private char delimiter;
|
||||
private char replacement;
|
||||
private boolean reverse = false;
|
||||
private int skip = PathHierarchyTokenizer.DEFAULT_SKIP;
|
||||
/** Creates a new PathHierarchyTokenizerFactory */
|
||||
public PathHierarchyTokenizerFactory(Map<String,String> args) {
|
||||
super(args);
|
||||
delimiter = getChar(args, "delimiter", PathHierarchyTokenizer.DEFAULT_DELIMITER);
|
||||
replacement = getChar(args, "replace", delimiter);
|
||||
reverse = getBoolean(args, "reverse", false);
|
||||
skip = getInt(args, "skip", PathHierarchyTokenizer.DEFAULT_SKIP);
|
||||
if (!args.isEmpty()) {
|
||||
throw new IllegalArgumentException("Unknown parameters: " + args);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Require a configured pattern
|
||||
*/
|
||||
@Override
|
||||
public void init(Map<String,String> args){
|
||||
super.init( args );
|
||||
|
||||
String v = args.get( "delimiter" );
|
||||
if( v != null ){
|
||||
if( v.length() != 1 ){
|
||||
throw new IllegalArgumentException("delimiter should be a char. \"" + v + "\" is invalid");
|
||||
private char getChar(Map<String,String> args, String name, char defaultValue) {
|
||||
String v = args.remove(name);
|
||||
if (v != null) {
|
||||
if (v.length() != 1) {
|
||||
throw new IllegalArgumentException(name + " should be a char. \"" + v + "\" is invalid");
|
||||
} else {
|
||||
return v.charAt(0);
|
||||
}
|
||||
else{
|
||||
delimiter = v.charAt(0);
|
||||
}
|
||||
}
|
||||
else{
|
||||
delimiter = PathHierarchyTokenizer.DEFAULT_DELIMITER;
|
||||
}
|
||||
|
||||
v = args.get( "replace" );
|
||||
if( v != null ){
|
||||
if( v.length() != 1 ){
|
||||
throw new IllegalArgumentException("replace should be a char. \"" + v + "\" is invalid");
|
||||
}
|
||||
else{
|
||||
replacement = v.charAt(0);
|
||||
}
|
||||
}
|
||||
else{
|
||||
replacement = delimiter;
|
||||
}
|
||||
|
||||
v = args.get( "reverse" );
|
||||
if( v != null ){
|
||||
reverse = "true".equals( v );
|
||||
}
|
||||
|
||||
v = args.get( "skip" );
|
||||
if( v != null ){
|
||||
skip = Integer.parseInt( v );
|
||||
} else {
|
||||
return defaultValue;
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public Tokenizer create(AttributeFactory factory, Reader input) {
|
||||
if( reverse ) {
|
||||
if (reverse) {
|
||||
return new ReversePathHierarchyTokenizer(factory, input, delimiter, replacement, skip);
|
||||
}
|
||||
return new PathHierarchyTokenizer(factory, input, delimiter, replacement, skip);
|
||||
|
|
|
@ -27,7 +27,7 @@ import org.apache.lucene.analysis.util.CharFilterFactory;
|
|||
|
||||
/**
|
||||
* Factory for {@link PatternReplaceCharFilter}.
|
||||
* <pre class="prettyprint" >
|
||||
* <pre class="prettyprint">
|
||||
* <fieldType name="text_ptnreplace" class="solr.TextField" positionIncrementGap="100">
|
||||
* <analyzer>
|
||||
* <charFilter class="solr.PatternReplaceCharFilterFactory"
|
||||
|
@ -36,26 +36,29 @@ import org.apache.lucene.analysis.util.CharFilterFactory;
|
|||
* </analyzer>
|
||||
* </fieldType></pre>
|
||||
*
|
||||
*
|
||||
* @since Solr 3.1
|
||||
*/
|
||||
public class PatternReplaceCharFilterFactory extends CharFilterFactory {
|
||||
|
||||
private Pattern p;
|
||||
private String replacement;
|
||||
private final Pattern pattern;
|
||||
private final String replacement;
|
||||
|
||||
@Override
|
||||
public void init(Map<String, String> args) {
|
||||
super.init( args );
|
||||
p = getPattern("pattern");
|
||||
replacement = args.get( "replacement" );
|
||||
if( replacement == null )
|
||||
/** Creates a new PatternReplaceCharFilterFactory */
|
||||
public PatternReplaceCharFilterFactory(Map<String, String> args) {
|
||||
super(args);
|
||||
pattern = getPattern(args, "pattern");
|
||||
String v = args.remove("replacement");
|
||||
if (v == null) {
|
||||
replacement = "";
|
||||
// TODO: throw exception if you set maxBlockChars or blockDelimiters ?
|
||||
} else {
|
||||
replacement = v;
|
||||
}
|
||||
if (!args.isEmpty()) {
|
||||
throw new IllegalArgumentException("Unknown parameters: " + args);
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public CharFilter create(Reader input) {
|
||||
return new PatternReplaceCharFilter( p, replacement, input );
|
||||
return new PatternReplaceCharFilter(pattern, replacement, input);
|
||||
}
|
||||
}
|
||||
|
|
|
@ -23,11 +23,10 @@ import org.apache.lucene.analysis.util.TokenFilterFactory;
|
|||
|
||||
import java.util.Map;
|
||||
import java.util.regex.Pattern;
|
||||
import java.util.regex.PatternSyntaxException;
|
||||
|
||||
/**
|
||||
* Factory for {@link PatternReplaceFilter}.
|
||||
* <pre class="prettyprint" >
|
||||
* <pre class="prettyprint">
|
||||
* <fieldType name="text_ptnreplace" class="solr.TextField" positionIncrementGap="100">
|
||||
* <analyzer>
|
||||
* <tokenizer class="solr.KeywordTokenizerFactory"/>
|
||||
|
@ -39,34 +38,32 @@ import java.util.regex.PatternSyntaxException;
|
|||
* @see PatternReplaceFilter
|
||||
*/
|
||||
public class PatternReplaceFilterFactory extends TokenFilterFactory {
|
||||
Pattern p;
|
||||
String replacement;
|
||||
boolean all = true;
|
||||
final Pattern pattern;
|
||||
final String replacement;
|
||||
final boolean replaceAll;
|
||||
|
||||
@Override
|
||||
public void init(Map<String, String> args) {
|
||||
super.init(args);
|
||||
p = getPattern("pattern");
|
||||
replacement = args.get("replacement");
|
||||
/** Creates a new PatternReplaceFilterFactory */
|
||||
public PatternReplaceFilterFactory(Map<String, String> args) {
|
||||
super(args);
|
||||
pattern = getPattern(args, "pattern");
|
||||
replacement = args.remove("replacement");
|
||||
|
||||
String r = args.get("replace");
|
||||
if (null != r) {
|
||||
if (r.equals("all")) {
|
||||
all = true;
|
||||
} else {
|
||||
if (r.equals("first")) {
|
||||
all = false;
|
||||
} else {
|
||||
throw new IllegalArgumentException
|
||||
("Configuration Error: 'replace' must be 'first' or 'all' in "
|
||||
+ this.getClass().getName());
|
||||
}
|
||||
}
|
||||
String v = args.remove("replace");
|
||||
if (v == null || v.equals("all")) {
|
||||
replaceAll = true;
|
||||
} else if (v.equals("first")) {
|
||||
replaceAll = false;
|
||||
} else {
|
||||
throw new IllegalArgumentException("Configuration Error: " +
|
||||
"'replace' must be 'first' or 'all' in " + getClass().getName());
|
||||
}
|
||||
if (!args.isEmpty()) {
|
||||
throw new IllegalArgumentException("Unknown parameters: " + args);
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
@Override
|
||||
public PatternReplaceFilter create(TokenStream input) {
|
||||
return new PatternReplaceFilter(input, p, replacement, all);
|
||||
return new PatternReplaceFilter(input, pattern, replacement, replaceAll);
|
||||
}
|
||||
}
|
||||
|
|
|
@ -17,7 +17,6 @@ package org.apache.lucene.analysis.pattern;
|
|||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import java.io.IOException;
|
||||
import java.io.Reader;
|
||||
import java.util.Map;
|
||||
import java.util.regex.Pattern;
|
||||
|
@ -45,13 +44,13 @@ import org.apache.lucene.util.AttributeSource.AttributeFactory;
|
|||
* pattern = \'([^\']+)\'
|
||||
* group = 0
|
||||
* input = aaa 'bbb' 'ccc'
|
||||
*</pre>
|
||||
* </pre>
|
||||
* the output will be two tokens: 'bbb' and 'ccc' (including the ' marks). With the same input
|
||||
* but using group=1, the output would be: bbb and ccc (no ' marks)
|
||||
* </p>
|
||||
* <p>NOTE: This Tokenizer does not output tokens that are of zero length.</p>
|
||||
*
|
||||
* <pre class="prettyprint" >
|
||||
* <pre class="prettyprint">
|
||||
* <fieldType name="text_ptn" class="solr.TextField" positionIncrementGap="100">
|
||||
* <analyzer>
|
||||
* <tokenizer class="solr.PatternTokenizerFactory" pattern="\'([^\']+)\'" group="1"/>
|
||||
|
@ -60,29 +59,27 @@ import org.apache.lucene.util.AttributeSource.AttributeFactory;
|
|||
*
|
||||
* @see PatternTokenizer
|
||||
* @since solr1.2
|
||||
*
|
||||
*/
|
||||
public class PatternTokenizerFactory extends TokenizerFactory
|
||||
{
|
||||
public class PatternTokenizerFactory extends TokenizerFactory {
|
||||
public static final String PATTERN = "pattern";
|
||||
public static final String GROUP = "group";
|
||||
|
||||
protected Pattern pattern;
|
||||
protected int group;
|
||||
protected final Pattern pattern;
|
||||
protected final int group;
|
||||
|
||||
/**
|
||||
* Require a configured pattern
|
||||
*/
|
||||
@Override
|
||||
public void init(Map<String,String> args)
|
||||
{
|
||||
super.init(args);
|
||||
pattern = getPattern( PATTERN );
|
||||
/** Creates a new PatternTokenizerFactory */
|
||||
public PatternTokenizerFactory(Map<String,String> args) {
|
||||
super(args);
|
||||
pattern = getPattern(args, PATTERN);
|
||||
|
||||
group = -1; // use 'split'
|
||||
String g = args.get( GROUP );
|
||||
if( g != null ) {
|
||||
group = Integer.parseInt( g );
|
||||
String v = args.remove(GROUP);
|
||||
if (v == null) {
|
||||
group = -1; // use 'split'
|
||||
} else {
|
||||
group = Integer.parseInt(v);
|
||||
}
|
||||
if (!args.isEmpty()) {
|
||||
throw new IllegalArgumentException("Unknown parameters: " + args);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -30,41 +30,51 @@ import org.apache.lucene.analysis.util.TokenFilterFactory;
|
|||
import java.util.Map;
|
||||
|
||||
/**
|
||||
*
|
||||
* Factory for {@link DelimitedPayloadTokenFilter}.
|
||||
* <pre class="prettyprint" >
|
||||
* <pre class="prettyprint">
|
||||
* <fieldType name="text_dlmtd" class="solr.TextField" positionIncrementGap="100">
|
||||
* <analyzer>
|
||||
* <tokenizer class="solr.WhitespaceTokenizerFactory"/>
|
||||
* <filter class="solr.DelimitedPayloadTokenFilterFactory" encoder="float" delimiter="|"/>
|
||||
* </analyzer>
|
||||
* </fieldType></pre>
|
||||
*
|
||||
*
|
||||
*/
|
||||
public class DelimitedPayloadTokenFilterFactory extends TokenFilterFactory implements ResourceLoaderAware {
|
||||
public static final String ENCODER_ATTR = "encoder";
|
||||
public static final String DELIMITER_ATTR = "delimiter";
|
||||
|
||||
private final String encoderClass;
|
||||
private final char delimiter;
|
||||
|
||||
private PayloadEncoder encoder;
|
||||
private char delimiter = '|';
|
||||
|
||||
/** Creates a new DelimitedPayloadTokenFilterFactory */
|
||||
public DelimitedPayloadTokenFilterFactory(Map<String, String> args) {
|
||||
super(args);
|
||||
encoderClass = args.remove(ENCODER_ATTR);
|
||||
if (encoderClass == null) {
|
||||
throw new IllegalArgumentException("Parameter " + ENCODER_ATTR + " is mandatory");
|
||||
}
|
||||
String delim = args.remove(DELIMITER_ATTR);
|
||||
if (delim == null) {
|
||||
delimiter = '|';
|
||||
} else if (delim.length() == 1) {
|
||||
delimiter = delim.charAt(0);
|
||||
} else {
|
||||
throw new IllegalArgumentException("Delimiter must be one character only");
|
||||
}
|
||||
if (!args.isEmpty()) {
|
||||
throw new IllegalArgumentException("Unknown parameters: " + args);
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public DelimitedPayloadTokenFilter create(TokenStream input) {
|
||||
return new DelimitedPayloadTokenFilter(input, delimiter, encoder);
|
||||
}
|
||||
|
||||
@Override
|
||||
public void init(Map<String, String> args) {
|
||||
super.init(args);
|
||||
}
|
||||
|
||||
@Override
|
||||
public void inform(ResourceLoader loader) {
|
||||
String encoderClass = args.get(ENCODER_ATTR);
|
||||
if (encoderClass == null) {
|
||||
throw new IllegalArgumentException("Parameter " + ENCODER_ATTR + " is mandatory");
|
||||
}
|
||||
if (encoderClass.equals("float")){
|
||||
encoder = new FloatEncoder();
|
||||
} else if (encoderClass.equals("integer")){
|
||||
|
@ -74,14 +84,5 @@ public class DelimitedPayloadTokenFilterFactory extends TokenFilterFactory imple
|
|||
} else {
|
||||
encoder = loader.newInstance(encoderClass, PayloadEncoder.class);
|
||||
}
|
||||
|
||||
String delim = args.get(DELIMITER_ATTR);
|
||||
if (delim != null){
|
||||
if (delim.length() == 1) {
|
||||
delimiter = delim.charAt(0);
|
||||
} else{
|
||||
throw new IllegalArgumentException("Delimiter must be one character only");
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
|
@ -24,28 +24,32 @@ import java.util.Map;
|
|||
|
||||
/**
|
||||
* Factory for {@link NumericPayloadTokenFilter}.
|
||||
* <pre class="prettyprint" >
|
||||
* <pre class="prettyprint">
|
||||
* <fieldType name="text_numpayload" class="solr.TextField" positionIncrementGap="100">
|
||||
* <analyzer>
|
||||
* <tokenizer class="solr.WhitespaceTokenizerFactory"/>
|
||||
* <filter class="solr.NumericPayloadTokenFilterFactory" payload="24" typeMatch="word"/>
|
||||
* </analyzer>
|
||||
* </fieldType></pre>
|
||||
*
|
||||
*/
|
||||
public class NumericPayloadTokenFilterFactory extends TokenFilterFactory {
|
||||
private float payload;
|
||||
private String typeMatch;
|
||||
@Override
|
||||
public void init(Map<String, String> args) {
|
||||
super.init(args);
|
||||
String payloadArg = args.get("payload");
|
||||
typeMatch = args.get("typeMatch");
|
||||
private final float payload;
|
||||
private final String typeMatch;
|
||||
|
||||
/** Creates a new NumericPayloadTokenFilterFactory */
|
||||
public NumericPayloadTokenFilterFactory(Map<String, String> args) {
|
||||
super(args);
|
||||
String payloadArg = args.remove("payload");
|
||||
typeMatch = args.remove("typeMatch");
|
||||
if (payloadArg == null || typeMatch == null) {
|
||||
throw new IllegalArgumentException("Both payload and typeMatch are required");
|
||||
}
|
||||
payload = Float.parseFloat(payloadArg);
|
||||
if (!args.isEmpty()) {
|
||||
throw new IllegalArgumentException("Unknown parameters: " + args);
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public NumericPayloadTokenFilter create(TokenStream input) {
|
||||
return new NumericPayloadTokenFilter(input,payload,typeMatch);
|
||||
|
|
|
@ -17,22 +17,32 @@ package org.apache.lucene.analysis.payloads;
|
|||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import java.util.Map;
|
||||
|
||||
import org.apache.lucene.analysis.payloads.TokenOffsetPayloadTokenFilter;
|
||||
import org.apache.lucene.analysis.TokenStream;
|
||||
import org.apache.lucene.analysis.util.TokenFilterFactory;
|
||||
|
||||
/**
|
||||
* Factory for {@link TokenOffsetPayloadTokenFilter}.
|
||||
* <pre class="prettyprint" >
|
||||
* <pre class="prettyprint">
|
||||
* <fieldType name="text_tokenoffset" class="solr.TextField" positionIncrementGap="100">
|
||||
* <analyzer>
|
||||
* <tokenizer class="solr.WhitespaceTokenizerFactory"/>
|
||||
* <filter class="solr.TokenOffsetPayloadTokenFilterFactory"/>
|
||||
* </analyzer>
|
||||
* </fieldType></pre>
|
||||
*
|
||||
*/
|
||||
public class TokenOffsetPayloadTokenFilterFactory extends TokenFilterFactory {
|
||||
|
||||
/** Creates a new TokenOffsetPayloadTokenFilterFactory */
|
||||
public TokenOffsetPayloadTokenFilterFactory(Map<String,String> args) {
|
||||
super(args);
|
||||
if (!args.isEmpty()) {
|
||||
throw new IllegalArgumentException("Unknown parameters: " + args);
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public TokenOffsetPayloadTokenFilter create(TokenStream input) {
|
||||
return new TokenOffsetPayloadTokenFilter(input);
|
||||
|
|
|
@ -17,22 +17,32 @@ package org.apache.lucene.analysis.payloads;
|
|||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import java.util.Map;
|
||||
|
||||
import org.apache.lucene.analysis.payloads.TypeAsPayloadTokenFilter;
|
||||
import org.apache.lucene.analysis.TokenStream;
|
||||
import org.apache.lucene.analysis.util.TokenFilterFactory;
|
||||
|
||||
/**
|
||||
* Factory for {@link TypeAsPayloadTokenFilter}.
|
||||
* <pre class="prettyprint" >
|
||||
* <pre class="prettyprint">
|
||||
* <fieldType name="text_typeaspayload" class="solr.TextField" positionIncrementGap="100">
|
||||
* <analyzer>
|
||||
* <tokenizer class="solr.WhitespaceTokenizerFactory"/>
|
||||
* <filter class="solr.TypeAsPayloadTokenFilterFactory"/>
|
||||
* </analyzer>
|
||||
* </fieldType></pre>
|
||||
*
|
||||
* </fieldType></pre>
|
||||
*/
|
||||
public class TypeAsPayloadTokenFilterFactory extends TokenFilterFactory {
|
||||
|
||||
/** Creates a new TypeAsPayloadTokenFilterFactory */
|
||||
public TypeAsPayloadTokenFilterFactory(Map<String,String> args) {
|
||||
super(args);
|
||||
if (!args.isEmpty()) {
|
||||
throw new IllegalArgumentException("Unknown parameters: " + args);
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public TypeAsPayloadTokenFilter create(TokenStream input) {
|
||||
return new TypeAsPayloadTokenFilter(input);
|
||||
|
|
|
@ -27,7 +27,7 @@ import java.util.Map;
|
|||
* Factory for {@link PositionFilter}.
|
||||
* Set the positionIncrement of all tokens to the "positionIncrement", except the first return token which retains its
|
||||
* original positionIncrement value. The default positionIncrement value is zero.
|
||||
* <pre class="prettyprint" >
|
||||
* <pre class="prettyprint">
|
||||
* <fieldType name="text_position" class="solr.TextField" positionIncrementGap="100">
|
||||
* <analyzer>
|
||||
* <tokenizer class="solr.WhitespaceTokenizerFactory"/>
|
||||
|
@ -35,17 +35,19 @@ import java.util.Map;
|
|||
* </analyzer>
|
||||
* </fieldType></pre>
|
||||
*
|
||||
*
|
||||
* @see org.apache.lucene.analysis.position.PositionFilter
|
||||
* @since solr 1.4
|
||||
*/
|
||||
public class PositionFilterFactory extends TokenFilterFactory {
|
||||
private int positionIncrement;
|
||||
private final int positionIncrement;
|
||||
|
||||
@Override
|
||||
public void init(Map<String, String> args) {
|
||||
super.init(args);
|
||||
positionIncrement = getInt("positionIncrement", 0);
|
||||
/** Creates a new PositionFilterFactory */
|
||||
public PositionFilterFactory(Map<String,String> args) {
|
||||
super(args);
|
||||
positionIncrement = getInt(args, "positionIncrement", 0);
|
||||
if (!args.isEmpty()) {
|
||||
throw new IllegalArgumentException("Unknown parameters: " + args);
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
|
|
|
@ -17,13 +17,15 @@ package org.apache.lucene.analysis.pt;
|
|||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import java.util.Map;
|
||||
|
||||
import org.apache.lucene.analysis.TokenStream;
|
||||
import org.apache.lucene.analysis.pt.PortugueseLightStemFilter;
|
||||
import org.apache.lucene.analysis.util.TokenFilterFactory;
|
||||
|
||||
/**
|
||||
* Factory for {@link PortugueseLightStemFilter}.
|
||||
* <pre class="prettyprint" >
|
||||
* <pre class="prettyprint">
|
||||
* <fieldType name="text_ptlgtstem" class="solr.TextField" positionIncrementGap="100">
|
||||
* <analyzer>
|
||||
* <tokenizer class="solr.StandardTokenizerFactory"/>
|
||||
|
@ -31,9 +33,17 @@ import org.apache.lucene.analysis.util.TokenFilterFactory;
|
|||
* <filter class="solr.PortugueseLightStemFilterFactory"/>
|
||||
* </analyzer>
|
||||
* </fieldType></pre>
|
||||
*
|
||||
*/
|
||||
public class PortugueseLightStemFilterFactory extends TokenFilterFactory {
|
||||
|
||||
/** Creates a new PortugueseLightStemFilterFactory */
|
||||
public PortugueseLightStemFilterFactory(Map<String,String> args) {
|
||||
super(args);
|
||||
if (!args.isEmpty()) {
|
||||
throw new IllegalArgumentException("Unknown parameters: " + args);
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public TokenStream create(TokenStream input) {
|
||||
return new PortugueseLightStemFilter(input);
|
||||
|
|
|
@ -17,13 +17,15 @@ package org.apache.lucene.analysis.pt;
|
|||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import java.util.Map;
|
||||
|
||||
import org.apache.lucene.analysis.TokenStream;
|
||||
import org.apache.lucene.analysis.pt.PortugueseMinimalStemFilter;
|
||||
import org.apache.lucene.analysis.util.TokenFilterFactory;
|
||||
|
||||
/**
|
||||
* Factory for {@link PortugueseMinimalStemFilter}.
|
||||
* <pre class="prettyprint" >
|
||||
* <pre class="prettyprint">
|
||||
* <fieldType name="text_ptminstem" class="solr.TextField" positionIncrementGap="100">
|
||||
* <analyzer>
|
||||
* <tokenizer class="solr.StandardTokenizerFactory"/>
|
||||
|
@ -31,9 +33,17 @@ import org.apache.lucene.analysis.util.TokenFilterFactory;
|
|||
* <filter class="solr.PortugueseMinimalStemFilterFactory"/>
|
||||
* </analyzer>
|
||||
* </fieldType></pre>
|
||||
*
|
||||
*/
|
||||
public class PortugueseMinimalStemFilterFactory extends TokenFilterFactory {
|
||||
|
||||
/** Creates a new PortugueseMinimalStemFilterFactory */
|
||||
public PortugueseMinimalStemFilterFactory(Map<String,String> args) {
|
||||
super(args);
|
||||
if (!args.isEmpty()) {
|
||||
throw new IllegalArgumentException("Unknown parameters: " + args);
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public TokenStream create(TokenStream input) {
|
||||
return new PortugueseMinimalStemFilter(input);
|
||||
|
|
|
@ -17,13 +17,15 @@ package org.apache.lucene.analysis.pt;
|
|||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import java.util.Map;
|
||||
|
||||
import org.apache.lucene.analysis.TokenStream;
|
||||
import org.apache.lucene.analysis.pt.PortugueseStemFilter;
|
||||
import org.apache.lucene.analysis.util.TokenFilterFactory;
|
||||
|
||||
/**
|
||||
* Factory for {@link PortugueseStemFilter}.
|
||||
* <pre class="prettyprint" >
|
||||
* <pre class="prettyprint">
|
||||
* <fieldType name="text_ptstem" class="solr.TextField" positionIncrementGap="100">
|
||||
* <analyzer>
|
||||
* <tokenizer class="solr.StandardTokenizerFactory"/>
|
||||
|
@ -31,9 +33,17 @@ import org.apache.lucene.analysis.util.TokenFilterFactory;
|
|||
* <filter class="solr.PortugueseStemFilterFactory"/>
|
||||
* </analyzer>
|
||||
* </fieldType></pre>
|
||||
*
|
||||
*/
|
||||
public class PortugueseStemFilterFactory extends TokenFilterFactory {
|
||||
|
||||
/** Creates a new PortugueseStemFilterFactory */
|
||||
public PortugueseStemFilterFactory(Map<String,String> args) {
|
||||
super(args);
|
||||
if (!args.isEmpty()) {
|
||||
throw new IllegalArgumentException("Unknown parameters: " + args);
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public TokenStream create(TokenStream input) {
|
||||
return new PortugueseStemFilter(input);
|
||||
|
|
|
@ -17,13 +17,15 @@ package org.apache.lucene.analysis.reverse;
|
|||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import java.util.Map;
|
||||
|
||||
import org.apache.lucene.analysis.TokenStream;
|
||||
import org.apache.lucene.analysis.reverse.ReverseStringFilter;
|
||||
import org.apache.lucene.analysis.util.TokenFilterFactory;
|
||||
|
||||
/**
|
||||
* Factory for {@link ReverseStringFilter}.
|
||||
* <pre class="prettyprint" >
|
||||
* <pre class="prettyprint">
|
||||
* <fieldType name="text_rvsstr" class="solr.TextField" positionIncrementGap="100">
|
||||
* <analyzer>
|
||||
* <tokenizer class="solr.WhitespaceTokenizerFactory"/>
|
||||
|
@ -31,13 +33,21 @@ import org.apache.lucene.analysis.util.TokenFilterFactory;
|
|||
* </analyzer>
|
||||
* </fieldType></pre>
|
||||
*
|
||||
*
|
||||
* @since solr 1.4
|
||||
*/
|
||||
public class ReverseStringFilterFactory extends TokenFilterFactory {
|
||||
|
||||
/** Creates a new ReverseStringFilterFactory */
|
||||
public ReverseStringFilterFactory(Map<String,String> args) {
|
||||
super(args);
|
||||
assureMatchVersion();
|
||||
if (!args.isEmpty()) {
|
||||
throw new IllegalArgumentException("Unknown parameters: " + args);
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public ReverseStringFilter create(TokenStream in) {
|
||||
assureMatchVersion();
|
||||
return new ReverseStringFilter(luceneMatchVersion,in);
|
||||
}
|
||||
}
|
||||
|
|
|
@ -17,13 +17,15 @@ package org.apache.lucene.analysis.ru;
|
|||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import java.util.Map;
|
||||
|
||||
import org.apache.lucene.analysis.TokenStream;
|
||||
import org.apache.lucene.analysis.ru.RussianLightStemFilter;
|
||||
import org.apache.lucene.analysis.util.TokenFilterFactory;
|
||||
|
||||
/**
|
||||
* Factory for {@link RussianLightStemFilter}.
|
||||
* <pre class="prettyprint" >
|
||||
* <pre class="prettyprint">
|
||||
* <fieldType name="text_rulgtstem" class="solr.TextField" positionIncrementGap="100">
|
||||
* <analyzer>
|
||||
* <tokenizer class="solr.StandardTokenizerFactory"/>
|
||||
|
@ -31,9 +33,17 @@ import org.apache.lucene.analysis.util.TokenFilterFactory;
|
|||
* <filter class="solr.RussianLightStemFilterFactory"/>
|
||||
* </analyzer>
|
||||
* </fieldType></pre>
|
||||
*
|
||||
*/
|
||||
public class RussianLightStemFilterFactory extends TokenFilterFactory {
|
||||
|
||||
/** Creates a new RussianLightStemFilterFactory */
|
||||
public RussianLightStemFilterFactory(Map<String,String> args) {
|
||||
super(args);
|
||||
if (!args.isEmpty()) {
|
||||
throw new IllegalArgumentException("Unknown parameters: " + args);
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public TokenStream create(TokenStream input) {
|
||||
return new RussianLightStemFilter(input);
|
||||
|
|
|
@ -25,7 +25,7 @@ import java.util.Map;
|
|||
|
||||
/**
|
||||
* Factory for {@link ShingleFilter}.
|
||||
* <pre class="prettyprint" >
|
||||
* <pre class="prettyprint">
|
||||
* <fieldType name="text_shingle" class="solr.TextField" positionIncrementGap="100">
|
||||
* <analyzer>
|
||||
* <tokenizer class="solr.WhitespaceTokenizerFactory"/>
|
||||
|
@ -33,25 +33,24 @@ import java.util.Map;
|
|||
* outputUnigrams="true" outputUnigramsIfNoShingles="false" tokenSeparator=" "/>
|
||||
* </analyzer>
|
||||
* </fieldType></pre>
|
||||
*
|
||||
*/
|
||||
public class ShingleFilterFactory extends TokenFilterFactory {
|
||||
private int minShingleSize;
|
||||
private int maxShingleSize;
|
||||
private boolean outputUnigrams;
|
||||
private boolean outputUnigramsIfNoShingles;
|
||||
private String tokenSeparator;
|
||||
private final int minShingleSize;
|
||||
private final int maxShingleSize;
|
||||
private final boolean outputUnigrams;
|
||||
private final boolean outputUnigramsIfNoShingles;
|
||||
private final String tokenSeparator;
|
||||
|
||||
@Override
|
||||
public void init(Map<String, String> args) {
|
||||
super.init(args);
|
||||
maxShingleSize = getInt("maxShingleSize",
|
||||
/** Creates a new ShingleFilterFactory */
|
||||
public ShingleFilterFactory(Map<String, String> args) {
|
||||
super(args);
|
||||
maxShingleSize = getInt(args, "maxShingleSize",
|
||||
ShingleFilter.DEFAULT_MAX_SHINGLE_SIZE);
|
||||
if (maxShingleSize < 2) {
|
||||
throw new IllegalArgumentException("Invalid maxShingleSize (" + maxShingleSize
|
||||
+ ") - must be at least 2");
|
||||
}
|
||||
minShingleSize = getInt("minShingleSize",
|
||||
minShingleSize = getInt(args, "minShingleSize",
|
||||
ShingleFilter.DEFAULT_MIN_SHINGLE_SIZE);
|
||||
if (minShingleSize < 2) {
|
||||
throw new IllegalArgumentException("Invalid minShingleSize (" + minShingleSize
|
||||
|
@ -62,12 +61,16 @@ public class ShingleFilterFactory extends TokenFilterFactory {
|
|||
+ ") - must be no greater than maxShingleSize ("
|
||||
+ maxShingleSize + ")");
|
||||
}
|
||||
outputUnigrams = getBoolean("outputUnigrams", true);
|
||||
outputUnigramsIfNoShingles = getBoolean("outputUnigramsIfNoShingles", false);
|
||||
outputUnigrams = getBoolean(args, "outputUnigrams", true);
|
||||
outputUnigramsIfNoShingles = getBoolean(args, "outputUnigramsIfNoShingles", false);
|
||||
tokenSeparator = args.containsKey("tokenSeparator")
|
||||
? args.get("tokenSeparator")
|
||||
? args.remove("tokenSeparator")
|
||||
: ShingleFilter.TOKEN_SEPARATOR;
|
||||
if (!args.isEmpty()) {
|
||||
throw new IllegalArgumentException("Unknown parameters: " + args);
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public ShingleFilter create(TokenStream input) {
|
||||
ShingleFilter r = new ShingleFilter(input, minShingleSize, maxShingleSize);
|
||||
|
|
|
@ -31,7 +31,7 @@ import org.tartarus.snowball.SnowballProgram;
|
|||
* Factory for {@link SnowballFilter}, with configurable language
|
||||
* <p>
|
||||
* Note: Use of the "Lovins" stemmer is not recommended, as it is implemented with reflection.
|
||||
* <pre class="prettyprint" >
|
||||
* <pre class="prettyprint">
|
||||
* <fieldType name="text_snowballstem" class="solr.TextField" positionIncrementGap="100">
|
||||
* <analyzer>
|
||||
* <tokenizer class="solr.StandardTokenizerFactory"/>
|
||||
|
@ -39,26 +39,35 @@ import org.tartarus.snowball.SnowballProgram;
|
|||
* <filter class="solr.SnowballPorterFilterFactory" protected="protectedkeyword.txt" language="English"/>
|
||||
* </analyzer>
|
||||
* </fieldType></pre>
|
||||
*
|
||||
*
|
||||
*/
|
||||
public class SnowballPorterFilterFactory extends TokenFilterFactory implements ResourceLoaderAware {
|
||||
public static final String PROTECTED_TOKENS = "protected";
|
||||
|
||||
private String language = "English";
|
||||
private final String language;
|
||||
private final String wordFiles;
|
||||
private Class<? extends SnowballProgram> stemClass;
|
||||
private CharArraySet protectedWords = null;
|
||||
|
||||
/** Creates a new SnowballPorterFilterFactory */
|
||||
public SnowballPorterFilterFactory(Map<String,String> args) {
|
||||
super(args);
|
||||
String cfgLanguage = args.remove("language");
|
||||
if (cfgLanguage == null) {
|
||||
language = "English";
|
||||
} else {
|
||||
language = cfgLanguage;
|
||||
}
|
||||
wordFiles = args.remove(PROTECTED_TOKENS);
|
||||
if (!args.isEmpty()) {
|
||||
throw new IllegalArgumentException("Unknown parameters: " + args);
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public void inform(ResourceLoader loader) throws IOException {
|
||||
String cfgLanguage = args.get("language");
|
||||
if (cfgLanguage != null)
|
||||
language = cfgLanguage;
|
||||
|
||||
String className = "org.tartarus.snowball.ext." + language + "Stemmer";
|
||||
stemClass = loader.newInstance(className, SnowballProgram.class).getClass();
|
||||
|
||||
String wordFiles = args.get(PROTECTED_TOKENS);
|
||||
if (wordFiles != null) {
|
||||
protectedWords = getWordSet(loader, wordFiles, false);
|
||||
}
|
||||
|
|
|
@ -17,6 +17,8 @@ package org.apache.lucene.analysis.standard;
|
|||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import java.util.Map;
|
||||
|
||||
import org.apache.lucene.analysis.TokenFilter;
|
||||
import org.apache.lucene.analysis.TokenStream;
|
||||
import org.apache.lucene.analysis.util.TokenFilterFactory;
|
||||
|
@ -24,17 +26,24 @@ import org.apache.lucene.analysis.standard.ClassicFilter;
|
|||
|
||||
/**
|
||||
* Factory for {@link ClassicFilter}.
|
||||
* <pre class="prettyprint" >
|
||||
* <pre class="prettyprint">
|
||||
* <fieldType name="text_clssc" class="solr.TextField" positionIncrementGap="100">
|
||||
* <analyzer>
|
||||
* <tokenizer class="solr.ClassicTokenizerFactory"/>
|
||||
* <filter class="solr.ClassicFilterFactory"/>
|
||||
* </analyzer>
|
||||
* </fieldType></pre>
|
||||
*
|
||||
*
|
||||
*/
|
||||
public class ClassicFilterFactory extends TokenFilterFactory {
|
||||
|
||||
/** Creates a new ClassicFilterFactory */
|
||||
public ClassicFilterFactory(Map<String,String> args) {
|
||||
super(args);
|
||||
if (!args.isEmpty()) {
|
||||
throw new IllegalArgumentException("Unknown parameters: " + args);
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public TokenFilter create(TokenStream input) {
|
||||
return new ClassicFilter(input);
|
||||
|
|
|
@ -25,26 +25,25 @@ import java.util.Map;
|
|||
|
||||
/**
|
||||
* Factory for {@link ClassicTokenizer}.
|
||||
* <pre class="prettyprint" >
|
||||
* <pre class="prettyprint">
|
||||
* <fieldType name="text_clssc" class="solr.TextField" positionIncrementGap="100">
|
||||
* <analyzer>
|
||||
* <tokenizer class="solr.ClassicTokenizerFactory" maxTokenLength="120"/>
|
||||
* </analyzer>
|
||||
* </fieldType></pre>
|
||||
*
|
||||
*
|
||||
*/
|
||||
|
||||
public class ClassicTokenizerFactory extends TokenizerFactory {
|
||||
private final int maxTokenLength;
|
||||
|
||||
private int maxTokenLength;
|
||||
|
||||
@Override
|
||||
public void init(Map<String,String> args) {
|
||||
super.init(args);
|
||||
/** Creates a new ClassicTokenizerFactory */
|
||||
public ClassicTokenizerFactory(Map<String,String> args) {
|
||||
super(args);
|
||||
assureMatchVersion();
|
||||
maxTokenLength = getInt("maxTokenLength",
|
||||
maxTokenLength = getInt(args, "maxTokenLength",
|
||||
StandardAnalyzer.DEFAULT_MAX_TOKEN_LENGTH);
|
||||
if (!args.isEmpty()) {
|
||||
throw new IllegalArgumentException("Unknown parameters: " + args);
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
|
|
|
@ -25,20 +25,23 @@ import org.apache.lucene.analysis.util.TokenFilterFactory;
|
|||
|
||||
/**
|
||||
* Factory for {@link StandardFilter}.
|
||||
* <pre class="prettyprint" >
|
||||
* <pre class="prettyprint">
|
||||
* <fieldType name="text_stndrd" class="solr.TextField" positionIncrementGap="100">
|
||||
* <analyzer>
|
||||
* <tokenizer class="solr.StandardTokenizerFactory"/>
|
||||
* <filter class="solr.StandardFilterFactory"/>
|
||||
* </analyzer>
|
||||
* </fieldType></pre>
|
||||
*
|
||||
*/
|
||||
public class StandardFilterFactory extends TokenFilterFactory {
|
||||
@Override
|
||||
public void init(Map<String,String> args) {
|
||||
super.init(args);
|
||||
|
||||
/** Creates a new StandardFilterFactory */
|
||||
public StandardFilterFactory(Map<String,String> args) {
|
||||
super(args);
|
||||
assureMatchVersion();
|
||||
if (!args.isEmpty()) {
|
||||
throw new IllegalArgumentException("Unknown parameters: " + args);
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
|
|
|
@ -25,24 +25,24 @@ import java.util.Map;
|
|||
|
||||
/**
|
||||
* Factory for {@link StandardTokenizer}.
|
||||
* <pre class="prettyprint" >
|
||||
* <pre class="prettyprint">
|
||||
* <fieldType name="text_stndrd" class="solr.TextField" positionIncrementGap="100">
|
||||
* <analyzer>
|
||||
* <tokenizer class="solr.StandardTokenizerFactory" maxTokenLength="255"/>
|
||||
* </analyzer>
|
||||
* </fieldType></pre>
|
||||
*
|
||||
*/
|
||||
|
||||
public class StandardTokenizerFactory extends TokenizerFactory {
|
||||
private final int maxTokenLength;
|
||||
|
||||
private int maxTokenLength;
|
||||
|
||||
@Override
|
||||
public void init(Map<String,String> args) {
|
||||
super.init(args);
|
||||
/** Creates a new StandardTokenizerFactory */
|
||||
public StandardTokenizerFactory(Map<String,String> args) {
|
||||
super(args);
|
||||
assureMatchVersion();
|
||||
maxTokenLength = getInt("maxTokenLength", StandardAnalyzer.DEFAULT_MAX_TOKEN_LENGTH);
|
||||
maxTokenLength = getInt(args, "maxTokenLength", StandardAnalyzer.DEFAULT_MAX_TOKEN_LENGTH);
|
||||
if (!args.isEmpty()) {
|
||||
throw new IllegalArgumentException("Unknown parameters: " + args);
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
|
|
|
@ -25,26 +25,25 @@ import java.util.Map;
|
|||
|
||||
/**
|
||||
* Factory for {@link UAX29URLEmailTokenizer}.
|
||||
* <pre class="prettyprint" >
|
||||
* <pre class="prettyprint">
|
||||
* <fieldType name="text_urlemail" class="solr.TextField" positionIncrementGap="100">
|
||||
* <analyzer>
|
||||
* <tokenizer class="solr.UAX29URLEmailTokenizerFactory" maxTokenLength="255"/>
|
||||
* </analyzer>
|
||||
* </fieldType></pre>
|
||||
*
|
||||
*
|
||||
*/
|
||||
|
||||
public class UAX29URLEmailTokenizerFactory extends TokenizerFactory {
|
||||
private final int maxTokenLength;
|
||||
|
||||
private int maxTokenLength;
|
||||
|
||||
@Override
|
||||
public void init(Map<String,String> args) {
|
||||
super.init(args);
|
||||
/** Creates a new UAX29URLEmailTokenizerFactory */
|
||||
public UAX29URLEmailTokenizerFactory(Map<String,String> args) {
|
||||
super(args);
|
||||
assureMatchVersion();
|
||||
maxTokenLength = getInt("maxTokenLength",
|
||||
maxTokenLength = getInt(args, "maxTokenLength",
|
||||
StandardAnalyzer.DEFAULT_MAX_TOKEN_LENGTH);
|
||||
if (!args.isEmpty()) {
|
||||
throw new IllegalArgumentException("Unknown parameters: " + args);
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
|
|
|
@ -17,13 +17,15 @@ package org.apache.lucene.analysis.sv;
|
|||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import java.util.Map;
|
||||
|
||||
import org.apache.lucene.analysis.TokenStream;
|
||||
import org.apache.lucene.analysis.sv.SwedishLightStemFilter;
|
||||
import org.apache.lucene.analysis.util.TokenFilterFactory;
|
||||
|
||||
/**
|
||||
* Factory for {@link SwedishLightStemFilter}.
|
||||
* <pre class="prettyprint" >
|
||||
* <pre class="prettyprint">
|
||||
* <fieldType name="text_svlgtstem" class="solr.TextField" positionIncrementGap="100">
|
||||
* <analyzer>
|
||||
* <tokenizer class="solr.StandardTokenizerFactory"/>
|
||||
|
@ -31,9 +33,17 @@ import org.apache.lucene.analysis.util.TokenFilterFactory;
|
|||
* <filter class="solr.SwedishLightStemFilterFactory"/>
|
||||
* </analyzer>
|
||||
* </fieldType></pre>
|
||||
*
|
||||
*/
|
||||
public class SwedishLightStemFilterFactory extends TokenFilterFactory {
|
||||
|
||||
/** Creates a new SwedishLightStemFilterFactory */
|
||||
public SwedishLightStemFilterFactory(Map<String,String> args) {
|
||||
super(args);
|
||||
if (!args.isEmpty()) {
|
||||
throw new IllegalArgumentException("Unknown parameters: " + args);
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public TokenStream create(TokenStream input) {
|
||||
return new SwedishLightStemFilter(input);
|
||||
|
|
|
@ -25,7 +25,9 @@ import java.nio.charset.Charset;
|
|||
import java.nio.charset.CharsetDecoder;
|
||||
import java.nio.charset.CodingErrorAction;
|
||||
import java.text.ParseException;
|
||||
import java.util.HashMap;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
|
||||
import org.apache.lucene.analysis.Analyzer;
|
||||
import org.apache.lucene.analysis.TokenStream;
|
||||
|
@ -41,7 +43,7 @@ import org.apache.lucene.util.Version;
|
|||
|
||||
/**
|
||||
* Factory for {@link SynonymFilter}.
|
||||
* <pre class="prettyprint" >
|
||||
* <pre class="prettyprint">
|
||||
* <fieldType name="text_synonym" class="solr.TextField" positionIncrementGap="100">
|
||||
* <analyzer>
|
||||
* <tokenizer class="solr.WhitespaceTokenizerFactory"/>
|
||||
|
@ -52,8 +54,32 @@ import org.apache.lucene.util.Version;
|
|||
* </fieldType></pre>
|
||||
*/
|
||||
public class SynonymFilterFactory extends TokenFilterFactory implements ResourceLoaderAware {
|
||||
private final boolean ignoreCase;
|
||||
private final String tokenizerFactory;
|
||||
private final String synonyms;
|
||||
private final String format;
|
||||
private final boolean expand;
|
||||
|
||||
private SynonymMap map;
|
||||
private boolean ignoreCase;
|
||||
|
||||
public SynonymFilterFactory(Map<String,String> args) {
|
||||
super(args);
|
||||
ignoreCase = getBoolean(args, "ignoreCase", false);
|
||||
tokenizerFactory = args.remove("tokenizerFactory");
|
||||
if (tokenizerFactory != null) {
|
||||
assureMatchVersion();
|
||||
}
|
||||
synonyms = args.remove("synonyms");
|
||||
if (synonyms == null) {
|
||||
throw new IllegalArgumentException("Missing required argument 'synonyms'.");
|
||||
}
|
||||
format = args.remove("format");
|
||||
expand = getBoolean(args, "expand", true);
|
||||
|
||||
if (!args.isEmpty()) {
|
||||
throw new IllegalArgumentException("Unknown parameters: " + args);
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public TokenStream create(TokenStream input) {
|
||||
|
@ -64,12 +90,7 @@ public class SynonymFilterFactory extends TokenFilterFactory implements Resource
|
|||
|
||||
@Override
|
||||
public void inform(ResourceLoader loader) throws IOException {
|
||||
final boolean ignoreCase = getBoolean("ignoreCase", false);
|
||||
this.ignoreCase = ignoreCase;
|
||||
|
||||
String tf = args.get("tokenizerFactory");
|
||||
|
||||
final TokenizerFactory factory = tf == null ? null : loadTokenizerFactory(loader, tf);
|
||||
final TokenizerFactory factory = tokenizerFactory == null ? null : loadTokenizerFactory(loader, tokenizerFactory);
|
||||
|
||||
Analyzer analyzer = new Analyzer() {
|
||||
@Override
|
||||
|
@ -80,7 +101,6 @@ public class SynonymFilterFactory extends TokenFilterFactory implements Resource
|
|||
}
|
||||
};
|
||||
|
||||
String format = args.get("format");
|
||||
try {
|
||||
if (format == null || format.equals("solr")) {
|
||||
// TODO: expose dedup as a parameter?
|
||||
|
@ -99,12 +119,7 @@ public class SynonymFilterFactory extends TokenFilterFactory implements Resource
|
|||
/**
|
||||
* Load synonyms from the solr format, "format=solr".
|
||||
*/
|
||||
private SynonymMap loadSolrSynonyms(ResourceLoader loader, boolean dedup, Analyzer analyzer) throws IOException, ParseException {
|
||||
final boolean expand = getBoolean("expand", true);
|
||||
String synonyms = args.get("synonyms");
|
||||
if (synonyms == null)
|
||||
throw new IllegalArgumentException("Missing required argument 'synonyms'.");
|
||||
|
||||
private SynonymMap loadSolrSynonyms(ResourceLoader loader, boolean dedup, Analyzer analyzer) throws IOException, ParseException {
|
||||
CharsetDecoder decoder = Charset.forName("UTF-8").newDecoder()
|
||||
.onMalformedInput(CodingErrorAction.REPORT)
|
||||
.onUnmappableCharacter(CodingErrorAction.REPORT);
|
||||
|
@ -128,11 +143,6 @@ public class SynonymFilterFactory extends TokenFilterFactory implements Resource
|
|||
* Load synonyms from the wordnet format, "format=wordnet".
|
||||
*/
|
||||
private SynonymMap loadWordnetSynonyms(ResourceLoader loader, boolean dedup, Analyzer analyzer) throws IOException, ParseException {
|
||||
final boolean expand = getBoolean("expand", true);
|
||||
String synonyms = args.get("synonyms");
|
||||
if (synonyms == null)
|
||||
throw new IllegalArgumentException("Missing required argument 'synonyms'.");
|
||||
|
||||
CharsetDecoder decoder = Charset.forName("UTF-8").newDecoder()
|
||||
.onMalformedInput(CodingErrorAction.REPORT)
|
||||
.onUnmappableCharacter(CodingErrorAction.REPORT);
|
||||
|
@ -154,12 +164,17 @@ public class SynonymFilterFactory extends TokenFilterFactory implements Resource
|
|||
|
||||
// (there are no tests for this functionality)
|
||||
private TokenizerFactory loadTokenizerFactory(ResourceLoader loader, String cname) throws IOException {
|
||||
TokenizerFactory tokFactory = loader.newInstance(cname, TokenizerFactory.class);
|
||||
tokFactory.setLuceneMatchVersion(luceneMatchVersion);
|
||||
tokFactory.init(args);
|
||||
if (tokFactory instanceof ResourceLoaderAware) {
|
||||
((ResourceLoaderAware) tokFactory).inform(loader);
|
||||
Map<String,String> args = new HashMap<String,String>();
|
||||
args.put("luceneMatchVersion", getLuceneMatchVersion().toString());
|
||||
Class<? extends TokenizerFactory> clazz = loader.findClass(cname, TokenizerFactory.class);
|
||||
try {
|
||||
TokenizerFactory tokFactory = clazz.getConstructor(Map.class).newInstance(args);
|
||||
if (tokFactory instanceof ResourceLoaderAware) {
|
||||
((ResourceLoaderAware) tokFactory).inform(loader);
|
||||
}
|
||||
return tokFactory;
|
||||
} catch (Exception e) {
|
||||
throw new RuntimeException(e);
|
||||
}
|
||||
return tokFactory;
|
||||
}
|
||||
}
|
||||
|
|
|
@ -17,6 +17,8 @@ package org.apache.lucene.analysis.th;
|
|||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import java.util.Map;
|
||||
|
||||
import org.apache.lucene.analysis.th.ThaiWordFilter;
|
||||
|
||||
import org.apache.lucene.analysis.TokenStream;
|
||||
|
@ -24,19 +26,27 @@ import org.apache.lucene.analysis.util.TokenFilterFactory;
|
|||
|
||||
/**
|
||||
* Factory for {@link ThaiWordFilter}.
|
||||
* <pre class="prettyprint" >
|
||||
* <pre class="prettyprint">
|
||||
* <fieldType name="text_thai" class="solr.TextField" positionIncrementGap="100">
|
||||
* <analyzer>
|
||||
* <tokenizer class="solr.StandardTokenizerFactory"/>
|
||||
* <filter class="solr.ThaiWordFilterFactory"/>
|
||||
* </analyzer>
|
||||
* </fieldType></pre>
|
||||
*
|
||||
*/
|
||||
public class ThaiWordFilterFactory extends TokenFilterFactory {
|
||||
|
||||
/** Creates a new ThaiWordFilterFactory */
|
||||
public ThaiWordFilterFactory(Map<String,String> args) {
|
||||
super(args);
|
||||
assureMatchVersion();
|
||||
if (!args.isEmpty()) {
|
||||
throw new IllegalArgumentException("Unknown parameters: " + args);
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public ThaiWordFilter create(TokenStream input) {
|
||||
assureMatchVersion();
|
||||
return new ThaiWordFilter(luceneMatchVersion, input);
|
||||
}
|
||||
}
|
||||
|
|
|
@ -17,6 +17,8 @@ package org.apache.lucene.analysis.tr;
|
|||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import java.util.Map;
|
||||
|
||||
import org.apache.lucene.analysis.TokenStream;
|
||||
import org.apache.lucene.analysis.tr.TurkishLowerCaseFilter;
|
||||
import org.apache.lucene.analysis.util.AbstractAnalysisFactory;
|
||||
|
@ -25,16 +27,24 @@ import org.apache.lucene.analysis.util.TokenFilterFactory;
|
|||
|
||||
/**
|
||||
* Factory for {@link TurkishLowerCaseFilter}.
|
||||
* <pre class="prettyprint" >
|
||||
* <pre class="prettyprint">
|
||||
* <fieldType name="text_trlwr" class="solr.TextField" positionIncrementGap="100">
|
||||
* <analyzer>
|
||||
* <tokenizer class="solr.StandardTokenizerFactory"/>
|
||||
* <filter class="solr.TurkishLowerCaseFilterFactory"/>
|
||||
* </analyzer>
|
||||
* </fieldType></pre>
|
||||
*
|
||||
* </fieldType></pre>
|
||||
*/
|
||||
public class TurkishLowerCaseFilterFactory extends TokenFilterFactory implements MultiTermAwareComponent {
|
||||
|
||||
/** Creates a new TurkishLowerCaseFilterFactory */
|
||||
public TurkishLowerCaseFilterFactory(Map<String,String> args) {
|
||||
super(args);
|
||||
if (!args.isEmpty()) {
|
||||
throw new IllegalArgumentException("Unknown parameters: " + args);
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public TokenStream create(TokenStream input) {
|
||||
return new TurkishLowerCaseFilter(input);
|
||||
|
|
|
@ -41,37 +41,29 @@ import java.util.regex.PatternSyntaxException;
|
|||
* <p>
|
||||
* The typical lifecycle for a factory consumer is:
|
||||
* <ol>
|
||||
* <li>Create factory via its a no-arg constructor
|
||||
* <li>Set version emulation by calling {@link #setLuceneMatchVersion(Version)}
|
||||
* <li>Calls {@link #init(Map)} passing arguments as key-value mappings.
|
||||
* <li>Create factory via its constructor (or via XXXFactory.forName)
|
||||
* <li>(Optional) If the factory uses resources such as files, {@link ResourceLoaderAware#inform(ResourceLoader)} is called to initialize those resources.
|
||||
* <li>Consumer calls create() to obtain instances.
|
||||
* </ol>
|
||||
*/
|
||||
public abstract class AbstractAnalysisFactory {
|
||||
|
||||
/** The original args, before init() processes them */
|
||||
private Map<String,String> originalArgs;
|
||||
|
||||
/** The init args */
|
||||
protected Map<String,String> args;
|
||||
/** The original args, before any processing */
|
||||
private final Map<String,String> originalArgs;
|
||||
|
||||
/** the luceneVersion arg */
|
||||
protected Version luceneMatchVersion = null;
|
||||
protected final Version luceneMatchVersion;
|
||||
|
||||
/**
|
||||
* Initialize this factory via a set of key-value pairs.
|
||||
*/
|
||||
public void init(Map<String,String> args) {
|
||||
originalArgs = Collections.unmodifiableMap(args);
|
||||
this.args = new HashMap<String,String>(args);
|
||||
}
|
||||
|
||||
public Map<String,String> getArgs() {
|
||||
return args;
|
||||
protected AbstractAnalysisFactory(Map<String,String> args) {
|
||||
originalArgs = Collections.unmodifiableMap(new HashMap<String,String>(args));
|
||||
String version = args.remove("luceneMatchVersion");
|
||||
luceneMatchVersion = version == null ? null : Version.parseLeniently(version);
|
||||
}
|
||||
|
||||
public Map<String,String> getOriginalArgs() {
|
||||
public final Map<String,String> getOriginalArgs() {
|
||||
return originalArgs;
|
||||
}
|
||||
|
||||
|
@ -85,24 +77,20 @@ public abstract class AbstractAnalysisFactory {
|
|||
}
|
||||
}
|
||||
|
||||
public void setLuceneMatchVersion(Version luceneMatchVersion) {
|
||||
this.luceneMatchVersion = luceneMatchVersion;
|
||||
}
|
||||
|
||||
public Version getLuceneMatchVersion() {
|
||||
public final Version getLuceneMatchVersion() {
|
||||
return this.luceneMatchVersion;
|
||||
}
|
||||
|
||||
protected int getInt(String name) {
|
||||
return getInt(name, -1, false);
|
||||
protected final int getInt(Map<String,String> args, String name) {
|
||||
return getInt(args, name, -1, false);
|
||||
}
|
||||
|
||||
protected int getInt(String name, int defaultVal) {
|
||||
return getInt(name, defaultVal, true);
|
||||
protected final int getInt(Map<String,String> args, String name, int defaultVal) {
|
||||
return getInt(args, name, defaultVal, true);
|
||||
}
|
||||
|
||||
protected int getInt(String name, int defaultVal, boolean useDefault) {
|
||||
String s = args.get(name);
|
||||
protected final int getInt(Map<String,String> args, String name, int defaultVal, boolean useDefault) {
|
||||
String s = args.remove(name);
|
||||
if (s == null) {
|
||||
if (useDefault) {
|
||||
return defaultVal;
|
||||
|
@ -112,12 +100,12 @@ public abstract class AbstractAnalysisFactory {
|
|||
return Integer.parseInt(s);
|
||||
}
|
||||
|
||||
protected boolean getBoolean(String name, boolean defaultVal) {
|
||||
return getBoolean(name, defaultVal, true);
|
||||
protected final boolean getBoolean(Map<String,String> args, String name, boolean defaultVal) {
|
||||
return getBoolean(args, name, defaultVal, true);
|
||||
}
|
||||
|
||||
protected boolean getBoolean(String name, boolean defaultVal, boolean useDefault) {
|
||||
String s = args.get(name);
|
||||
protected final boolean getBoolean(Map<String,String> args, String name, boolean defaultVal, boolean useDefault) {
|
||||
String s = args.remove(name);
|
||||
if (s==null) {
|
||||
if (useDefault) return defaultVal;
|
||||
throw new IllegalArgumentException("Configuration Error: missing parameter '" + name + "'");
|
||||
|
@ -128,13 +116,13 @@ public abstract class AbstractAnalysisFactory {
|
|||
/**
|
||||
* Compiles a pattern for the value of the specified argument key <code>name</code>
|
||||
*/
|
||||
protected Pattern getPattern(String name) {
|
||||
protected final Pattern getPattern(Map<String,String> args, String name) {
|
||||
try {
|
||||
String pat = args.get(name);
|
||||
String pat = args.remove(name);
|
||||
if (null == pat) {
|
||||
throw new IllegalArgumentException("Configuration Error: missing parameter '" + name + "'");
|
||||
}
|
||||
return Pattern.compile(args.get(name));
|
||||
return Pattern.compile(pat);
|
||||
} catch (PatternSyntaxException e) {
|
||||
throw new IllegalArgumentException
|
||||
("Configuration Error: '" + name + "' can not be parsed in " +
|
||||
|
@ -146,7 +134,7 @@ public abstract class AbstractAnalysisFactory {
|
|||
* Returns as {@link CharArraySet} from wordFiles, which
|
||||
* can be a comma-separated list of filenames
|
||||
*/
|
||||
protected CharArraySet getWordSet(ResourceLoader loader,
|
||||
protected final CharArraySet getWordSet(ResourceLoader loader,
|
||||
String wordFiles, boolean ignoreCase) throws IOException {
|
||||
assureMatchVersion();
|
||||
List<String> files = splitFileNames(wordFiles);
|
||||
|
@ -168,13 +156,13 @@ public abstract class AbstractAnalysisFactory {
|
|||
/**
|
||||
* Returns the resource's lines (with content treated as UTF-8)
|
||||
*/
|
||||
protected List<String> getLines(ResourceLoader loader, String resource) throws IOException {
|
||||
protected final List<String> getLines(ResourceLoader loader, String resource) throws IOException {
|
||||
return WordlistLoader.getLines(loader.openResource(resource), IOUtils.CHARSET_UTF_8);
|
||||
}
|
||||
|
||||
/** same as {@link #getWordSet(ResourceLoader, String, boolean)},
|
||||
* except the input is in snowball format. */
|
||||
protected CharArraySet getSnowballWordSet(ResourceLoader loader,
|
||||
protected final CharArraySet getSnowballWordSet(ResourceLoader loader,
|
||||
String wordFiles, boolean ignoreCase) throws IOException {
|
||||
assureMatchVersion();
|
||||
List<String> files = splitFileNames(wordFiles);
|
||||
|
@ -209,7 +197,7 @@ public abstract class AbstractAnalysisFactory {
|
|||
* @param fileNames the string containing file names
|
||||
* @return a list of file names with the escaping backslashed removed
|
||||
*/
|
||||
protected List<String> splitFileNames(String fileNames) {
|
||||
protected final List<String> splitFileNames(String fileNames) {
|
||||
if (fileNames == null)
|
||||
return Collections.<String>emptyList();
|
||||
|
||||
|
|
|
@ -104,10 +104,10 @@ final class AnalysisSPILoader<S extends AbstractAnalysisFactory> {
|
|||
this.services = Collections.unmodifiableMap(services);
|
||||
}
|
||||
|
||||
public S newInstance(String name) {
|
||||
public S newInstance(String name, Map<String,String> args) {
|
||||
final Class<? extends S> service = lookupClass(name);
|
||||
try {
|
||||
return service.newInstance();
|
||||
return service.getConstructor(Map.class).newInstance(args);
|
||||
} catch (Exception e) {
|
||||
throw new IllegalArgumentException("SPI class of type "+clazz.getName()+" with name '"+name+"' cannot be instantiated. " +
|
||||
"This is likely due to a misconfiguration of the java class '" + service.getName() + "': ", e);
|
||||
|
|
|
@ -18,6 +18,7 @@ package org.apache.lucene.analysis.util;
|
|||
*/
|
||||
|
||||
import java.io.Reader;
|
||||
import java.util.Map;
|
||||
import java.util.Set;
|
||||
|
||||
import org.apache.lucene.analysis.CharFilter;
|
||||
|
@ -32,8 +33,8 @@ public abstract class CharFilterFactory extends AbstractAnalysisFactory {
|
|||
new AnalysisSPILoader<CharFilterFactory>(CharFilterFactory.class);
|
||||
|
||||
/** looks up a charfilter by name from context classpath */
|
||||
public static CharFilterFactory forName(String name) {
|
||||
return loader.newInstance(name);
|
||||
public static CharFilterFactory forName(String name, Map<String,String> args) {
|
||||
return loader.newInstance(name, args);
|
||||
}
|
||||
|
||||
/** looks up a charfilter class by name from context classpath */
|
||||
|
@ -61,6 +62,13 @@ public abstract class CharFilterFactory extends AbstractAnalysisFactory {
|
|||
loader.reload(classloader);
|
||||
}
|
||||
|
||||
/**
|
||||
* Initialize this factory via a set of key-value pairs.
|
||||
*/
|
||||
protected CharFilterFactory(Map<String,String> args) {
|
||||
super(args);
|
||||
}
|
||||
|
||||
/** Wraps the given Reader with a CharFilter. */
|
||||
public abstract Reader create(Reader input);
|
||||
}
|
||||
|
|
|
@ -67,14 +67,23 @@ public final class ClasspathResourceLoader implements ResourceLoader {
|
|||
throw new IOException("Resource not found: " + resource);
|
||||
return stream;
|
||||
}
|
||||
|
||||
@Override
|
||||
public <T> Class<? extends T> findClass(String cname, Class<T> expectedType) {
|
||||
try {
|
||||
return Class.forName(cname, true, loader).asSubclass(expectedType);
|
||||
} catch (Exception e) {
|
||||
throw new RuntimeException("Cannot load class: " + cname, e);
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public <T> T newInstance(String cname, Class<T> expectedType) {
|
||||
Class<? extends T> clazz = findClass(cname, expectedType);
|
||||
try {
|
||||
final Class<? extends T> clazz = Class.forName(cname, true, loader).asSubclass(expectedType);
|
||||
return clazz.newInstance();
|
||||
} catch (Exception e) {
|
||||
throw new RuntimeException("Cannot instantiate class: " + cname, e);
|
||||
throw new RuntimeException("Cannot create instance: " + cname, e);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -18,12 +18,14 @@ package org.apache.lucene.analysis.util;
|
|||
*/
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.Map;
|
||||
|
||||
import org.apache.lucene.analysis.TokenStream;
|
||||
import org.apache.lucene.analysis.fr.FrenchAnalyzer;
|
||||
|
||||
/**
|
||||
* Factory for {@link ElisionFilter}.
|
||||
* <pre class="prettyprint" >
|
||||
* <pre class="prettyprint">
|
||||
* <fieldType name="text_elsn" class="solr.TextField" positionIncrementGap="100">
|
||||
* <analyzer>
|
||||
* <tokenizer class="solr.StandardTokenizerFactory"/>
|
||||
|
@ -32,22 +34,28 @@ import org.apache.lucene.analysis.fr.FrenchAnalyzer;
|
|||
* articles="stopwordarticles.txt" ignoreCase="true"/>
|
||||
* </analyzer>
|
||||
* </fieldType></pre>
|
||||
*
|
||||
*/
|
||||
public class ElisionFilterFactory extends TokenFilterFactory implements ResourceLoaderAware, MultiTermAwareComponent {
|
||||
|
||||
private final String articlesFile;
|
||||
private final boolean ignoreCase;
|
||||
private CharArraySet articles;
|
||||
|
||||
/** Creates a new ElisionFilterFactory */
|
||||
public ElisionFilterFactory(Map<String,String> args) {
|
||||
super(args);
|
||||
articlesFile = args.remove("articles");
|
||||
ignoreCase = getBoolean(args, "ignoreCase", false);
|
||||
if (!args.isEmpty()) {
|
||||
throw new IllegalArgumentException("Unknown parameters: " + args);
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public void inform(ResourceLoader loader) throws IOException {
|
||||
String articlesFile = args.get("articles");
|
||||
boolean ignoreCase = getBoolean("ignoreCase", false);
|
||||
|
||||
if (articlesFile != null) {
|
||||
articles = getWordSet(loader, articlesFile, ignoreCase);
|
||||
}
|
||||
if (articles == null) {
|
||||
if (articlesFile == null) {
|
||||
articles = FrenchAnalyzer.DEFAULT_ARTICLES;
|
||||
} else {
|
||||
articles = getWordSet(loader, articlesFile, ignoreCase);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -91,4 +91,9 @@ public final class FilesystemResourceLoader implements ResourceLoader {
|
|||
public <T> T newInstance(String cname, Class<T> expectedType) {
|
||||
return delegate.newInstance(cname, expectedType);
|
||||
}
|
||||
|
||||
@Override
|
||||
public <T> Class<? extends T> findClass(String cname, Class<T> expectedType) {
|
||||
return delegate.findClass(cname, expectedType);
|
||||
}
|
||||
}
|
||||
|
|
|
@ -30,8 +30,14 @@ public interface ResourceLoader {
|
|||
*/
|
||||
public InputStream openResource(String resource) throws IOException;
|
||||
|
||||
|
||||
/**
|
||||
* Creates a class of the name and expected type
|
||||
* Finds class of the name and expected type
|
||||
*/
|
||||
public <T> Class<? extends T> findClass(String cname, Class<T> expectedType);
|
||||
|
||||
/**
|
||||
* Creates an instance of the name and expected type
|
||||
*/
|
||||
// TODO: fix exception handling
|
||||
public <T> T newInstance(String cname, Class<T> expectedType);
|
||||
|
|
|
@ -17,6 +17,7 @@ package org.apache.lucene.analysis.util;
|
|||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import java.util.Map;
|
||||
import java.util.Set;
|
||||
|
||||
import org.apache.lucene.analysis.TokenStream;
|
||||
|
@ -32,8 +33,8 @@ public abstract class TokenFilterFactory extends AbstractAnalysisFactory {
|
|||
new String[] { "TokenFilterFactory", "FilterFactory" });
|
||||
|
||||
/** looks up a tokenfilter by name from context classpath */
|
||||
public static TokenFilterFactory forName(String name) {
|
||||
return loader.newInstance(name);
|
||||
public static TokenFilterFactory forName(String name, Map<String,String> args) {
|
||||
return loader.newInstance(name, args);
|
||||
}
|
||||
|
||||
/** looks up a tokenfilter class by name from context classpath */
|
||||
|
@ -60,6 +61,13 @@ public abstract class TokenFilterFactory extends AbstractAnalysisFactory {
|
|||
public static void reloadTokenFilters(ClassLoader classloader) {
|
||||
loader.reload(classloader);
|
||||
}
|
||||
|
||||
/**
|
||||
* Initialize this factory via a set of key-value pairs.
|
||||
*/
|
||||
protected TokenFilterFactory(Map<String,String> args) {
|
||||
super(args);
|
||||
}
|
||||
|
||||
/** Transform the specified input TokenStream */
|
||||
public abstract TokenStream create(TokenStream input);
|
||||
|
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in New Issue