mirror of https://github.com/apache/lucene.git
LUCENE-7318: graduate StandardAnalyzer and make it the default for IndexWriterConfig
This commit is contained in:
parent
a893c64595
commit
87016b5f0c
|
@ -47,6 +47,10 @@ Improvements
|
|||
write-once architecture, possibly catching externally caused
|
||||
issues sooner (Robert Muir, Mike McCandless)
|
||||
|
||||
* LUCENE-7318: StandardAnalyzer has been moved from the analysis
|
||||
module into core and is now the default analyzer in
|
||||
IndexWriterConfig (Robert Muir, Mike McCandless)
|
||||
|
||||
Optimizations
|
||||
|
||||
* LUCENE-7330: Speed up conjunction queries. (Adrien Grand)
|
||||
|
|
|
@ -33,7 +33,7 @@
|
|||
|
||||
<property name="unicode-props-file" location="src/java/org/apache/lucene/analysis/util/UnicodeProps.java"/>
|
||||
|
||||
<target name="jflex" depends="-install-jflex,clean-jflex,-jflex-StandardAnalyzer,-jflex-UAX29URLEmailTokenizer,
|
||||
<target name="jflex" depends="-install-jflex,clean-jflex,-jflex-ClassicAnalyzer,-jflex-UAX29URLEmailTokenizer,
|
||||
-jflex-wiki-tokenizer,-jflex-HTMLStripCharFilter"/>
|
||||
|
||||
<target name="-jflex-HTMLStripCharFilter"
|
||||
|
@ -62,45 +62,14 @@
|
|||
<run-jflex dir="src/java/org/apache/lucene/analysis/wikipedia" name="WikipediaTokenizerImpl"/>
|
||||
</target>
|
||||
|
||||
<target name="-jflex-StandardAnalyzer" depends="init,-install-jflex">
|
||||
<run-jflex-and-disable-buffer-expansion
|
||||
dir="src/java/org/apache/lucene/analysis/standard" name="StandardTokenizerImpl"/>
|
||||
<run-jflex dir="src/java/org/apache/lucene/analysis/standard" name="ClassicTokenizerImpl"/>
|
||||
</target>
|
||||
|
||||
<target name="-jflex-UAX29URLEmailTokenizer" depends="init,-install-jflex">
|
||||
<run-jflex-and-disable-buffer-expansion
|
||||
dir="src/java/org/apache/lucene/analysis/standard" name="UAX29URLEmailTokenizerImpl"/>
|
||||
</target>
|
||||
|
||||
<macrodef name="run-jflex">
|
||||
<attribute name="dir"/>
|
||||
<attribute name="name"/>
|
||||
<sequential>
|
||||
<jflex file="@{dir}/@{name}.jflex" outdir="@{dir}" nobak="on" inputstreamctor="false"/>
|
||||
</sequential>
|
||||
</macrodef>
|
||||
|
||||
<macrodef name="run-jflex-and-disable-buffer-expansion">
|
||||
<attribute name="dir"/>
|
||||
<attribute name="name"/>
|
||||
<sequential>
|
||||
<jflex file="@{dir}/@{name}.jflex" outdir="@{dir}" nobak="on" inputstreamctor="false"/>
|
||||
<!-- LUCENE-5897: Disallow scanner buffer expansion -->
|
||||
<replaceregexp file="@{dir}/@{name}.java"
|
||||
match="[ \t]*/\* is the buffer big enough\? \*/\s+if \(zzCurrentPos >= zzBuffer\.length.*?\}[ \t]*\r?\n"
|
||||
replace="" flags="s" />
|
||||
<replaceregexp file="@{dir}/@{name}.java"
|
||||
match="private static final int ZZ_BUFFERSIZE ="
|
||||
replace="private int ZZ_BUFFERSIZE ="/>
|
||||
<replaceregexp file="@{dir}/@{name}.java"
|
||||
match="int requested = zzBuffer.length - zzEndRead;"
|
||||
replace="int requested = zzBuffer.length - zzEndRead - zzFinalHighSurrogate;"/>
|
||||
<replaceregexp file="@{dir}/@{name}.java"
|
||||
match="(zzFinalHighSurrogate = 1;)(\r?\n)"
|
||||
replace="\1\2 if (totalRead == 1) { return true; }\2"/>
|
||||
</sequential>
|
||||
</macrodef>
|
||||
<target name="-jflex-ClassicAnalyzer" depends="init,-install-jflex">
|
||||
<run-jflex dir="src/java/org/apache/lucene/analysis/standard" name="ClassicTokenizerImpl"/>
|
||||
</target>
|
||||
|
||||
<target name="clean-jflex">
|
||||
<delete>
|
||||
|
|
|
@ -21,15 +21,15 @@ import java.io.IOException;
|
|||
import java.io.Reader;
|
||||
|
||||
import org.apache.lucene.analysis.Analyzer;
|
||||
import org.apache.lucene.analysis.core.DecimalDigitFilter;
|
||||
import org.apache.lucene.analysis.core.LowerCaseFilter;
|
||||
import org.apache.lucene.analysis.core.StopFilter;
|
||||
import org.apache.lucene.analysis.miscellaneous.SetKeywordMarkerFilter;
|
||||
import org.apache.lucene.analysis.standard.StandardTokenizer;
|
||||
import org.apache.lucene.analysis.util.CharArraySet;
|
||||
import org.apache.lucene.analysis.util.StopwordAnalyzerBase;
|
||||
import org.apache.lucene.analysis.CharArraySet;
|
||||
import org.apache.lucene.analysis.LowerCaseFilter;
|
||||
import org.apache.lucene.analysis.StopFilter;
|
||||
import org.apache.lucene.analysis.StopwordAnalyzerBase;
|
||||
import org.apache.lucene.analysis.TokenStream;
|
||||
import org.apache.lucene.analysis.Tokenizer;
|
||||
import org.apache.lucene.analysis.core.DecimalDigitFilter;
|
||||
import org.apache.lucene.analysis.miscellaneous.SetKeywordMarkerFilter;
|
||||
import org.apache.lucene.analysis.standard.StandardTokenizer;
|
||||
|
||||
/**
|
||||
* {@link Analyzer} for Arabic.
|
||||
|
|
|
@ -21,15 +21,15 @@ import java.io.IOException;
|
|||
import java.io.Reader;
|
||||
|
||||
import org.apache.lucene.analysis.Analyzer;
|
||||
import org.apache.lucene.analysis.core.LowerCaseFilter;
|
||||
import org.apache.lucene.analysis.core.StopFilter;
|
||||
import org.apache.lucene.analysis.miscellaneous.SetKeywordMarkerFilter;
|
||||
import org.apache.lucene.analysis.CharArraySet;
|
||||
import org.apache.lucene.analysis.LowerCaseFilter;
|
||||
import org.apache.lucene.analysis.StopFilter;
|
||||
import org.apache.lucene.analysis.StopwordAnalyzerBase;
|
||||
import org.apache.lucene.analysis.TokenStream;
|
||||
import org.apache.lucene.analysis.Tokenizer;
|
||||
import org.apache.lucene.analysis.miscellaneous.SetKeywordMarkerFilter;
|
||||
import org.apache.lucene.analysis.standard.StandardFilter;
|
||||
import org.apache.lucene.analysis.standard.StandardTokenizer;
|
||||
import org.apache.lucene.analysis.util.CharArraySet;
|
||||
import org.apache.lucene.analysis.util.StopwordAnalyzerBase;
|
||||
|
||||
/**
|
||||
* {@link Analyzer} for Bulgarian.
|
||||
|
|
|
@ -22,17 +22,17 @@ import java.io.Reader;
|
|||
import java.nio.charset.StandardCharsets;
|
||||
|
||||
import org.apache.lucene.analysis.Analyzer;
|
||||
import org.apache.lucene.analysis.CharArraySet;
|
||||
import org.apache.lucene.analysis.LowerCaseFilter;
|
||||
import org.apache.lucene.analysis.StopFilter;
|
||||
import org.apache.lucene.analysis.StopwordAnalyzerBase;
|
||||
import org.apache.lucene.analysis.TokenStream;
|
||||
import org.apache.lucene.analysis.Tokenizer;
|
||||
import org.apache.lucene.analysis.core.LowerCaseFilter;
|
||||
import org.apache.lucene.analysis.core.StopFilter;
|
||||
import org.apache.lucene.analysis.WordlistLoader;
|
||||
import org.apache.lucene.analysis.miscellaneous.SetKeywordMarkerFilter;
|
||||
import org.apache.lucene.analysis.standard.StandardAnalyzer;
|
||||
import org.apache.lucene.analysis.standard.StandardFilter;
|
||||
import org.apache.lucene.analysis.standard.StandardTokenizer;
|
||||
import org.apache.lucene.analysis.util.CharArraySet;
|
||||
import org.apache.lucene.analysis.util.StopwordAnalyzerBase;
|
||||
import org.apache.lucene.analysis.util.WordlistLoader;
|
||||
import org.apache.lucene.util.IOUtils;
|
||||
|
||||
/**
|
||||
|
|
|
@ -22,17 +22,17 @@ import java.io.Reader;
|
|||
import java.util.Arrays;
|
||||
|
||||
import org.apache.lucene.analysis.Analyzer;
|
||||
import org.apache.lucene.analysis.core.LowerCaseFilter;
|
||||
import org.apache.lucene.analysis.core.StopFilter;
|
||||
import org.apache.lucene.analysis.miscellaneous.SetKeywordMarkerFilter;
|
||||
import org.apache.lucene.analysis.CharArraySet;
|
||||
import org.apache.lucene.analysis.LowerCaseFilter;
|
||||
import org.apache.lucene.analysis.StopFilter;
|
||||
import org.apache.lucene.analysis.TokenStream;
|
||||
import org.apache.lucene.analysis.Tokenizer;
|
||||
import org.apache.lucene.analysis.miscellaneous.SetKeywordMarkerFilter;
|
||||
import org.apache.lucene.analysis.snowball.SnowballFilter;
|
||||
import org.apache.lucene.analysis.standard.StandardFilter;
|
||||
import org.apache.lucene.analysis.standard.StandardTokenizer;
|
||||
import org.apache.lucene.analysis.util.CharArraySet;
|
||||
import org.apache.lucene.analysis.util.ElisionFilter;
|
||||
import org.apache.lucene.analysis.util.StopwordAnalyzerBase;
|
||||
import org.apache.lucene.analysis.StopwordAnalyzerBase;
|
||||
import org.tartarus.snowball.ext.CatalanStemmer;
|
||||
|
||||
/**
|
||||
|
|
|
@ -24,8 +24,8 @@ import java.util.HashMap;
|
|||
import java.util.Map;
|
||||
import java.util.Set;
|
||||
|
||||
import org.apache.lucene.analysis.util.CharArrayMap;
|
||||
import org.apache.lucene.analysis.util.CharArraySet;
|
||||
import org.apache.lucene.analysis.CharArrayMap;
|
||||
import org.apache.lucene.analysis.CharArraySet;
|
||||
import org.apache.lucene.analysis.util.OpenStringBuilder;
|
||||
|
||||
/**
|
||||
|
|
|
@ -20,13 +20,13 @@ package org.apache.lucene.analysis.cjk;
|
|||
import java.io.IOException;
|
||||
|
||||
import org.apache.lucene.analysis.Analyzer;
|
||||
import org.apache.lucene.analysis.CharArraySet;
|
||||
import org.apache.lucene.analysis.LowerCaseFilter;
|
||||
import org.apache.lucene.analysis.StopFilter;
|
||||
import org.apache.lucene.analysis.StopwordAnalyzerBase;
|
||||
import org.apache.lucene.analysis.TokenStream;
|
||||
import org.apache.lucene.analysis.Tokenizer;
|
||||
import org.apache.lucene.analysis.core.LowerCaseFilter;
|
||||
import org.apache.lucene.analysis.core.StopFilter;
|
||||
import org.apache.lucene.analysis.standard.StandardTokenizer;
|
||||
import org.apache.lucene.analysis.util.CharArraySet;
|
||||
import org.apache.lucene.analysis.util.StopwordAnalyzerBase;
|
||||
|
||||
/**
|
||||
* An {@link Analyzer} that tokenizes text with {@link StandardTokenizer},
|
||||
|
|
|
@ -22,17 +22,17 @@ import java.io.Reader;
|
|||
import java.nio.charset.StandardCharsets;
|
||||
|
||||
import org.apache.lucene.analysis.Analyzer;
|
||||
import org.apache.lucene.analysis.core.DecimalDigitFilter;
|
||||
import org.apache.lucene.analysis.core.LowerCaseFilter;
|
||||
import org.apache.lucene.analysis.core.StopFilter;
|
||||
import org.apache.lucene.analysis.miscellaneous.SetKeywordMarkerFilter;
|
||||
import org.apache.lucene.analysis.CharArraySet;
|
||||
import org.apache.lucene.analysis.LowerCaseFilter;
|
||||
import org.apache.lucene.analysis.StopFilter;
|
||||
import org.apache.lucene.analysis.StopwordAnalyzerBase;
|
||||
import org.apache.lucene.analysis.TokenStream;
|
||||
import org.apache.lucene.analysis.Tokenizer;
|
||||
import org.apache.lucene.analysis.WordlistLoader;
|
||||
import org.apache.lucene.analysis.core.DecimalDigitFilter;
|
||||
import org.apache.lucene.analysis.miscellaneous.SetKeywordMarkerFilter;
|
||||
import org.apache.lucene.analysis.standard.StandardFilter;
|
||||
import org.apache.lucene.analysis.standard.StandardTokenizer;
|
||||
import org.apache.lucene.analysis.util.CharArraySet;
|
||||
import org.apache.lucene.analysis.util.StopwordAnalyzerBase;
|
||||
import org.apache.lucene.analysis.util.WordlistLoader;
|
||||
import org.apache.lucene.util.IOUtils;
|
||||
|
||||
/**
|
||||
|
|
|
@ -18,14 +18,14 @@ package org.apache.lucene.analysis.commongrams;
|
|||
|
||||
import java.io.IOException;
|
||||
|
||||
import org.apache.lucene.analysis.CharArraySet;
|
||||
import org.apache.lucene.analysis.TokenFilter;
|
||||
import org.apache.lucene.analysis.TokenStream;
|
||||
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
|
||||
import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
|
||||
import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
|
||||
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
|
||||
import org.apache.lucene.analysis.tokenattributes.PositionLengthAttribute;
|
||||
import org.apache.lucene.analysis.tokenattributes.TypeAttribute;
|
||||
import org.apache.lucene.analysis.util.CharArraySet;
|
||||
|
||||
/*
|
||||
* TODO: Consider implementing https://issues.apache.org/jira/browse/LUCENE-1688 changes to stop list and associated constructors
|
||||
|
|
|
@ -20,6 +20,7 @@ package org.apache.lucene.analysis.commongrams;
|
|||
import java.io.IOException;
|
||||
import java.util.Map;
|
||||
|
||||
import org.apache.lucene.analysis.CharArraySet;
|
||||
import org.apache.lucene.analysis.TokenFilter;
|
||||
import org.apache.lucene.analysis.TokenStream;
|
||||
import org.apache.lucene.analysis.core.StopAnalyzer;
|
||||
|
@ -82,4 +83,4 @@ public class CommonGramsFilterFactory extends TokenFilterFactory implements Reso
|
|||
}
|
||||
|
||||
|
||||
|
||||
|
||||
|
|
|
@ -17,15 +17,15 @@
|
|||
package org.apache.lucene.analysis.compound;
|
||||
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.LinkedList;
|
||||
|
||||
import org.apache.lucene.analysis.CharArraySet;
|
||||
import org.apache.lucene.analysis.TokenFilter;
|
||||
import org.apache.lucene.analysis.TokenStream;
|
||||
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
|
||||
import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
|
||||
import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
|
||||
import org.apache.lucene.analysis.util.CharArraySet;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.LinkedList;
|
||||
|
||||
/**
|
||||
* Base class for decomposition token filters.
|
||||
|
|
|
@ -19,7 +19,7 @@ package org.apache.lucene.analysis.compound;
|
|||
|
||||
|
||||
import org.apache.lucene.analysis.TokenStream;
|
||||
import org.apache.lucene.analysis.util.CharArraySet;
|
||||
import org.apache.lucene.analysis.CharArraySet;
|
||||
|
||||
/**
|
||||
* A {@link org.apache.lucene.analysis.TokenFilter} that decomposes compound words found in many Germanic languages.
|
||||
|
|
|
@ -17,15 +17,15 @@
|
|||
package org.apache.lucene.analysis.compound;
|
||||
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.Map;
|
||||
|
||||
import org.apache.lucene.analysis.CharArraySet;
|
||||
import org.apache.lucene.analysis.TokenStream;
|
||||
import org.apache.lucene.analysis.util.CharArraySet;
|
||||
import org.apache.lucene.analysis.util.ResourceLoader;
|
||||
import org.apache.lucene.analysis.util.ResourceLoaderAware;
|
||||
import org.apache.lucene.analysis.util.TokenFilterFactory;
|
||||
|
||||
import java.util.Map;
|
||||
import java.io.IOException;
|
||||
|
||||
/**
|
||||
* Factory for {@link DictionaryCompoundWordTokenFilter}.
|
||||
* <pre class="prettyprint">
|
||||
|
|
|
@ -16,15 +16,14 @@
|
|||
*/
|
||||
package org.apache.lucene.analysis.compound;
|
||||
|
||||
import java.io.IOException;
|
||||
|
||||
import org.apache.lucene.analysis.CharArraySet;
|
||||
import org.apache.lucene.analysis.TokenStream;
|
||||
import org.apache.lucene.analysis.compound.hyphenation.Hyphenation;
|
||||
import org.apache.lucene.analysis.compound.hyphenation.HyphenationTree;
|
||||
import org.apache.lucene.analysis.util.CharArraySet;
|
||||
import org.xml.sax.InputSource;
|
||||
|
||||
import java.io.IOException;
|
||||
|
||||
/**
|
||||
* A {@link org.apache.lucene.analysis.TokenFilter} that decomposes compound words found in many Germanic languages.
|
||||
*
|
||||
|
@ -82,7 +81,7 @@ public class HyphenationCompoundWordTokenFilter extends
|
|||
/**
|
||||
* Create a HyphenationCompoundWordTokenFilter with no dictionary.
|
||||
* <p>
|
||||
* Calls {@link #HyphenationCompoundWordTokenFilter(org.apache.lucene.analysis.TokenStream, org.apache.lucene.analysis.compound.hyphenation.HyphenationTree, org.apache.lucene.analysis.util.CharArraySet, int, int, int, boolean)
|
||||
* Calls {@link #HyphenationCompoundWordTokenFilter(org.apache.lucene.analysis.TokenStream, org.apache.lucene.analysis.compound.hyphenation.HyphenationTree, org.apache.lucene.analysis.CharArraySet, int, int, int, boolean)
|
||||
* HyphenationCompoundWordTokenFilter(matchVersion, input, hyphenator,
|
||||
* null, minWordSize, minSubwordSize, maxSubwordSize }
|
||||
*/
|
||||
|
|
|
@ -17,19 +17,18 @@
|
|||
package org.apache.lucene.analysis.compound;
|
||||
|
||||
|
||||
import java.io.IOException;
|
||||
import java.io.InputStream;
|
||||
import java.util.Map;
|
||||
|
||||
import org.apache.lucene.analysis.CharArraySet;
|
||||
import org.apache.lucene.analysis.TokenFilter;
|
||||
import org.apache.lucene.analysis.TokenStream;
|
||||
import org.apache.lucene.analysis.compound.hyphenation.HyphenationTree;
|
||||
import org.apache.lucene.analysis.util.CharArraySet;
|
||||
import org.apache.lucene.analysis.util.ResourceLoader;
|
||||
import org.apache.lucene.analysis.util.ResourceLoaderAware;
|
||||
import org.apache.lucene.analysis.util.TokenFilterFactory;
|
||||
import org.apache.lucene.util.IOUtils;
|
||||
|
||||
import java.util.Map;
|
||||
import java.io.IOException;
|
||||
import java.io.InputStream;
|
||||
|
||||
import org.xml.sax.InputSource;
|
||||
|
||||
/**
|
||||
|
|
|
@ -20,7 +20,7 @@ package org.apache.lucene.analysis.core;
|
|||
import java.util.Map;
|
||||
|
||||
import org.apache.lucene.analysis.TokenStream;
|
||||
import org.apache.lucene.analysis.core.LowerCaseFilter;
|
||||
import org.apache.lucene.analysis.LowerCaseFilter;
|
||||
import org.apache.lucene.analysis.util.AbstractAnalysisFactory;
|
||||
import org.apache.lucene.analysis.util.MultiTermAwareComponent;
|
||||
import org.apache.lucene.analysis.util.TokenFilterFactory;
|
||||
|
|
|
@ -18,6 +18,7 @@ package org.apache.lucene.analysis.core;
|
|||
|
||||
|
||||
import org.apache.lucene.analysis.Analyzer;
|
||||
import org.apache.lucene.analysis.LowerCaseFilter;
|
||||
|
||||
/** An {@link Analyzer} that filters {@link LetterTokenizer}
|
||||
* with {@link LowerCaseFilter}
|
||||
|
|
|
@ -20,13 +20,14 @@ package org.apache.lucene.analysis.core;
|
|||
import java.io.IOException;
|
||||
import java.io.Reader;
|
||||
import java.nio.file.Path;
|
||||
import java.util.Arrays;
|
||||
import java.util.List;
|
||||
|
||||
import org.apache.lucene.analysis.CharArraySet;
|
||||
import org.apache.lucene.analysis.LowerCaseFilter;
|
||||
import org.apache.lucene.analysis.StopFilter;
|
||||
import org.apache.lucene.analysis.StopwordAnalyzerBase;
|
||||
import org.apache.lucene.analysis.Tokenizer;
|
||||
import org.apache.lucene.analysis.util.CharArraySet;
|
||||
import org.apache.lucene.analysis.util.StopwordAnalyzerBase;
|
||||
import org.apache.lucene.analysis.util.WordlistLoader;
|
||||
import org.apache.lucene.analysis.WordlistLoader;
|
||||
import org.apache.lucene.analysis.standard.StandardAnalyzer;
|
||||
|
||||
/**
|
||||
* Filters {@link LetterTokenizer} with {@link LowerCaseFilter} and {@link StopFilter}.
|
||||
|
@ -35,19 +36,7 @@ public final class StopAnalyzer extends StopwordAnalyzerBase {
|
|||
|
||||
/** An unmodifiable set containing some common English words that are not usually useful
|
||||
for searching.*/
|
||||
public static final CharArraySet ENGLISH_STOP_WORDS_SET;
|
||||
|
||||
static {
|
||||
final List<String> stopWords = Arrays.asList(
|
||||
"a", "an", "and", "are", "as", "at", "be", "but", "by",
|
||||
"for", "if", "in", "into", "is", "it",
|
||||
"no", "not", "of", "on", "or", "such",
|
||||
"that", "the", "their", "then", "there", "these",
|
||||
"they", "this", "to", "was", "will", "with"
|
||||
);
|
||||
final CharArraySet stopSet = new CharArraySet(stopWords, false);
|
||||
ENGLISH_STOP_WORDS_SET = CharArraySet.unmodifiableSet(stopSet);
|
||||
}
|
||||
public static final CharArraySet ENGLISH_STOP_WORDS_SET = StandardAnalyzer.ENGLISH_STOP_WORDS_SET;
|
||||
|
||||
/** Builds an analyzer which removes words in
|
||||
* {@link #ENGLISH_STOP_WORDS_SET}.
|
||||
|
|
|
@ -17,15 +17,16 @@
|
|||
package org.apache.lucene.analysis.core;
|
||||
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.Map;
|
||||
|
||||
import org.apache.lucene.analysis.CharArraySet;
|
||||
import org.apache.lucene.analysis.StopFilter;
|
||||
import org.apache.lucene.analysis.TokenStream;
|
||||
import org.apache.lucene.analysis.util.CharArraySet;
|
||||
import org.apache.lucene.analysis.WordlistLoader; // jdocs
|
||||
import org.apache.lucene.analysis.util.ResourceLoader;
|
||||
import org.apache.lucene.analysis.util.ResourceLoaderAware;
|
||||
import org.apache.lucene.analysis.util.TokenFilterFactory;
|
||||
import org.apache.lucene.analysis.util.WordlistLoader; // jdocs
|
||||
|
||||
import java.util.Map;
|
||||
import java.io.IOException;
|
||||
|
||||
/**
|
||||
* Factory for {@link StopFilter}.
|
||||
|
|
|
@ -19,9 +19,9 @@ package org.apache.lucene.analysis.core;
|
|||
|
||||
import java.util.Set;
|
||||
|
||||
import org.apache.lucene.analysis.FilteringTokenFilter;
|
||||
import org.apache.lucene.analysis.TokenStream;
|
||||
import org.apache.lucene.analysis.tokenattributes.TypeAttribute;
|
||||
import org.apache.lucene.analysis.util.FilteringTokenFilter;
|
||||
|
||||
/**
|
||||
* Removes tokens whose types appear in a set of blocked types from a token stream.
|
||||
|
|
|
@ -19,10 +19,11 @@ package org.apache.lucene.analysis.core;
|
|||
|
||||
import java.io.IOException;
|
||||
|
||||
import org.apache.lucene.analysis.CharacterUtils;
|
||||
import org.apache.lucene.analysis.LowerCaseFilter;
|
||||
import org.apache.lucene.analysis.TokenFilter;
|
||||
import org.apache.lucene.analysis.TokenStream;
|
||||
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
|
||||
import org.apache.lucene.analysis.util.CharacterUtils;
|
||||
|
||||
/**
|
||||
* Normalizes token text to UPPER CASE.
|
||||
|
|
|
@ -17,22 +17,22 @@
|
|||
package org.apache.lucene.analysis.cz;
|
||||
|
||||
|
||||
import org.apache.lucene.analysis.Analyzer;
|
||||
import org.apache.lucene.analysis.core.LowerCaseFilter;
|
||||
import org.apache.lucene.analysis.core.StopFilter;
|
||||
import org.apache.lucene.analysis.miscellaneous.SetKeywordMarkerFilter;
|
||||
import org.apache.lucene.analysis.TokenStream;
|
||||
import org.apache.lucene.analysis.Tokenizer;
|
||||
import org.apache.lucene.analysis.standard.StandardFilter;
|
||||
import org.apache.lucene.analysis.standard.StandardTokenizer;
|
||||
import org.apache.lucene.analysis.util.CharArraySet;
|
||||
import org.apache.lucene.analysis.util.StopwordAnalyzerBase;
|
||||
import org.apache.lucene.analysis.util.WordlistLoader;
|
||||
import org.apache.lucene.util.IOUtils;
|
||||
|
||||
import java.io.*;
|
||||
import java.nio.charset.StandardCharsets;
|
||||
|
||||
import org.apache.lucene.analysis.Analyzer;
|
||||
import org.apache.lucene.analysis.CharArraySet;
|
||||
import org.apache.lucene.analysis.LowerCaseFilter;
|
||||
import org.apache.lucene.analysis.StopFilter;
|
||||
import org.apache.lucene.analysis.StopwordAnalyzerBase;
|
||||
import org.apache.lucene.analysis.TokenStream;
|
||||
import org.apache.lucene.analysis.Tokenizer;
|
||||
import org.apache.lucene.analysis.WordlistLoader;
|
||||
import org.apache.lucene.analysis.miscellaneous.SetKeywordMarkerFilter;
|
||||
import org.apache.lucene.analysis.standard.StandardFilter;
|
||||
import org.apache.lucene.analysis.standard.StandardTokenizer;
|
||||
import org.apache.lucene.util.IOUtils;
|
||||
|
||||
/**
|
||||
* {@link Analyzer} for Czech language.
|
||||
* <p>
|
||||
|
|
|
@ -22,17 +22,17 @@ import java.io.Reader;
|
|||
import java.nio.charset.StandardCharsets;
|
||||
|
||||
import org.apache.lucene.analysis.Analyzer;
|
||||
import org.apache.lucene.analysis.core.LowerCaseFilter;
|
||||
import org.apache.lucene.analysis.core.StopFilter;
|
||||
import org.apache.lucene.analysis.miscellaneous.SetKeywordMarkerFilter;
|
||||
import org.apache.lucene.analysis.CharArraySet;
|
||||
import org.apache.lucene.analysis.LowerCaseFilter;
|
||||
import org.apache.lucene.analysis.StopFilter;
|
||||
import org.apache.lucene.analysis.StopwordAnalyzerBase;
|
||||
import org.apache.lucene.analysis.TokenStream;
|
||||
import org.apache.lucene.analysis.Tokenizer;
|
||||
import org.apache.lucene.analysis.WordlistLoader;
|
||||
import org.apache.lucene.analysis.miscellaneous.SetKeywordMarkerFilter;
|
||||
import org.apache.lucene.analysis.snowball.SnowballFilter;
|
||||
import org.apache.lucene.analysis.standard.StandardFilter;
|
||||
import org.apache.lucene.analysis.standard.StandardTokenizer;
|
||||
import org.apache.lucene.analysis.util.CharArraySet;
|
||||
import org.apache.lucene.analysis.util.StopwordAnalyzerBase;
|
||||
import org.apache.lucene.analysis.util.WordlistLoader;
|
||||
import org.apache.lucene.util.IOUtils;
|
||||
import org.tartarus.snowball.ext.DanishStemmer;
|
||||
|
||||
|
|
|
@ -23,18 +23,18 @@ import java.io.Reader;
|
|||
import java.nio.charset.StandardCharsets;
|
||||
|
||||
import org.apache.lucene.analysis.Analyzer;
|
||||
import org.apache.lucene.analysis.core.LowerCaseFilter;
|
||||
import org.apache.lucene.analysis.core.StopFilter;
|
||||
import org.apache.lucene.analysis.miscellaneous.SetKeywordMarkerFilter;
|
||||
import org.apache.lucene.analysis.CharArraySet;
|
||||
import org.apache.lucene.analysis.LowerCaseFilter;
|
||||
import org.apache.lucene.analysis.StopFilter;
|
||||
import org.apache.lucene.analysis.StopwordAnalyzerBase;
|
||||
import org.apache.lucene.analysis.TokenStream;
|
||||
import org.apache.lucene.analysis.Tokenizer;
|
||||
import org.apache.lucene.analysis.WordlistLoader;
|
||||
import org.apache.lucene.analysis.miscellaneous.SetKeywordMarkerFilter;
|
||||
import org.apache.lucene.analysis.snowball.SnowballFilter;
|
||||
import org.apache.lucene.analysis.standard.StandardAnalyzer;
|
||||
import org.apache.lucene.analysis.standard.StandardFilter;
|
||||
import org.apache.lucene.analysis.standard.StandardTokenizer;
|
||||
import org.apache.lucene.analysis.util.CharArraySet;
|
||||
import org.apache.lucene.analysis.util.StopwordAnalyzerBase;
|
||||
import org.apache.lucene.analysis.util.WordlistLoader;
|
||||
import org.apache.lucene.util.IOUtils;
|
||||
|
||||
/**
|
||||
|
|
|
@ -20,14 +20,14 @@ import java.io.IOException;
|
|||
import java.io.Reader;
|
||||
|
||||
import org.apache.lucene.analysis.Analyzer;
|
||||
import org.apache.lucene.analysis.CharArraySet;
|
||||
import org.apache.lucene.analysis.StopFilter;
|
||||
import org.apache.lucene.analysis.StopwordAnalyzerBase;
|
||||
import org.apache.lucene.analysis.TokenStream;
|
||||
import org.apache.lucene.analysis.Tokenizer;
|
||||
import org.apache.lucene.analysis.core.StopFilter;
|
||||
import org.apache.lucene.analysis.standard.StandardAnalyzer;
|
||||
import org.apache.lucene.analysis.standard.StandardFilter;
|
||||
import org.apache.lucene.analysis.standard.StandardTokenizer;
|
||||
import org.apache.lucene.analysis.util.CharArraySet;
|
||||
import org.apache.lucene.analysis.util.StopwordAnalyzerBase;
|
||||
|
||||
/**
|
||||
* {@link Analyzer} for the Greek language.
|
||||
|
|
|
@ -16,7 +16,7 @@
|
|||
*/
|
||||
package org.apache.lucene.analysis.el;
|
||||
|
||||
import org.apache.lucene.analysis.util.CharArraySet;
|
||||
import org.apache.lucene.analysis.CharArraySet;
|
||||
|
||||
import java.util.Arrays;
|
||||
|
||||
|
|
|
@ -20,16 +20,16 @@ package org.apache.lucene.analysis.en;
|
|||
import java.io.Reader;
|
||||
|
||||
import org.apache.lucene.analysis.Analyzer;
|
||||
import org.apache.lucene.analysis.CharArraySet;
|
||||
import org.apache.lucene.analysis.LowerCaseFilter;
|
||||
import org.apache.lucene.analysis.StopFilter;
|
||||
import org.apache.lucene.analysis.StopwordAnalyzerBase;
|
||||
import org.apache.lucene.analysis.TokenStream;
|
||||
import org.apache.lucene.analysis.Tokenizer;
|
||||
import org.apache.lucene.analysis.core.LowerCaseFilter;
|
||||
import org.apache.lucene.analysis.core.StopFilter;
|
||||
import org.apache.lucene.analysis.miscellaneous.SetKeywordMarkerFilter;
|
||||
import org.apache.lucene.analysis.standard.StandardAnalyzer;
|
||||
import org.apache.lucene.analysis.standard.StandardFilter;
|
||||
import org.apache.lucene.analysis.standard.StandardTokenizer;
|
||||
import org.apache.lucene.analysis.util.CharArraySet;
|
||||
import org.apache.lucene.analysis.util.StopwordAnalyzerBase;
|
||||
|
||||
/**
|
||||
* {@link Analyzer} for English.
|
||||
|
|
|
@ -55,7 +55,7 @@ the original shown below)
|
|||
*/
|
||||
package org.apache.lucene.analysis.en;
|
||||
|
||||
import org.apache.lucene.analysis.util.CharArrayMap;
|
||||
import org.apache.lucene.analysis.CharArrayMap;
|
||||
import org.apache.lucene.analysis.util.OpenStringBuilder;
|
||||
/**
|
||||
* <p>Title: Kstemmer</p>
|
||||
|
|
|
@ -22,17 +22,17 @@ import java.io.Reader;
|
|||
import java.nio.charset.StandardCharsets;
|
||||
|
||||
import org.apache.lucene.analysis.Analyzer;
|
||||
import org.apache.lucene.analysis.core.LowerCaseFilter;
|
||||
import org.apache.lucene.analysis.core.StopFilter;
|
||||
import org.apache.lucene.analysis.miscellaneous.SetKeywordMarkerFilter;
|
||||
import org.apache.lucene.analysis.CharArraySet;
|
||||
import org.apache.lucene.analysis.LowerCaseFilter;
|
||||
import org.apache.lucene.analysis.StopFilter;
|
||||
import org.apache.lucene.analysis.StopwordAnalyzerBase;
|
||||
import org.apache.lucene.analysis.TokenStream;
|
||||
import org.apache.lucene.analysis.Tokenizer;
|
||||
import org.apache.lucene.analysis.WordlistLoader;
|
||||
import org.apache.lucene.analysis.miscellaneous.SetKeywordMarkerFilter;
|
||||
import org.apache.lucene.analysis.snowball.SnowballFilter;
|
||||
import org.apache.lucene.analysis.standard.StandardFilter;
|
||||
import org.apache.lucene.analysis.standard.StandardTokenizer;
|
||||
import org.apache.lucene.analysis.util.CharArraySet;
|
||||
import org.apache.lucene.analysis.util.StopwordAnalyzerBase;
|
||||
import org.apache.lucene.analysis.util.WordlistLoader;
|
||||
import org.apache.lucene.util.IOUtils;
|
||||
|
||||
/**
|
||||
|
|
|
@ -21,16 +21,16 @@ import java.io.IOException;
|
|||
import java.io.Reader;
|
||||
|
||||
import org.apache.lucene.analysis.Analyzer;
|
||||
import org.apache.lucene.analysis.core.LowerCaseFilter;
|
||||
import org.apache.lucene.analysis.core.StopFilter;
|
||||
import org.apache.lucene.analysis.miscellaneous.SetKeywordMarkerFilter;
|
||||
import org.apache.lucene.analysis.CharArraySet;
|
||||
import org.apache.lucene.analysis.LowerCaseFilter;
|
||||
import org.apache.lucene.analysis.StopFilter;
|
||||
import org.apache.lucene.analysis.StopwordAnalyzerBase;
|
||||
import org.apache.lucene.analysis.TokenStream;
|
||||
import org.apache.lucene.analysis.Tokenizer;
|
||||
import org.apache.lucene.analysis.miscellaneous.SetKeywordMarkerFilter;
|
||||
import org.apache.lucene.analysis.snowball.SnowballFilter;
|
||||
import org.apache.lucene.analysis.standard.StandardFilter;
|
||||
import org.apache.lucene.analysis.standard.StandardTokenizer;
|
||||
import org.apache.lucene.analysis.util.CharArraySet;
|
||||
import org.apache.lucene.analysis.util.StopwordAnalyzerBase;
|
||||
import org.tartarus.snowball.ext.BasqueStemmer;
|
||||
|
||||
/**
|
||||
|
|
|
@ -21,15 +21,15 @@ import java.io.IOException;
|
|||
import java.io.Reader;
|
||||
|
||||
import org.apache.lucene.analysis.Analyzer;
|
||||
import org.apache.lucene.analysis.CharArraySet;
|
||||
import org.apache.lucene.analysis.LowerCaseFilter;
|
||||
import org.apache.lucene.analysis.StopFilter;
|
||||
import org.apache.lucene.analysis.StopwordAnalyzerBase;
|
||||
import org.apache.lucene.analysis.TokenStream;
|
||||
import org.apache.lucene.analysis.Tokenizer;
|
||||
import org.apache.lucene.analysis.ar.ArabicNormalizationFilter;
|
||||
import org.apache.lucene.analysis.core.DecimalDigitFilter;
|
||||
import org.apache.lucene.analysis.core.LowerCaseFilter;
|
||||
import org.apache.lucene.analysis.core.StopFilter;
|
||||
import org.apache.lucene.analysis.standard.StandardTokenizer;
|
||||
import org.apache.lucene.analysis.util.CharArraySet;
|
||||
import org.apache.lucene.analysis.util.StopwordAnalyzerBase;
|
||||
|
||||
/**
|
||||
* {@link Analyzer} for Persian.
|
||||
|
|
|
@ -22,17 +22,17 @@ import java.io.Reader;
|
|||
import java.nio.charset.StandardCharsets;
|
||||
|
||||
import org.apache.lucene.analysis.Analyzer;
|
||||
import org.apache.lucene.analysis.core.LowerCaseFilter;
|
||||
import org.apache.lucene.analysis.core.StopFilter;
|
||||
import org.apache.lucene.analysis.miscellaneous.SetKeywordMarkerFilter;
|
||||
import org.apache.lucene.analysis.CharArraySet;
|
||||
import org.apache.lucene.analysis.LowerCaseFilter;
|
||||
import org.apache.lucene.analysis.StopFilter;
|
||||
import org.apache.lucene.analysis.StopwordAnalyzerBase;
|
||||
import org.apache.lucene.analysis.TokenStream;
|
||||
import org.apache.lucene.analysis.Tokenizer;
|
||||
import org.apache.lucene.analysis.WordlistLoader;
|
||||
import org.apache.lucene.analysis.miscellaneous.SetKeywordMarkerFilter;
|
||||
import org.apache.lucene.analysis.snowball.SnowballFilter;
|
||||
import org.apache.lucene.analysis.standard.StandardFilter;
|
||||
import org.apache.lucene.analysis.standard.StandardTokenizer;
|
||||
import org.apache.lucene.analysis.util.CharArraySet;
|
||||
import org.apache.lucene.analysis.util.StopwordAnalyzerBase;
|
||||
import org.apache.lucene.analysis.util.WordlistLoader;
|
||||
import org.apache.lucene.util.IOUtils;
|
||||
import org.tartarus.snowball.ext.FinnishStemmer;
|
||||
|
||||
|
|
|
@ -17,27 +17,27 @@
|
|||
package org.apache.lucene.analysis.fr;
|
||||
|
||||
|
||||
import org.apache.lucene.analysis.Analyzer;
|
||||
import org.apache.lucene.analysis.core.LowerCaseFilter;
|
||||
import org.apache.lucene.analysis.core.StopFilter;
|
||||
import org.apache.lucene.analysis.miscellaneous.SetKeywordMarkerFilter;
|
||||
import org.apache.lucene.analysis.TokenStream;
|
||||
import org.apache.lucene.analysis.Tokenizer;
|
||||
import org.apache.lucene.analysis.snowball.SnowballFilter;
|
||||
import org.apache.lucene.analysis.standard.StandardFilter;
|
||||
import org.apache.lucene.analysis.standard.StandardTokenizer;
|
||||
import org.apache.lucene.analysis.standard.StandardAnalyzer; // for javadoc
|
||||
import org.apache.lucene.analysis.util.CharArraySet;
|
||||
import org.apache.lucene.analysis.util.ElisionFilter;
|
||||
import org.apache.lucene.analysis.util.StopwordAnalyzerBase;
|
||||
import org.apache.lucene.analysis.util.WordlistLoader;
|
||||
import org.apache.lucene.util.IOUtils;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.io.Reader;
|
||||
import java.nio.charset.StandardCharsets;
|
||||
import java.util.Arrays;
|
||||
|
||||
import org.apache.lucene.analysis.Analyzer;
|
||||
import org.apache.lucene.analysis.CharArraySet;
|
||||
import org.apache.lucene.analysis.LowerCaseFilter;
|
||||
import org.apache.lucene.analysis.StopFilter;
|
||||
import org.apache.lucene.analysis.StopwordAnalyzerBase;
|
||||
import org.apache.lucene.analysis.TokenStream;
|
||||
import org.apache.lucene.analysis.Tokenizer;
|
||||
import org.apache.lucene.analysis.WordlistLoader;
|
||||
import org.apache.lucene.analysis.miscellaneous.SetKeywordMarkerFilter;
|
||||
import org.apache.lucene.analysis.snowball.SnowballFilter;
|
||||
import org.apache.lucene.analysis.standard.StandardAnalyzer; // for javadoc
|
||||
import org.apache.lucene.analysis.standard.StandardFilter;
|
||||
import org.apache.lucene.analysis.standard.StandardTokenizer;
|
||||
import org.apache.lucene.analysis.util.ElisionFilter;
|
||||
import org.apache.lucene.util.IOUtils;
|
||||
|
||||
/**
|
||||
* {@link Analyzer} for French language.
|
||||
* <p>
|
||||
|
|
|
@ -22,16 +22,16 @@ import java.io.Reader;
|
|||
import java.util.Arrays;
|
||||
|
||||
import org.apache.lucene.analysis.Analyzer;
|
||||
import org.apache.lucene.analysis.core.StopFilter;
|
||||
import org.apache.lucene.analysis.miscellaneous.SetKeywordMarkerFilter;
|
||||
import org.apache.lucene.analysis.CharArraySet;
|
||||
import org.apache.lucene.analysis.StopFilter;
|
||||
import org.apache.lucene.analysis.StopwordAnalyzerBase;
|
||||
import org.apache.lucene.analysis.TokenStream;
|
||||
import org.apache.lucene.analysis.Tokenizer;
|
||||
import org.apache.lucene.analysis.miscellaneous.SetKeywordMarkerFilter;
|
||||
import org.apache.lucene.analysis.snowball.SnowballFilter;
|
||||
import org.apache.lucene.analysis.standard.StandardFilter;
|
||||
import org.apache.lucene.analysis.standard.StandardTokenizer;
|
||||
import org.apache.lucene.analysis.util.CharArraySet;
|
||||
import org.apache.lucene.analysis.util.ElisionFilter;
|
||||
import org.apache.lucene.analysis.util.StopwordAnalyzerBase;
|
||||
import org.tartarus.snowball.ext.IrishStemmer;
|
||||
|
||||
/**
|
||||
|
|
|
@ -22,16 +22,16 @@ import java.io.Reader;
|
|||
import java.nio.charset.StandardCharsets;
|
||||
|
||||
import org.apache.lucene.analysis.Analyzer;
|
||||
import org.apache.lucene.analysis.core.LowerCaseFilter;
|
||||
import org.apache.lucene.analysis.core.StopFilter;
|
||||
import org.apache.lucene.analysis.miscellaneous.SetKeywordMarkerFilter;
|
||||
import org.apache.lucene.analysis.CharArraySet;
|
||||
import org.apache.lucene.analysis.LowerCaseFilter;
|
||||
import org.apache.lucene.analysis.StopFilter;
|
||||
import org.apache.lucene.analysis.StopwordAnalyzerBase;
|
||||
import org.apache.lucene.analysis.TokenStream;
|
||||
import org.apache.lucene.analysis.Tokenizer;
|
||||
import org.apache.lucene.analysis.WordlistLoader;
|
||||
import org.apache.lucene.analysis.miscellaneous.SetKeywordMarkerFilter;
|
||||
import org.apache.lucene.analysis.standard.StandardFilter;
|
||||
import org.apache.lucene.analysis.standard.StandardTokenizer;
|
||||
import org.apache.lucene.analysis.util.CharArraySet;
|
||||
import org.apache.lucene.analysis.util.StopwordAnalyzerBase;
|
||||
import org.apache.lucene.analysis.util.WordlistLoader;
|
||||
import org.apache.lucene.util.IOUtils;
|
||||
|
||||
/**
|
||||
|
|
|
@ -20,16 +20,16 @@ package org.apache.lucene.analysis.hi;
|
|||
import java.io.IOException;
|
||||
import java.io.Reader;
|
||||
|
||||
import org.apache.lucene.analysis.miscellaneous.SetKeywordMarkerFilter;
|
||||
import org.apache.lucene.analysis.standard.StandardTokenizer;
|
||||
import org.apache.lucene.analysis.util.CharArraySet;
|
||||
import org.apache.lucene.analysis.util.StopwordAnalyzerBase;
|
||||
import org.apache.lucene.analysis.CharArraySet;
|
||||
import org.apache.lucene.analysis.LowerCaseFilter;
|
||||
import org.apache.lucene.analysis.StopFilter;
|
||||
import org.apache.lucene.analysis.StopwordAnalyzerBase;
|
||||
import org.apache.lucene.analysis.TokenStream;
|
||||
import org.apache.lucene.analysis.Tokenizer;
|
||||
import org.apache.lucene.analysis.core.DecimalDigitFilter;
|
||||
import org.apache.lucene.analysis.core.LowerCaseFilter;
|
||||
import org.apache.lucene.analysis.core.StopFilter;
|
||||
import org.apache.lucene.analysis.in.IndicNormalizationFilter;
|
||||
import org.apache.lucene.analysis.miscellaneous.SetKeywordMarkerFilter;
|
||||
import org.apache.lucene.analysis.standard.StandardTokenizer;
|
||||
|
||||
/**
|
||||
* Analyzer for Hindi.
|
||||
|
|
|
@ -22,17 +22,17 @@ import java.io.Reader;
|
|||
import java.nio.charset.StandardCharsets;
|
||||
|
||||
import org.apache.lucene.analysis.Analyzer;
|
||||
import org.apache.lucene.analysis.core.LowerCaseFilter;
|
||||
import org.apache.lucene.analysis.core.StopFilter;
|
||||
import org.apache.lucene.analysis.miscellaneous.SetKeywordMarkerFilter;
|
||||
import org.apache.lucene.analysis.CharArraySet;
|
||||
import org.apache.lucene.analysis.LowerCaseFilter;
|
||||
import org.apache.lucene.analysis.StopFilter;
|
||||
import org.apache.lucene.analysis.StopwordAnalyzerBase;
|
||||
import org.apache.lucene.analysis.TokenStream;
|
||||
import org.apache.lucene.analysis.Tokenizer;
|
||||
import org.apache.lucene.analysis.WordlistLoader;
|
||||
import org.apache.lucene.analysis.miscellaneous.SetKeywordMarkerFilter;
|
||||
import org.apache.lucene.analysis.snowball.SnowballFilter;
|
||||
import org.apache.lucene.analysis.standard.StandardFilter;
|
||||
import org.apache.lucene.analysis.standard.StandardTokenizer;
|
||||
import org.apache.lucene.analysis.util.CharArraySet;
|
||||
import org.apache.lucene.analysis.util.StopwordAnalyzerBase;
|
||||
import org.apache.lucene.analysis.util.WordlistLoader;
|
||||
import org.apache.lucene.util.IOUtils;
|
||||
import org.tartarus.snowball.ext.HungarianStemmer;
|
||||
|
||||
|
|
|
@ -22,7 +22,7 @@ import java.util.ArrayList;
|
|||
import java.util.Arrays;
|
||||
import java.util.List;
|
||||
|
||||
import org.apache.lucene.analysis.util.CharArraySet;
|
||||
import org.apache.lucene.analysis.CharArraySet;
|
||||
import org.apache.lucene.store.ByteArrayDataInput;
|
||||
import org.apache.lucene.util.ArrayUtil;
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
|
|
|
@ -21,16 +21,16 @@ import java.io.IOException;
|
|||
import java.io.Reader;
|
||||
|
||||
import org.apache.lucene.analysis.Analyzer;
|
||||
import org.apache.lucene.analysis.core.LowerCaseFilter;
|
||||
import org.apache.lucene.analysis.core.StopFilter;
|
||||
import org.apache.lucene.analysis.miscellaneous.SetKeywordMarkerFilter;
|
||||
import org.apache.lucene.analysis.CharArraySet;
|
||||
import org.apache.lucene.analysis.LowerCaseFilter;
|
||||
import org.apache.lucene.analysis.StopFilter;
|
||||
import org.apache.lucene.analysis.StopwordAnalyzerBase;
|
||||
import org.apache.lucene.analysis.TokenStream;
|
||||
import org.apache.lucene.analysis.Tokenizer;
|
||||
import org.apache.lucene.analysis.miscellaneous.SetKeywordMarkerFilter;
|
||||
import org.apache.lucene.analysis.snowball.SnowballFilter;
|
||||
import org.apache.lucene.analysis.standard.StandardFilter;
|
||||
import org.apache.lucene.analysis.standard.StandardTokenizer;
|
||||
import org.apache.lucene.analysis.util.CharArraySet;
|
||||
import org.apache.lucene.analysis.util.StopwordAnalyzerBase;
|
||||
import org.tartarus.snowball.ext.ArmenianStemmer;
|
||||
|
||||
/**
|
||||
|
|
|
@ -20,15 +20,15 @@ package org.apache.lucene.analysis.id;
|
|||
import java.io.IOException;
|
||||
import java.io.Reader;
|
||||
|
||||
import org.apache.lucene.analysis.core.LowerCaseFilter;
|
||||
import org.apache.lucene.analysis.core.StopFilter;
|
||||
import org.apache.lucene.analysis.miscellaneous.SetKeywordMarkerFilter;
|
||||
import org.apache.lucene.analysis.CharArraySet;
|
||||
import org.apache.lucene.analysis.LowerCaseFilter;
|
||||
import org.apache.lucene.analysis.StopFilter;
|
||||
import org.apache.lucene.analysis.StopwordAnalyzerBase;
|
||||
import org.apache.lucene.analysis.TokenStream;
|
||||
import org.apache.lucene.analysis.Tokenizer;
|
||||
import org.apache.lucene.analysis.miscellaneous.SetKeywordMarkerFilter;
|
||||
import org.apache.lucene.analysis.standard.StandardFilter;
|
||||
import org.apache.lucene.analysis.standard.StandardTokenizer;
|
||||
import org.apache.lucene.analysis.util.CharArraySet;
|
||||
import org.apache.lucene.analysis.util.StopwordAnalyzerBase;
|
||||
|
||||
/**
|
||||
* Analyzer for Indonesian (Bahasa)
|
||||
|
|
|
@ -23,18 +23,18 @@ import java.nio.charset.StandardCharsets;
|
|||
import java.util.Arrays;
|
||||
|
||||
import org.apache.lucene.analysis.Analyzer;
|
||||
import org.apache.lucene.analysis.core.LowerCaseFilter;
|
||||
import org.apache.lucene.analysis.core.StopFilter;
|
||||
import org.apache.lucene.analysis.miscellaneous.SetKeywordMarkerFilter;
|
||||
import org.apache.lucene.analysis.CharArraySet;
|
||||
import org.apache.lucene.analysis.LowerCaseFilter;
|
||||
import org.apache.lucene.analysis.StopFilter;
|
||||
import org.apache.lucene.analysis.StopwordAnalyzerBase;
|
||||
import org.apache.lucene.analysis.TokenStream;
|
||||
import org.apache.lucene.analysis.Tokenizer;
|
||||
import org.apache.lucene.analysis.WordlistLoader;
|
||||
import org.apache.lucene.analysis.miscellaneous.SetKeywordMarkerFilter;
|
||||
import org.apache.lucene.analysis.snowball.SnowballFilter;
|
||||
import org.apache.lucene.analysis.standard.StandardFilter;
|
||||
import org.apache.lucene.analysis.standard.StandardTokenizer;
|
||||
import org.apache.lucene.analysis.util.CharArraySet;
|
||||
import org.apache.lucene.analysis.util.ElisionFilter;
|
||||
import org.apache.lucene.analysis.util.StopwordAnalyzerBase;
|
||||
import org.apache.lucene.analysis.util.WordlistLoader;
|
||||
import org.apache.lucene.util.IOUtils;
|
||||
|
||||
/**
|
||||
|
|
|
@ -21,16 +21,16 @@ import java.io.IOException;
|
|||
import java.io.Reader;
|
||||
|
||||
import org.apache.lucene.analysis.Analyzer;
|
||||
import org.apache.lucene.analysis.core.LowerCaseFilter;
|
||||
import org.apache.lucene.analysis.core.StopFilter;
|
||||
import org.apache.lucene.analysis.miscellaneous.SetKeywordMarkerFilter;
|
||||
import org.apache.lucene.analysis.CharArraySet;
|
||||
import org.apache.lucene.analysis.LowerCaseFilter;
|
||||
import org.apache.lucene.analysis.StopFilter;
|
||||
import org.apache.lucene.analysis.StopwordAnalyzerBase;
|
||||
import org.apache.lucene.analysis.TokenStream;
|
||||
import org.apache.lucene.analysis.Tokenizer;
|
||||
import org.apache.lucene.analysis.miscellaneous.SetKeywordMarkerFilter;
|
||||
import org.apache.lucene.analysis.snowball.SnowballFilter;
|
||||
import org.apache.lucene.analysis.standard.StandardFilter;
|
||||
import org.apache.lucene.analysis.standard.StandardTokenizer;
|
||||
import org.apache.lucene.analysis.util.CharArraySet;
|
||||
import org.apache.lucene.analysis.util.StopwordAnalyzerBase;
|
||||
import org.tartarus.snowball.ext.LithuanianStemmer;
|
||||
|
||||
/**
|
||||
|
|
|
@ -22,16 +22,16 @@ import java.io.Reader;
|
|||
import java.nio.charset.StandardCharsets;
|
||||
|
||||
import org.apache.lucene.analysis.Analyzer;
|
||||
import org.apache.lucene.analysis.core.LowerCaseFilter;
|
||||
import org.apache.lucene.analysis.core.StopFilter;
|
||||
import org.apache.lucene.analysis.miscellaneous.SetKeywordMarkerFilter;
|
||||
import org.apache.lucene.analysis.CharArraySet;
|
||||
import org.apache.lucene.analysis.LowerCaseFilter;
|
||||
import org.apache.lucene.analysis.StopFilter;
|
||||
import org.apache.lucene.analysis.StopwordAnalyzerBase;
|
||||
import org.apache.lucene.analysis.TokenStream;
|
||||
import org.apache.lucene.analysis.Tokenizer;
|
||||
import org.apache.lucene.analysis.WordlistLoader;
|
||||
import org.apache.lucene.analysis.miscellaneous.SetKeywordMarkerFilter;
|
||||
import org.apache.lucene.analysis.standard.StandardFilter;
|
||||
import org.apache.lucene.analysis.standard.StandardTokenizer;
|
||||
import org.apache.lucene.analysis.util.CharArraySet;
|
||||
import org.apache.lucene.analysis.util.StopwordAnalyzerBase;
|
||||
import org.apache.lucene.analysis.util.WordlistLoader;
|
||||
import org.apache.lucene.util.IOUtils;
|
||||
|
||||
/**
|
||||
|
|
|
@ -20,10 +20,10 @@ package org.apache.lucene.analysis.miscellaneous;
|
|||
import java.io.IOException;
|
||||
import java.util.Collection;
|
||||
|
||||
import org.apache.lucene.analysis.CharArraySet;
|
||||
import org.apache.lucene.analysis.TokenFilter;
|
||||
import org.apache.lucene.analysis.TokenStream;
|
||||
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
|
||||
import org.apache.lucene.analysis.util.CharArraySet;
|
||||
|
||||
/**
|
||||
* A filter to apply normal capitalization rules to Tokens. It will make the first letter
|
||||
|
|
|
@ -17,16 +17,16 @@
|
|||
package org.apache.lucene.analysis.miscellaneous;
|
||||
|
||||
|
||||
import org.apache.lucene.analysis.TokenStream;
|
||||
import org.apache.lucene.analysis.util.CharArraySet;
|
||||
import org.apache.lucene.analysis.util.TokenFilterFactory;
|
||||
|
||||
import java.util.ArrayList;
|
||||
import java.util.Collection;
|
||||
import java.util.Collections;
|
||||
import java.util.Map;
|
||||
import java.util.Set;
|
||||
|
||||
import org.apache.lucene.analysis.CharArraySet;
|
||||
import org.apache.lucene.analysis.TokenStream;
|
||||
import org.apache.lucene.analysis.util.TokenFilterFactory;
|
||||
|
||||
/**
|
||||
* Factory for {@link CapitalizationFilter}.
|
||||
* <p>
|
||||
|
|
|
@ -17,8 +17,8 @@
|
|||
package org.apache.lucene.analysis.miscellaneous;
|
||||
|
||||
|
||||
import org.apache.lucene.analysis.FilteringTokenFilter;
|
||||
import org.apache.lucene.analysis.TokenStream;
|
||||
import org.apache.lucene.analysis.util.FilteringTokenFilter;
|
||||
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
|
||||
|
||||
/**
|
||||
|
|
|
@ -21,9 +21,9 @@ import java.text.DateFormat;
|
|||
import java.text.ParseException;
|
||||
import java.util.Locale;
|
||||
|
||||
import org.apache.lucene.analysis.FilteringTokenFilter;
|
||||
import org.apache.lucene.analysis.TokenStream;
|
||||
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
|
||||
import org.apache.lucene.analysis.util.FilteringTokenFilter;
|
||||
|
||||
/** Filters all tokens that cannot be parsed to a date, using the provided {@link DateFormat}. */
|
||||
public class DateRecognizerFilter extends FilteringTokenFilter {
|
||||
|
|
|
@ -20,6 +20,7 @@ import java.io.IOException;
|
|||
import java.util.Arrays;
|
||||
import java.util.Comparator;
|
||||
|
||||
import org.apache.lucene.analysis.CharArraySet;
|
||||
import org.apache.lucene.analysis.TokenFilter;
|
||||
import org.apache.lucene.analysis.TokenStream;
|
||||
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
|
||||
|
@ -27,7 +28,6 @@ import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
|
|||
import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
|
||||
import org.apache.lucene.analysis.tokenattributes.PositionLengthAttribute;
|
||||
import org.apache.lucene.analysis.tokenattributes.TypeAttribute;
|
||||
import org.apache.lucene.analysis.util.CharArraySet;
|
||||
import org.apache.lucene.util.AttributeSource;
|
||||
|
||||
/**
|
||||
|
|
|
@ -16,10 +16,10 @@
|
|||
*/
|
||||
package org.apache.lucene.analysis.miscellaneous;
|
||||
|
||||
import org.apache.lucene.analysis.CharArraySet;
|
||||
import org.apache.lucene.analysis.FilteringTokenFilter;
|
||||
import org.apache.lucene.analysis.TokenStream;
|
||||
import org.apache.lucene.analysis.util.FilteringTokenFilter;
|
||||
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
|
||||
import org.apache.lucene.analysis.util.CharArraySet;
|
||||
|
||||
/**
|
||||
* A TokenFilter that only keeps tokens with text contained in the
|
||||
|
|
|
@ -17,15 +17,15 @@
|
|||
package org.apache.lucene.analysis.miscellaneous;
|
||||
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.Map;
|
||||
|
||||
import org.apache.lucene.analysis.CharArraySet;
|
||||
import org.apache.lucene.analysis.TokenStream;
|
||||
import org.apache.lucene.analysis.util.CharArraySet;
|
||||
import org.apache.lucene.analysis.util.ResourceLoader;
|
||||
import org.apache.lucene.analysis.util.ResourceLoaderAware;
|
||||
import org.apache.lucene.analysis.util.TokenFilterFactory;
|
||||
|
||||
import java.util.Map;
|
||||
import java.io.IOException;
|
||||
|
||||
/**
|
||||
* Factory for {@link KeepWordFilter}.
|
||||
* <pre class="prettyprint">
|
||||
|
|
|
@ -21,8 +21,8 @@ import java.io.IOException;
|
|||
import java.util.Map;
|
||||
import java.util.regex.Pattern;
|
||||
|
||||
import org.apache.lucene.analysis.CharArraySet;
|
||||
import org.apache.lucene.analysis.TokenStream;
|
||||
import org.apache.lucene.analysis.util.CharArraySet;
|
||||
import org.apache.lucene.analysis.util.ResourceLoader;
|
||||
import org.apache.lucene.analysis.util.ResourceLoaderAware;
|
||||
import org.apache.lucene.analysis.util.TokenFilterFactory;
|
||||
|
|
|
@ -18,7 +18,7 @@ package org.apache.lucene.analysis.miscellaneous;
|
|||
|
||||
|
||||
import org.apache.lucene.analysis.TokenStream;
|
||||
import org.apache.lucene.analysis.util.FilteringTokenFilter;
|
||||
import org.apache.lucene.analysis.FilteringTokenFilter;
|
||||
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
|
||||
|
||||
/**
|
||||
|
|
|
@ -20,7 +20,7 @@ import org.apache.lucene.analysis.TokenFilter;
|
|||
import org.apache.lucene.analysis.TokenStream;
|
||||
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
|
||||
import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
|
||||
import org.apache.lucene.analysis.util.CharArraySet;
|
||||
import org.apache.lucene.analysis.CharArraySet;
|
||||
|
||||
import java.io.IOException;
|
||||
|
||||
|
|
|
@ -18,7 +18,7 @@ package org.apache.lucene.analysis.miscellaneous;
|
|||
import org.apache.lucene.analysis.TokenStream;
|
||||
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
|
||||
import org.apache.lucene.analysis.tokenattributes.KeywordAttribute;
|
||||
import org.apache.lucene.analysis.util.CharArraySet;
|
||||
import org.apache.lucene.analysis.CharArraySet;
|
||||
|
||||
/**
|
||||
* Marks terms as keywords via the {@link KeywordAttribute}. Each token
|
||||
|
|
|
@ -16,22 +16,22 @@
|
|||
*/
|
||||
package org.apache.lucene.analysis.miscellaneous;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.Arrays;
|
||||
|
||||
import org.apache.lucene.analysis.CharArraySet;
|
||||
import org.apache.lucene.analysis.TokenFilter;
|
||||
import org.apache.lucene.analysis.TokenStream;
|
||||
import org.apache.lucene.analysis.core.WhitespaceTokenizer;
|
||||
import org.apache.lucene.analysis.standard.StandardTokenizer;
|
||||
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
|
||||
import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
|
||||
import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
|
||||
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
|
||||
import org.apache.lucene.analysis.tokenattributes.TypeAttribute;
|
||||
import org.apache.lucene.analysis.util.CharArraySet;
|
||||
import org.apache.lucene.util.ArrayUtil;
|
||||
import org.apache.lucene.util.AttributeSource;
|
||||
import org.apache.lucene.util.InPlaceMergeSorter;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.Arrays;
|
||||
|
||||
/**
|
||||
* Splits words into subwords and performs optional transformations on subword
|
||||
* groups. Words are split into subwords with the following rules:
|
||||
|
|
|
@ -16,13 +16,7 @@
|
|||
*/
|
||||
package org.apache.lucene.analysis.miscellaneous;
|
||||
|
||||
|
||||
import org.apache.lucene.analysis.TokenFilter;
|
||||
import org.apache.lucene.analysis.TokenStream;
|
||||
import org.apache.lucene.analysis.util.CharArraySet;
|
||||
import org.apache.lucene.analysis.util.ResourceLoader;
|
||||
import org.apache.lucene.analysis.util.ResourceLoaderAware;
|
||||
import org.apache.lucene.analysis.util.TokenFilterFactory;
|
||||
import java.io.IOException;
|
||||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
|
@ -30,7 +24,13 @@ import java.util.SortedMap;
|
|||
import java.util.TreeMap;
|
||||
import java.util.regex.Matcher;
|
||||
import java.util.regex.Pattern;
|
||||
import java.io.IOException;
|
||||
|
||||
import org.apache.lucene.analysis.CharArraySet;
|
||||
import org.apache.lucene.analysis.TokenFilter;
|
||||
import org.apache.lucene.analysis.TokenStream;
|
||||
import org.apache.lucene.analysis.util.ResourceLoader;
|
||||
import org.apache.lucene.analysis.util.ResourceLoaderAware;
|
||||
import org.apache.lucene.analysis.util.TokenFilterFactory;
|
||||
|
||||
import static org.apache.lucene.analysis.miscellaneous.WordDelimiterFilter.*;
|
||||
|
||||
|
|
|
@ -24,7 +24,7 @@ import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
|
|||
import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
|
||||
import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
|
||||
import org.apache.lucene.analysis.tokenattributes.PositionLengthAttribute;
|
||||
import org.apache.lucene.analysis.util.CharacterUtils;
|
||||
import org.apache.lucene.analysis.CharacterUtils;
|
||||
import org.apache.lucene.util.AttributeFactory;
|
||||
|
||||
/**
|
||||
|
|
|
@ -17,27 +17,27 @@
|
|||
package org.apache.lucene.analysis.nl;
|
||||
|
||||
|
||||
import java.io.IOException;
|
||||
import java.io.Reader;
|
||||
import java.nio.charset.StandardCharsets;
|
||||
|
||||
import org.apache.lucene.analysis.Analyzer;
|
||||
import org.apache.lucene.analysis.core.LowerCaseFilter;
|
||||
import org.apache.lucene.analysis.core.StopFilter;
|
||||
import org.apache.lucene.analysis.miscellaneous.SetKeywordMarkerFilter;
|
||||
import org.apache.lucene.analysis.miscellaneous.StemmerOverrideFilter.StemmerOverrideMap;
|
||||
import org.apache.lucene.analysis.CharArrayMap;
|
||||
import org.apache.lucene.analysis.CharArraySet;
|
||||
import org.apache.lucene.analysis.LowerCaseFilter;
|
||||
import org.apache.lucene.analysis.StopFilter;
|
||||
import org.apache.lucene.analysis.TokenStream;
|
||||
import org.apache.lucene.analysis.Tokenizer;
|
||||
import org.apache.lucene.analysis.WordlistLoader;
|
||||
import org.apache.lucene.analysis.miscellaneous.SetKeywordMarkerFilter;
|
||||
import org.apache.lucene.analysis.miscellaneous.StemmerOverrideFilter.StemmerOverrideMap;
|
||||
import org.apache.lucene.analysis.miscellaneous.StemmerOverrideFilter;
|
||||
import org.apache.lucene.analysis.snowball.SnowballFilter;
|
||||
import org.apache.lucene.analysis.standard.StandardFilter;
|
||||
import org.apache.lucene.analysis.standard.StandardTokenizer;
|
||||
import org.apache.lucene.analysis.util.CharArrayMap;
|
||||
import org.apache.lucene.analysis.util.CharArraySet;
|
||||
import org.apache.lucene.analysis.util.WordlistLoader;
|
||||
import org.apache.lucene.util.CharsRefBuilder;
|
||||
import org.apache.lucene.util.IOUtils;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.io.Reader;
|
||||
import java.nio.charset.StandardCharsets;
|
||||
|
||||
/**
|
||||
* {@link Analyzer} for Dutch language.
|
||||
* <p>
|
||||
|
|
|
@ -22,17 +22,17 @@ import java.io.Reader;
|
|||
import java.nio.charset.StandardCharsets;
|
||||
|
||||
import org.apache.lucene.analysis.Analyzer;
|
||||
import org.apache.lucene.analysis.core.LowerCaseFilter;
|
||||
import org.apache.lucene.analysis.core.StopFilter;
|
||||
import org.apache.lucene.analysis.miscellaneous.SetKeywordMarkerFilter;
|
||||
import org.apache.lucene.analysis.CharArraySet;
|
||||
import org.apache.lucene.analysis.LowerCaseFilter;
|
||||
import org.apache.lucene.analysis.StopFilter;
|
||||
import org.apache.lucene.analysis.StopwordAnalyzerBase;
|
||||
import org.apache.lucene.analysis.TokenStream;
|
||||
import org.apache.lucene.analysis.Tokenizer;
|
||||
import org.apache.lucene.analysis.WordlistLoader;
|
||||
import org.apache.lucene.analysis.miscellaneous.SetKeywordMarkerFilter;
|
||||
import org.apache.lucene.analysis.snowball.SnowballFilter;
|
||||
import org.apache.lucene.analysis.standard.StandardFilter;
|
||||
import org.apache.lucene.analysis.standard.StandardTokenizer;
|
||||
import org.apache.lucene.analysis.util.CharArraySet;
|
||||
import org.apache.lucene.analysis.util.StopwordAnalyzerBase;
|
||||
import org.apache.lucene.analysis.util.WordlistLoader;
|
||||
import org.apache.lucene.util.IOUtils;
|
||||
import org.tartarus.snowball.ext.NorwegianStemmer;
|
||||
|
||||
|
|
|
@ -22,17 +22,17 @@ import java.io.Reader;
|
|||
import java.nio.charset.StandardCharsets;
|
||||
|
||||
import org.apache.lucene.analysis.Analyzer;
|
||||
import org.apache.lucene.analysis.core.LowerCaseFilter;
|
||||
import org.apache.lucene.analysis.core.StopFilter;
|
||||
import org.apache.lucene.analysis.miscellaneous.SetKeywordMarkerFilter;
|
||||
import org.apache.lucene.analysis.CharArraySet;
|
||||
import org.apache.lucene.analysis.LowerCaseFilter;
|
||||
import org.apache.lucene.analysis.StopFilter;
|
||||
import org.apache.lucene.analysis.StopwordAnalyzerBase;
|
||||
import org.apache.lucene.analysis.TokenStream;
|
||||
import org.apache.lucene.analysis.Tokenizer;
|
||||
import org.apache.lucene.analysis.WordlistLoader;
|
||||
import org.apache.lucene.analysis.miscellaneous.SetKeywordMarkerFilter;
|
||||
import org.apache.lucene.analysis.snowball.SnowballFilter;
|
||||
import org.apache.lucene.analysis.standard.StandardFilter;
|
||||
import org.apache.lucene.analysis.standard.StandardTokenizer;
|
||||
import org.apache.lucene.analysis.util.CharArraySet;
|
||||
import org.apache.lucene.analysis.util.StopwordAnalyzerBase;
|
||||
import org.apache.lucene.analysis.util.WordlistLoader;
|
||||
import org.apache.lucene.util.IOUtils;
|
||||
|
||||
/**
|
||||
|
|
|
@ -30,7 +30,7 @@ import java.util.Map;
|
|||
import java.util.regex.Matcher;
|
||||
import java.util.regex.Pattern;
|
||||
|
||||
import org.apache.lucene.analysis.util.CharArraySet;
|
||||
import org.apache.lucene.analysis.CharArraySet;
|
||||
|
||||
import static org.apache.lucene.analysis.util.StemmerUtil.*;
|
||||
|
||||
|
|
|
@ -21,8 +21,8 @@ import java.util.*;
|
|||
|
||||
import org.apache.lucene.analysis.Analyzer;
|
||||
import org.apache.lucene.analysis.AnalyzerWrapper;
|
||||
import org.apache.lucene.analysis.core.StopFilter;
|
||||
import org.apache.lucene.analysis.util.CharArraySet;
|
||||
import org.apache.lucene.analysis.CharArraySet;
|
||||
import org.apache.lucene.analysis.StopFilter;
|
||||
import org.apache.lucene.index.IndexReader;
|
||||
import org.apache.lucene.index.MultiFields;
|
||||
import org.apache.lucene.index.Term;
|
||||
|
|
|
@ -21,16 +21,16 @@ import java.io.IOException;
|
|||
import java.io.Reader;
|
||||
|
||||
import org.apache.lucene.analysis.Analyzer;
|
||||
import org.apache.lucene.analysis.core.LowerCaseFilter;
|
||||
import org.apache.lucene.analysis.core.StopFilter;
|
||||
import org.apache.lucene.analysis.miscellaneous.SetKeywordMarkerFilter;
|
||||
import org.apache.lucene.analysis.CharArraySet;
|
||||
import org.apache.lucene.analysis.LowerCaseFilter;
|
||||
import org.apache.lucene.analysis.StopFilter;
|
||||
import org.apache.lucene.analysis.StopwordAnalyzerBase;
|
||||
import org.apache.lucene.analysis.TokenStream;
|
||||
import org.apache.lucene.analysis.Tokenizer;
|
||||
import org.apache.lucene.analysis.miscellaneous.SetKeywordMarkerFilter;
|
||||
import org.apache.lucene.analysis.snowball.SnowballFilter;
|
||||
import org.apache.lucene.analysis.standard.StandardFilter;
|
||||
import org.apache.lucene.analysis.standard.StandardTokenizer;
|
||||
import org.apache.lucene.analysis.util.CharArraySet;
|
||||
import org.apache.lucene.analysis.util.StopwordAnalyzerBase;
|
||||
import org.tartarus.snowball.ext.RomanianStemmer;
|
||||
|
||||
/**
|
||||
|
|
|
@ -22,17 +22,17 @@ import java.io.Reader;
|
|||
import java.nio.charset.StandardCharsets;
|
||||
|
||||
import org.apache.lucene.analysis.Analyzer;
|
||||
import org.apache.lucene.analysis.CharArraySet;
|
||||
import org.apache.lucene.analysis.LowerCaseFilter;
|
||||
import org.apache.lucene.analysis.StopFilter;
|
||||
import org.apache.lucene.analysis.StopwordAnalyzerBase;
|
||||
import org.apache.lucene.analysis.TokenStream;
|
||||
import org.apache.lucene.analysis.Tokenizer;
|
||||
import org.apache.lucene.analysis.WordlistLoader;
|
||||
import org.apache.lucene.analysis.miscellaneous.SetKeywordMarkerFilter;
|
||||
import org.apache.lucene.analysis.snowball.SnowballFilter;
|
||||
import org.apache.lucene.analysis.standard.StandardFilter;
|
||||
import org.apache.lucene.analysis.standard.StandardTokenizer;
|
||||
import org.apache.lucene.analysis.util.CharArraySet;
|
||||
import org.apache.lucene.analysis.util.StopwordAnalyzerBase;
|
||||
import org.apache.lucene.analysis.util.WordlistLoader;
|
||||
import org.apache.lucene.analysis.core.LowerCaseFilter;
|
||||
import org.apache.lucene.analysis.core.StopFilter;
|
||||
import org.apache.lucene.analysis.miscellaneous.SetKeywordMarkerFilter;
|
||||
import org.apache.lucene.analysis.TokenStream;
|
||||
import org.apache.lucene.analysis.Tokenizer;
|
||||
import org.apache.lucene.util.IOUtils;
|
||||
|
||||
/**
|
||||
|
|
|
@ -19,11 +19,11 @@ package org.apache.lucene.analysis.snowball;
|
|||
|
||||
import java.io.IOException;
|
||||
|
||||
import org.apache.lucene.analysis.LowerCaseFilter;
|
||||
import org.apache.lucene.analysis.TokenFilter;
|
||||
import org.apache.lucene.analysis.TokenStream;
|
||||
import org.apache.lucene.analysis.core.LowerCaseFilter;
|
||||
import org.apache.lucene.analysis.tokenattributes.KeywordAttribute;
|
||||
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
|
||||
import org.apache.lucene.analysis.tokenattributes.KeywordAttribute;
|
||||
import org.apache.lucene.analysis.tr.TurkishLowerCaseFilter; // javadoc @link
|
||||
import org.tartarus.snowball.SnowballProgram;
|
||||
|
||||
|
|
|
@ -17,13 +17,13 @@
|
|||
package org.apache.lucene.analysis.snowball;
|
||||
|
||||
|
||||
import java.util.Map;
|
||||
import java.io.IOException;
|
||||
import java.util.Map;
|
||||
|
||||
import org.apache.lucene.analysis.miscellaneous.SetKeywordMarkerFilter;
|
||||
import org.apache.lucene.analysis.CharArraySet;
|
||||
import org.apache.lucene.analysis.TokenFilter;
|
||||
import org.apache.lucene.analysis.TokenStream;
|
||||
import org.apache.lucene.analysis.util.CharArraySet;
|
||||
import org.apache.lucene.analysis.miscellaneous.SetKeywordMarkerFilter;
|
||||
import org.apache.lucene.analysis.util.ResourceLoader;
|
||||
import org.apache.lucene.analysis.util.ResourceLoaderAware;
|
||||
import org.apache.lucene.analysis.util.TokenFilterFactory;
|
||||
|
|
|
@ -20,13 +20,13 @@ package org.apache.lucene.analysis.standard;
|
|||
import java.io.IOException;
|
||||
import java.io.Reader;
|
||||
|
||||
import org.apache.lucene.analysis.CharArraySet;
|
||||
import org.apache.lucene.analysis.LowerCaseFilter;
|
||||
import org.apache.lucene.analysis.StopFilter;
|
||||
import org.apache.lucene.analysis.StopwordAnalyzerBase;
|
||||
import org.apache.lucene.analysis.TokenStream;
|
||||
import org.apache.lucene.analysis.core.LowerCaseFilter;
|
||||
import org.apache.lucene.analysis.WordlistLoader;
|
||||
import org.apache.lucene.analysis.core.StopAnalyzer;
|
||||
import org.apache.lucene.analysis.core.StopFilter;
|
||||
import org.apache.lucene.analysis.util.CharArraySet;
|
||||
import org.apache.lucene.analysis.util.StopwordAnalyzerBase;
|
||||
import org.apache.lucene.analysis.util.WordlistLoader;
|
||||
|
||||
/**
|
||||
* Filters {@link ClassicTokenizer} with {@link ClassicFilter}, {@link
|
||||
|
|
|
@ -20,18 +20,18 @@ package org.apache.lucene.analysis.standard;
|
|||
import java.io.IOException;
|
||||
import java.io.Reader;
|
||||
|
||||
import org.apache.lucene.analysis.CharArraySet;
|
||||
import org.apache.lucene.analysis.LowerCaseFilter;
|
||||
import org.apache.lucene.analysis.StopFilter;
|
||||
import org.apache.lucene.analysis.StopwordAnalyzerBase;
|
||||
import org.apache.lucene.analysis.TokenStream;
|
||||
import org.apache.lucene.analysis.core.LowerCaseFilter;
|
||||
import org.apache.lucene.analysis.core.StopAnalyzer;
|
||||
import org.apache.lucene.analysis.core.StopFilter;
|
||||
import org.apache.lucene.analysis.util.CharArraySet;
|
||||
import org.apache.lucene.analysis.util.StopwordAnalyzerBase;
|
||||
|
||||
/**
|
||||
* Filters {@link org.apache.lucene.analysis.standard.UAX29URLEmailTokenizer}
|
||||
* with {@link org.apache.lucene.analysis.standard.StandardFilter},
|
||||
* {@link org.apache.lucene.analysis.core.LowerCaseFilter} and
|
||||
* {@link org.apache.lucene.analysis.core.StopFilter}, using a list of
|
||||
* {@link org.apache.lucene.analysis.LowerCaseFilter} and
|
||||
* {@link org.apache.lucene.analysis.StopFilter}, using a list of
|
||||
* English stop words.
|
||||
*/
|
||||
public final class UAX29URLEmailAnalyzer extends StopwordAnalyzerBase {
|
||||
|
@ -59,7 +59,7 @@ public final class UAX29URLEmailAnalyzer extends StopwordAnalyzerBase {
|
|||
}
|
||||
|
||||
/** Builds an analyzer with the stop words from the given reader.
|
||||
* @see org.apache.lucene.analysis.util.WordlistLoader#getWordSet(java.io.Reader)
|
||||
* @see org.apache.lucene.analysis.WordlistLoader#getWordSet(java.io.Reader)
|
||||
* @param stopwords Reader to read stop words from */
|
||||
public UAX29URLEmailAnalyzer(Reader stopwords) throws IOException {
|
||||
this(loadStopwordSet(stopwords));
|
||||
|
|
|
@ -1,63 +0,0 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
/**
|
||||
* Fast, general-purpose grammar-based tokenizers.
|
||||
* <p>The <code>org.apache.lucene.analysis.standard</code> package contains three
|
||||
* fast grammar-based tokenizers constructed with JFlex:</p>
|
||||
* <ul>
|
||||
* <li>{@link org.apache.lucene.analysis.standard.StandardTokenizer}:
|
||||
* as of Lucene 3.1, implements the Word Break rules from the Unicode Text
|
||||
* Segmentation algorithm, as specified in
|
||||
* <a href="http://unicode.org/reports/tr29/">Unicode Standard Annex #29</a>.
|
||||
* Unlike <code>UAX29URLEmailTokenizer</code>, URLs and email addresses are
|
||||
* <b>not</b> tokenized as single tokens, but are instead split up into
|
||||
* tokens according to the UAX#29 word break rules.
|
||||
* <br>
|
||||
* {@link org.apache.lucene.analysis.standard.StandardAnalyzer StandardAnalyzer} includes
|
||||
* {@link org.apache.lucene.analysis.standard.StandardTokenizer StandardTokenizer},
|
||||
* {@link org.apache.lucene.analysis.standard.StandardFilter StandardFilter},
|
||||
* {@link org.apache.lucene.analysis.core.LowerCaseFilter LowerCaseFilter}
|
||||
* and {@link org.apache.lucene.analysis.core.StopFilter StopFilter}.
|
||||
* When the <code>Version</code> specified in the constructor is lower than
|
||||
* 3.1, the {@link org.apache.lucene.analysis.standard.ClassicTokenizer ClassicTokenizer}
|
||||
* implementation is invoked.</li>
|
||||
* <li>{@link org.apache.lucene.analysis.standard.ClassicTokenizer ClassicTokenizer}:
|
||||
* this class was formerly (prior to Lucene 3.1) named
|
||||
* <code>StandardTokenizer</code>. (Its tokenization rules are not
|
||||
* based on the Unicode Text Segmentation algorithm.)
|
||||
* {@link org.apache.lucene.analysis.standard.ClassicAnalyzer ClassicAnalyzer} includes
|
||||
* {@link org.apache.lucene.analysis.standard.ClassicTokenizer ClassicTokenizer},
|
||||
* {@link org.apache.lucene.analysis.standard.StandardFilter StandardFilter},
|
||||
* {@link org.apache.lucene.analysis.core.LowerCaseFilter LowerCaseFilter}
|
||||
* and {@link org.apache.lucene.analysis.core.StopFilter StopFilter}.
|
||||
* </li>
|
||||
* <li>{@link org.apache.lucene.analysis.standard.UAX29URLEmailTokenizer UAX29URLEmailTokenizer}:
|
||||
* implements the Word Break rules from the Unicode Text Segmentation
|
||||
* algorithm, as specified in
|
||||
* <a href="http://unicode.org/reports/tr29/">Unicode Standard Annex #29</a>.
|
||||
* URLs and email addresses are also tokenized according to the relevant RFCs.
|
||||
* <br>
|
||||
* {@link org.apache.lucene.analysis.standard.UAX29URLEmailAnalyzer UAX29URLEmailAnalyzer} includes
|
||||
* {@link org.apache.lucene.analysis.standard.UAX29URLEmailTokenizer UAX29URLEmailTokenizer},
|
||||
* {@link org.apache.lucene.analysis.standard.StandardFilter StandardFilter},
|
||||
* {@link org.apache.lucene.analysis.core.LowerCaseFilter LowerCaseFilter}
|
||||
* and {@link org.apache.lucene.analysis.core.StopFilter StopFilter}.
|
||||
* </li>
|
||||
* </ul>
|
||||
*/
|
||||
package org.apache.lucene.analysis.standard;
|
|
@ -0,0 +1,50 @@
|
|||
<!doctype html public "-//w3c//dtd html 4.0 transitional//en">
|
||||
<!--
|
||||
Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
contributor license agreements. See the NOTICE file distributed with
|
||||
this work for additional information regarding copyright ownership.
|
||||
The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
(the "License"); you may not use this file except in compliance with
|
||||
the License. You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
-->
|
||||
<!-- not a package-info.java, because we already defined this package in spatial/ -->
|
||||
<html>
|
||||
<head>
|
||||
<meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1">
|
||||
</head>
|
||||
<body>
|
||||
Fast, general-purpose grammar-based tokenizers.
|
||||
<ul>
|
||||
<li>{@link org.apache.lucene.analysis.standard.ClassicTokenizer ClassicTokenizer}:
|
||||
this class was formerly (prior to Lucene 3.1) named
|
||||
<code>StandardTokenizer</code>. (Its tokenization rules are not
|
||||
based on the Unicode Text Segmentation algorithm.)
|
||||
{@link org.apache.lucene.analysis.standard.ClassicAnalyzer ClassicAnalyzer} includes
|
||||
{@link org.apache.lucene.analysis.standard.ClassicTokenizer ClassicTokenizer},
|
||||
{@link org.apache.lucene.analysis.standard.StandardFilter StandardFilter},
|
||||
{@link org.apache.lucene.analysis.LowerCaseFilter LowerCaseFilter}
|
||||
and {@link org.apache.lucene.analysis.StopFilter StopFilter}.
|
||||
</li>
|
||||
<li>{@link org.apache.lucene.analysis.standard.UAX29URLEmailTokenizer UAX29URLEmailTokenizer}:
|
||||
implements the Word Break rules from the Unicode Text Segmentation
|
||||
algorithm, as specified in
|
||||
<a href="http://unicode.org/reports/tr29/">Unicode Standard Annex #29</a>, except
|
||||
URLs and email addresses are also tokenized according to the relevant RFCs.
|
||||
<br>
|
||||
{@link org.apache.lucene.analysis.standard.UAX29URLEmailAnalyzer UAX29URLEmailAnalyzer} includes
|
||||
{@link org.apache.lucene.analysis.standard.UAX29URLEmailTokenizer UAX29URLEmailTokenizer},
|
||||
{@link org.apache.lucene.analysis.standard.StandardFilter StandardFilter},
|
||||
{@link org.apache.lucene.analysis.LowerCaseFilter LowerCaseFilter}
|
||||
and {@link org.apache.lucene.analysis.StopFilter StopFilter}.
|
||||
</li>
|
||||
</ul>
|
||||
</body>
|
||||
</html>
|
|
@ -22,17 +22,17 @@ import java.io.Reader;
|
|||
import java.nio.charset.StandardCharsets;
|
||||
|
||||
import org.apache.lucene.analysis.Analyzer;
|
||||
import org.apache.lucene.analysis.core.LowerCaseFilter;
|
||||
import org.apache.lucene.analysis.core.StopFilter;
|
||||
import org.apache.lucene.analysis.miscellaneous.SetKeywordMarkerFilter;
|
||||
import org.apache.lucene.analysis.CharArraySet;
|
||||
import org.apache.lucene.analysis.LowerCaseFilter;
|
||||
import org.apache.lucene.analysis.StopFilter;
|
||||
import org.apache.lucene.analysis.StopwordAnalyzerBase;
|
||||
import org.apache.lucene.analysis.TokenStream;
|
||||
import org.apache.lucene.analysis.Tokenizer;
|
||||
import org.apache.lucene.analysis.WordlistLoader;
|
||||
import org.apache.lucene.analysis.miscellaneous.SetKeywordMarkerFilter;
|
||||
import org.apache.lucene.analysis.snowball.SnowballFilter;
|
||||
import org.apache.lucene.analysis.standard.StandardFilter;
|
||||
import org.apache.lucene.analysis.standard.StandardTokenizer;
|
||||
import org.apache.lucene.analysis.util.CharArraySet;
|
||||
import org.apache.lucene.analysis.util.StopwordAnalyzerBase;
|
||||
import org.apache.lucene.analysis.util.WordlistLoader;
|
||||
import org.apache.lucene.util.IOUtils;
|
||||
import org.tartarus.snowball.ext.SwedishStemmer;
|
||||
|
||||
|
|
|
@ -30,9 +30,9 @@ import java.util.List;
|
|||
import java.util.Map;
|
||||
|
||||
import org.apache.lucene.analysis.Analyzer;
|
||||
import org.apache.lucene.analysis.LowerCaseFilter;
|
||||
import org.apache.lucene.analysis.TokenStream;
|
||||
import org.apache.lucene.analysis.Tokenizer;
|
||||
import org.apache.lucene.analysis.core.LowerCaseFilter;
|
||||
import org.apache.lucene.analysis.core.WhitespaceTokenizer;
|
||||
import org.apache.lucene.analysis.util.ResourceLoader;
|
||||
import org.apache.lucene.analysis.util.ResourceLoaderAware;
|
||||
|
|
|
@ -20,13 +20,13 @@ import java.io.IOException;
|
|||
import java.io.Reader;
|
||||
|
||||
import org.apache.lucene.analysis.Analyzer;
|
||||
import org.apache.lucene.analysis.CharArraySet;
|
||||
import org.apache.lucene.analysis.LowerCaseFilter;
|
||||
import org.apache.lucene.analysis.StopFilter;
|
||||
import org.apache.lucene.analysis.StopwordAnalyzerBase;
|
||||
import org.apache.lucene.analysis.TokenStream;
|
||||
import org.apache.lucene.analysis.Tokenizer;
|
||||
import org.apache.lucene.analysis.core.DecimalDigitFilter;
|
||||
import org.apache.lucene.analysis.core.LowerCaseFilter;
|
||||
import org.apache.lucene.analysis.core.StopFilter;
|
||||
import org.apache.lucene.analysis.util.CharArraySet;
|
||||
import org.apache.lucene.analysis.util.StopwordAnalyzerBase;
|
||||
|
||||
/**
|
||||
* {@link Analyzer} for Thai language. It uses {@link java.text.BreakIterator} to break words.
|
||||
|
|
|
@ -21,15 +21,15 @@ import java.io.IOException;
|
|||
import java.io.Reader;
|
||||
|
||||
import org.apache.lucene.analysis.Analyzer;
|
||||
import org.apache.lucene.analysis.core.StopFilter;
|
||||
import org.apache.lucene.analysis.miscellaneous.SetKeywordMarkerFilter;
|
||||
import org.apache.lucene.analysis.CharArraySet;
|
||||
import org.apache.lucene.analysis.StopFilter;
|
||||
import org.apache.lucene.analysis.StopwordAnalyzerBase;
|
||||
import org.apache.lucene.analysis.TokenStream;
|
||||
import org.apache.lucene.analysis.Tokenizer;
|
||||
import org.apache.lucene.analysis.miscellaneous.SetKeywordMarkerFilter;
|
||||
import org.apache.lucene.analysis.snowball.SnowballFilter;
|
||||
import org.apache.lucene.analysis.standard.StandardFilter;
|
||||
import org.apache.lucene.analysis.standard.StandardTokenizer;
|
||||
import org.apache.lucene.analysis.util.CharArraySet;
|
||||
import org.apache.lucene.analysis.util.StopwordAnalyzerBase;
|
||||
import org.tartarus.snowball.ext.TurkishStemmer;
|
||||
|
||||
/**
|
||||
|
|
|
@ -37,7 +37,9 @@ import java.util.regex.Matcher;
|
|||
import java.util.regex.Pattern;
|
||||
import java.util.regex.PatternSyntaxException;
|
||||
|
||||
import org.apache.lucene.analysis.core.StopFilter;
|
||||
import org.apache.lucene.analysis.CharArraySet;
|
||||
import org.apache.lucene.analysis.StopFilter;
|
||||
import org.apache.lucene.analysis.WordlistLoader;
|
||||
import org.apache.lucene.util.IOUtils;
|
||||
import org.apache.lucene.util.Version;
|
||||
|
||||
|
|
|
@ -22,16 +22,16 @@ import java.util.Objects;
|
|||
import java.util.function.IntPredicate;
|
||||
import java.util.function.IntUnaryOperator;
|
||||
|
||||
import org.apache.lucene.analysis.CharacterUtils.CharacterBuffer;
|
||||
import org.apache.lucene.analysis.CharacterUtils;
|
||||
import org.apache.lucene.analysis.LowerCaseFilter;
|
||||
import org.apache.lucene.analysis.Tokenizer;
|
||||
import org.apache.lucene.analysis.core.LetterTokenizer;
|
||||
import org.apache.lucene.analysis.core.LowerCaseFilter;
|
||||
import org.apache.lucene.analysis.core.LowerCaseTokenizer;
|
||||
import org.apache.lucene.analysis.core.WhitespaceTokenizer;
|
||||
import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
|
||||
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
|
||||
import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
|
||||
import org.apache.lucene.util.AttributeFactory;
|
||||
import org.apache.lucene.analysis.util.CharacterUtils;
|
||||
import org.apache.lucene.analysis.util.CharacterUtils.CharacterBuffer;
|
||||
|
||||
/**
|
||||
* An abstract base class for simple, character-oriented tokenizers.
|
||||
|
@ -285,4 +285,4 @@ public abstract class CharTokenizer extends Tokenizer {
|
|||
finalOffset = 0;
|
||||
ioBuffer.reset(); // make sure to reset the IO buffer!!
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -19,10 +19,10 @@ package org.apache.lucene.analysis.util;
|
|||
|
||||
import java.io.IOException;
|
||||
|
||||
import org.apache.lucene.analysis.CharArraySet;
|
||||
import org.apache.lucene.analysis.TokenFilter;
|
||||
import org.apache.lucene.analysis.TokenStream;
|
||||
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
|
||||
import org.apache.lucene.analysis.util.CharArraySet;
|
||||
|
||||
/**
|
||||
* Removes elisions from a {@link TokenStream}. For example, "l'avion" (the plane) will be
|
||||
|
|
|
@ -20,6 +20,7 @@ package org.apache.lucene.analysis.util;
|
|||
import java.io.IOException;
|
||||
import java.util.Map;
|
||||
|
||||
import org.apache.lucene.analysis.CharArraySet;
|
||||
import org.apache.lucene.analysis.TokenStream;
|
||||
import org.apache.lucene.analysis.fr.FrenchAnalyzer;
|
||||
|
||||
|
|
|
@ -36,7 +36,7 @@
|
|||
* </li>
|
||||
* <li>
|
||||
* Effective Locale-specific normalization (case differences, diacritics, etc.).
|
||||
* ({@link org.apache.lucene.analysis.core.LowerCaseFilter} and
|
||||
* ({@link org.apache.lucene.analysis.LowerCaseFilter} and
|
||||
* {@link org.apache.lucene.analysis.miscellaneous.ASCIIFoldingFilter} provide these services
|
||||
* in a generic way that doesn't take into account locale-specific needs.)
|
||||
* </li>
|
||||
|
|
|
@ -20,7 +20,7 @@ package org.apache.lucene.analysis.ar;
|
|||
import java.io.IOException;
|
||||
|
||||
import org.apache.lucene.analysis.BaseTokenStreamTestCase;
|
||||
import org.apache.lucene.analysis.util.CharArraySet;
|
||||
import org.apache.lucene.analysis.CharArraySet;
|
||||
|
||||
/**
|
||||
* Test the Arabic Analyzer
|
||||
|
|
|
@ -21,11 +21,11 @@ import java.io.IOException;
|
|||
|
||||
import org.apache.lucene.analysis.Analyzer;
|
||||
import org.apache.lucene.analysis.BaseTokenStreamTestCase;
|
||||
import org.apache.lucene.analysis.CharArraySet;
|
||||
import org.apache.lucene.analysis.MockTokenizer;
|
||||
import org.apache.lucene.analysis.Tokenizer;
|
||||
import org.apache.lucene.analysis.core.KeywordTokenizer;
|
||||
import org.apache.lucene.analysis.miscellaneous.SetKeywordMarkerFilter;
|
||||
import org.apache.lucene.analysis.util.CharArraySet;
|
||||
|
||||
/**
|
||||
* Test the Arabic Normalization Filter
|
||||
|
|
|
@ -21,7 +21,7 @@ import java.io.IOException;
|
|||
|
||||
import org.apache.lucene.analysis.Analyzer;
|
||||
import org.apache.lucene.analysis.BaseTokenStreamTestCase;
|
||||
import org.apache.lucene.analysis.util.CharArraySet;
|
||||
import org.apache.lucene.analysis.CharArraySet;
|
||||
|
||||
/**
|
||||
* Test the Bulgarian analyzer
|
||||
|
|
|
@ -22,11 +22,11 @@ import java.io.StringReader;
|
|||
|
||||
import org.apache.lucene.analysis.Analyzer;
|
||||
import org.apache.lucene.analysis.BaseTokenStreamTestCase;
|
||||
import org.apache.lucene.analysis.CharArraySet;
|
||||
import org.apache.lucene.analysis.MockTokenizer;
|
||||
import org.apache.lucene.analysis.Tokenizer;
|
||||
import org.apache.lucene.analysis.core.KeywordTokenizer;
|
||||
import org.apache.lucene.analysis.miscellaneous.SetKeywordMarkerFilter;
|
||||
import org.apache.lucene.analysis.util.CharArraySet;
|
||||
|
||||
/**
|
||||
* Test the Bulgarian Stemmer
|
||||
|
|
|
@ -22,11 +22,11 @@ import java.io.StringReader;
|
|||
|
||||
import org.apache.lucene.analysis.Analyzer;
|
||||
import org.apache.lucene.analysis.BaseTokenStreamTestCase;
|
||||
import org.apache.lucene.analysis.CharArraySet;
|
||||
import org.apache.lucene.analysis.Tokenizer;
|
||||
import org.apache.lucene.analysis.core.KeywordTokenizer;
|
||||
import org.apache.lucene.analysis.core.LowerCaseTokenizer;
|
||||
import org.apache.lucene.analysis.miscellaneous.SetKeywordMarkerFilter;
|
||||
import org.apache.lucene.analysis.util.CharArraySet;
|
||||
|
||||
/**
|
||||
* Test the Brazilian Stem Filter, which only modifies the term text.
|
||||
|
|
|
@ -21,7 +21,7 @@ import java.io.IOException;
|
|||
|
||||
import org.apache.lucene.analysis.Analyzer;
|
||||
import org.apache.lucene.analysis.BaseTokenStreamTestCase;
|
||||
import org.apache.lucene.analysis.util.CharArraySet;
|
||||
import org.apache.lucene.analysis.CharArraySet;
|
||||
|
||||
public class TestCatalanAnalyzer extends BaseTokenStreamTestCase {
|
||||
/** This test fails with NPE when the
|
||||
|
|
|
@ -22,17 +22,17 @@ import java.io.Reader;
|
|||
|
||||
import org.apache.lucene.analysis.Analyzer;
|
||||
import org.apache.lucene.analysis.BaseTokenStreamTestCase;
|
||||
import org.apache.lucene.analysis.CharArraySet;
|
||||
import org.apache.lucene.analysis.MockTokenizer;
|
||||
import org.apache.lucene.analysis.StopFilter;
|
||||
import org.apache.lucene.analysis.TokenFilter;
|
||||
import org.apache.lucene.analysis.TokenStream;
|
||||
import org.apache.lucene.analysis.Tokenizer;
|
||||
import org.apache.lucene.analysis.charfilter.MappingCharFilter;
|
||||
import org.apache.lucene.analysis.charfilter.NormalizeCharMap;
|
||||
import org.apache.lucene.analysis.core.KeywordTokenizer;
|
||||
import org.apache.lucene.analysis.core.StopFilter;
|
||||
import org.apache.lucene.analysis.standard.StandardTokenizer;
|
||||
import org.apache.lucene.analysis.tokenattributes.TypeAttribute;
|
||||
import org.apache.lucene.analysis.util.CharArraySet;
|
||||
|
||||
/**
|
||||
* Most tests adopted from TestCJKTokenizer
|
||||
|
|
|
@ -21,7 +21,7 @@ import java.io.IOException;
|
|||
|
||||
import org.apache.lucene.analysis.Analyzer;
|
||||
import org.apache.lucene.analysis.BaseTokenStreamTestCase;
|
||||
import org.apache.lucene.analysis.util.CharArraySet;
|
||||
import org.apache.lucene.analysis.CharArraySet;
|
||||
|
||||
/**
|
||||
* Test the Sorani analyzer
|
||||
|
|
|
@ -20,9 +20,9 @@ import java.io.StringReader;
|
|||
import java.util.Arrays;
|
||||
|
||||
import org.apache.lucene.analysis.*;
|
||||
import org.apache.lucene.analysis.CharArraySet;
|
||||
import org.apache.lucene.analysis.core.WhitespaceTokenizer;
|
||||
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
|
||||
import org.apache.lucene.analysis.util.CharArraySet;
|
||||
|
||||
/**
|
||||
* Tests CommonGrams(Query)Filter
|
||||
|
|
|
@ -17,18 +17,18 @@
|
|||
package org.apache.lucene.analysis.commongrams;
|
||||
|
||||
|
||||
import java.io.StringReader;
|
||||
|
||||
import org.apache.lucene.analysis.CharArraySet;
|
||||
import org.apache.lucene.analysis.MockTokenizer;
|
||||
import org.apache.lucene.analysis.TokenStream;
|
||||
import org.apache.lucene.analysis.Tokenizer;
|
||||
import org.apache.lucene.analysis.core.TestStopFilter;
|
||||
import org.apache.lucene.analysis.core.TestStopFilterFactory;
|
||||
import org.apache.lucene.analysis.util.BaseTokenStreamFactoryTestCase;
|
||||
import org.apache.lucene.analysis.util.CharArraySet;
|
||||
import org.apache.lucene.analysis.util.ClasspathResourceLoader;
|
||||
import org.apache.lucene.analysis.util.ResourceLoader;
|
||||
import org.apache.lucene.util.Version;
|
||||
|
||||
import java.io.StringReader;
|
||||
|
||||
/**
|
||||
* Tests pretty much copied from StopFilterFactoryTest We use the test files
|
||||
* used by the StopFilterFactoryTest TODO: consider creating separate test files
|
||||
|
@ -37,7 +37,7 @@ import java.io.StringReader;
|
|||
public class TestCommonGramsFilterFactory extends BaseTokenStreamFactoryTestCase {
|
||||
|
||||
public void testInform() throws Exception {
|
||||
ResourceLoader loader = new ClasspathResourceLoader(TestStopFilter.class);
|
||||
ResourceLoader loader = new ClasspathResourceLoader(TestStopFilterFactory.class);
|
||||
assertTrue("loader is null and it shouldn't be", loader != null);
|
||||
CommonGramsFilterFactory factory = (CommonGramsFilterFactory) tokenFilterFactory("CommonGrams", Version.LATEST, loader,
|
||||
"words", "stop-1.txt",
|
||||
|
|
|
@ -16,12 +16,11 @@
|
|||
*/
|
||||
package org.apache.lucene.analysis.commongrams;
|
||||
|
||||
|
||||
import org.apache.lucene.analysis.CharArraySet;
|
||||
import org.apache.lucene.analysis.TokenStream;
|
||||
import org.apache.lucene.analysis.Tokenizer;
|
||||
import org.apache.lucene.analysis.core.TestStopFilter;
|
||||
import org.apache.lucene.analysis.core.TestStopFilterFactory;
|
||||
import org.apache.lucene.analysis.util.BaseTokenStreamFactoryTestCase;
|
||||
import org.apache.lucene.analysis.util.CharArraySet;
|
||||
import org.apache.lucene.analysis.util.ClasspathResourceLoader;
|
||||
import org.apache.lucene.analysis.util.ResourceLoader;
|
||||
import org.apache.lucene.util.Version;
|
||||
|
@ -34,7 +33,7 @@ import org.apache.lucene.util.Version;
|
|||
public class TestCommonGramsQueryFilterFactory extends BaseTokenStreamFactoryTestCase {
|
||||
|
||||
public void testInform() throws Exception {
|
||||
ResourceLoader loader = new ClasspathResourceLoader(TestStopFilter.class);
|
||||
ResourceLoader loader = new ClasspathResourceLoader(TestStopFilterFactory.class);
|
||||
assertTrue("loader is null and it shouldn't be", loader != null);
|
||||
CommonGramsQueryFilterFactory factory = (CommonGramsQueryFilterFactory) tokenFilterFactory("CommonGramsQuery", Version.LATEST, loader,
|
||||
"words", "stop-1.txt",
|
||||
|
|
|
@ -24,6 +24,7 @@ import java.util.Arrays;
|
|||
|
||||
import org.apache.lucene.analysis.Analyzer;
|
||||
import org.apache.lucene.analysis.BaseTokenStreamTestCase;
|
||||
import org.apache.lucene.analysis.CharArraySet;
|
||||
import org.apache.lucene.analysis.MockTokenizer;
|
||||
import org.apache.lucene.analysis.TokenFilter;
|
||||
import org.apache.lucene.analysis.TokenStream;
|
||||
|
@ -32,7 +33,6 @@ import org.apache.lucene.analysis.charfilter.MappingCharFilter;
|
|||
import org.apache.lucene.analysis.charfilter.NormalizeCharMap;
|
||||
import org.apache.lucene.analysis.compound.hyphenation.HyphenationTree;
|
||||
import org.apache.lucene.analysis.core.KeywordTokenizer;
|
||||
import org.apache.lucene.analysis.util.CharArraySet;
|
||||
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
|
||||
import org.apache.lucene.util.Attribute;
|
||||
import org.apache.lucene.util.AttributeImpl;
|
||||
|
|
|
@ -22,6 +22,7 @@ import java.io.StringReader;
|
|||
|
||||
import org.apache.lucene.analysis.Analyzer;
|
||||
import org.apache.lucene.analysis.BaseTokenStreamTestCase;
|
||||
import org.apache.lucene.analysis.LowerCaseFilter;
|
||||
import org.apache.lucene.analysis.TokenFilter;
|
||||
import org.apache.lucene.analysis.TokenStream;
|
||||
import org.apache.lucene.analysis.Tokenizer;
|
||||
|
|
|
@ -25,6 +25,7 @@ import java.util.HashSet;
|
|||
|
||||
import org.apache.lucene.analysis.Analyzer;
|
||||
import org.apache.lucene.analysis.BaseTokenStreamTestCase;
|
||||
import org.apache.lucene.analysis.CharArraySet;
|
||||
import org.apache.lucene.analysis.CharFilter;
|
||||
import org.apache.lucene.analysis.MockCharFilter;
|
||||
import org.apache.lucene.analysis.MockTokenFilter;
|
||||
|
@ -39,7 +40,6 @@ import org.apache.lucene.analysis.miscellaneous.WordDelimiterFilter;
|
|||
import org.apache.lucene.analysis.ngram.EdgeNGramTokenizer;
|
||||
import org.apache.lucene.analysis.ngram.NGramTokenFilter;
|
||||
import org.apache.lucene.analysis.shingle.ShingleFilter;
|
||||
import org.apache.lucene.analysis.util.CharArraySet;
|
||||
import org.apache.lucene.analysis.wikipedia.WikipediaTokenizer;
|
||||
import org.apache.lucene.util.LuceneTestCase.SuppressCodecs;
|
||||
|
||||
|
|
|
@ -50,6 +50,8 @@ import java.util.regex.Pattern;
|
|||
import org.apache.lucene.analysis.Analyzer;
|
||||
import org.apache.lucene.analysis.BaseTokenStreamTestCase;
|
||||
import org.apache.lucene.analysis.CachingTokenFilter;
|
||||
import org.apache.lucene.analysis.CharArrayMap;
|
||||
import org.apache.lucene.analysis.CharArraySet;
|
||||
import org.apache.lucene.analysis.CharFilter;
|
||||
import org.apache.lucene.analysis.CrankyTokenFilter;
|
||||
import org.apache.lucene.analysis.MockGraphTokenFilter;
|
||||
|
@ -73,8 +75,8 @@ import org.apache.lucene.analysis.miscellaneous.HyphenatedWordsFilter;
|
|||
import org.apache.lucene.analysis.miscellaneous.LimitTokenCountFilter;
|
||||
import org.apache.lucene.analysis.miscellaneous.LimitTokenOffsetFilter;
|
||||
import org.apache.lucene.analysis.miscellaneous.LimitTokenPositionFilter;
|
||||
import org.apache.lucene.analysis.miscellaneous.StemmerOverrideFilter;
|
||||
import org.apache.lucene.analysis.miscellaneous.StemmerOverrideFilter.StemmerOverrideMap;
|
||||
import org.apache.lucene.analysis.miscellaneous.StemmerOverrideFilter;
|
||||
import org.apache.lucene.analysis.miscellaneous.WordDelimiterFilter;
|
||||
import org.apache.lucene.analysis.path.PathHierarchyTokenizer;
|
||||
import org.apache.lucene.analysis.path.ReversePathHierarchyTokenizer;
|
||||
|
@ -83,8 +85,6 @@ import org.apache.lucene.analysis.payloads.PayloadEncoder;
|
|||
import org.apache.lucene.analysis.snowball.TestSnowball;
|
||||
import org.apache.lucene.analysis.standard.StandardTokenizer;
|
||||
import org.apache.lucene.analysis.synonym.SynonymMap;
|
||||
import org.apache.lucene.analysis.util.CharArrayMap;
|
||||
import org.apache.lucene.analysis.util.CharArraySet;
|
||||
import org.apache.lucene.analysis.wikipedia.WikipediaTokenizer;
|
||||
import org.apache.lucene.store.RAMDirectory;
|
||||
import org.apache.lucene.util.AttributeFactory;
|
||||
|
|
|
@ -17,16 +17,16 @@
|
|||
package org.apache.lucene.analysis.core;
|
||||
|
||||
|
||||
import org.apache.lucene.analysis.BaseTokenStreamTestCase;
|
||||
import org.apache.lucene.analysis.TokenStream;
|
||||
import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
|
||||
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
|
||||
import org.apache.lucene.analysis.util.CharArraySet;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.HashSet;
|
||||
import java.util.Iterator;
|
||||
import java.util.Set;
|
||||
import java.util.HashSet;
|
||||
|
||||
import org.apache.lucene.analysis.BaseTokenStreamTestCase;
|
||||
import org.apache.lucene.analysis.CharArraySet;
|
||||
import org.apache.lucene.analysis.TokenStream;
|
||||
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
|
||||
import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
|
||||
|
||||
public class TestStopAnalyzer extends BaseTokenStreamTestCase {
|
||||
|
||||
|
|
|
@ -17,8 +17,8 @@
|
|||
package org.apache.lucene.analysis.core;
|
||||
|
||||
|
||||
import org.apache.lucene.analysis.CharArraySet;
|
||||
import org.apache.lucene.analysis.util.BaseTokenStreamFactoryTestCase;
|
||||
import org.apache.lucene.analysis.util.CharArraySet;
|
||||
import org.apache.lucene.analysis.util.ClasspathResourceLoader;
|
||||
import org.apache.lucene.analysis.util.ResourceLoader;
|
||||
|
||||
|
|
|
@ -19,9 +19,9 @@ package org.apache.lucene.analysis.cz;
|
|||
|
||||
import java.io.IOException;
|
||||
|
||||
import org.apache.lucene.analysis.BaseTokenStreamTestCase;
|
||||
import org.apache.lucene.analysis.Analyzer;
|
||||
import org.apache.lucene.analysis.util.CharArraySet;
|
||||
import org.apache.lucene.analysis.BaseTokenStreamTestCase;
|
||||
import org.apache.lucene.analysis.CharArraySet;
|
||||
|
||||
/**
|
||||
* Test the CzechAnalyzer
|
||||
|
|
|
@ -22,11 +22,11 @@ import java.io.StringReader;
|
|||
|
||||
import org.apache.lucene.analysis.Analyzer;
|
||||
import org.apache.lucene.analysis.BaseTokenStreamTestCase;
|
||||
import org.apache.lucene.analysis.CharArraySet;
|
||||
import org.apache.lucene.analysis.MockTokenizer;
|
||||
import org.apache.lucene.analysis.Tokenizer;
|
||||
import org.apache.lucene.analysis.core.KeywordTokenizer;
|
||||
import org.apache.lucene.analysis.miscellaneous.SetKeywordMarkerFilter;
|
||||
import org.apache.lucene.analysis.util.CharArraySet;
|
||||
|
||||
/**
|
||||
* Test the Czech Stemmer.
|
||||
|
|
|
@ -21,7 +21,7 @@ import java.io.IOException;
|
|||
|
||||
import org.apache.lucene.analysis.Analyzer;
|
||||
import org.apache.lucene.analysis.BaseTokenStreamTestCase;
|
||||
import org.apache.lucene.analysis.util.CharArraySet;
|
||||
import org.apache.lucene.analysis.CharArraySet;
|
||||
|
||||
public class TestDanishAnalyzer extends BaseTokenStreamTestCase {
|
||||
/** This test fails with NPE when the
|
||||
|
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in New Issue