mirror of https://github.com/apache/lucene.git
LUCENE-5211: Better javadocs and error checking of 'format' option in StopFilterFactory, as well as comments in all snowball formated files about specifying format option
git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1524809 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
c7188a03a3
commit
499bd3688a
|
@ -78,6 +78,12 @@ API Changes:
|
||||||
with IndexSearcher when an ExecutorService is specified.
|
with IndexSearcher when an ExecutorService is specified.
|
||||||
(Ryan Ernst, Mike McCandless, Robert Muir)
|
(Ryan Ernst, Mike McCandless, Robert Muir)
|
||||||
|
|
||||||
|
Documentation
|
||||||
|
|
||||||
|
* LUCENE-5211: Better javadocs and error checking of 'format' option in
|
||||||
|
StopFilterFactory, as well as comments in all snowball formated files
|
||||||
|
about specifying format option. (hossman)
|
||||||
|
|
||||||
Changes in backwards compatibility policy
|
Changes in backwards compatibility policy
|
||||||
|
|
||||||
* LUCENE-5204: Directory doesn't have default implementations for
|
* LUCENE-5204: Directory doesn't have default implementations for
|
||||||
|
|
|
@ -22,22 +22,57 @@ import org.apache.lucene.analysis.util.CharArraySet;
|
||||||
import org.apache.lucene.analysis.util.ResourceLoader;
|
import org.apache.lucene.analysis.util.ResourceLoader;
|
||||||
import org.apache.lucene.analysis.util.ResourceLoaderAware;
|
import org.apache.lucene.analysis.util.ResourceLoaderAware;
|
||||||
import org.apache.lucene.analysis.util.TokenFilterFactory;
|
import org.apache.lucene.analysis.util.TokenFilterFactory;
|
||||||
|
import org.apache.lucene.analysis.util.WordlistLoader; // jdocs
|
||||||
|
|
||||||
import java.util.Map;
|
import java.util.Map;
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Factory for {@link StopFilter}.
|
* Factory for {@link StopFilter}.
|
||||||
|
*
|
||||||
* <pre class="prettyprint">
|
* <pre class="prettyprint">
|
||||||
* <fieldType name="text_stop" class="solr.TextField" positionIncrementGap="100" autoGeneratePhraseQueries="true">
|
* <fieldType name="text_stop" class="solr.TextField" positionIncrementGap="100" autoGeneratePhraseQueries="true">
|
||||||
* <analyzer>
|
* <analyzer>
|
||||||
* <tokenizer class="solr.WhitespaceTokenizerFactory"/>
|
* <tokenizer class="solr.WhitespaceTokenizerFactory"/>
|
||||||
* <filter class="solr.StopFilterFactory" ignoreCase="true"
|
* <filter class="solr.StopFilterFactory" ignoreCase="true"
|
||||||
* words="stopwords.txt"
|
* words="stopwords.txt" format="wordset"
|
||||||
* </analyzer>
|
* </analyzer>
|
||||||
* </fieldType></pre>
|
* </fieldType></pre>
|
||||||
|
*
|
||||||
|
* <p>
|
||||||
|
* All attributes are optional:
|
||||||
|
* </p>
|
||||||
|
* <ul>
|
||||||
|
* <li><code>ignoreCase</code> defaults to <code>false</code></li>
|
||||||
|
* <li><code>words</code> should be the name of a stopwords file to parse, if not
|
||||||
|
* specified the factory will use {@link StopAnalyzer#ENGLISH_STOP_WORDS_SET}
|
||||||
|
* </li>
|
||||||
|
* <li><code>format</code> defines how the <code>words</code> file will be parsed,
|
||||||
|
* and defaults to <code>wordset</code>. If <code>words</code> is not specified,
|
||||||
|
* then <code>format</code> must not be specified.
|
||||||
|
* </li>
|
||||||
|
* </ul>
|
||||||
|
* <p>
|
||||||
|
* The valid values for the <code>format</code> option are:
|
||||||
|
* </p>
|
||||||
|
* <ul>
|
||||||
|
* <li><code>wordset</code> - This is the default format, which supports one word per
|
||||||
|
* line (including any intra-word whitespace) and allows whole line comments
|
||||||
|
* begining with the "#" character. Blank lines are ignored. See
|
||||||
|
* {@link WordlistLoader#getLines WordlistLoader.getLines} for details.
|
||||||
|
* </li>
|
||||||
|
* <li><code>snowball</code> - This format allows for multiple words specified on each
|
||||||
|
* line, and trailing comments may be specified using the vertical line ("|").
|
||||||
|
* Blank lines are ignored. See
|
||||||
|
* {@link WordlistLoader#getSnowballWordSet WordlistLoader.getSnowballWordSet}
|
||||||
|
* for details.
|
||||||
|
* </li>
|
||||||
|
* </ul>
|
||||||
*/
|
*/
|
||||||
public class StopFilterFactory extends TokenFilterFactory implements ResourceLoaderAware {
|
public class StopFilterFactory extends TokenFilterFactory implements ResourceLoaderAware {
|
||||||
|
public static final String FORMAT_WORDSET = "wordset";
|
||||||
|
public static final String FORMAT_SNOWBALL = "snowball";
|
||||||
|
|
||||||
private CharArraySet stopWords;
|
private CharArraySet stopWords;
|
||||||
private final String stopWordFiles;
|
private final String stopWordFiles;
|
||||||
private final String format;
|
private final String format;
|
||||||
|
@ -48,7 +83,7 @@ public class StopFilterFactory extends TokenFilterFactory implements ResourceLoa
|
||||||
super(args);
|
super(args);
|
||||||
assureMatchVersion();
|
assureMatchVersion();
|
||||||
stopWordFiles = get(args, "words");
|
stopWordFiles = get(args, "words");
|
||||||
format = get(args, "format");
|
format = get(args, "format", (null == stopWordFiles ? null : FORMAT_WORDSET));
|
||||||
ignoreCase = getBoolean(args, "ignoreCase", false);
|
ignoreCase = getBoolean(args, "ignoreCase", false);
|
||||||
if (!args.isEmpty()) {
|
if (!args.isEmpty()) {
|
||||||
throw new IllegalArgumentException("Unknown parameters: " + args);
|
throw new IllegalArgumentException("Unknown parameters: " + args);
|
||||||
|
@ -58,12 +93,17 @@ public class StopFilterFactory extends TokenFilterFactory implements ResourceLoa
|
||||||
@Override
|
@Override
|
||||||
public void inform(ResourceLoader loader) throws IOException {
|
public void inform(ResourceLoader loader) throws IOException {
|
||||||
if (stopWordFiles != null) {
|
if (stopWordFiles != null) {
|
||||||
if ("snowball".equalsIgnoreCase(format)) {
|
if (FORMAT_WORDSET.equalsIgnoreCase(format)) {
|
||||||
|
stopWords = getWordSet(loader, stopWordFiles, ignoreCase);
|
||||||
|
} else if (FORMAT_SNOWBALL.equalsIgnoreCase(format)) {
|
||||||
stopWords = getSnowballWordSet(loader, stopWordFiles, ignoreCase);
|
stopWords = getSnowballWordSet(loader, stopWordFiles, ignoreCase);
|
||||||
} else {
|
} else {
|
||||||
stopWords = getWordSet(loader, stopWordFiles, ignoreCase);
|
throw new IllegalArgumentException("Unknown 'format' specified for 'words' file: " + format);
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
|
if (null != format) {
|
||||||
|
throw new IllegalArgumentException("'format' can not be specified w/o an explicit 'words' file: " + format);
|
||||||
|
}
|
||||||
stopWords = new CharArraySet(luceneMatchVersion, StopAnalyzer.ENGLISH_STOP_WORDS_SET, ignoreCase);
|
stopWords = new CharArraySet(luceneMatchVersion, StopAnalyzer.ENGLISH_STOP_WORDS_SET, ignoreCase);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -4,6 +4,8 @@
|
||||||
| Also see http://www.opensource.org/licenses/bsd-license.html
|
| Also see http://www.opensource.org/licenses/bsd-license.html
|
||||||
| - Encoding was converted to UTF-8.
|
| - Encoding was converted to UTF-8.
|
||||||
| - This notice was added.
|
| - This notice was added.
|
||||||
|
|
|
||||||
|
| NOTE: To use this file with StopFilterFactory, you must specify format="snowball"
|
||||||
|
|
||||||
| A Danish stop word list. Comments begin with vertical bar. Each stop
|
| A Danish stop word list. Comments begin with vertical bar. Each stop
|
||||||
| word is at the start of a line.
|
| word is at the start of a line.
|
||||||
|
|
|
@ -4,6 +4,8 @@
|
||||||
| Also see http://www.opensource.org/licenses/bsd-license.html
|
| Also see http://www.opensource.org/licenses/bsd-license.html
|
||||||
| - Encoding was converted to UTF-8.
|
| - Encoding was converted to UTF-8.
|
||||||
| - This notice was added.
|
| - This notice was added.
|
||||||
|
|
|
||||||
|
| NOTE: To use this file with StopFilterFactory, you must specify format="snowball"
|
||||||
|
|
||||||
| A Dutch stop word list. Comments begin with vertical bar. Each stop
|
| A Dutch stop word list. Comments begin with vertical bar. Each stop
|
||||||
| word is at the start of a line.
|
| word is at the start of a line.
|
||||||
|
|
|
@ -4,6 +4,8 @@
|
||||||
| Also see http://www.opensource.org/licenses/bsd-license.html
|
| Also see http://www.opensource.org/licenses/bsd-license.html
|
||||||
| - Encoding was converted to UTF-8.
|
| - Encoding was converted to UTF-8.
|
||||||
| - This notice was added.
|
| - This notice was added.
|
||||||
|
|
|
||||||
|
| NOTE: To use this file with StopFilterFactory, you must specify format="snowball"
|
||||||
|
|
||||||
| An English stop word list. Comments begin with vertical bar. Each stop
|
| An English stop word list. Comments begin with vertical bar. Each stop
|
||||||
| word is at the start of a line.
|
| word is at the start of a line.
|
||||||
|
|
|
@ -4,6 +4,8 @@
|
||||||
| Also see http://www.opensource.org/licenses/bsd-license.html
|
| Also see http://www.opensource.org/licenses/bsd-license.html
|
||||||
| - Encoding was converted to UTF-8.
|
| - Encoding was converted to UTF-8.
|
||||||
| - This notice was added.
|
| - This notice was added.
|
||||||
|
|
|
||||||
|
| NOTE: To use this file with StopFilterFactory, you must specify format="snowball"
|
||||||
|
|
||||||
| forms of BE
|
| forms of BE
|
||||||
|
|
||||||
|
|
|
@ -4,6 +4,8 @@
|
||||||
| Also see http://www.opensource.org/licenses/bsd-license.html
|
| Also see http://www.opensource.org/licenses/bsd-license.html
|
||||||
| - Encoding was converted to UTF-8.
|
| - Encoding was converted to UTF-8.
|
||||||
| - This notice was added.
|
| - This notice was added.
|
||||||
|
|
|
||||||
|
| NOTE: To use this file with StopFilterFactory, you must specify format="snowball"
|
||||||
|
|
||||||
| A French stop word list. Comments begin with vertical bar. Each stop
|
| A French stop word list. Comments begin with vertical bar. Each stop
|
||||||
| word is at the start of a line.
|
| word is at the start of a line.
|
||||||
|
|
|
@ -4,6 +4,8 @@
|
||||||
| Also see http://www.opensource.org/licenses/bsd-license.html
|
| Also see http://www.opensource.org/licenses/bsd-license.html
|
||||||
| - Encoding was converted to UTF-8.
|
| - Encoding was converted to UTF-8.
|
||||||
| - This notice was added.
|
| - This notice was added.
|
||||||
|
|
|
||||||
|
| NOTE: To use this file with StopFilterFactory, you must specify format="snowball"
|
||||||
|
|
||||||
| A German stop word list. Comments begin with vertical bar. Each stop
|
| A German stop word list. Comments begin with vertical bar. Each stop
|
||||||
| word is at the start of a line.
|
| word is at the start of a line.
|
||||||
|
|
|
@ -4,6 +4,8 @@
|
||||||
| Also see http://www.opensource.org/licenses/bsd-license.html
|
| Also see http://www.opensource.org/licenses/bsd-license.html
|
||||||
| - Encoding was converted to UTF-8.
|
| - Encoding was converted to UTF-8.
|
||||||
| - This notice was added.
|
| - This notice was added.
|
||||||
|
|
|
||||||
|
| NOTE: To use this file with StopFilterFactory, you must specify format="snowball"
|
||||||
|
|
||||||
| Hungarian stop word list
|
| Hungarian stop word list
|
||||||
| prepared by Anna Tordai
|
| prepared by Anna Tordai
|
||||||
|
|
|
@ -4,6 +4,8 @@
|
||||||
| Also see http://www.opensource.org/licenses/bsd-license.html
|
| Also see http://www.opensource.org/licenses/bsd-license.html
|
||||||
| - Encoding was converted to UTF-8.
|
| - Encoding was converted to UTF-8.
|
||||||
| - This notice was added.
|
| - This notice was added.
|
||||||
|
|
|
||||||
|
| NOTE: To use this file with StopFilterFactory, you must specify format="snowball"
|
||||||
|
|
||||||
| An Italian stop word list. Comments begin with vertical bar. Each stop
|
| An Italian stop word list. Comments begin with vertical bar. Each stop
|
||||||
| word is at the start of a line.
|
| word is at the start of a line.
|
||||||
|
|
|
@ -4,6 +4,8 @@
|
||||||
| Also see http://www.opensource.org/licenses/bsd-license.html
|
| Also see http://www.opensource.org/licenses/bsd-license.html
|
||||||
| - Encoding was converted to UTF-8.
|
| - Encoding was converted to UTF-8.
|
||||||
| - This notice was added.
|
| - This notice was added.
|
||||||
|
|
|
||||||
|
| NOTE: To use this file with StopFilterFactory, you must specify format="snowball"
|
||||||
|
|
||||||
| A Norwegian stop word list. Comments begin with vertical bar. Each stop
|
| A Norwegian stop word list. Comments begin with vertical bar. Each stop
|
||||||
| word is at the start of a line.
|
| word is at the start of a line.
|
||||||
|
|
|
@ -4,6 +4,8 @@
|
||||||
| Also see http://www.opensource.org/licenses/bsd-license.html
|
| Also see http://www.opensource.org/licenses/bsd-license.html
|
||||||
| - Encoding was converted to UTF-8.
|
| - Encoding was converted to UTF-8.
|
||||||
| - This notice was added.
|
| - This notice was added.
|
||||||
|
|
|
||||||
|
| NOTE: To use this file with StopFilterFactory, you must specify format="snowball"
|
||||||
|
|
||||||
| A Portuguese stop word list. Comments begin with vertical bar. Each stop
|
| A Portuguese stop word list. Comments begin with vertical bar. Each stop
|
||||||
| word is at the start of a line.
|
| word is at the start of a line.
|
||||||
|
|
|
@ -4,6 +4,8 @@
|
||||||
| Also see http://www.opensource.org/licenses/bsd-license.html
|
| Also see http://www.opensource.org/licenses/bsd-license.html
|
||||||
| - Encoding was converted to UTF-8.
|
| - Encoding was converted to UTF-8.
|
||||||
| - This notice was added.
|
| - This notice was added.
|
||||||
|
|
|
||||||
|
| NOTE: To use this file with StopFilterFactory, you must specify format="snowball"
|
||||||
|
|
||||||
| a russian stop word list. comments begin with vertical bar. each stop
|
| a russian stop word list. comments begin with vertical bar. each stop
|
||||||
| word is at the start of a line.
|
| word is at the start of a line.
|
||||||
|
|
|
@ -4,6 +4,8 @@
|
||||||
| Also see http://www.opensource.org/licenses/bsd-license.html
|
| Also see http://www.opensource.org/licenses/bsd-license.html
|
||||||
| - Encoding was converted to UTF-8.
|
| - Encoding was converted to UTF-8.
|
||||||
| - This notice was added.
|
| - This notice was added.
|
||||||
|
|
|
||||||
|
| NOTE: To use this file with StopFilterFactory, you must specify format="snowball"
|
||||||
|
|
||||||
| A Spanish stop word list. Comments begin with vertical bar. Each stop
|
| A Spanish stop word list. Comments begin with vertical bar. Each stop
|
||||||
| word is at the start of a line.
|
| word is at the start of a line.
|
||||||
|
|
|
@ -4,6 +4,8 @@
|
||||||
| Also see http://www.opensource.org/licenses/bsd-license.html
|
| Also see http://www.opensource.org/licenses/bsd-license.html
|
||||||
| - Encoding was converted to UTF-8.
|
| - Encoding was converted to UTF-8.
|
||||||
| - This notice was added.
|
| - This notice was added.
|
||||||
|
|
|
||||||
|
| NOTE: To use this file with StopFilterFactory, you must specify format="snowball"
|
||||||
|
|
||||||
| A Swedish stop word list. Comments begin with vertical bar. Each stop
|
| A Swedish stop word list. Comments begin with vertical bar. Each stop
|
||||||
| word is at the start of a line.
|
| word is at the start of a line.
|
||||||
|
|
|
@ -57,6 +57,11 @@ public class TestStopFilterFactory extends BaseTokenStreamFactoryTestCase {
|
||||||
assertTrue(words.contains("her"));
|
assertTrue(words.contains("her"));
|
||||||
assertTrue(words.contains("hers"));
|
assertTrue(words.contains("hers"));
|
||||||
assertTrue(words.contains("herself"));
|
assertTrue(words.contains("herself"));
|
||||||
|
|
||||||
|
// defaults
|
||||||
|
factory = (StopFilterFactory) tokenFilterFactory("Stop");
|
||||||
|
assertEquals(StopAnalyzer.ENGLISH_STOP_WORDS_SET, factory.getStopWords());
|
||||||
|
assertEquals(false, factory.isIgnoreCase());
|
||||||
}
|
}
|
||||||
|
|
||||||
/** Test that bogus arguments result in exception */
|
/** Test that bogus arguments result in exception */
|
||||||
|
@ -68,4 +73,30 @@ public class TestStopFilterFactory extends BaseTokenStreamFactoryTestCase {
|
||||||
assertTrue(expected.getMessage().contains("Unknown parameters"));
|
assertTrue(expected.getMessage().contains("Unknown parameters"));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/** Test that bogus arguments result in exception */
|
||||||
|
public void testBogusFormats() throws Exception {
|
||||||
|
try {
|
||||||
|
tokenFilterFactory("Stop",
|
||||||
|
"words", "stop-snowball.txt",
|
||||||
|
"format", "bogus");
|
||||||
|
fail();
|
||||||
|
} catch (IllegalArgumentException expected) {
|
||||||
|
String msg = expected.getMessage();
|
||||||
|
assertTrue(msg, msg.contains("Unknown"));
|
||||||
|
assertTrue(msg, msg.contains("format"));
|
||||||
|
assertTrue(msg, msg.contains("bogus"));
|
||||||
|
}
|
||||||
|
try {
|
||||||
|
tokenFilterFactory("Stop",
|
||||||
|
// implicit default words file
|
||||||
|
"format", "bogus");
|
||||||
|
fail();
|
||||||
|
} catch (IllegalArgumentException expected) {
|
||||||
|
String msg = expected.getMessage();
|
||||||
|
assertTrue(msg, msg.contains("can not be specified"));
|
||||||
|
assertTrue(msg, msg.contains("format"));
|
||||||
|
assertTrue(msg, msg.contains("bogus"));
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -4,6 +4,8 @@
|
||||||
| Also see http://www.opensource.org/licenses/bsd-license.html
|
| Also see http://www.opensource.org/licenses/bsd-license.html
|
||||||
| - Encoding was converted to UTF-8.
|
| - Encoding was converted to UTF-8.
|
||||||
| - This notice was added.
|
| - This notice was added.
|
||||||
|
|
|
||||||
|
| NOTE: To use this file with StopFilterFactory, you must specify format="snowball"
|
||||||
|
|
||||||
| A Danish stop word list. Comments begin with vertical bar. Each stop
|
| A Danish stop word list. Comments begin with vertical bar. Each stop
|
||||||
| word is at the start of a line.
|
| word is at the start of a line.
|
||||||
|
|
|
@ -4,6 +4,8 @@
|
||||||
| Also see http://www.opensource.org/licenses/bsd-license.html
|
| Also see http://www.opensource.org/licenses/bsd-license.html
|
||||||
| - Encoding was converted to UTF-8.
|
| - Encoding was converted to UTF-8.
|
||||||
| - This notice was added.
|
| - This notice was added.
|
||||||
|
|
|
||||||
|
| NOTE: To use this file with StopFilterFactory, you must specify format="snowball"
|
||||||
|
|
||||||
| A German stop word list. Comments begin with vertical bar. Each stop
|
| A German stop word list. Comments begin with vertical bar. Each stop
|
||||||
| word is at the start of a line.
|
| word is at the start of a line.
|
||||||
|
|
|
@ -4,6 +4,8 @@
|
||||||
| Also see http://www.opensource.org/licenses/bsd-license.html
|
| Also see http://www.opensource.org/licenses/bsd-license.html
|
||||||
| - Encoding was converted to UTF-8.
|
| - Encoding was converted to UTF-8.
|
||||||
| - This notice was added.
|
| - This notice was added.
|
||||||
|
|
|
||||||
|
| NOTE: To use this file with StopFilterFactory, you must specify format="snowball"
|
||||||
|
|
||||||
| A Spanish stop word list. Comments begin with vertical bar. Each stop
|
| A Spanish stop word list. Comments begin with vertical bar. Each stop
|
||||||
| word is at the start of a line.
|
| word is at the start of a line.
|
||||||
|
|
|
@ -4,6 +4,8 @@
|
||||||
| Also see http://www.opensource.org/licenses/bsd-license.html
|
| Also see http://www.opensource.org/licenses/bsd-license.html
|
||||||
| - Encoding was converted to UTF-8.
|
| - Encoding was converted to UTF-8.
|
||||||
| - This notice was added.
|
| - This notice was added.
|
||||||
|
|
|
||||||
|
| NOTE: To use this file with StopFilterFactory, you must specify format="snowball"
|
||||||
|
|
||||||
| forms of BE
|
| forms of BE
|
||||||
|
|
||||||
|
|
|
@ -4,6 +4,8 @@
|
||||||
| Also see http://www.opensource.org/licenses/bsd-license.html
|
| Also see http://www.opensource.org/licenses/bsd-license.html
|
||||||
| - Encoding was converted to UTF-8.
|
| - Encoding was converted to UTF-8.
|
||||||
| - This notice was added.
|
| - This notice was added.
|
||||||
|
|
|
||||||
|
| NOTE: To use this file with StopFilterFactory, you must specify format="snowball"
|
||||||
|
|
||||||
| A French stop word list. Comments begin with vertical bar. Each stop
|
| A French stop word list. Comments begin with vertical bar. Each stop
|
||||||
| word is at the start of a line.
|
| word is at the start of a line.
|
||||||
|
|
|
@ -4,6 +4,8 @@
|
||||||
| Also see http://www.opensource.org/licenses/bsd-license.html
|
| Also see http://www.opensource.org/licenses/bsd-license.html
|
||||||
| - Encoding was converted to UTF-8.
|
| - Encoding was converted to UTF-8.
|
||||||
| - This notice was added.
|
| - This notice was added.
|
||||||
|
|
|
||||||
|
| NOTE: To use this file with StopFilterFactory, you must specify format="snowball"
|
||||||
|
|
||||||
| Hungarian stop word list
|
| Hungarian stop word list
|
||||||
| prepared by Anna Tordai
|
| prepared by Anna Tordai
|
||||||
|
|
|
@ -4,6 +4,8 @@
|
||||||
| Also see http://www.opensource.org/licenses/bsd-license.html
|
| Also see http://www.opensource.org/licenses/bsd-license.html
|
||||||
| - Encoding was converted to UTF-8.
|
| - Encoding was converted to UTF-8.
|
||||||
| - This notice was added.
|
| - This notice was added.
|
||||||
|
|
|
||||||
|
| NOTE: To use this file with StopFilterFactory, you must specify format="snowball"
|
||||||
|
|
||||||
| An Italian stop word list. Comments begin with vertical bar. Each stop
|
| An Italian stop word list. Comments begin with vertical bar. Each stop
|
||||||
| word is at the start of a line.
|
| word is at the start of a line.
|
||||||
|
|
|
@ -4,6 +4,8 @@
|
||||||
| Also see http://www.opensource.org/licenses/bsd-license.html
|
| Also see http://www.opensource.org/licenses/bsd-license.html
|
||||||
| - Encoding was converted to UTF-8.
|
| - Encoding was converted to UTF-8.
|
||||||
| - This notice was added.
|
| - This notice was added.
|
||||||
|
|
|
||||||
|
| NOTE: To use this file with StopFilterFactory, you must specify format="snowball"
|
||||||
|
|
||||||
| A Dutch stop word list. Comments begin with vertical bar. Each stop
|
| A Dutch stop word list. Comments begin with vertical bar. Each stop
|
||||||
| word is at the start of a line.
|
| word is at the start of a line.
|
||||||
|
|
|
@ -4,6 +4,8 @@
|
||||||
| Also see http://www.opensource.org/licenses/bsd-license.html
|
| Also see http://www.opensource.org/licenses/bsd-license.html
|
||||||
| - Encoding was converted to UTF-8.
|
| - Encoding was converted to UTF-8.
|
||||||
| - This notice was added.
|
| - This notice was added.
|
||||||
|
|
|
||||||
|
| NOTE: To use this file with StopFilterFactory, you must specify format="snowball"
|
||||||
|
|
||||||
| A Norwegian stop word list. Comments begin with vertical bar. Each stop
|
| A Norwegian stop word list. Comments begin with vertical bar. Each stop
|
||||||
| word is at the start of a line.
|
| word is at the start of a line.
|
||||||
|
|
|
@ -4,6 +4,8 @@
|
||||||
| Also see http://www.opensource.org/licenses/bsd-license.html
|
| Also see http://www.opensource.org/licenses/bsd-license.html
|
||||||
| - Encoding was converted to UTF-8.
|
| - Encoding was converted to UTF-8.
|
||||||
| - This notice was added.
|
| - This notice was added.
|
||||||
|
|
|
||||||
|
| NOTE: To use this file with StopFilterFactory, you must specify format="snowball"
|
||||||
|
|
||||||
| A Portuguese stop word list. Comments begin with vertical bar. Each stop
|
| A Portuguese stop word list. Comments begin with vertical bar. Each stop
|
||||||
| word is at the start of a line.
|
| word is at the start of a line.
|
||||||
|
|
|
@ -4,6 +4,8 @@
|
||||||
| Also see http://www.opensource.org/licenses/bsd-license.html
|
| Also see http://www.opensource.org/licenses/bsd-license.html
|
||||||
| - Encoding was converted to UTF-8.
|
| - Encoding was converted to UTF-8.
|
||||||
| - This notice was added.
|
| - This notice was added.
|
||||||
|
|
|
||||||
|
| NOTE: To use this file with StopFilterFactory, you must specify format="snowball"
|
||||||
|
|
||||||
| a russian stop word list. comments begin with vertical bar. each stop
|
| a russian stop word list. comments begin with vertical bar. each stop
|
||||||
| word is at the start of a line.
|
| word is at the start of a line.
|
||||||
|
|
|
@ -4,6 +4,8 @@
|
||||||
| Also see http://www.opensource.org/licenses/bsd-license.html
|
| Also see http://www.opensource.org/licenses/bsd-license.html
|
||||||
| - Encoding was converted to UTF-8.
|
| - Encoding was converted to UTF-8.
|
||||||
| - This notice was added.
|
| - This notice was added.
|
||||||
|
|
|
||||||
|
| NOTE: To use this file with StopFilterFactory, you must specify format="snowball"
|
||||||
|
|
||||||
| A Swedish stop word list. Comments begin with vertical bar. Each stop
|
| A Swedish stop word list. Comments begin with vertical bar. Each stop
|
||||||
| word is at the start of a line.
|
| word is at the start of a line.
|
||||||
|
|
|
@ -4,6 +4,8 @@
|
||||||
| Also see http://www.opensource.org/licenses/bsd-license.html
|
| Also see http://www.opensource.org/licenses/bsd-license.html
|
||||||
| - Encoding was converted to UTF-8.
|
| - Encoding was converted to UTF-8.
|
||||||
| - This notice was added.
|
| - This notice was added.
|
||||||
|
|
|
||||||
|
| NOTE: To use this file with StopFilterFactory, you must specify format="snowball"
|
||||||
|
|
||||||
| A Danish stop word list. Comments begin with vertical bar. Each stop
|
| A Danish stop word list. Comments begin with vertical bar. Each stop
|
||||||
| word is at the start of a line.
|
| word is at the start of a line.
|
||||||
|
|
|
@ -4,6 +4,8 @@
|
||||||
| Also see http://www.opensource.org/licenses/bsd-license.html
|
| Also see http://www.opensource.org/licenses/bsd-license.html
|
||||||
| - Encoding was converted to UTF-8.
|
| - Encoding was converted to UTF-8.
|
||||||
| - This notice was added.
|
| - This notice was added.
|
||||||
|
|
|
||||||
|
| NOTE: To use this file with StopFilterFactory, you must specify format="snowball"
|
||||||
|
|
||||||
| A German stop word list. Comments begin with vertical bar. Each stop
|
| A German stop word list. Comments begin with vertical bar. Each stop
|
||||||
| word is at the start of a line.
|
| word is at the start of a line.
|
||||||
|
|
|
@ -4,6 +4,8 @@
|
||||||
| Also see http://www.opensource.org/licenses/bsd-license.html
|
| Also see http://www.opensource.org/licenses/bsd-license.html
|
||||||
| - Encoding was converted to UTF-8.
|
| - Encoding was converted to UTF-8.
|
||||||
| - This notice was added.
|
| - This notice was added.
|
||||||
|
|
|
||||||
|
| NOTE: To use this file with StopFilterFactory, you must specify format="snowball"
|
||||||
|
|
||||||
| A Spanish stop word list. Comments begin with vertical bar. Each stop
|
| A Spanish stop word list. Comments begin with vertical bar. Each stop
|
||||||
| word is at the start of a line.
|
| word is at the start of a line.
|
||||||
|
|
|
@ -4,6 +4,8 @@
|
||||||
| Also see http://www.opensource.org/licenses/bsd-license.html
|
| Also see http://www.opensource.org/licenses/bsd-license.html
|
||||||
| - Encoding was converted to UTF-8.
|
| - Encoding was converted to UTF-8.
|
||||||
| - This notice was added.
|
| - This notice was added.
|
||||||
|
|
|
||||||
|
| NOTE: To use this file with StopFilterFactory, you must specify format="snowball"
|
||||||
|
|
||||||
| forms of BE
|
| forms of BE
|
||||||
|
|
||||||
|
|
|
@ -4,6 +4,8 @@
|
||||||
| Also see http://www.opensource.org/licenses/bsd-license.html
|
| Also see http://www.opensource.org/licenses/bsd-license.html
|
||||||
| - Encoding was converted to UTF-8.
|
| - Encoding was converted to UTF-8.
|
||||||
| - This notice was added.
|
| - This notice was added.
|
||||||
|
|
|
||||||
|
| NOTE: To use this file with StopFilterFactory, you must specify format="snowball"
|
||||||
|
|
||||||
| A French stop word list. Comments begin with vertical bar. Each stop
|
| A French stop word list. Comments begin with vertical bar. Each stop
|
||||||
| word is at the start of a line.
|
| word is at the start of a line.
|
||||||
|
|
|
@ -4,6 +4,8 @@
|
||||||
| Also see http://www.opensource.org/licenses/bsd-license.html
|
| Also see http://www.opensource.org/licenses/bsd-license.html
|
||||||
| - Encoding was converted to UTF-8.
|
| - Encoding was converted to UTF-8.
|
||||||
| - This notice was added.
|
| - This notice was added.
|
||||||
|
|
|
||||||
|
| NOTE: To use this file with StopFilterFactory, you must specify format="snowball"
|
||||||
|
|
||||||
| Hungarian stop word list
|
| Hungarian stop word list
|
||||||
| prepared by Anna Tordai
|
| prepared by Anna Tordai
|
||||||
|
|
|
@ -4,6 +4,8 @@
|
||||||
| Also see http://www.opensource.org/licenses/bsd-license.html
|
| Also see http://www.opensource.org/licenses/bsd-license.html
|
||||||
| - Encoding was converted to UTF-8.
|
| - Encoding was converted to UTF-8.
|
||||||
| - This notice was added.
|
| - This notice was added.
|
||||||
|
|
|
||||||
|
| NOTE: To use this file with StopFilterFactory, you must specify format="snowball"
|
||||||
|
|
||||||
| An Italian stop word list. Comments begin with vertical bar. Each stop
|
| An Italian stop word list. Comments begin with vertical bar. Each stop
|
||||||
| word is at the start of a line.
|
| word is at the start of a line.
|
||||||
|
|
|
@ -4,6 +4,8 @@
|
||||||
| Also see http://www.opensource.org/licenses/bsd-license.html
|
| Also see http://www.opensource.org/licenses/bsd-license.html
|
||||||
| - Encoding was converted to UTF-8.
|
| - Encoding was converted to UTF-8.
|
||||||
| - This notice was added.
|
| - This notice was added.
|
||||||
|
|
|
||||||
|
| NOTE: To use this file with StopFilterFactory, you must specify format="snowball"
|
||||||
|
|
||||||
| A Dutch stop word list. Comments begin with vertical bar. Each stop
|
| A Dutch stop word list. Comments begin with vertical bar. Each stop
|
||||||
| word is at the start of a line.
|
| word is at the start of a line.
|
||||||
|
|
|
@ -4,6 +4,8 @@
|
||||||
| Also see http://www.opensource.org/licenses/bsd-license.html
|
| Also see http://www.opensource.org/licenses/bsd-license.html
|
||||||
| - Encoding was converted to UTF-8.
|
| - Encoding was converted to UTF-8.
|
||||||
| - This notice was added.
|
| - This notice was added.
|
||||||
|
|
|
||||||
|
| NOTE: To use this file with StopFilterFactory, you must specify format="snowball"
|
||||||
|
|
||||||
| A Norwegian stop word list. Comments begin with vertical bar. Each stop
|
| A Norwegian stop word list. Comments begin with vertical bar. Each stop
|
||||||
| word is at the start of a line.
|
| word is at the start of a line.
|
||||||
|
|
|
@ -4,6 +4,8 @@
|
||||||
| Also see http://www.opensource.org/licenses/bsd-license.html
|
| Also see http://www.opensource.org/licenses/bsd-license.html
|
||||||
| - Encoding was converted to UTF-8.
|
| - Encoding was converted to UTF-8.
|
||||||
| - This notice was added.
|
| - This notice was added.
|
||||||
|
|
|
||||||
|
| NOTE: To use this file with StopFilterFactory, you must specify format="snowball"
|
||||||
|
|
||||||
| A Portuguese stop word list. Comments begin with vertical bar. Each stop
|
| A Portuguese stop word list. Comments begin with vertical bar. Each stop
|
||||||
| word is at the start of a line.
|
| word is at the start of a line.
|
||||||
|
|
|
@ -4,6 +4,8 @@
|
||||||
| Also see http://www.opensource.org/licenses/bsd-license.html
|
| Also see http://www.opensource.org/licenses/bsd-license.html
|
||||||
| - Encoding was converted to UTF-8.
|
| - Encoding was converted to UTF-8.
|
||||||
| - This notice was added.
|
| - This notice was added.
|
||||||
|
|
|
||||||
|
| NOTE: To use this file with StopFilterFactory, you must specify format="snowball"
|
||||||
|
|
||||||
| a russian stop word list. comments begin with vertical bar. each stop
|
| a russian stop word list. comments begin with vertical bar. each stop
|
||||||
| word is at the start of a line.
|
| word is at the start of a line.
|
||||||
|
|
|
@ -4,6 +4,8 @@
|
||||||
| Also see http://www.opensource.org/licenses/bsd-license.html
|
| Also see http://www.opensource.org/licenses/bsd-license.html
|
||||||
| - Encoding was converted to UTF-8.
|
| - Encoding was converted to UTF-8.
|
||||||
| - This notice was added.
|
| - This notice was added.
|
||||||
|
|
|
||||||
|
| NOTE: To use this file with StopFilterFactory, you must specify format="snowball"
|
||||||
|
|
||||||
| A Swedish stop word list. Comments begin with vertical bar. Each stop
|
| A Swedish stop word list. Comments begin with vertical bar. Each stop
|
||||||
| word is at the start of a line.
|
| word is at the start of a line.
|
||||||
|
|
Loading…
Reference in New Issue