LUCENE-5211: Better javadocs and error checking of 'format' option in StopFilterFactory, as well as comments in all snowball formated files about specifying format option

git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1524809 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Chris M. Hostetter 2013-09-19 19:06:37 +00:00
parent c7188a03a3
commit 499bd3688a
40 changed files with 155 additions and 4 deletions

View File

@ -78,6 +78,12 @@ API Changes:
with IndexSearcher when an ExecutorService is specified. with IndexSearcher when an ExecutorService is specified.
(Ryan Ernst, Mike McCandless, Robert Muir) (Ryan Ernst, Mike McCandless, Robert Muir)
Documentation
* LUCENE-5211: Better javadocs and error checking of 'format' option in
StopFilterFactory, as well as comments in all snowball formated files
about specifying format option. (hossman)
Changes in backwards compatibility policy Changes in backwards compatibility policy
* LUCENE-5204: Directory doesn't have default implementations for * LUCENE-5204: Directory doesn't have default implementations for

View File

@ -22,22 +22,57 @@ import org.apache.lucene.analysis.util.CharArraySet;
import org.apache.lucene.analysis.util.ResourceLoader; import org.apache.lucene.analysis.util.ResourceLoader;
import org.apache.lucene.analysis.util.ResourceLoaderAware; import org.apache.lucene.analysis.util.ResourceLoaderAware;
import org.apache.lucene.analysis.util.TokenFilterFactory; import org.apache.lucene.analysis.util.TokenFilterFactory;
import org.apache.lucene.analysis.util.WordlistLoader; // jdocs
import java.util.Map; import java.util.Map;
import java.io.IOException; import java.io.IOException;
/** /**
* Factory for {@link StopFilter}. * Factory for {@link StopFilter}.
*
* <pre class="prettyprint"> * <pre class="prettyprint">
* &lt;fieldType name="text_stop" class="solr.TextField" positionIncrementGap="100" autoGeneratePhraseQueries="true"&gt; * &lt;fieldType name="text_stop" class="solr.TextField" positionIncrementGap="100" autoGeneratePhraseQueries="true"&gt;
* &lt;analyzer&gt; * &lt;analyzer&gt;
* &lt;tokenizer class="solr.WhitespaceTokenizerFactory"/&gt; * &lt;tokenizer class="solr.WhitespaceTokenizerFactory"/&gt;
* &lt;filter class="solr.StopFilterFactory" ignoreCase="true" * &lt;filter class="solr.StopFilterFactory" ignoreCase="true"
* words="stopwords.txt" * words="stopwords.txt" format="wordset"
* &lt;/analyzer&gt; * &lt;/analyzer&gt;
* &lt;/fieldType&gt;</pre> * &lt;/fieldType&gt;</pre>
*
* <p>
* All attributes are optional:
* </p>
* <ul>
* <li><code>ignoreCase</code> defaults to <code>false</code></li>
* <li><code>words</code> should be the name of a stopwords file to parse, if not
* specified the factory will use {@link StopAnalyzer#ENGLISH_STOP_WORDS_SET}
* </li>
* <li><code>format</code> defines how the <code>words</code> file will be parsed,
* and defaults to <code>wordset</code>. If <code>words</code> is not specified,
* then <code>format</code> must not be specified.
* </li>
* </ul>
* <p>
* The valid values for the <code>format</code> option are:
* </p>
* <ul>
* <li><code>wordset</code> - This is the default format, which supports one word per
* line (including any intra-word whitespace) and allows whole line comments
* begining with the "#" character. Blank lines are ignored. See
* {@link WordlistLoader#getLines WordlistLoader.getLines} for details.
* </li>
* <li><code>snowball</code> - This format allows for multiple words specified on each
* line, and trailing comments may be specified using the vertical line ("&#124;").
* Blank lines are ignored. See
* {@link WordlistLoader#getSnowballWordSet WordlistLoader.getSnowballWordSet}
* for details.
* </li>
* </ul>
*/ */
public class StopFilterFactory extends TokenFilterFactory implements ResourceLoaderAware { public class StopFilterFactory extends TokenFilterFactory implements ResourceLoaderAware {
public static final String FORMAT_WORDSET = "wordset";
public static final String FORMAT_SNOWBALL = "snowball";
private CharArraySet stopWords; private CharArraySet stopWords;
private final String stopWordFiles; private final String stopWordFiles;
private final String format; private final String format;
@ -48,7 +83,7 @@ public class StopFilterFactory extends TokenFilterFactory implements ResourceLoa
super(args); super(args);
assureMatchVersion(); assureMatchVersion();
stopWordFiles = get(args, "words"); stopWordFiles = get(args, "words");
format = get(args, "format"); format = get(args, "format", (null == stopWordFiles ? null : FORMAT_WORDSET));
ignoreCase = getBoolean(args, "ignoreCase", false); ignoreCase = getBoolean(args, "ignoreCase", false);
if (!args.isEmpty()) { if (!args.isEmpty()) {
throw new IllegalArgumentException("Unknown parameters: " + args); throw new IllegalArgumentException("Unknown parameters: " + args);
@ -58,12 +93,17 @@ public class StopFilterFactory extends TokenFilterFactory implements ResourceLoa
@Override @Override
public void inform(ResourceLoader loader) throws IOException { public void inform(ResourceLoader loader) throws IOException {
if (stopWordFiles != null) { if (stopWordFiles != null) {
if ("snowball".equalsIgnoreCase(format)) { if (FORMAT_WORDSET.equalsIgnoreCase(format)) {
stopWords = getWordSet(loader, stopWordFiles, ignoreCase);
} else if (FORMAT_SNOWBALL.equalsIgnoreCase(format)) {
stopWords = getSnowballWordSet(loader, stopWordFiles, ignoreCase); stopWords = getSnowballWordSet(loader, stopWordFiles, ignoreCase);
} else { } else {
stopWords = getWordSet(loader, stopWordFiles, ignoreCase); throw new IllegalArgumentException("Unknown 'format' specified for 'words' file: " + format);
} }
} else { } else {
if (null != format) {
throw new IllegalArgumentException("'format' can not be specified w/o an explicit 'words' file: " + format);
}
stopWords = new CharArraySet(luceneMatchVersion, StopAnalyzer.ENGLISH_STOP_WORDS_SET, ignoreCase); stopWords = new CharArraySet(luceneMatchVersion, StopAnalyzer.ENGLISH_STOP_WORDS_SET, ignoreCase);
} }
} }

View File

@ -4,6 +4,8 @@
| Also see http://www.opensource.org/licenses/bsd-license.html | Also see http://www.opensource.org/licenses/bsd-license.html
| - Encoding was converted to UTF-8. | - Encoding was converted to UTF-8.
| - This notice was added. | - This notice was added.
|
| NOTE: To use this file with StopFilterFactory, you must specify format="snowball"
| A Danish stop word list. Comments begin with vertical bar. Each stop | A Danish stop word list. Comments begin with vertical bar. Each stop
| word is at the start of a line. | word is at the start of a line.

View File

@ -4,6 +4,8 @@
| Also see http://www.opensource.org/licenses/bsd-license.html | Also see http://www.opensource.org/licenses/bsd-license.html
| - Encoding was converted to UTF-8. | - Encoding was converted to UTF-8.
| - This notice was added. | - This notice was added.
|
| NOTE: To use this file with StopFilterFactory, you must specify format="snowball"
| A Dutch stop word list. Comments begin with vertical bar. Each stop | A Dutch stop word list. Comments begin with vertical bar. Each stop
| word is at the start of a line. | word is at the start of a line.

View File

@ -4,6 +4,8 @@
| Also see http://www.opensource.org/licenses/bsd-license.html | Also see http://www.opensource.org/licenses/bsd-license.html
| - Encoding was converted to UTF-8. | - Encoding was converted to UTF-8.
| - This notice was added. | - This notice was added.
|
| NOTE: To use this file with StopFilterFactory, you must specify format="snowball"
| An English stop word list. Comments begin with vertical bar. Each stop | An English stop word list. Comments begin with vertical bar. Each stop
| word is at the start of a line. | word is at the start of a line.

View File

@ -4,6 +4,8 @@
| Also see http://www.opensource.org/licenses/bsd-license.html | Also see http://www.opensource.org/licenses/bsd-license.html
| - Encoding was converted to UTF-8. | - Encoding was converted to UTF-8.
| - This notice was added. | - This notice was added.
|
| NOTE: To use this file with StopFilterFactory, you must specify format="snowball"
| forms of BE | forms of BE

View File

@ -4,6 +4,8 @@
| Also see http://www.opensource.org/licenses/bsd-license.html | Also see http://www.opensource.org/licenses/bsd-license.html
| - Encoding was converted to UTF-8. | - Encoding was converted to UTF-8.
| - This notice was added. | - This notice was added.
|
| NOTE: To use this file with StopFilterFactory, you must specify format="snowball"
| A French stop word list. Comments begin with vertical bar. Each stop | A French stop word list. Comments begin with vertical bar. Each stop
| word is at the start of a line. | word is at the start of a line.

View File

@ -4,6 +4,8 @@
| Also see http://www.opensource.org/licenses/bsd-license.html | Also see http://www.opensource.org/licenses/bsd-license.html
| - Encoding was converted to UTF-8. | - Encoding was converted to UTF-8.
| - This notice was added. | - This notice was added.
|
| NOTE: To use this file with StopFilterFactory, you must specify format="snowball"
| A German stop word list. Comments begin with vertical bar. Each stop | A German stop word list. Comments begin with vertical bar. Each stop
| word is at the start of a line. | word is at the start of a line.

View File

@ -4,6 +4,8 @@
| Also see http://www.opensource.org/licenses/bsd-license.html | Also see http://www.opensource.org/licenses/bsd-license.html
| - Encoding was converted to UTF-8. | - Encoding was converted to UTF-8.
| - This notice was added. | - This notice was added.
|
| NOTE: To use this file with StopFilterFactory, you must specify format="snowball"
| Hungarian stop word list | Hungarian stop word list
| prepared by Anna Tordai | prepared by Anna Tordai

View File

@ -4,6 +4,8 @@
| Also see http://www.opensource.org/licenses/bsd-license.html | Also see http://www.opensource.org/licenses/bsd-license.html
| - Encoding was converted to UTF-8. | - Encoding was converted to UTF-8.
| - This notice was added. | - This notice was added.
|
| NOTE: To use this file with StopFilterFactory, you must specify format="snowball"
| An Italian stop word list. Comments begin with vertical bar. Each stop | An Italian stop word list. Comments begin with vertical bar. Each stop
| word is at the start of a line. | word is at the start of a line.

View File

@ -4,6 +4,8 @@
| Also see http://www.opensource.org/licenses/bsd-license.html | Also see http://www.opensource.org/licenses/bsd-license.html
| - Encoding was converted to UTF-8. | - Encoding was converted to UTF-8.
| - This notice was added. | - This notice was added.
|
| NOTE: To use this file with StopFilterFactory, you must specify format="snowball"
| A Norwegian stop word list. Comments begin with vertical bar. Each stop | A Norwegian stop word list. Comments begin with vertical bar. Each stop
| word is at the start of a line. | word is at the start of a line.

View File

@ -4,6 +4,8 @@
| Also see http://www.opensource.org/licenses/bsd-license.html | Also see http://www.opensource.org/licenses/bsd-license.html
| - Encoding was converted to UTF-8. | - Encoding was converted to UTF-8.
| - This notice was added. | - This notice was added.
|
| NOTE: To use this file with StopFilterFactory, you must specify format="snowball"
| A Portuguese stop word list. Comments begin with vertical bar. Each stop | A Portuguese stop word list. Comments begin with vertical bar. Each stop
| word is at the start of a line. | word is at the start of a line.

View File

@ -4,6 +4,8 @@
| Also see http://www.opensource.org/licenses/bsd-license.html | Also see http://www.opensource.org/licenses/bsd-license.html
| - Encoding was converted to UTF-8. | - Encoding was converted to UTF-8.
| - This notice was added. | - This notice was added.
|
| NOTE: To use this file with StopFilterFactory, you must specify format="snowball"
| a russian stop word list. comments begin with vertical bar. each stop | a russian stop word list. comments begin with vertical bar. each stop
| word is at the start of a line. | word is at the start of a line.

View File

@ -4,6 +4,8 @@
| Also see http://www.opensource.org/licenses/bsd-license.html | Also see http://www.opensource.org/licenses/bsd-license.html
| - Encoding was converted to UTF-8. | - Encoding was converted to UTF-8.
| - This notice was added. | - This notice was added.
|
| NOTE: To use this file with StopFilterFactory, you must specify format="snowball"
| A Spanish stop word list. Comments begin with vertical bar. Each stop | A Spanish stop word list. Comments begin with vertical bar. Each stop
| word is at the start of a line. | word is at the start of a line.

View File

@ -4,6 +4,8 @@
| Also see http://www.opensource.org/licenses/bsd-license.html | Also see http://www.opensource.org/licenses/bsd-license.html
| - Encoding was converted to UTF-8. | - Encoding was converted to UTF-8.
| - This notice was added. | - This notice was added.
|
| NOTE: To use this file with StopFilterFactory, you must specify format="snowball"
| A Swedish stop word list. Comments begin with vertical bar. Each stop | A Swedish stop word list. Comments begin with vertical bar. Each stop
| word is at the start of a line. | word is at the start of a line.

View File

@ -57,6 +57,11 @@ public class TestStopFilterFactory extends BaseTokenStreamFactoryTestCase {
assertTrue(words.contains("her")); assertTrue(words.contains("her"));
assertTrue(words.contains("hers")); assertTrue(words.contains("hers"));
assertTrue(words.contains("herself")); assertTrue(words.contains("herself"));
// defaults
factory = (StopFilterFactory) tokenFilterFactory("Stop");
assertEquals(StopAnalyzer.ENGLISH_STOP_WORDS_SET, factory.getStopWords());
assertEquals(false, factory.isIgnoreCase());
} }
/** Test that bogus arguments result in exception */ /** Test that bogus arguments result in exception */
@ -68,4 +73,30 @@ public class TestStopFilterFactory extends BaseTokenStreamFactoryTestCase {
assertTrue(expected.getMessage().contains("Unknown parameters")); assertTrue(expected.getMessage().contains("Unknown parameters"));
} }
} }
/** Test that bogus arguments result in exception */
public void testBogusFormats() throws Exception {
try {
tokenFilterFactory("Stop",
"words", "stop-snowball.txt",
"format", "bogus");
fail();
} catch (IllegalArgumentException expected) {
String msg = expected.getMessage();
assertTrue(msg, msg.contains("Unknown"));
assertTrue(msg, msg.contains("format"));
assertTrue(msg, msg.contains("bogus"));
}
try {
tokenFilterFactory("Stop",
// implicit default words file
"format", "bogus");
fail();
} catch (IllegalArgumentException expected) {
String msg = expected.getMessage();
assertTrue(msg, msg.contains("can not be specified"));
assertTrue(msg, msg.contains("format"));
assertTrue(msg, msg.contains("bogus"));
}
}
} }

View File

@ -4,6 +4,8 @@
| Also see http://www.opensource.org/licenses/bsd-license.html | Also see http://www.opensource.org/licenses/bsd-license.html
| - Encoding was converted to UTF-8. | - Encoding was converted to UTF-8.
| - This notice was added. | - This notice was added.
|
| NOTE: To use this file with StopFilterFactory, you must specify format="snowball"
| A Danish stop word list. Comments begin with vertical bar. Each stop | A Danish stop word list. Comments begin with vertical bar. Each stop
| word is at the start of a line. | word is at the start of a line.

View File

@ -4,6 +4,8 @@
| Also see http://www.opensource.org/licenses/bsd-license.html | Also see http://www.opensource.org/licenses/bsd-license.html
| - Encoding was converted to UTF-8. | - Encoding was converted to UTF-8.
| - This notice was added. | - This notice was added.
|
| NOTE: To use this file with StopFilterFactory, you must specify format="snowball"
| A German stop word list. Comments begin with vertical bar. Each stop | A German stop word list. Comments begin with vertical bar. Each stop
| word is at the start of a line. | word is at the start of a line.

View File

@ -4,6 +4,8 @@
| Also see http://www.opensource.org/licenses/bsd-license.html | Also see http://www.opensource.org/licenses/bsd-license.html
| - Encoding was converted to UTF-8. | - Encoding was converted to UTF-8.
| - This notice was added. | - This notice was added.
|
| NOTE: To use this file with StopFilterFactory, you must specify format="snowball"
| A Spanish stop word list. Comments begin with vertical bar. Each stop | A Spanish stop word list. Comments begin with vertical bar. Each stop
| word is at the start of a line. | word is at the start of a line.

View File

@ -4,6 +4,8 @@
| Also see http://www.opensource.org/licenses/bsd-license.html | Also see http://www.opensource.org/licenses/bsd-license.html
| - Encoding was converted to UTF-8. | - Encoding was converted to UTF-8.
| - This notice was added. | - This notice was added.
|
| NOTE: To use this file with StopFilterFactory, you must specify format="snowball"
| forms of BE | forms of BE

View File

@ -4,6 +4,8 @@
| Also see http://www.opensource.org/licenses/bsd-license.html | Also see http://www.opensource.org/licenses/bsd-license.html
| - Encoding was converted to UTF-8. | - Encoding was converted to UTF-8.
| - This notice was added. | - This notice was added.
|
| NOTE: To use this file with StopFilterFactory, you must specify format="snowball"
| A French stop word list. Comments begin with vertical bar. Each stop | A French stop word list. Comments begin with vertical bar. Each stop
| word is at the start of a line. | word is at the start of a line.

View File

@ -4,6 +4,8 @@
| Also see http://www.opensource.org/licenses/bsd-license.html | Also see http://www.opensource.org/licenses/bsd-license.html
| - Encoding was converted to UTF-8. | - Encoding was converted to UTF-8.
| - This notice was added. | - This notice was added.
|
| NOTE: To use this file with StopFilterFactory, you must specify format="snowball"
| Hungarian stop word list | Hungarian stop word list
| prepared by Anna Tordai | prepared by Anna Tordai

View File

@ -4,6 +4,8 @@
| Also see http://www.opensource.org/licenses/bsd-license.html | Also see http://www.opensource.org/licenses/bsd-license.html
| - Encoding was converted to UTF-8. | - Encoding was converted to UTF-8.
| - This notice was added. | - This notice was added.
|
| NOTE: To use this file with StopFilterFactory, you must specify format="snowball"
| An Italian stop word list. Comments begin with vertical bar. Each stop | An Italian stop word list. Comments begin with vertical bar. Each stop
| word is at the start of a line. | word is at the start of a line.

View File

@ -4,6 +4,8 @@
| Also see http://www.opensource.org/licenses/bsd-license.html | Also see http://www.opensource.org/licenses/bsd-license.html
| - Encoding was converted to UTF-8. | - Encoding was converted to UTF-8.
| - This notice was added. | - This notice was added.
|
| NOTE: To use this file with StopFilterFactory, you must specify format="snowball"
| A Dutch stop word list. Comments begin with vertical bar. Each stop | A Dutch stop word list. Comments begin with vertical bar. Each stop
| word is at the start of a line. | word is at the start of a line.

View File

@ -4,6 +4,8 @@
| Also see http://www.opensource.org/licenses/bsd-license.html | Also see http://www.opensource.org/licenses/bsd-license.html
| - Encoding was converted to UTF-8. | - Encoding was converted to UTF-8.
| - This notice was added. | - This notice was added.
|
| NOTE: To use this file with StopFilterFactory, you must specify format="snowball"
| A Norwegian stop word list. Comments begin with vertical bar. Each stop | A Norwegian stop word list. Comments begin with vertical bar. Each stop
| word is at the start of a line. | word is at the start of a line.

View File

@ -4,6 +4,8 @@
| Also see http://www.opensource.org/licenses/bsd-license.html | Also see http://www.opensource.org/licenses/bsd-license.html
| - Encoding was converted to UTF-8. | - Encoding was converted to UTF-8.
| - This notice was added. | - This notice was added.
|
| NOTE: To use this file with StopFilterFactory, you must specify format="snowball"
| A Portuguese stop word list. Comments begin with vertical bar. Each stop | A Portuguese stop word list. Comments begin with vertical bar. Each stop
| word is at the start of a line. | word is at the start of a line.

View File

@ -4,6 +4,8 @@
| Also see http://www.opensource.org/licenses/bsd-license.html | Also see http://www.opensource.org/licenses/bsd-license.html
| - Encoding was converted to UTF-8. | - Encoding was converted to UTF-8.
| - This notice was added. | - This notice was added.
|
| NOTE: To use this file with StopFilterFactory, you must specify format="snowball"
| a russian stop word list. comments begin with vertical bar. each stop | a russian stop word list. comments begin with vertical bar. each stop
| word is at the start of a line. | word is at the start of a line.

View File

@ -4,6 +4,8 @@
| Also see http://www.opensource.org/licenses/bsd-license.html | Also see http://www.opensource.org/licenses/bsd-license.html
| - Encoding was converted to UTF-8. | - Encoding was converted to UTF-8.
| - This notice was added. | - This notice was added.
|
| NOTE: To use this file with StopFilterFactory, you must specify format="snowball"
| A Swedish stop word list. Comments begin with vertical bar. Each stop | A Swedish stop word list. Comments begin with vertical bar. Each stop
| word is at the start of a line. | word is at the start of a line.

View File

@ -4,6 +4,8 @@
| Also see http://www.opensource.org/licenses/bsd-license.html | Also see http://www.opensource.org/licenses/bsd-license.html
| - Encoding was converted to UTF-8. | - Encoding was converted to UTF-8.
| - This notice was added. | - This notice was added.
|
| NOTE: To use this file with StopFilterFactory, you must specify format="snowball"
| A Danish stop word list. Comments begin with vertical bar. Each stop | A Danish stop word list. Comments begin with vertical bar. Each stop
| word is at the start of a line. | word is at the start of a line.

View File

@ -4,6 +4,8 @@
| Also see http://www.opensource.org/licenses/bsd-license.html | Also see http://www.opensource.org/licenses/bsd-license.html
| - Encoding was converted to UTF-8. | - Encoding was converted to UTF-8.
| - This notice was added. | - This notice was added.
|
| NOTE: To use this file with StopFilterFactory, you must specify format="snowball"
| A German stop word list. Comments begin with vertical bar. Each stop | A German stop word list. Comments begin with vertical bar. Each stop
| word is at the start of a line. | word is at the start of a line.

View File

@ -4,6 +4,8 @@
| Also see http://www.opensource.org/licenses/bsd-license.html | Also see http://www.opensource.org/licenses/bsd-license.html
| - Encoding was converted to UTF-8. | - Encoding was converted to UTF-8.
| - This notice was added. | - This notice was added.
|
| NOTE: To use this file with StopFilterFactory, you must specify format="snowball"
| A Spanish stop word list. Comments begin with vertical bar. Each stop | A Spanish stop word list. Comments begin with vertical bar. Each stop
| word is at the start of a line. | word is at the start of a line.

View File

@ -4,6 +4,8 @@
| Also see http://www.opensource.org/licenses/bsd-license.html | Also see http://www.opensource.org/licenses/bsd-license.html
| - Encoding was converted to UTF-8. | - Encoding was converted to UTF-8.
| - This notice was added. | - This notice was added.
|
| NOTE: To use this file with StopFilterFactory, you must specify format="snowball"
| forms of BE | forms of BE

View File

@ -4,6 +4,8 @@
| Also see http://www.opensource.org/licenses/bsd-license.html | Also see http://www.opensource.org/licenses/bsd-license.html
| - Encoding was converted to UTF-8. | - Encoding was converted to UTF-8.
| - This notice was added. | - This notice was added.
|
| NOTE: To use this file with StopFilterFactory, you must specify format="snowball"
| A French stop word list. Comments begin with vertical bar. Each stop | A French stop word list. Comments begin with vertical bar. Each stop
| word is at the start of a line. | word is at the start of a line.

View File

@ -4,6 +4,8 @@
| Also see http://www.opensource.org/licenses/bsd-license.html | Also see http://www.opensource.org/licenses/bsd-license.html
| - Encoding was converted to UTF-8. | - Encoding was converted to UTF-8.
| - This notice was added. | - This notice was added.
|
| NOTE: To use this file with StopFilterFactory, you must specify format="snowball"
| Hungarian stop word list | Hungarian stop word list
| prepared by Anna Tordai | prepared by Anna Tordai

View File

@ -4,6 +4,8 @@
| Also see http://www.opensource.org/licenses/bsd-license.html | Also see http://www.opensource.org/licenses/bsd-license.html
| - Encoding was converted to UTF-8. | - Encoding was converted to UTF-8.
| - This notice was added. | - This notice was added.
|
| NOTE: To use this file with StopFilterFactory, you must specify format="snowball"
| An Italian stop word list. Comments begin with vertical bar. Each stop | An Italian stop word list. Comments begin with vertical bar. Each stop
| word is at the start of a line. | word is at the start of a line.

View File

@ -4,6 +4,8 @@
| Also see http://www.opensource.org/licenses/bsd-license.html | Also see http://www.opensource.org/licenses/bsd-license.html
| - Encoding was converted to UTF-8. | - Encoding was converted to UTF-8.
| - This notice was added. | - This notice was added.
|
| NOTE: To use this file with StopFilterFactory, you must specify format="snowball"
| A Dutch stop word list. Comments begin with vertical bar. Each stop | A Dutch stop word list. Comments begin with vertical bar. Each stop
| word is at the start of a line. | word is at the start of a line.

View File

@ -4,6 +4,8 @@
| Also see http://www.opensource.org/licenses/bsd-license.html | Also see http://www.opensource.org/licenses/bsd-license.html
| - Encoding was converted to UTF-8. | - Encoding was converted to UTF-8.
| - This notice was added. | - This notice was added.
|
| NOTE: To use this file with StopFilterFactory, you must specify format="snowball"
| A Norwegian stop word list. Comments begin with vertical bar. Each stop | A Norwegian stop word list. Comments begin with vertical bar. Each stop
| word is at the start of a line. | word is at the start of a line.

View File

@ -4,6 +4,8 @@
| Also see http://www.opensource.org/licenses/bsd-license.html | Also see http://www.opensource.org/licenses/bsd-license.html
| - Encoding was converted to UTF-8. | - Encoding was converted to UTF-8.
| - This notice was added. | - This notice was added.
|
| NOTE: To use this file with StopFilterFactory, you must specify format="snowball"
| A Portuguese stop word list. Comments begin with vertical bar. Each stop | A Portuguese stop word list. Comments begin with vertical bar. Each stop
| word is at the start of a line. | word is at the start of a line.

View File

@ -4,6 +4,8 @@
| Also see http://www.opensource.org/licenses/bsd-license.html | Also see http://www.opensource.org/licenses/bsd-license.html
| - Encoding was converted to UTF-8. | - Encoding was converted to UTF-8.
| - This notice was added. | - This notice was added.
|
| NOTE: To use this file with StopFilterFactory, you must specify format="snowball"
| a russian stop word list. comments begin with vertical bar. each stop | a russian stop word list. comments begin with vertical bar. each stop
| word is at the start of a line. | word is at the start of a line.

View File

@ -4,6 +4,8 @@
| Also see http://www.opensource.org/licenses/bsd-license.html | Also see http://www.opensource.org/licenses/bsd-license.html
| - Encoding was converted to UTF-8. | - Encoding was converted to UTF-8.
| - This notice was added. | - This notice was added.
|
| NOTE: To use this file with StopFilterFactory, you must specify format="snowball"
| A Swedish stop word list. Comments begin with vertical bar. Each stop | A Swedish stop word list. Comments begin with vertical bar. Each stop
| word is at the start of a line. | word is at the start of a line.