mirror of https://github.com/apache/lucene.git
Lucene-10008: Respect ignoreCase flag in CommonGramsFilterFactory and factor out a common abstract base class AbstractWordsFileFilterFactory.java (#188)
This commit is contained in:
parent
624560a3d7
commit
cb4c8ae07f
|
@ -16,15 +16,12 @@
|
|||
*/
|
||||
package org.apache.lucene.analysis.commongrams;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.Map;
|
||||
import org.apache.lucene.analysis.CharArraySet;
|
||||
import org.apache.lucene.analysis.TokenFilter;
|
||||
import org.apache.lucene.analysis.TokenFilterFactory;
|
||||
import org.apache.lucene.analysis.TokenStream;
|
||||
import org.apache.lucene.analysis.en.AbstractWordsFileFilterFactory;
|
||||
import org.apache.lucene.analysis.en.EnglishAnalyzer;
|
||||
import org.apache.lucene.util.ResourceLoader;
|
||||
import org.apache.lucene.util.ResourceLoaderAware;
|
||||
|
||||
/**
|
||||
* Constructs a {@link CommonGramsFilter}.
|
||||
|
@ -40,26 +37,14 @@ import org.apache.lucene.util.ResourceLoaderAware;
|
|||
* @since 3.1
|
||||
* @lucene.spi {@value #NAME}
|
||||
*/
|
||||
public class CommonGramsFilterFactory extends TokenFilterFactory implements ResourceLoaderAware {
|
||||
public class CommonGramsFilterFactory extends AbstractWordsFileFilterFactory {
|
||||
|
||||
/** SPI name */
|
||||
public static final String NAME = "commonGrams";
|
||||
|
||||
// TODO: shared base class for Stop/Keep/CommonGrams?
|
||||
private CharArraySet commonWords;
|
||||
private final String commonWordFiles;
|
||||
private final String format;
|
||||
private final boolean ignoreCase;
|
||||
|
||||
/** Creates a new CommonGramsFilterFactory */
|
||||
public CommonGramsFilterFactory(Map<String, String> args) {
|
||||
super(args);
|
||||
commonWordFiles = get(args, "words");
|
||||
format = get(args, "format");
|
||||
ignoreCase = getBoolean(args, "ignoreCase", false);
|
||||
if (!args.isEmpty()) {
|
||||
throw new IllegalArgumentException("Unknown parameters: " + args);
|
||||
}
|
||||
}
|
||||
|
||||
/** Default ctor for compatibility with SPI */
|
||||
|
@ -67,30 +52,18 @@ public class CommonGramsFilterFactory extends TokenFilterFactory implements Reso
|
|||
throw defaultCtorException();
|
||||
}
|
||||
|
||||
@Override
|
||||
public void inform(ResourceLoader loader) throws IOException {
|
||||
if (commonWordFiles != null) {
|
||||
if ("snowball".equalsIgnoreCase(format)) {
|
||||
commonWords = getSnowballWordSet(loader, commonWordFiles, ignoreCase);
|
||||
} else {
|
||||
commonWords = getWordSet(loader, commonWordFiles, ignoreCase);
|
||||
}
|
||||
} else {
|
||||
commonWords = EnglishAnalyzer.ENGLISH_STOP_WORDS_SET;
|
||||
}
|
||||
}
|
||||
|
||||
public boolean isIgnoreCase() {
|
||||
return ignoreCase;
|
||||
}
|
||||
|
||||
public CharArraySet getCommonWords() {
|
||||
return commonWords;
|
||||
return getWords();
|
||||
}
|
||||
|
||||
@Override
|
||||
protected CharArraySet createDefaultWords() {
|
||||
return new CharArraySet(EnglishAnalyzer.ENGLISH_STOP_WORDS_SET, isIgnoreCase());
|
||||
}
|
||||
|
||||
@Override
|
||||
public TokenFilter create(TokenStream input) {
|
||||
CommonGramsFilter commonGrams = new CommonGramsFilter(input, commonWords);
|
||||
CommonGramsFilter commonGrams = new CommonGramsFilter(input, getWords());
|
||||
return commonGrams;
|
||||
}
|
||||
}
|
||||
|
|
|
@ -16,15 +16,12 @@
|
|||
*/
|
||||
package org.apache.lucene.analysis.core;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.Map;
|
||||
import org.apache.lucene.analysis.CharArraySet;
|
||||
import org.apache.lucene.analysis.TokenFilterFactory;
|
||||
import org.apache.lucene.analysis.TokenStream;
|
||||
import org.apache.lucene.analysis.WordlistLoader;
|
||||
import org.apache.lucene.analysis.en.AbstractWordsFileFilterFactory;
|
||||
import org.apache.lucene.analysis.en.EnglishAnalyzer;
|
||||
import org.apache.lucene.util.ResourceLoader;
|
||||
import org.apache.lucene.util.ResourceLoaderAware;
|
||||
|
||||
/**
|
||||
* Factory for {@link StopFilter}.
|
||||
|
@ -65,28 +62,14 @@ import org.apache.lucene.util.ResourceLoaderAware;
|
|||
* @since 3.1
|
||||
* @lucene.spi {@value #NAME}
|
||||
*/
|
||||
public class StopFilterFactory extends TokenFilterFactory implements ResourceLoaderAware {
|
||||
public class StopFilterFactory extends AbstractWordsFileFilterFactory {
|
||||
|
||||
/** SPI name */
|
||||
public static final String NAME = "stop";
|
||||
|
||||
public static final String FORMAT_WORDSET = "wordset";
|
||||
public static final String FORMAT_SNOWBALL = "snowball";
|
||||
|
||||
private CharArraySet stopWords;
|
||||
private final String stopWordFiles;
|
||||
private final String format;
|
||||
private final boolean ignoreCase;
|
||||
|
||||
/** Creates a new StopFilterFactory */
|
||||
public StopFilterFactory(Map<String, String> args) {
|
||||
super(args);
|
||||
stopWordFiles = get(args, "words");
|
||||
format = get(args, "format", (null == stopWordFiles ? null : FORMAT_WORDSET));
|
||||
ignoreCase = getBoolean(args, "ignoreCase", false);
|
||||
if (!args.isEmpty()) {
|
||||
throw new IllegalArgumentException("Unknown parameters: " + args);
|
||||
}
|
||||
}
|
||||
|
||||
/** Default ctor for compatibility with SPI */
|
||||
|
@ -94,37 +77,18 @@ public class StopFilterFactory extends TokenFilterFactory implements ResourceLoa
|
|||
throw defaultCtorException();
|
||||
}
|
||||
|
||||
@Override
|
||||
public void inform(ResourceLoader loader) throws IOException {
|
||||
if (stopWordFiles != null) {
|
||||
if (FORMAT_WORDSET.equalsIgnoreCase(format)) {
|
||||
stopWords = getWordSet(loader, stopWordFiles, ignoreCase);
|
||||
} else if (FORMAT_SNOWBALL.equalsIgnoreCase(format)) {
|
||||
stopWords = getSnowballWordSet(loader, stopWordFiles, ignoreCase);
|
||||
} else {
|
||||
throw new IllegalArgumentException(
|
||||
"Unknown 'format' specified for 'words' file: " + format);
|
||||
}
|
||||
} else {
|
||||
if (null != format) {
|
||||
throw new IllegalArgumentException(
|
||||
"'format' can not be specified w/o an explicit 'words' file: " + format);
|
||||
}
|
||||
stopWords = new CharArraySet(EnglishAnalyzer.ENGLISH_STOP_WORDS_SET, ignoreCase);
|
||||
}
|
||||
}
|
||||
|
||||
public boolean isIgnoreCase() {
|
||||
return ignoreCase;
|
||||
}
|
||||
|
||||
public CharArraySet getStopWords() {
|
||||
return stopWords;
|
||||
return getWords();
|
||||
}
|
||||
|
||||
@Override
|
||||
protected CharArraySet createDefaultWords() {
|
||||
return new CharArraySet(EnglishAnalyzer.ENGLISH_STOP_WORDS_SET, isIgnoreCase());
|
||||
}
|
||||
|
||||
@Override
|
||||
public TokenStream create(TokenStream input) {
|
||||
StopFilter stopFilter = new StopFilter(input, stopWords);
|
||||
StopFilter stopFilter = new StopFilter(input, getWords());
|
||||
return stopFilter;
|
||||
}
|
||||
}
|
||||
|
|
|
@ -0,0 +1,122 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package org.apache.lucene.analysis.en;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.Map;
|
||||
import org.apache.lucene.analysis.CharArraySet;
|
||||
import org.apache.lucene.analysis.TokenFilterFactory;
|
||||
import org.apache.lucene.analysis.WordlistLoader;
|
||||
import org.apache.lucene.util.ResourceLoader;
|
||||
import org.apache.lucene.util.ResourceLoaderAware;
|
||||
|
||||
/**
|
||||
* Abstract parent class for analysis factories that accept a stopwords file as input.
|
||||
*
|
||||
* <p>Concrete implementations can leverage the following input attributes. All attributes are
|
||||
* optional:
|
||||
*
|
||||
* <ul>
|
||||
* <li><code>ignoreCase</code> defaults to <code>false</code>
|
||||
* <li><code>words</code> should be the name of a stopwords file to parse, if not specified the
|
||||
* factory will use the value provided by {@link #createDefaultWords()} implementation in
|
||||
* concrete subclass.
|
||||
* <li><code>format</code> defines how the <code>words</code> file will be parsed, and defaults to
|
||||
* <code>wordset</code>. If <code>words</code> is not specified, then <code>format</code> must
|
||||
* not be specified.
|
||||
* </ul>
|
||||
*
|
||||
* <p>The valid values for the <code>format</code> option are:
|
||||
*
|
||||
* <ul>
|
||||
* <li><code>wordset</code> - This is the default format, which supports one word per line
|
||||
* (including any intra-word whitespace) and allows whole line comments beginning with the "#"
|
||||
* character. Blank lines are ignored. See {@link WordlistLoader#getLines
|
||||
* WordlistLoader.getLines} for details.
|
||||
* <li><code>snowball</code> - This format allows for multiple words specified on each line, and
|
||||
* trailing comments may be specified using the vertical line ("|"). Blank lines are
|
||||
* ignored. See {@link WordlistLoader#getSnowballWordSet WordlistLoader.getSnowballWordSet}
|
||||
* for details.
|
||||
* </ul>
|
||||
*/
|
||||
public abstract class AbstractWordsFileFilterFactory extends TokenFilterFactory
|
||||
implements ResourceLoaderAware {
|
||||
|
||||
public static final String FORMAT_WORDSET = "wordset";
|
||||
public static final String FORMAT_SNOWBALL = "snowball";
|
||||
|
||||
private CharArraySet words;
|
||||
private final String wordFiles;
|
||||
private final String format;
|
||||
private final boolean ignoreCase;
|
||||
|
||||
/** Default ctor for compatibility with SPI */
|
||||
protected AbstractWordsFileFilterFactory() {
|
||||
throw defaultCtorException();
|
||||
}
|
||||
|
||||
/** Initialize this factory via a set of key-value pairs. */
|
||||
public AbstractWordsFileFilterFactory(Map<String, String> args) {
|
||||
super(args);
|
||||
wordFiles = get(args, "words");
|
||||
format = get(args, "format", (null == wordFiles ? null : FORMAT_WORDSET));
|
||||
ignoreCase = getBoolean(args, "ignoreCase", false);
|
||||
if (!args.isEmpty()) {
|
||||
throw new IllegalArgumentException("Unknown parameters: " + args);
|
||||
}
|
||||
}
|
||||
|
||||
/** Initialize the set of stopwords provided via ResourceLoader, or using defaults. */
|
||||
@Override
|
||||
public void inform(ResourceLoader loader) throws IOException {
|
||||
if (wordFiles != null) {
|
||||
if (FORMAT_WORDSET.equalsIgnoreCase(format)) {
|
||||
words = getWordSet(loader, wordFiles, ignoreCase);
|
||||
} else if (FORMAT_SNOWBALL.equalsIgnoreCase(format)) {
|
||||
words = getSnowballWordSet(loader, wordFiles, ignoreCase);
|
||||
} else {
|
||||
throw new IllegalArgumentException(
|
||||
"Unknown 'format' specified for 'words' file: " + format);
|
||||
}
|
||||
} else {
|
||||
if (null != format) {
|
||||
throw new IllegalArgumentException(
|
||||
"'format' can not be specified w/o an explicit 'words' file: " + format);
|
||||
}
|
||||
words = createDefaultWords();
|
||||
}
|
||||
}
|
||||
|
||||
/** Default word set implementation. */
|
||||
protected abstract CharArraySet createDefaultWords();
|
||||
|
||||
public CharArraySet getWords() {
|
||||
return words;
|
||||
}
|
||||
|
||||
public String getWordFiles() {
|
||||
return wordFiles;
|
||||
}
|
||||
|
||||
public String getFormat() {
|
||||
return format;
|
||||
}
|
||||
|
||||
public boolean isIgnoreCase() {
|
||||
return ignoreCase;
|
||||
}
|
||||
}
|
|
@ -16,13 +16,10 @@
|
|||
*/
|
||||
package org.apache.lucene.analysis.miscellaneous;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.Map;
|
||||
import org.apache.lucene.analysis.CharArraySet;
|
||||
import org.apache.lucene.analysis.TokenFilterFactory;
|
||||
import org.apache.lucene.analysis.TokenStream;
|
||||
import org.apache.lucene.util.ResourceLoader;
|
||||
import org.apache.lucene.util.ResourceLoaderAware;
|
||||
import org.apache.lucene.analysis.en.AbstractWordsFileFilterFactory;
|
||||
|
||||
/**
|
||||
* Factory for {@link KeepWordFilter}.
|
||||
|
@ -38,23 +35,14 @@ import org.apache.lucene.util.ResourceLoaderAware;
|
|||
* @since 3.1
|
||||
* @lucene.spi {@value #NAME}
|
||||
*/
|
||||
public class KeepWordFilterFactory extends TokenFilterFactory implements ResourceLoaderAware {
|
||||
public class KeepWordFilterFactory extends AbstractWordsFileFilterFactory {
|
||||
|
||||
/** SPI name */
|
||||
public static final String NAME = "keepWord";
|
||||
|
||||
private final boolean ignoreCase;
|
||||
private final String wordFiles;
|
||||
private CharArraySet words;
|
||||
|
||||
/** Creates a new KeepWordFilterFactory */
|
||||
public KeepWordFilterFactory(Map<String, String> args) {
|
||||
super(args);
|
||||
wordFiles = get(args, "words");
|
||||
ignoreCase = getBoolean(args, "ignoreCase", false);
|
||||
if (!args.isEmpty()) {
|
||||
throw new IllegalArgumentException("Unknown parameters: " + args);
|
||||
}
|
||||
}
|
||||
|
||||
/** Default ctor for compatibility with SPI */
|
||||
|
@ -63,27 +51,17 @@ public class KeepWordFilterFactory extends TokenFilterFactory implements Resourc
|
|||
}
|
||||
|
||||
@Override
|
||||
public void inform(ResourceLoader loader) throws IOException {
|
||||
if (wordFiles != null) {
|
||||
words = getWordSet(loader, wordFiles, ignoreCase);
|
||||
}
|
||||
}
|
||||
|
||||
public boolean isIgnoreCase() {
|
||||
return ignoreCase;
|
||||
}
|
||||
|
||||
public CharArraySet getWords() {
|
||||
return words;
|
||||
protected CharArraySet createDefaultWords() {
|
||||
return null;
|
||||
}
|
||||
|
||||
@Override
|
||||
public TokenStream create(TokenStream input) {
|
||||
// if the set is null, it means it was empty
|
||||
if (words == null) {
|
||||
if (getWords() == null) {
|
||||
return input;
|
||||
} else {
|
||||
final TokenStream filter = new KeepWordFilter(input, words);
|
||||
final TokenStream filter = new KeepWordFilter(input, getWords());
|
||||
return filter;
|
||||
}
|
||||
}
|
||||
|
|
|
@ -22,25 +22,25 @@ import org.apache.lucene.analysis.CharArraySet;
|
|||
import org.apache.lucene.analysis.MockTokenizer;
|
||||
import org.apache.lucene.analysis.TokenStream;
|
||||
import org.apache.lucene.analysis.Tokenizer;
|
||||
import org.apache.lucene.analysis.core.TestStopFilterFactory;
|
||||
import org.apache.lucene.util.ClasspathResourceLoader;
|
||||
import org.apache.lucene.util.ResourceLoader;
|
||||
import org.apache.lucene.util.Version;
|
||||
|
||||
/**
|
||||
* Tests pretty much copied from StopFilterFactoryTest We use the test files used by the
|
||||
* StopFilterFactoryTest TODO: consider creating separate test files so this won't break if stop
|
||||
* filter test files change
|
||||
*/
|
||||
public class TestCommonGramsFilterFactory extends BaseTokenStreamFactoryTestCase {
|
||||
|
||||
public void testInform() throws Exception {
|
||||
ResourceLoader loader = new ClasspathResourceLoader(TestStopFilterFactory.class);
|
||||
ResourceLoader loader = new ClasspathResourceLoader(getClass());
|
||||
assertTrue("loader is null and it shouldn't be", loader != null);
|
||||
CommonGramsFilterFactory factory =
|
||||
(CommonGramsFilterFactory)
|
||||
tokenFilterFactory(
|
||||
"CommonGrams", Version.LATEST, loader, "words", "stop-1.txt", "ignoreCase", "true");
|
||||
"CommonGrams",
|
||||
Version.LATEST,
|
||||
loader,
|
||||
"words",
|
||||
"common-1.txt",
|
||||
"ignoreCase",
|
||||
"true");
|
||||
CharArraySet words = factory.getCommonWords();
|
||||
assertTrue("words is null and it shouldn't be", words != null);
|
||||
assertTrue("words Size: " + words.size() + " is not: " + 2, words.size() == 2);
|
||||
|
@ -53,7 +53,7 @@ public class TestCommonGramsFilterFactory extends BaseTokenStreamFactoryTestCase
|
|||
Version.LATEST,
|
||||
loader,
|
||||
"words",
|
||||
"stop-1.txt, stop-2.txt",
|
||||
"common-1.txt, common-2.txt",
|
||||
"ignoreCase",
|
||||
"true");
|
||||
words = factory.getCommonWords();
|
||||
|
@ -68,7 +68,7 @@ public class TestCommonGramsFilterFactory extends BaseTokenStreamFactoryTestCase
|
|||
Version.LATEST,
|
||||
loader,
|
||||
"words",
|
||||
"stop-snowball.txt",
|
||||
"common-snowball.txt",
|
||||
"format",
|
||||
"snowball",
|
||||
"ignoreCase",
|
||||
|
@ -98,6 +98,25 @@ public class TestCommonGramsFilterFactory extends BaseTokenStreamFactoryTestCase
|
|||
stream, new String[] {"testing", "testing_the", "the", "the_factory", "factory"});
|
||||
}
|
||||
|
||||
/**
|
||||
* Test that ignoreCase flag is honored when no words are provided and default stopwords are used.
|
||||
*/
|
||||
public void testIgnoreCase() throws Exception {
|
||||
ResourceLoader loader = new ClasspathResourceLoader(getClass());
|
||||
CommonGramsFilterFactory factory =
|
||||
(CommonGramsFilterFactory)
|
||||
tokenFilterFactory("CommonGrams", Version.LATEST, loader, "ignoreCase", "true");
|
||||
CharArraySet words = factory.getCommonWords();
|
||||
assertTrue("words is null and it shouldn't be", words != null);
|
||||
assertTrue(words.contains("the"));
|
||||
assertTrue(words.contains("The"));
|
||||
Tokenizer tokenizer = new MockTokenizer(MockTokenizer.WHITESPACE, false);
|
||||
tokenizer.setReader(new StringReader("testing The factory"));
|
||||
TokenStream stream = factory.create(tokenizer);
|
||||
assertTokenStreamContents(
|
||||
stream, new String[] {"testing", "testing_The", "The", "The_factory", "factory"});
|
||||
}
|
||||
|
||||
/** Test that bogus arguments result in exception */
|
||||
public void testBogusArguments() throws Exception {
|
||||
IllegalArgumentException expected =
|
||||
|
|
|
@ -0,0 +1,17 @@
|
|||
# Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
# contributor license agreements. See the NOTICE file distributed with
|
||||
# this work for additional information regarding copyright ownership.
|
||||
# The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
# (the "License"); you may not use this file except in compliance with
|
||||
# the License. You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
foo
|
||||
bar
|
|
@ -0,0 +1,17 @@
|
|||
# Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
# contributor license agreements. See the NOTICE file distributed with
|
||||
# this work for additional information regarding copyright ownership.
|
||||
# The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
# (the "License"); you may not use this file except in compliance with
|
||||
# the License. You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
junk
|
||||
more
|
|
@ -0,0 +1,10 @@
|
|||
| This is a file in snowball format, empty lines are ignored, '|' is a comment
|
||||
| Additionally, multiple words can be on the same line, allowing stopwords to be
|
||||
| arranged in tables (useful in some languages where they might inflect)
|
||||
|
||||
| fictitious table below
|
||||
|
||||
|third person singular
|
||||
|Subject Object Possessive Reflexive
|
||||
he him his himself| masculine
|
||||
she her hers herself| feminine
|
|
@ -39,6 +39,32 @@ public class TestKeepFilterFactory extends BaseTokenStreamFactoryTestCase {
|
|||
words = factory.getWords();
|
||||
assertTrue("words is null and it shouldn't be", words != null);
|
||||
assertTrue("words Size: " + words.size() + " is not: " + 4, words.size() == 4);
|
||||
|
||||
factory =
|
||||
(KeepWordFilterFactory)
|
||||
tokenFilterFactory(
|
||||
"KeepWord",
|
||||
"words",
|
||||
"keep-snowball.txt",
|
||||
"format",
|
||||
"snowball",
|
||||
"ignoreCase",
|
||||
"true");
|
||||
words = factory.getWords();
|
||||
assertEquals(8, words.size());
|
||||
assertTrue(words.contains("he"));
|
||||
assertTrue(words.contains("him"));
|
||||
assertTrue(words.contains("his"));
|
||||
assertTrue(words.contains("himself"));
|
||||
assertTrue(words.contains("she"));
|
||||
assertTrue(words.contains("her"));
|
||||
assertTrue(words.contains("hers"));
|
||||
assertTrue(words.contains("herself"));
|
||||
|
||||
// defaults
|
||||
factory = (KeepWordFilterFactory) tokenFilterFactory("KeepWord");
|
||||
assertTrue(factory.getWords() == null);
|
||||
assertEquals(false, factory.isIgnoreCase());
|
||||
}
|
||||
|
||||
/** Test that bogus arguments result in exception */
|
||||
|
|
|
@ -0,0 +1,10 @@
|
|||
| This is a file in snowball format, empty lines are ignored, '|' is a comment
|
||||
| Additionally, multiple words can be on the same line, allowing stopwords to be
|
||||
| arranged in tables (useful in some languages where they might inflect)
|
||||
|
||||
| fictitious table below
|
||||
|
||||
|third person singular
|
||||
|Subject Object Possessive Reflexive
|
||||
he him his himself| masculine
|
||||
she her hers herself| feminine
|
Loading…
Reference in New Issue