mirror of https://github.com/apache/lucene.git
LUCENE-9800: Hunspell: put a time limit on suggestion calculation (#2414)
* LUCENE-9800: Hunspell: put a time limit on suggestion calculation * fix review remarks
This commit is contained in:
parent
bfce5f36da
commit
42da2b45e6
|
@ -17,6 +17,7 @@
|
|||
package org.apache.lucene.analysis.hunspell;
|
||||
|
||||
import static org.apache.lucene.analysis.hunspell.Dictionary.FLAG_UNSET;
|
||||
import static org.apache.lucene.analysis.hunspell.TimeoutPolicy.*;
|
||||
import static org.apache.lucene.analysis.hunspell.WordContext.COMPOUND_BEGIN;
|
||||
import static org.apache.lucene.analysis.hunspell.WordContext.COMPOUND_END;
|
||||
import static org.apache.lucene.analysis.hunspell.WordContext.COMPOUND_MIDDLE;
|
||||
|
@ -24,11 +25,13 @@ import static org.apache.lucene.analysis.hunspell.WordContext.COMPOUND_RULE_END;
|
|||
import static org.apache.lucene.analysis.hunspell.WordContext.SIMPLE_WORD;
|
||||
|
||||
import java.util.ArrayList;
|
||||
import java.util.Collection;
|
||||
import java.util.Collections;
|
||||
import java.util.LinkedHashSet;
|
||||
import java.util.List;
|
||||
import java.util.Locale;
|
||||
import java.util.Set;
|
||||
import java.util.concurrent.TimeUnit;
|
||||
import java.util.stream.Collectors;
|
||||
import org.apache.lucene.util.CharsRef;
|
||||
import org.apache.lucene.util.IntsRef;
|
||||
|
@ -49,20 +52,25 @@ import org.apache.lucene.util.IntsRef;
|
|||
* shared by multiple spell-checkers in different threads).
|
||||
*/
|
||||
public class Hunspell {
|
||||
static final long SUGGEST_TIME_LIMIT = 250;
|
||||
|
||||
final Dictionary dictionary;
|
||||
final Stemmer stemmer;
|
||||
private final TimeoutPolicy policy;
|
||||
final Runnable checkCanceled;
|
||||
|
||||
public Hunspell(Dictionary dictionary) {
|
||||
this(dictionary, () -> {});
|
||||
this(dictionary, RETURN_PARTIAL_RESULT, () -> {});
|
||||
}
|
||||
|
||||
/**
|
||||
* @param policy a strategy determining what to do when API calls take too much time
|
||||
* @param checkCanceled an object that's periodically called, allowing to interrupt spell-checking
|
||||
* or suggestion generation by throwing an exception
|
||||
*/
|
||||
public Hunspell(Dictionary dictionary, Runnable checkCanceled) {
|
||||
public Hunspell(Dictionary dictionary, TimeoutPolicy policy, Runnable checkCanceled) {
|
||||
this.dictionary = dictionary;
|
||||
this.policy = policy;
|
||||
this.checkCanceled = checkCanceled;
|
||||
stemmer = new Stemmer(dictionary);
|
||||
}
|
||||
|
@ -504,7 +512,16 @@ public class Hunspell {
|
|||
&& spell(word.substring(breakPos + breakStr.length()));
|
||||
}
|
||||
|
||||
public List<String> suggest(String word) {
|
||||
/**
|
||||
* @return suggestions for the given misspelled word
|
||||
* @throws SuggestionTimeoutException if the computation takes too long and {@link
|
||||
* TimeoutPolicy#THROW_EXCEPTION} was specified in the constructor
|
||||
*/
|
||||
public List<String> suggest(String word) throws SuggestionTimeoutException {
|
||||
return suggest(word, SUGGEST_TIME_LIMIT);
|
||||
}
|
||||
|
||||
List<String> suggest(String word, long timeLimitMs) throws SuggestionTimeoutException {
|
||||
checkCanceled.run();
|
||||
if (word.length() >= 100) return Collections.emptyList();
|
||||
|
||||
|
@ -520,16 +537,35 @@ public class Hunspell {
|
|||
}
|
||||
}
|
||||
|
||||
LinkedHashSet<String> suggestions = new LinkedHashSet<>();
|
||||
Runnable checkCanceled =
|
||||
policy == NO_TIMEOUT
|
||||
? this.checkCanceled
|
||||
: checkTimeLimit(word, wordCase, suggestions, timeLimitMs);
|
||||
try {
|
||||
doSuggest(word, wordCase, suggestions, checkCanceled);
|
||||
} catch (SuggestionTimeoutException e) {
|
||||
if (policy == RETURN_PARTIAL_RESULT) {
|
||||
return postprocess(word, wordCase, suggestions);
|
||||
}
|
||||
throw e;
|
||||
}
|
||||
|
||||
return postprocess(word, wordCase, suggestions);
|
||||
}
|
||||
|
||||
private void doSuggest(
|
||||
String word, WordCase wordCase, LinkedHashSet<String> suggestions, Runnable checkCanceled) {
|
||||
Hunspell suggestionSpeller =
|
||||
new Hunspell(dictionary, checkCanceled) {
|
||||
new Hunspell(dictionary, policy, checkCanceled) {
|
||||
@Override
|
||||
boolean acceptsStem(int formID) {
|
||||
return !dictionary.hasFlag(formID, dictionary.noSuggest)
|
||||
&& !dictionary.hasFlag(formID, dictionary.subStandard);
|
||||
}
|
||||
};
|
||||
ModifyingSuggester modifier = new ModifyingSuggester(suggestionSpeller);
|
||||
Set<String> suggestions = modifier.suggest(word, wordCase);
|
||||
ModifyingSuggester modifier = new ModifyingSuggester(suggestionSpeller, suggestions);
|
||||
modifier.suggest(word, wordCase);
|
||||
|
||||
if (!modifier.hasGoodSuggestions && dictionary.maxNGramSuggestions > 0) {
|
||||
suggestions.addAll(
|
||||
|
@ -540,7 +576,35 @@ public class Hunspell {
|
|||
if (word.contains("-") && suggestions.stream().noneMatch(s -> s.contains("-"))) {
|
||||
suggestions.addAll(modifyChunksBetweenDashes(word));
|
||||
}
|
||||
}
|
||||
|
||||
private Runnable checkTimeLimit(
|
||||
String word, WordCase wordCase, Set<String> suggestions, long timeLimitMs) {
|
||||
return new Runnable() {
|
||||
final long deadline = System.nanoTime() + TimeUnit.MILLISECONDS.toNanos(timeLimitMs);
|
||||
int invocationCounter = 100;
|
||||
|
||||
@Override
|
||||
public void run() {
|
||||
checkCanceled.run();
|
||||
if (--invocationCounter <= 0) {
|
||||
if (System.nanoTime() - deadline > 0) {
|
||||
stop();
|
||||
}
|
||||
invocationCounter = 100;
|
||||
}
|
||||
}
|
||||
|
||||
private void stop() {
|
||||
List<String> partialResult =
|
||||
policy == RETURN_PARTIAL_RESULT ? null : postprocess(word, wordCase, suggestions);
|
||||
String message = "Time limit of " + timeLimitMs + "ms exceeded for " + word;
|
||||
throw new SuggestionTimeoutException(message, partialResult);
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
private List<String> postprocess(String word, WordCase wordCase, Collection<String> suggestions) {
|
||||
Set<String> result = new LinkedHashSet<>();
|
||||
for (String candidate : suggestions) {
|
||||
result.add(adjustSuggestionCase(candidate, wordCase, word));
|
||||
|
|
|
@ -26,17 +26,18 @@ import java.util.stream.Collectors;
|
|||
/** A class that modifies the given misspelled word in various ways to get correct suggestions */
|
||||
class ModifyingSuggester {
|
||||
private static final int MAX_CHAR_DISTANCE = 4;
|
||||
private final LinkedHashSet<String> result = new LinkedHashSet<>();
|
||||
private final LinkedHashSet<String> result;
|
||||
private final char[] tryChars;
|
||||
private final Hunspell speller;
|
||||
boolean hasGoodSuggestions;
|
||||
|
||||
ModifyingSuggester(Hunspell speller) {
|
||||
ModifyingSuggester(Hunspell speller, LinkedHashSet<String> result) {
|
||||
this.speller = speller;
|
||||
tryChars = speller.dictionary.tryChars.toCharArray();
|
||||
this.result = result;
|
||||
}
|
||||
|
||||
LinkedHashSet<String> suggest(String word, WordCase wordCase) {
|
||||
void suggest(String word, WordCase wordCase) {
|
||||
String low = wordCase != WordCase.LOWER ? speller.dictionary.toLowerCase(word) : word;
|
||||
if (wordCase == WordCase.UPPER || wordCase == WordCase.MIXED) {
|
||||
trySuggestion(low);
|
||||
|
@ -68,12 +69,11 @@ class ModifyingSuggester {
|
|||
tryVariationsOf(speller.dictionary.toTitleCase(low));
|
||||
}
|
||||
|
||||
return result.stream()
|
||||
.map(s -> capitalizeAfterSpace(low, s))
|
||||
.collect(Collectors.toCollection(LinkedHashSet::new));
|
||||
List<String> adjusted =
|
||||
result.stream().map(s -> capitalizeAfterSpace(low, s)).collect(Collectors.toList());
|
||||
result.clear();
|
||||
result.addAll(adjusted);
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
// aNew -> "a New" (instead of "a new")
|
||||
|
|
|
@ -0,0 +1,41 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package org.apache.lucene.analysis.hunspell;
|
||||
|
||||
import java.util.Collections;
|
||||
import java.util.List;
|
||||
|
||||
/**
|
||||
* An exception thrown when {@link Hunspell#suggest} call takes too long, if {@link
|
||||
* TimeoutPolicy#THROW_EXCEPTION} is used.
|
||||
*/
|
||||
public class SuggestionTimeoutException extends RuntimeException {
|
||||
private final List<String> partialResult;
|
||||
|
||||
SuggestionTimeoutException(String message, List<String> partialResult) {
|
||||
super(message);
|
||||
this.partialResult = partialResult == null ? null : Collections.unmodifiableList(partialResult);
|
||||
}
|
||||
|
||||
/**
|
||||
* @return partial result calculated by {@link Hunspell#suggest} before the time limit was
|
||||
* exceeded
|
||||
*/
|
||||
public List<String> getPartialResult() {
|
||||
return partialResult;
|
||||
}
|
||||
}
|
|
@ -0,0 +1,33 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package org.apache.lucene.analysis.hunspell;
|
||||
|
||||
/** A strategy determining what to do when Hunspell API calls take too much time */
|
||||
public enum TimeoutPolicy {
|
||||
/** Let the computation complete even if it takes ages */
|
||||
NO_TIMEOUT,
|
||||
|
||||
/** Just stop the calculation and return whatever has been computed so far */
|
||||
RETURN_PARTIAL_RESULT,
|
||||
|
||||
/**
|
||||
* Throw an exception (e.g. {@link SuggestionTimeoutException}) to make it more clear to the
|
||||
* caller that the timeout happened and the returned results might not be reliable or
|
||||
* reproducible.
|
||||
*/
|
||||
THROW_EXCEPTION
|
||||
}
|
|
@ -1,45 +0,0 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package org.apache.lucene.analysis.hunspell;
|
||||
|
||||
import static org.apache.lucene.analysis.hunspell.StemmerTestBase.loadDictionary;
|
||||
|
||||
import java.util.Collections;
|
||||
import java.util.concurrent.CancellationException;
|
||||
import java.util.concurrent.atomic.AtomicBoolean;
|
||||
import org.apache.lucene.util.LuceneTestCase;
|
||||
|
||||
public class HunspellTest extends LuceneTestCase {
|
||||
public void testCheckCanceled() throws Exception {
|
||||
AtomicBoolean canceled = new AtomicBoolean();
|
||||
Runnable checkCanceled =
|
||||
() -> {
|
||||
if (canceled.get()) {
|
||||
throw new CancellationException();
|
||||
}
|
||||
};
|
||||
Hunspell hunspell =
|
||||
new Hunspell(loadDictionary(false, "simple.aff", "simple.dic"), checkCanceled);
|
||||
|
||||
assertTrue(hunspell.spell("apache"));
|
||||
assertEquals(Collections.singletonList("apach"), hunspell.suggest("apac"));
|
||||
|
||||
canceled.set(true);
|
||||
assertThrows(CancellationException.class, () -> hunspell.spell("apache"));
|
||||
assertThrows(CancellationException.class, () -> hunspell.suggest("apac"));
|
||||
}
|
||||
}
|
|
@ -0,0 +1,80 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package org.apache.lucene.analysis.hunspell;
|
||||
|
||||
import static org.apache.lucene.analysis.hunspell.StemmerTestBase.loadDictionary;
|
||||
import static org.apache.lucene.analysis.hunspell.TimeoutPolicy.NO_TIMEOUT;
|
||||
import static org.apache.lucene.analysis.hunspell.TimeoutPolicy.RETURN_PARTIAL_RESULT;
|
||||
import static org.apache.lucene.analysis.hunspell.TimeoutPolicy.THROW_EXCEPTION;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.text.ParseException;
|
||||
import java.util.Collections;
|
||||
import java.util.concurrent.CancellationException;
|
||||
import java.util.concurrent.atomic.AtomicBoolean;
|
||||
import org.apache.lucene.util.LuceneTestCase;
|
||||
import org.junit.Test;
|
||||
|
||||
public class TestHunspell extends LuceneTestCase {
|
||||
public void testCheckCanceled() throws Exception {
|
||||
AtomicBoolean canceled = new AtomicBoolean();
|
||||
Runnable checkCanceled =
|
||||
() -> {
|
||||
if (canceled.get()) {
|
||||
throw new CancellationException();
|
||||
}
|
||||
};
|
||||
Dictionary dictionary = loadDictionary(false, "simple.aff", "simple.dic");
|
||||
Hunspell hunspell = new Hunspell(dictionary, NO_TIMEOUT, checkCanceled);
|
||||
|
||||
assertTrue(hunspell.spell("apache"));
|
||||
assertEquals(Collections.singletonList("apach"), hunspell.suggest("apac"));
|
||||
|
||||
canceled.set(true);
|
||||
assertThrows(CancellationException.class, () -> hunspell.spell("apache"));
|
||||
assertThrows(CancellationException.class, () -> hunspell.suggest("apac"));
|
||||
}
|
||||
|
||||
public void testSuggestionTimeLimit() throws IOException, ParseException {
|
||||
int timeLimitMs = 10;
|
||||
|
||||
Dictionary dictionary = loadDictionary(false, "timelimit.aff", "simple.dic");
|
||||
String word = "qazwsxedcrfvtgbyhnujm";
|
||||
|
||||
Hunspell incomplete = new Hunspell(dictionary, RETURN_PARTIAL_RESULT, () -> {});
|
||||
assertFalse(incomplete.spell(word));
|
||||
assertEquals(Collections.emptyList(), incomplete.suggest(word, timeLimitMs));
|
||||
|
||||
Hunspell throwing = new Hunspell(dictionary, THROW_EXCEPTION, () -> {});
|
||||
assertFalse(throwing.spell(word));
|
||||
|
||||
SuggestionTimeoutException exception =
|
||||
assertThrows(SuggestionTimeoutException.class, () -> throwing.suggest(word, timeLimitMs));
|
||||
assertEquals(Collections.emptyList(), exception.getPartialResult());
|
||||
|
||||
Hunspell noLimit = new Hunspell(dictionary, NO_TIMEOUT, () -> {});
|
||||
assertEquals(Collections.emptyList(), noLimit.suggest(word.substring(0, 5), timeLimitMs));
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testStemmingApi() throws Exception {
|
||||
Dictionary dictionary = loadDictionary(false, "simple.aff", "simple.dic");
|
||||
Hunspell hunspell = new Hunspell(dictionary, TimeoutPolicy.NO_TIMEOUT, () -> {});
|
||||
assertEquals(Collections.singletonList("apach"), hunspell.getRoots("apache"));
|
||||
assertEquals(Collections.singletonList("foo"), hunspell.getRoots("foo"));
|
||||
}
|
||||
}
|
|
@ -28,6 +28,7 @@ import java.nio.file.Paths;
|
|||
import java.text.ParseException;
|
||||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
import java.util.concurrent.TimeUnit;
|
||||
import java.util.function.Consumer;
|
||||
import java.util.regex.Pattern;
|
||||
import java.util.stream.Collectors;
|
||||
|
@ -81,7 +82,7 @@ public class TestPerformance extends LuceneTestCase {
|
|||
|
||||
@Test
|
||||
public void fr_suggest() throws Exception {
|
||||
checkSuggestionPerformance("fr", 10);
|
||||
checkSuggestionPerformance("fr", 100);
|
||||
}
|
||||
|
||||
private Dictionary loadDictionary(String code) throws IOException, ParseException {
|
||||
|
@ -97,7 +98,7 @@ public class TestPerformance extends LuceneTestCase {
|
|||
List<String> words = loadWords(code, wordCount, dictionary);
|
||||
|
||||
Stemmer stemmer = new Stemmer(dictionary);
|
||||
Hunspell speller = new Hunspell(dictionary);
|
||||
Hunspell speller = new Hunspell(dictionary, TimeoutPolicy.NO_TIMEOUT, () -> {});
|
||||
measure(
|
||||
"Stemming " + code,
|
||||
blackHole -> {
|
||||
|
@ -117,10 +118,10 @@ public class TestPerformance extends LuceneTestCase {
|
|||
|
||||
private void checkSuggestionPerformance(String code, int wordCount) throws Exception {
|
||||
Dictionary dictionary = loadDictionary(code);
|
||||
Hunspell speller = new Hunspell(dictionary);
|
||||
Hunspell speller = new Hunspell(dictionary, TimeoutPolicy.THROW_EXCEPTION, () -> {});
|
||||
List<String> words =
|
||||
loadWords(code, wordCount, dictionary).stream()
|
||||
.filter(w -> !speller.spell(w))
|
||||
.filter(w -> hasQuickSuggestions(speller, w))
|
||||
.collect(Collectors.toList());
|
||||
System.out.println("Checking " + words.size() + " misspelled words");
|
||||
|
||||
|
@ -134,6 +135,25 @@ public class TestPerformance extends LuceneTestCase {
|
|||
System.out.println();
|
||||
}
|
||||
|
||||
private boolean hasQuickSuggestions(Hunspell speller, String word) {
|
||||
if (speller.spell(word)) {
|
||||
return false;
|
||||
}
|
||||
|
||||
long start = System.nanoTime();
|
||||
try {
|
||||
speller.suggest(word);
|
||||
} catch (SuggestionTimeoutException e) {
|
||||
System.out.println("Timeout happened for " + word + ", skipping");
|
||||
return false;
|
||||
}
|
||||
long elapsed = TimeUnit.NANOSECONDS.toMillis(System.nanoTime() - start);
|
||||
if (elapsed > Hunspell.SUGGEST_TIME_LIMIT * 4 / 5) {
|
||||
System.out.println(elapsed + "ms for " + word + ", too close to time limit, skipping");
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
private Path findAffFile(String code) throws IOException {
|
||||
return TestAllDictionaries.findAllAffixFiles()
|
||||
.filter(
|
||||
|
|
|
@ -25,8 +25,9 @@ import java.util.List;
|
|||
import java.util.stream.Collectors;
|
||||
import org.apache.lucene.store.ByteBuffersDirectory;
|
||||
import org.apache.lucene.util.IOUtils;
|
||||
import org.apache.lucene.util.LuceneTestCase;
|
||||
|
||||
public class TestSpellChecking extends StemmerTestBase {
|
||||
public class TestSpellChecking extends LuceneTestCase {
|
||||
|
||||
public void testBase() throws Exception {
|
||||
doTest("base");
|
||||
|
@ -233,7 +234,7 @@ public class TestSpellChecking extends StemmerTestBase {
|
|||
try {
|
||||
Dictionary dictionary =
|
||||
new Dictionary(new ByteBuffersDirectory(), "dictionary", affixStream, dictStream);
|
||||
speller = new Hunspell(dictionary);
|
||||
speller = new Hunspell(dictionary, TimeoutPolicy.NO_TIMEOUT, () -> {});
|
||||
} finally {
|
||||
IOUtils.closeWhileHandlingException(affixStream);
|
||||
IOUtils.closeWhileHandlingException(dictStream);
|
||||
|
|
|
@ -16,11 +16,7 @@
|
|||
*/
|
||||
package org.apache.lucene.analysis.hunspell;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.text.ParseException;
|
||||
import java.util.Collections;
|
||||
import org.junit.BeforeClass;
|
||||
import org.junit.Test;
|
||||
|
||||
public class TestStemmer extends StemmerTestBase {
|
||||
|
||||
|
@ -62,13 +58,6 @@ public class TestStemmer extends StemmerTestBase {
|
|||
assertStemsTo("solr", "olr");
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testHunspellStemmingApi() throws IOException, ParseException {
|
||||
Hunspell hunspell = new Hunspell(loadDictionary(false, "simple.aff", "simple.dic"));
|
||||
assertEquals(Collections.singletonList("apach"), hunspell.getRoots("apache"));
|
||||
assertEquals(Collections.singletonList("foo"), hunspell.getRoots("foo"));
|
||||
}
|
||||
|
||||
// some bogus stuff that should not stem (empty lists)!
|
||||
public void testBogusStems() {
|
||||
assertStemsTo("abs");
|
||||
|
|
|
@ -0,0 +1,7 @@
|
|||
# A very expensive MAP: each character from each group has to be replaced with every other character from the same group,
|
||||
# so for long words the enumeration is very long
|
||||
|
||||
MAP 3
|
||||
MAP qwertyuiop
|
||||
MAP asdfghjkl
|
||||
MAP zxcvbnm
|
Loading…
Reference in New Issue