mirror of https://github.com/apache/lucene.git
LUCENE-9779: Hunspell: add an API to interrupt long computations (#2378)
This commit is contained in:
parent
cfd0ccefe1
commit
2d53c6073b
|
@ -91,6 +91,7 @@ class GeneratingSuggester {
|
||||||
IntsRefFSTEnum<IntsRef> fstEnum = new IntsRefFSTEnum<>(fst);
|
IntsRefFSTEnum<IntsRef> fstEnum = new IntsRefFSTEnum<>(fst);
|
||||||
IntsRefFSTEnum.InputOutput<IntsRef> mapping;
|
IntsRefFSTEnum.InputOutput<IntsRef> mapping;
|
||||||
while ((mapping = fstEnum.next()) != null) {
|
while ((mapping = fstEnum.next()) != null) {
|
||||||
|
speller.checkCanceled.run();
|
||||||
keyValueConsumer.accept(mapping.input, mapping.output);
|
keyValueConsumer.accept(mapping.input, mapping.output);
|
||||||
}
|
}
|
||||||
} catch (IOException e) {
|
} catch (IOException e) {
|
||||||
|
|
|
@ -51,14 +51,25 @@ import org.apache.lucene.util.IntsRef;
|
||||||
public class Hunspell {
|
public class Hunspell {
|
||||||
final Dictionary dictionary;
|
final Dictionary dictionary;
|
||||||
final Stemmer stemmer;
|
final Stemmer stemmer;
|
||||||
|
final Runnable checkCanceled;
|
||||||
|
|
||||||
public Hunspell(Dictionary dictionary) {
|
public Hunspell(Dictionary dictionary) {
|
||||||
|
this(dictionary, () -> {});
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @param checkCanceled an object that's periodically called, allowing to interrupt spell-checking
|
||||||
|
* or suggestion generation by throwing an exception
|
||||||
|
*/
|
||||||
|
public Hunspell(Dictionary dictionary, Runnable checkCanceled) {
|
||||||
this.dictionary = dictionary;
|
this.dictionary = dictionary;
|
||||||
|
this.checkCanceled = checkCanceled;
|
||||||
stemmer = new Stemmer(dictionary);
|
stemmer = new Stemmer(dictionary);
|
||||||
}
|
}
|
||||||
|
|
||||||
/** @return whether the given word's spelling is considered correct according to Hunspell rules */
|
/** @return whether the given word's spelling is considered correct according to Hunspell rules */
|
||||||
public boolean spell(String word) {
|
public boolean spell(String word) {
|
||||||
|
checkCanceled.run();
|
||||||
if (word.isEmpty()) return true;
|
if (word.isEmpty()) return true;
|
||||||
|
|
||||||
if (dictionary.needsInputCleaning) {
|
if (dictionary.needsInputCleaning) {
|
||||||
|
@ -148,6 +159,7 @@ public class Hunspell {
|
||||||
|
|
||||||
private Root<CharsRef> findStem(
|
private Root<CharsRef> findStem(
|
||||||
char[] wordChars, int offset, int length, WordCase originalCase, WordContext context) {
|
char[] wordChars, int offset, int length, WordCase originalCase, WordContext context) {
|
||||||
|
checkCanceled.run();
|
||||||
@SuppressWarnings({"rawtypes", "unchecked"})
|
@SuppressWarnings({"rawtypes", "unchecked"})
|
||||||
Root<CharsRef>[] result = new Root[1];
|
Root<CharsRef>[] result = new Root[1];
|
||||||
stemmer.doStem(
|
stemmer.doStem(
|
||||||
|
@ -356,6 +368,7 @@ public class Hunspell {
|
||||||
|
|
||||||
int limit = length - dictionary.compoundMin + 1;
|
int limit = length - dictionary.compoundMin + 1;
|
||||||
for (int breakPos = dictionary.compoundMin; breakPos < limit; breakPos++) {
|
for (int breakPos = dictionary.compoundMin; breakPos < limit; breakPos++) {
|
||||||
|
checkCanceled.run();
|
||||||
IntsRef forms = dictionary.lookupWord(wordChars, offset, breakPos);
|
IntsRef forms = dictionary.lookupWord(wordChars, offset, breakPos);
|
||||||
if (forms != null) {
|
if (forms != null) {
|
||||||
words.add(forms);
|
words.add(forms);
|
||||||
|
@ -463,6 +476,7 @@ public class Hunspell {
|
||||||
}
|
}
|
||||||
|
|
||||||
public List<String> suggest(String word) {
|
public List<String> suggest(String word) {
|
||||||
|
checkCanceled.run();
|
||||||
if (word.length() >= 100) return Collections.emptyList();
|
if (word.length() >= 100) return Collections.emptyList();
|
||||||
|
|
||||||
if (dictionary.needsInputCleaning) {
|
if (dictionary.needsInputCleaning) {
|
||||||
|
@ -478,7 +492,7 @@ public class Hunspell {
|
||||||
}
|
}
|
||||||
|
|
||||||
Hunspell suggestionSpeller =
|
Hunspell suggestionSpeller =
|
||||||
new Hunspell(dictionary) {
|
new Hunspell(dictionary, checkCanceled) {
|
||||||
@Override
|
@Override
|
||||||
boolean acceptsStem(int formID) {
|
boolean acceptsStem(int formID) {
|
||||||
return !dictionary.hasFlag(formID, dictionary.noSuggest)
|
return !dictionary.hasFlag(formID, dictionary.noSuggest)
|
||||||
|
|
|
@ -0,0 +1,45 @@
|
||||||
|
/*
|
||||||
|
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||||
|
* contributor license agreements. See the NOTICE file distributed with
|
||||||
|
* this work for additional information regarding copyright ownership.
|
||||||
|
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||||
|
* (the "License"); you may not use this file except in compliance with
|
||||||
|
* the License. You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
package org.apache.lucene.analysis.hunspell;
|
||||||
|
|
||||||
|
import static org.apache.lucene.analysis.hunspell.StemmerTestBase.loadDictionary;
|
||||||
|
|
||||||
|
import java.util.Collections;
|
||||||
|
import java.util.concurrent.CancellationException;
|
||||||
|
import java.util.concurrent.atomic.AtomicBoolean;
|
||||||
|
import org.apache.lucene.util.LuceneTestCase;
|
||||||
|
|
||||||
|
public class HunspellTest extends LuceneTestCase {
|
||||||
|
public void testCheckCanceled() throws Exception {
|
||||||
|
AtomicBoolean canceled = new AtomicBoolean();
|
||||||
|
Runnable checkCanceled =
|
||||||
|
() -> {
|
||||||
|
if (canceled.get()) {
|
||||||
|
throw new CancellationException();
|
||||||
|
}
|
||||||
|
};
|
||||||
|
Hunspell hunspell =
|
||||||
|
new Hunspell(loadDictionary(false, "simple.aff", "simple.dic"), checkCanceled);
|
||||||
|
|
||||||
|
assertTrue(hunspell.spell("apache"));
|
||||||
|
assertEquals(Collections.singletonList("apach"), hunspell.suggest("apac"));
|
||||||
|
|
||||||
|
canceled.set(true);
|
||||||
|
assertThrows(CancellationException.class, () -> hunspell.spell("apache"));
|
||||||
|
assertThrows(CancellationException.class, () -> hunspell.suggest("apac"));
|
||||||
|
}
|
||||||
|
}
|
Loading…
Reference in New Issue