LUCENE-9779: Hunspell: add an API to interrupt long computations (#2378)

This commit is contained in:
Peter Gromov 2021-02-17 09:09:44 +01:00 committed by GitHub
parent cfd0ccefe1
commit 2d53c6073b
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
3 changed files with 61 additions and 1 deletions

View File

@ -91,6 +91,7 @@ class GeneratingSuggester {
IntsRefFSTEnum<IntsRef> fstEnum = new IntsRefFSTEnum<>(fst); IntsRefFSTEnum<IntsRef> fstEnum = new IntsRefFSTEnum<>(fst);
IntsRefFSTEnum.InputOutput<IntsRef> mapping; IntsRefFSTEnum.InputOutput<IntsRef> mapping;
while ((mapping = fstEnum.next()) != null) { while ((mapping = fstEnum.next()) != null) {
speller.checkCanceled.run();
keyValueConsumer.accept(mapping.input, mapping.output); keyValueConsumer.accept(mapping.input, mapping.output);
} }
} catch (IOException e) { } catch (IOException e) {

View File

@ -51,14 +51,25 @@ import org.apache.lucene.util.IntsRef;
public class Hunspell { public class Hunspell {
final Dictionary dictionary; final Dictionary dictionary;
final Stemmer stemmer; final Stemmer stemmer;
final Runnable checkCanceled;
public Hunspell(Dictionary dictionary) { public Hunspell(Dictionary dictionary) {
this(dictionary, () -> {});
}
/**
* @param checkCanceled an object that's periodically called, allowing to interrupt spell-checking
* or suggestion generation by throwing an exception
*/
public Hunspell(Dictionary dictionary, Runnable checkCanceled) {
this.dictionary = dictionary; this.dictionary = dictionary;
this.checkCanceled = checkCanceled;
stemmer = new Stemmer(dictionary); stemmer = new Stemmer(dictionary);
} }
/** @return whether the given word's spelling is considered correct according to Hunspell rules */ /** @return whether the given word's spelling is considered correct according to Hunspell rules */
public boolean spell(String word) { public boolean spell(String word) {
checkCanceled.run();
if (word.isEmpty()) return true; if (word.isEmpty()) return true;
if (dictionary.needsInputCleaning) { if (dictionary.needsInputCleaning) {
@ -148,6 +159,7 @@ public class Hunspell {
private Root<CharsRef> findStem( private Root<CharsRef> findStem(
char[] wordChars, int offset, int length, WordCase originalCase, WordContext context) { char[] wordChars, int offset, int length, WordCase originalCase, WordContext context) {
checkCanceled.run();
@SuppressWarnings({"rawtypes", "unchecked"}) @SuppressWarnings({"rawtypes", "unchecked"})
Root<CharsRef>[] result = new Root[1]; Root<CharsRef>[] result = new Root[1];
stemmer.doStem( stemmer.doStem(
@ -356,6 +368,7 @@ public class Hunspell {
int limit = length - dictionary.compoundMin + 1; int limit = length - dictionary.compoundMin + 1;
for (int breakPos = dictionary.compoundMin; breakPos < limit; breakPos++) { for (int breakPos = dictionary.compoundMin; breakPos < limit; breakPos++) {
checkCanceled.run();
IntsRef forms = dictionary.lookupWord(wordChars, offset, breakPos); IntsRef forms = dictionary.lookupWord(wordChars, offset, breakPos);
if (forms != null) { if (forms != null) {
words.add(forms); words.add(forms);
@ -463,6 +476,7 @@ public class Hunspell {
} }
public List<String> suggest(String word) { public List<String> suggest(String word) {
checkCanceled.run();
if (word.length() >= 100) return Collections.emptyList(); if (word.length() >= 100) return Collections.emptyList();
if (dictionary.needsInputCleaning) { if (dictionary.needsInputCleaning) {
@ -478,7 +492,7 @@ public class Hunspell {
} }
Hunspell suggestionSpeller = Hunspell suggestionSpeller =
new Hunspell(dictionary) { new Hunspell(dictionary, checkCanceled) {
@Override @Override
boolean acceptsStem(int formID) { boolean acceptsStem(int formID) {
return !dictionary.hasFlag(formID, dictionary.noSuggest) return !dictionary.hasFlag(formID, dictionary.noSuggest)

View File

@ -0,0 +1,45 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.lucene.analysis.hunspell;
import static org.apache.lucene.analysis.hunspell.StemmerTestBase.loadDictionary;
import java.util.Collections;
import java.util.concurrent.CancellationException;
import java.util.concurrent.atomic.AtomicBoolean;
import org.apache.lucene.util.LuceneTestCase;
public class HunspellTest extends LuceneTestCase {
public void testCheckCanceled() throws Exception {
AtomicBoolean canceled = new AtomicBoolean();
Runnable checkCanceled =
() -> {
if (canceled.get()) {
throw new CancellationException();
}
};
Hunspell hunspell =
new Hunspell(loadDictionary(false, "simple.aff", "simple.dic"), checkCanceled);
assertTrue(hunspell.spell("apache"));
assertEquals(Collections.singletonList("apach"), hunspell.suggest("apac"));
canceled.set(true);
assertThrows(CancellationException.class, () -> hunspell.spell("apache"));
assertThrows(CancellationException.class, () -> hunspell.suggest("apac"));
}
}