From 2d53c6073bf12aa848678982aa94f2896d2a1d60 Mon Sep 17 00:00:00 2001 From: Peter Gromov Date: Wed, 17 Feb 2021 09:09:44 +0100 Subject: [PATCH] LUCENE-9779: Hunspell: add an API to interrupt long computations (#2378) --- .../hunspell/GeneratingSuggester.java | 1 + .../lucene/analysis/hunspell/Hunspell.java | 16 ++++++- .../analysis/hunspell/HunspellTest.java | 45 +++++++++++++++++++ 3 files changed, 61 insertions(+), 1 deletion(-) create mode 100644 lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/HunspellTest.java diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/hunspell/GeneratingSuggester.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/hunspell/GeneratingSuggester.java index 9d9c582b984..59ae43a2f6e 100644 --- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/hunspell/GeneratingSuggester.java +++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/hunspell/GeneratingSuggester.java @@ -91,6 +91,7 @@ class GeneratingSuggester { IntsRefFSTEnum fstEnum = new IntsRefFSTEnum<>(fst); IntsRefFSTEnum.InputOutput mapping; while ((mapping = fstEnum.next()) != null) { + speller.checkCanceled.run(); keyValueConsumer.accept(mapping.input, mapping.output); } } catch (IOException e) { diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/hunspell/Hunspell.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/hunspell/Hunspell.java index 81254d8d8cb..e85494b57d9 100644 --- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/hunspell/Hunspell.java +++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/hunspell/Hunspell.java @@ -51,14 +51,25 @@ import org.apache.lucene.util.IntsRef; public class Hunspell { final Dictionary dictionary; final Stemmer stemmer; + final Runnable checkCanceled; public Hunspell(Dictionary dictionary) { + this(dictionary, () -> {}); + } + + /** + * @param checkCanceled an object that's periodically called, allowing to interrupt spell-checking + * or suggestion generation by throwing an exception + */ + public Hunspell(Dictionary dictionary, Runnable checkCanceled) { this.dictionary = dictionary; + this.checkCanceled = checkCanceled; stemmer = new Stemmer(dictionary); } /** @return whether the given word's spelling is considered correct according to Hunspell rules */ public boolean spell(String word) { + checkCanceled.run(); if (word.isEmpty()) return true; if (dictionary.needsInputCleaning) { @@ -148,6 +159,7 @@ public class Hunspell { private Root findStem( char[] wordChars, int offset, int length, WordCase originalCase, WordContext context) { + checkCanceled.run(); @SuppressWarnings({"rawtypes", "unchecked"}) Root[] result = new Root[1]; stemmer.doStem( @@ -356,6 +368,7 @@ public class Hunspell { int limit = length - dictionary.compoundMin + 1; for (int breakPos = dictionary.compoundMin; breakPos < limit; breakPos++) { + checkCanceled.run(); IntsRef forms = dictionary.lookupWord(wordChars, offset, breakPos); if (forms != null) { words.add(forms); @@ -463,6 +476,7 @@ public class Hunspell { } public List suggest(String word) { + checkCanceled.run(); if (word.length() >= 100) return Collections.emptyList(); if (dictionary.needsInputCleaning) { @@ -478,7 +492,7 @@ public class Hunspell { } Hunspell suggestionSpeller = - new Hunspell(dictionary) { + new Hunspell(dictionary, checkCanceled) { @Override boolean acceptsStem(int formID) { return !dictionary.hasFlag(formID, dictionary.noSuggest) diff --git a/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/HunspellTest.java b/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/HunspellTest.java new file mode 100644 index 00000000000..893439ea3bb --- /dev/null +++ b/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/HunspellTest.java @@ -0,0 +1,45 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.lucene.analysis.hunspell; + +import static org.apache.lucene.analysis.hunspell.StemmerTestBase.loadDictionary; + +import java.util.Collections; +import java.util.concurrent.CancellationException; +import java.util.concurrent.atomic.AtomicBoolean; +import org.apache.lucene.util.LuceneTestCase; + +public class HunspellTest extends LuceneTestCase { + public void testCheckCanceled() throws Exception { + AtomicBoolean canceled = new AtomicBoolean(); + Runnable checkCanceled = + () -> { + if (canceled.get()) { + throw new CancellationException(); + } + }; + Hunspell hunspell = + new Hunspell(loadDictionary(false, "simple.aff", "simple.dic"), checkCanceled); + + assertTrue(hunspell.spell("apache")); + assertEquals(Collections.singletonList("apach"), hunspell.suggest("apac")); + + canceled.set(true); + assertThrows(CancellationException.class, () -> hunspell.spell("apache")); + assertThrows(CancellationException.class, () -> hunspell.suggest("apac")); + } +}