mirror of https://github.com/apache/lucene.git
LUCENE-10451 Hunspell: don't perform potentially expensive spellchecking after timeout (#721)
move all expensive operations closer to the suggestion creation, encapsulate case and output conversion into a new Suggestion class
This commit is contained in:
parent
b6c1024f55
commit
92a20c24e9
|
@ -52,7 +52,7 @@ class GeneratingSuggester {
|
||||||
this.speller = speller;
|
this.speller = speller;
|
||||||
}
|
}
|
||||||
|
|
||||||
List<String> suggest(String word, WordCase originalCase, Set<String> prevSuggestions) {
|
List<String> suggest(String word, WordCase originalCase, Set<Suggestion> prevSuggestions) {
|
||||||
List<Weighted<Root<String>>> roots = findSimilarDictionaryEntries(word, originalCase);
|
List<Weighted<Root<String>>> roots = findSimilarDictionaryEntries(word, originalCase);
|
||||||
List<Weighted<String>> expanded = expandRoots(word, roots);
|
List<Weighted<String>> expanded = expandRoots(word, roots);
|
||||||
TreeSet<Weighted<String>> bySimilarity = rankBySimilarity(word, expanded);
|
TreeSet<Weighted<String>> bySimilarity = rankBySimilarity(word, expanded);
|
||||||
|
@ -331,7 +331,7 @@ class GeneratingSuggester {
|
||||||
}
|
}
|
||||||
|
|
||||||
private List<String> getMostRelevantSuggestions(
|
private List<String> getMostRelevantSuggestions(
|
||||||
TreeSet<Weighted<String>> bySimilarity, Set<String> prevSuggestions) {
|
TreeSet<Weighted<String>> bySimilarity, Set<Suggestion> prevSuggestions) {
|
||||||
List<String> result = new ArrayList<>();
|
List<String> result = new ArrayList<>();
|
||||||
boolean hasExcellent = false;
|
boolean hasExcellent = false;
|
||||||
for (Weighted<String> weighted : bySimilarity) {
|
for (Weighted<String> weighted : bySimilarity) {
|
||||||
|
@ -347,7 +347,7 @@ class GeneratingSuggester {
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (prevSuggestions.stream().noneMatch(weighted.word::contains)
|
if (prevSuggestions.stream().noneMatch(s -> weighted.word.contains(s.raw))
|
||||||
&& result.stream().noneMatch(weighted.word::contains)
|
&& result.stream().noneMatch(weighted.word::contains)
|
||||||
&& speller.checkWord(weighted.word)) {
|
&& speller.checkWord(weighted.word)) {
|
||||||
result.add(weighted.word);
|
result.add(weighted.word);
|
||||||
|
|
|
@ -17,7 +17,8 @@
|
||||||
package org.apache.lucene.analysis.hunspell;
|
package org.apache.lucene.analysis.hunspell;
|
||||||
|
|
||||||
import static org.apache.lucene.analysis.hunspell.Dictionary.FLAG_UNSET;
|
import static org.apache.lucene.analysis.hunspell.Dictionary.FLAG_UNSET;
|
||||||
import static org.apache.lucene.analysis.hunspell.TimeoutPolicy.*;
|
import static org.apache.lucene.analysis.hunspell.TimeoutPolicy.NO_TIMEOUT;
|
||||||
|
import static org.apache.lucene.analysis.hunspell.TimeoutPolicy.RETURN_PARTIAL_RESULT;
|
||||||
import static org.apache.lucene.analysis.hunspell.WordContext.COMPOUND_BEGIN;
|
import static org.apache.lucene.analysis.hunspell.WordContext.COMPOUND_BEGIN;
|
||||||
import static org.apache.lucene.analysis.hunspell.WordContext.COMPOUND_END;
|
import static org.apache.lucene.analysis.hunspell.WordContext.COMPOUND_END;
|
||||||
import static org.apache.lucene.analysis.hunspell.WordContext.COMPOUND_MIDDLE;
|
import static org.apache.lucene.analysis.hunspell.WordContext.COMPOUND_MIDDLE;
|
||||||
|
@ -25,11 +26,11 @@ import static org.apache.lucene.analysis.hunspell.WordContext.COMPOUND_RULE_END;
|
||||||
import static org.apache.lucene.analysis.hunspell.WordContext.SIMPLE_WORD;
|
import static org.apache.lucene.analysis.hunspell.WordContext.SIMPLE_WORD;
|
||||||
|
|
||||||
import java.util.ArrayList;
|
import java.util.ArrayList;
|
||||||
|
import java.util.Arrays;
|
||||||
import java.util.Collection;
|
import java.util.Collection;
|
||||||
import java.util.Collections;
|
import java.util.Collections;
|
||||||
import java.util.LinkedHashSet;
|
import java.util.LinkedHashSet;
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
import java.util.Locale;
|
|
||||||
import java.util.Set;
|
import java.util.Set;
|
||||||
import java.util.concurrent.TimeUnit;
|
import java.util.concurrent.TimeUnit;
|
||||||
import java.util.stream.Collectors;
|
import java.util.stream.Collectors;
|
||||||
|
@ -543,25 +544,25 @@ public class Hunspell {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
LinkedHashSet<String> suggestions = new LinkedHashSet<>();
|
LinkedHashSet<Suggestion> suggestions = new LinkedHashSet<>();
|
||||||
Runnable checkCanceled =
|
Runnable checkCanceled =
|
||||||
policy == NO_TIMEOUT
|
policy == NO_TIMEOUT ? this.checkCanceled : checkTimeLimit(word, suggestions, timeLimitMs);
|
||||||
? this.checkCanceled
|
|
||||||
: checkTimeLimit(word, wordCase, suggestions, timeLimitMs);
|
|
||||||
try {
|
try {
|
||||||
doSuggest(word, wordCase, suggestions, checkCanceled);
|
doSuggest(word, wordCase, suggestions, checkCanceled);
|
||||||
} catch (SuggestionTimeoutException e) {
|
} catch (SuggestionTimeoutException e) {
|
||||||
if (policy == RETURN_PARTIAL_RESULT) {
|
if (policy != RETURN_PARTIAL_RESULT) {
|
||||||
return postprocess(word, wordCase, suggestions);
|
throw e;
|
||||||
}
|
}
|
||||||
throw e;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
return postprocess(word, wordCase, suggestions);
|
return postprocess(suggestions);
|
||||||
}
|
}
|
||||||
|
|
||||||
private void doSuggest(
|
private void doSuggest(
|
||||||
String word, WordCase wordCase, LinkedHashSet<String> suggestions, Runnable checkCanceled) {
|
String word,
|
||||||
|
WordCase wordCase,
|
||||||
|
LinkedHashSet<Suggestion> suggestions,
|
||||||
|
Runnable checkCanceled) {
|
||||||
Hunspell suggestionSpeller =
|
Hunspell suggestionSpeller =
|
||||||
new Hunspell(dictionary, policy, checkCanceled) {
|
new Hunspell(dictionary, policy, checkCanceled) {
|
||||||
@Override
|
@Override
|
||||||
|
@ -570,22 +571,26 @@ public class Hunspell {
|
||||||
&& !dictionary.hasFlag(formID, dictionary.subStandard);
|
&& !dictionary.hasFlag(formID, dictionary.subStandard);
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
ModifyingSuggester modifier = new ModifyingSuggester(suggestionSpeller, suggestions);
|
boolean hasGoodSuggestions =
|
||||||
boolean hasGoodSuggestions = modifier.suggest(word, wordCase);
|
new ModifyingSuggester(suggestionSpeller, suggestions, word, wordCase).suggest();
|
||||||
|
|
||||||
if (!hasGoodSuggestions && dictionary.maxNGramSuggestions > 0) {
|
if (!hasGoodSuggestions && dictionary.maxNGramSuggestions > 0) {
|
||||||
suggestions.addAll(
|
List<String> generated =
|
||||||
new GeneratingSuggester(suggestionSpeller)
|
new GeneratingSuggester(suggestionSpeller)
|
||||||
.suggest(dictionary.toLowerCase(word), wordCase, suggestions));
|
.suggest(dictionary.toLowerCase(word), wordCase, suggestions);
|
||||||
|
for (String raw : generated) {
|
||||||
|
suggestions.add(new Suggestion(raw, word, wordCase, suggestionSpeller));
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if (word.contains("-") && suggestions.stream().noneMatch(s -> s.contains("-"))) {
|
if (word.contains("-") && suggestions.stream().noneMatch(s -> s.raw.contains("-"))) {
|
||||||
suggestions.addAll(modifyChunksBetweenDashes(word));
|
for (String raw : modifyChunksBetweenDashes(word)) {
|
||||||
|
suggestions.add(new Suggestion(raw, word, wordCase, suggestionSpeller));
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
private Runnable checkTimeLimit(
|
private Runnable checkTimeLimit(String word, Set<Suggestion> suggestions, long timeLimitMs) {
|
||||||
String word, WordCase wordCase, Set<String> suggestions, long timeLimitMs) {
|
|
||||||
return new Runnable() {
|
return new Runnable() {
|
||||||
final long deadline = System.nanoTime() + TimeUnit.MILLISECONDS.toNanos(timeLimitMs);
|
final long deadline = System.nanoTime() + TimeUnit.MILLISECONDS.toNanos(timeLimitMs);
|
||||||
int invocationCounter = 100;
|
int invocationCounter = 100;
|
||||||
|
@ -603,38 +608,15 @@ public class Hunspell {
|
||||||
|
|
||||||
private void stop() {
|
private void stop() {
|
||||||
List<String> partialResult =
|
List<String> partialResult =
|
||||||
policy == RETURN_PARTIAL_RESULT ? null : postprocess(word, wordCase, suggestions);
|
policy == RETURN_PARTIAL_RESULT ? null : postprocess(suggestions);
|
||||||
String message = "Time limit of " + timeLimitMs + "ms exceeded for " + word;
|
String message = "Time limit of " + timeLimitMs + "ms exceeded for " + word;
|
||||||
throw new SuggestionTimeoutException(message, partialResult);
|
throw new SuggestionTimeoutException(message, partialResult);
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
|
||||||
private List<String> postprocess(String word, WordCase wordCase, Collection<String> suggestions) {
|
private List<String> postprocess(Collection<Suggestion> suggestions) {
|
||||||
Set<String> result = new LinkedHashSet<>();
|
return suggestions.stream().flatMap(s -> Arrays.stream(s.result)).distinct().toList();
|
||||||
for (String candidate : suggestions) {
|
|
||||||
result.add(adjustSuggestionCase(candidate, wordCase, word));
|
|
||||||
if (wordCase == WordCase.UPPER && dictionary.checkSharpS && candidate.contains("ß")) {
|
|
||||||
result.add(candidate);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return result.stream().map(this::cleanOutput).collect(Collectors.toList());
|
|
||||||
}
|
|
||||||
|
|
||||||
private String adjustSuggestionCase(String candidate, WordCase originalCase, String original) {
|
|
||||||
if (originalCase == WordCase.UPPER) {
|
|
||||||
String upper = candidate.toUpperCase(Locale.ROOT);
|
|
||||||
if (upper.contains(" ") || spell(upper)) {
|
|
||||||
return upper;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
if (Character.isUpperCase(original.charAt(0))) {
|
|
||||||
String title = Character.toUpperCase(candidate.charAt(0)) + candidate.substring(1);
|
|
||||||
if (title.contains(" ") || spell(title)) {
|
|
||||||
return title;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return candidate;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
private List<String> modifyChunksBetweenDashes(String word) {
|
private List<String> modifyChunksBetweenDashes(String word) {
|
||||||
|
@ -662,12 +644,4 @@ public class Hunspell {
|
||||||
}
|
}
|
||||||
return result;
|
return result;
|
||||||
}
|
}
|
||||||
|
|
||||||
private String cleanOutput(String s) {
|
|
||||||
if (dictionary.oconv == null) return s;
|
|
||||||
|
|
||||||
StringBuilder sb = new StringBuilder(s);
|
|
||||||
dictionary.oconv.applyMappings(sb);
|
|
||||||
return sb.toString();
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
|
@ -25,42 +25,50 @@ import java.util.Locale;
|
||||||
/** A class that modifies the given misspelled word in various ways to get correct suggestions */
|
/** A class that modifies the given misspelled word in various ways to get correct suggestions */
|
||||||
class ModifyingSuggester {
|
class ModifyingSuggester {
|
||||||
private static final int MAX_CHAR_DISTANCE = 4;
|
private static final int MAX_CHAR_DISTANCE = 4;
|
||||||
private final LinkedHashSet<String> result;
|
private final LinkedHashSet<Suggestion> result;
|
||||||
|
private final String misspelled;
|
||||||
|
private final WordCase wordCase;
|
||||||
private final char[] tryChars;
|
private final char[] tryChars;
|
||||||
private final Hunspell speller;
|
private final Hunspell speller;
|
||||||
|
|
||||||
ModifyingSuggester(Hunspell speller, LinkedHashSet<String> result) {
|
ModifyingSuggester(
|
||||||
|
Hunspell speller, LinkedHashSet<Suggestion> result, String misspelled, WordCase wordCase) {
|
||||||
this.speller = speller;
|
this.speller = speller;
|
||||||
tryChars = speller.dictionary.tryChars.toCharArray();
|
tryChars = speller.dictionary.tryChars.toCharArray();
|
||||||
this.result = result;
|
this.result = result;
|
||||||
|
this.misspelled = misspelled;
|
||||||
|
this.wordCase = wordCase;
|
||||||
}
|
}
|
||||||
|
|
||||||
/** @return whether any of the added suggestions are considered "good" */
|
/** @return whether any of the added suggestions are considered "good" */
|
||||||
boolean suggest(String word, WordCase wordCase) {
|
boolean suggest() {
|
||||||
String low = wordCase != WordCase.LOWER ? speller.dictionary.toLowerCase(word) : word;
|
String low =
|
||||||
|
wordCase != WordCase.LOWER ? speller.dictionary.toLowerCase(misspelled) : misspelled;
|
||||||
if (wordCase == WordCase.UPPER || wordCase == WordCase.MIXED) {
|
if (wordCase == WordCase.UPPER || wordCase == WordCase.MIXED) {
|
||||||
trySuggestion(low);
|
trySuggestion(low);
|
||||||
}
|
}
|
||||||
|
|
||||||
boolean hasGoodSuggestions = tryVariationsOf(word);
|
boolean hasGoodSuggestions = tryVariationsOf(misspelled);
|
||||||
|
|
||||||
if (wordCase == WordCase.TITLE) {
|
if (wordCase == WordCase.TITLE) {
|
||||||
hasGoodSuggestions |= tryVariationsOf(low);
|
hasGoodSuggestions |= tryVariationsOf(low);
|
||||||
} else if (wordCase == WordCase.UPPER) {
|
} else if (wordCase == WordCase.UPPER) {
|
||||||
hasGoodSuggestions |= tryVariationsOf(low);
|
hasGoodSuggestions |= tryVariationsOf(low);
|
||||||
hasGoodSuggestions |= tryVariationsOf(speller.dictionary.toTitleCase(word));
|
hasGoodSuggestions |= tryVariationsOf(speller.dictionary.toTitleCase(misspelled));
|
||||||
} else if (wordCase == WordCase.MIXED) {
|
} else if (wordCase == WordCase.MIXED) {
|
||||||
int dot = word.indexOf('.');
|
int dot = misspelled.indexOf('.');
|
||||||
if (dot > 0
|
if (dot > 0 && dot < misspelled.length() - 1) {
|
||||||
&& dot < word.length() - 1
|
String afterDot = misspelled.substring(dot + 1);
|
||||||
&& WordCase.caseOf(word.substring(dot + 1)) == WordCase.TITLE) {
|
if (WordCase.caseOf(afterDot) == WordCase.TITLE) {
|
||||||
result.add(word.substring(0, dot + 1) + " " + word.substring(dot + 1));
|
result.add(createSuggestion(misspelled.substring(0, dot + 1) + " " + afterDot));
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
boolean capitalized = Character.isUpperCase(word.charAt(0));
|
char first = misspelled.charAt(0);
|
||||||
|
boolean capitalized = Character.isUpperCase(first);
|
||||||
if (capitalized) {
|
if (capitalized) {
|
||||||
hasGoodSuggestions |=
|
hasGoodSuggestions |=
|
||||||
tryVariationsOf(speller.dictionary.caseFold(word.charAt(0)) + word.substring(1));
|
tryVariationsOf(speller.dictionary.caseFold(first) + misspelled.substring(1));
|
||||||
}
|
}
|
||||||
|
|
||||||
hasGoodSuggestions |= tryVariationsOf(low);
|
hasGoodSuggestions |= tryVariationsOf(low);
|
||||||
|
@ -69,29 +77,38 @@ class ModifyingSuggester {
|
||||||
hasGoodSuggestions |= tryVariationsOf(speller.dictionary.toTitleCase(low));
|
hasGoodSuggestions |= tryVariationsOf(speller.dictionary.toTitleCase(low));
|
||||||
}
|
}
|
||||||
|
|
||||||
List<String> adjusted = new ArrayList<>();
|
List<Suggestion> reordered = new ArrayList<>();
|
||||||
for (String candidate : result) {
|
for (Suggestion candidate : result) {
|
||||||
String s = capitalizeAfterSpace(word, candidate);
|
Suggestion changed = capitalizeAfterSpace(candidate.raw);
|
||||||
adjusted.add(s.equals(candidate) ? adjusted.size() : 0, s);
|
if (changed == null) {
|
||||||
|
reordered.add(candidate);
|
||||||
|
} else {
|
||||||
|
reordered.add(0, changed);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
result.clear();
|
result.clear();
|
||||||
result.addAll(adjusted);
|
result.addAll(reordered);
|
||||||
}
|
}
|
||||||
return hasGoodSuggestions;
|
return hasGoodSuggestions;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
private Suggestion createSuggestion(String candidate) {
|
||||||
|
return new Suggestion(candidate, misspelled, wordCase, speller);
|
||||||
|
}
|
||||||
|
|
||||||
// aNew -> "a New" (instead of "a new")
|
// aNew -> "a New" (instead of "a new")
|
||||||
private String capitalizeAfterSpace(String misspelled, String candidate) {
|
private Suggestion capitalizeAfterSpace(String candidate) {
|
||||||
int space = candidate.indexOf(' ');
|
int space = candidate.indexOf(' ');
|
||||||
int tail = candidate.length() - space - 1;
|
int tail = candidate.length() - space - 1;
|
||||||
if (space > 0
|
if (space > 0
|
||||||
&& !misspelled.regionMatches(misspelled.length() - tail, candidate, space + 1, tail)) {
|
&& !misspelled.regionMatches(misspelled.length() - tail, candidate, space + 1, tail)) {
|
||||||
return candidate.substring(0, space + 1)
|
return createSuggestion(
|
||||||
+ Character.toUpperCase(candidate.charAt(space + 1))
|
candidate.substring(0, space + 1)
|
||||||
+ candidate.substring(space + 2);
|
+ Character.toUpperCase(candidate.charAt(space + 1))
|
||||||
|
+ candidate.substring(space + 2));
|
||||||
}
|
}
|
||||||
return candidate;
|
return null;
|
||||||
}
|
}
|
||||||
|
|
||||||
private boolean tryVariationsOf(String word) {
|
private boolean tryVariationsOf(String word) {
|
||||||
|
@ -111,9 +128,9 @@ class ModifyingSuggester {
|
||||||
tryReplacingChar(word);
|
tryReplacingChar(word);
|
||||||
tryTwoDuplicateChars(word);
|
tryTwoDuplicateChars(word);
|
||||||
|
|
||||||
List<String> goodSplit = checkDictionaryForSplitSuggestions(word);
|
List<Suggestion> goodSplit = checkDictionaryForSplitSuggestions(word);
|
||||||
if (!goodSplit.isEmpty()) {
|
if (!goodSplit.isEmpty()) {
|
||||||
List<String> copy = new ArrayList<>(result);
|
List<Suggestion> copy = new ArrayList<>(result);
|
||||||
result.clear();
|
result.clear();
|
||||||
result.addAll(goodSplit);
|
result.addAll(goodSplit);
|
||||||
if (hasGoodSuggestions) {
|
if (hasGoodSuggestions) {
|
||||||
|
@ -139,7 +156,7 @@ class ModifyingSuggester {
|
||||||
|
|
||||||
if (candidate.contains(" ")
|
if (candidate.contains(" ")
|
||||||
&& Arrays.stream(candidate.split(" ")).allMatch(this::checkSimpleWord)) {
|
&& Arrays.stream(candidate.split(" ")).allMatch(this::checkSimpleWord)) {
|
||||||
result.add(candidate);
|
result.add(createSuggestion(candidate));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -294,19 +311,19 @@ class ModifyingSuggester {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
private List<String> checkDictionaryForSplitSuggestions(String word) {
|
private List<Suggestion> checkDictionaryForSplitSuggestions(String word) {
|
||||||
List<String> result = new ArrayList<>();
|
List<Suggestion> result = new ArrayList<>();
|
||||||
for (int i = 1; i < word.length() - 1; i++) {
|
for (int i = 1; i < word.length() - 1; i++) {
|
||||||
String w1 = word.substring(0, i);
|
String w1 = word.substring(0, i);
|
||||||
String w2 = word.substring(i);
|
String w2 = word.substring(i);
|
||||||
String spaced = w1 + " " + w2;
|
String spaced = w1 + " " + w2;
|
||||||
if (speller.checkWord(spaced)) {
|
if (speller.checkWord(spaced)) {
|
||||||
result.add(spaced);
|
result.add(createSuggestion(spaced));
|
||||||
}
|
}
|
||||||
if (shouldSplitByDash()) {
|
if (shouldSplitByDash()) {
|
||||||
String dashed = w1 + "-" + w2;
|
String dashed = w1 + "-" + w2;
|
||||||
if (speller.checkWord(dashed)) {
|
if (speller.checkWord(dashed)) {
|
||||||
result.add(dashed);
|
result.add(createSuggestion(dashed));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -318,9 +335,9 @@ class ModifyingSuggester {
|
||||||
String w1 = word.substring(0, i);
|
String w1 = word.substring(0, i);
|
||||||
String w2 = word.substring(i);
|
String w2 = word.substring(i);
|
||||||
if (checkSimpleWord(w1) && checkSimpleWord(w2)) {
|
if (checkSimpleWord(w1) && checkSimpleWord(w2)) {
|
||||||
result.add(w1 + " " + w2);
|
result.add(createSuggestion(w1 + " " + w2));
|
||||||
if (w1.length() > 1 && w2.length() > 1 && shouldSplitByDash()) {
|
if (w1.length() > 1 && w2.length() > 1 && shouldSplitByDash()) {
|
||||||
result.add(w1 + "-" + w2);
|
result.add(createSuggestion(w1 + "-" + w2));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -331,6 +348,6 @@ class ModifyingSuggester {
|
||||||
}
|
}
|
||||||
|
|
||||||
private boolean trySuggestion(String candidate) {
|
private boolean trySuggestion(String candidate) {
|
||||||
return speller.checkWord(candidate) && result.add(candidate);
|
return speller.checkWord(candidate) && result.add(createSuggestion(candidate));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -0,0 +1,77 @@
|
||||||
|
/*
|
||||||
|
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||||
|
* contributor license agreements. See the NOTICE file distributed with
|
||||||
|
* this work for additional information regarding copyright ownership.
|
||||||
|
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||||
|
* (the "License"); you may not use this file except in compliance with
|
||||||
|
* the License. You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
package org.apache.lucene.analysis.hunspell;
|
||||||
|
|
||||||
|
import java.util.ArrayList;
|
||||||
|
import java.util.Arrays;
|
||||||
|
import java.util.List;
|
||||||
|
import java.util.Locale;
|
||||||
|
import java.util.Objects;
|
||||||
|
|
||||||
|
class Suggestion {
|
||||||
|
final String raw;
|
||||||
|
final String[] result;
|
||||||
|
|
||||||
|
Suggestion(String raw, String misspelled, WordCase originalCase, Hunspell speller) {
|
||||||
|
this.raw = raw;
|
||||||
|
|
||||||
|
List<String> result = new ArrayList<>();
|
||||||
|
String adjusted = adjustSuggestionCase(raw, misspelled, originalCase);
|
||||||
|
result.add(
|
||||||
|
cleanOutput(speller, adjusted.contains(" ") || speller.spell(adjusted) ? adjusted : raw));
|
||||||
|
if (originalCase == WordCase.UPPER && speller.dictionary.checkSharpS && raw.contains("ß")) {
|
||||||
|
result.add(cleanOutput(speller, raw));
|
||||||
|
}
|
||||||
|
this.result = result.toArray(new String[0]);
|
||||||
|
}
|
||||||
|
|
||||||
|
private String adjustSuggestionCase(String candidate, String misspelled, WordCase originalCase) {
|
||||||
|
if (originalCase == WordCase.UPPER) {
|
||||||
|
return candidate.toUpperCase(Locale.ROOT);
|
||||||
|
}
|
||||||
|
if (Character.isUpperCase(misspelled.charAt(0))) {
|
||||||
|
return Character.toUpperCase(candidate.charAt(0)) + candidate.substring(1);
|
||||||
|
}
|
||||||
|
return candidate;
|
||||||
|
}
|
||||||
|
|
||||||
|
private String cleanOutput(Hunspell speller, String s) {
|
||||||
|
if (speller.dictionary.oconv == null) return s;
|
||||||
|
|
||||||
|
StringBuilder sb = new StringBuilder(s);
|
||||||
|
speller.dictionary.oconv.applyMappings(sb);
|
||||||
|
return sb.toString();
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public boolean equals(Object o) {
|
||||||
|
if (this == o) return true;
|
||||||
|
if (!(o instanceof Suggestion)) return false;
|
||||||
|
Suggestion that = (Suggestion) o;
|
||||||
|
return raw.equals(that.raw) && Arrays.equals(result, that.result);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public int hashCode() {
|
||||||
|
return 31 * Objects.hash(raw) + Arrays.hashCode(result);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public String toString() {
|
||||||
|
return raw;
|
||||||
|
}
|
||||||
|
}
|
Loading…
Reference in New Issue