From 381a5cacb001453235e008976f6dc312fb149d37 Mon Sep 17 00:00:00 2001 From: Peter Gromov Date: Tue, 23 Feb 2021 13:00:02 +0100 Subject: [PATCH] LUCENE-9805: Hunspell: fix space + mixed case heuristics on suggestions (#2420) --- .../apache/lucene/analysis/hunspell/ModifyingSuggester.java | 6 +++--- .../src/test/org/apache/lucene/analysis/hunspell/sug.dic | 6 +++++- .../src/test/org/apache/lucene/analysis/hunspell/sug.sug | 1 + .../src/test/org/apache/lucene/analysis/hunspell/sug.wrong | 3 ++- 4 files changed, 11 insertions(+), 5 deletions(-) diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/hunspell/ModifyingSuggester.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/hunspell/ModifyingSuggester.java index 86e34c739e4..286d1ee69ee 100644 --- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/hunspell/ModifyingSuggester.java +++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/hunspell/ModifyingSuggester.java @@ -70,18 +70,18 @@ class ModifyingSuggester { } List adjusted = - result.stream().map(s -> capitalizeAfterSpace(low, s)).collect(Collectors.toList()); + result.stream().map(s -> capitalizeAfterSpace(word, s)).collect(Collectors.toList()); result.clear(); result.addAll(adjusted); } } // aNew -> "a New" (instead of "a new") - private String capitalizeAfterSpace(String lowMisspelled, String candidate) { + private String capitalizeAfterSpace(String misspelled, String candidate) { int space = candidate.indexOf(' '); int tail = candidate.length() - space - 1; if (space > 0 - && lowMisspelled.regionMatches(lowMisspelled.length() - tail, candidate, space + 1, tail)) { + && !misspelled.regionMatches(misspelled.length() - tail, candidate, space + 1, tail)) { return candidate.substring(0, space + 1) + Character.toUpperCase(candidate.charAt(space + 1)) + candidate.substring(space + 2); diff --git a/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/sug.dic b/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/sug.dic index 6725561e81f..6ea06f1f10f 100644 --- a/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/sug.dic +++ b/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/sug.dic @@ -15,4 +15,8 @@ spite inspire Saiph sahib -ship \ No newline at end of file +ship +ESP +esp +s +S \ No newline at end of file diff --git a/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/sug.sug b/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/sug.sug index 4595756cc87..e7a52c43c02 100644 --- a/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/sug.sug +++ b/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/sug.sug @@ -13,3 +13,4 @@ McDonald permanent in, in a Saiph, Ship, Sahib +ESP, ESP s, Esp, Esp s \ No newline at end of file diff --git a/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/sug.wrong b/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/sug.wrong index d8875d8a571..356a08d8810 100644 --- a/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/sug.wrong +++ b/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/sug.wrong @@ -14,4 +14,5 @@ permqnent permanent-vacation permqnent-vacation ina -Sahip \ No newline at end of file +Sahip +ESPs \ No newline at end of file