From 71705c900b6002ef36d1448f64a78526c158bf70 Mon Sep 17 00:00:00 2001
From: Peter Gromov <peter@jetbrains.com>
Date: Fri, 29 Jan 2021 08:36:37 +0100
Subject: [PATCH] LUCENE-9703: Hunspell: prohibit FORBIDDENWORD words and their
 case variations (#2254)

---
 .../org/apache/lucene/analysis/hunspell/SpellChecker.java | 8 +++++---
 .../java/org/apache/lucene/analysis/hunspell/Stemmer.java | 5 +++++
 .../src/test/org/apache/lucene/analysis/hunspell/IJ.wrong | 1 +
 .../apache/lucene/analysis/hunspell/SpellCheckerTest.java | 5 +++++
 .../org/apache/lucene/analysis/hunspell/TestDutchIJ.java  | 1 +
 5 files changed, 17 insertions(+), 3 deletions(-)
 create mode 100644 lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/IJ.wrong
diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/hunspell/SpellChecker.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/hunspell/SpellChecker.java
index e32a805be14..8570e3d89fc 100644
--- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/hunspell/SpellChecker.java
+++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/hunspell/SpellChecker.java
@@ -57,6 +57,10 @@ public class SpellChecker {
     }
 
     char[] wordChars = word.toCharArray();
+    if (dictionary.isForbiddenWord(wordChars, wordChars.length, scratch)) {
+      return false;
+    }
+
     if (checkWord(wordChars, wordChars.length, false)) {
       return true;
     }
@@ -66,9 +70,7 @@ public class SpellChecker {
       return true;
     }
 
-    if (dictionary.breaks.isNotEmpty()
-        && !hasTooManyBreakOccurrences(word)
-        && !dictionary.isForbiddenWord(wordChars, word.length(), scratch)) {
+    if (dictionary.breaks.isNotEmpty() && !hasTooManyBreakOccurrences(word)) {
       return tryBreaks(word);
     }
 
diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/hunspell/Stemmer.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/hunspell/Stemmer.java
index 572473c2ab7..d88ee408f35 100644
--- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/hunspell/Stemmer.java
+++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/hunspell/Stemmer.java
@@ -18,6 +18,7 @@ package org.apache.lucene.analysis.hunspell;
 
 import java.io.IOException;
 import java.util.ArrayList;
+import java.util.Collections;
 import java.util.List;
 import org.apache.lucene.analysis.CharArraySet;
 import org.apache.lucene.util.ArrayUtil;
@@ -93,6 +94,10 @@ final class Stemmer {
       word = scratchBuffer;
     }
 
+    if (dictionary.isForbiddenWord(word, length, scratch)) {
+      return Collections.emptyList();
+    }
+
     WordCase wordCase = caseOf(word, length);
     List<CharsRef> list = doStem(word, 0, length, false, WordContext.SIMPLE_WORD);
     if (wordCase == WordCase.UPPER) {
diff --git a/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/IJ.wrong b/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/IJ.wrong
new file mode 100644
index 00000000000..54bbb475a07
--- /dev/null
+++ b/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/IJ.wrong
@@ -0,0 +1 @@
+Ijs
diff --git a/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/SpellCheckerTest.java b/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/SpellCheckerTest.java
index dbfbbec08c8..0baf32f2901 100644
--- a/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/SpellCheckerTest.java
+++ b/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/SpellCheckerTest.java
@@ -41,6 +41,11 @@ public class SpellCheckerTest extends StemmerTestBase {
     doTest("allcaps");
   }
 
+  @Test
+  public void IJ() throws Exception {
+    doTest("IJ");
+  }
+
   @Test
   public void i53643_numbersWithSeparators() throws Exception {
     doTest("i53643");
diff --git a/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/TestDutchIJ.java b/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/TestDutchIJ.java
index dc4b897dae3..58477d84f12 100644
--- a/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/TestDutchIJ.java
+++ b/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/TestDutchIJ.java
@@ -27,5 +27,6 @@ public class TestDutchIJ extends StemmerTestBase {
   public void testStemming() {
     assertStemsTo("ijs", "ijs");
     assertStemsTo("IJs", "ijs");
+    assertStemsTo("Ijs");
   }
 }