diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/hunspell/CheckCompoundPattern.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/hunspell/CheckCompoundPattern.java index b1c4b3d30b4..5b4c504c818 100644 --- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/hunspell/CheckCompoundPattern.java +++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/hunspell/CheckCompoundPattern.java @@ -17,7 +17,6 @@ package org.apache.lucene.analysis.hunspell; import org.apache.lucene.util.CharsRef; -import org.apache.lucene.util.IntsRef; class CheckCompoundPattern { private final String endChars; @@ -51,10 +50,9 @@ class CheckCompoundPattern { return endChars + " " + beginChars + (replacement == null ? "" : " -> " + replacement); } - boolean prohibitsCompounding( - CharsRef word, int breakPos, CharsRef stemBefore, CharsRef stemAfter) { + boolean prohibitsCompounding(CharsRef word, int breakPos, Root rootBefore, Root rootAfter) { if (isNonAffixedPattern(endChars)) { - if (!charsMatch(word, breakPos - stemBefore.length, stemBefore)) { + if (!charsMatch(word, breakPos - rootBefore.word.length(), rootBefore.word)) { return false; } } else if (!charsMatch(word, breakPos - endChars.length(), endChars)) { @@ -62,18 +60,18 @@ class CheckCompoundPattern { } if (isNonAffixedPattern(beginChars)) { - if (!charsMatch(word, breakPos, stemAfter)) { + if (!charsMatch(word, breakPos, rootAfter.word)) { return false; } } else if (!charsMatch(word, breakPos, beginChars)) { return false; } - if (endFlags.length > 0 && !stemHasFlags(stemBefore, endFlags)) { + if (endFlags.length > 0 && !hasAllFlags(rootBefore, endFlags)) { return false; } //noinspection RedundantIfStatement - if (beginFlags.length > 0 && !stemHasFlags(stemAfter, beginFlags)) { + if (beginFlags.length > 0 && !hasAllFlags(rootAfter, beginFlags)) { return false; } @@ -84,14 +82,9 @@ class CheckCompoundPattern { return pattern.length() == 1 && pattern.charAt(0) == '0'; } - private boolean stemHasFlags(CharsRef stem, char[] flags) { - IntsRef forms = dictionary.lookupWord(stem.chars, stem.offset, stem.length); - return forms != null && hasAllFlags(flags, forms); - } - - private boolean hasAllFlags(char[] flags, IntsRef forms) { + private boolean hasAllFlags(Root root, char[] flags) { for (char flag : flags) { - if (!dictionary.hasFlag(forms, flag)) { + if (!dictionary.hasFlag(root.entryId, flag)) { return false; } } diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/hunspell/Hunspell.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/hunspell/Hunspell.java index 99884218d25..e98200a5287 100644 --- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/hunspell/Hunspell.java +++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/hunspell/Hunspell.java @@ -226,13 +226,13 @@ public class Hunspell { int breakPos = prev.length; int remainingLength = word.length - breakPos; int breakOffset = word.offset + breakPos; - Root tailStem = + Root lastRoot = findStem(word.chars, breakOffset, remainingLength, originalCase, COMPOUND_END); - if (tailStem != null - && !dictionary.hasFlag(tailStem.entryId, dictionary.forbiddenword) - && !(dictionary.checkCompoundDup && equalsIgnoreCase(prev.stem, tailStem.word)) - && !hasForceUCaseProblem(word.chars, breakOffset, remainingLength, originalCase) - && prev.mayCompound(tailStem, remainingLength, originalCase)) { + if (lastRoot != null + && !dictionary.hasFlag(lastRoot.entryId, dictionary.forbiddenword) + && !(dictionary.checkCompoundDup && prev.root.equals(lastRoot)) + && !hasForceUCaseProblem(lastRoot, originalCase) + && prev.mayCompound(lastRoot, remainingLength, originalCase)) { return true; } @@ -240,17 +240,9 @@ public class Hunspell { return checkCompounds(tail, originalCase, prev); } - private boolean hasForceUCaseProblem( - char[] chars, int offset, int length, WordCase originalCase) { - if (dictionary.forceUCase == FLAG_UNSET) return false; + private boolean hasForceUCaseProblem(Root root, WordCase originalCase) { if (originalCase == WordCase.TITLE || originalCase == WordCase.UPPER) return false; - - IntsRef forms = dictionary.lookupWord(chars, offset, length); - return forms != null && dictionary.hasFlag(forms, dictionary.forceUCase); - } - - private boolean equalsIgnoreCase(CharSequence cr1, CharSequence cr2) { - return cr1.toString().equalsIgnoreCase(cr2.toString()); + return dictionary.hasFlag(root.entryId, dictionary.forceUCase); } /** @@ -274,19 +266,20 @@ public class Hunspell { private class CompoundPart { final CompoundPart prev; final int index, length; - final CharsRef tail, stem; + final CharsRef tail; + final Root root; final CheckCompoundPattern enablingPattern; CompoundPart( CompoundPart prev, CharsRef tail, int length, - Root stem, + Root root, CheckCompoundPattern enabler) { this.prev = prev; this.tail = tail; this.length = length; - this.stem = stem.word; + this.root = root; index = prev == null ? 1 : prev.index + 1; enablingPattern = enabler; } @@ -296,12 +289,12 @@ public class Hunspell { return (prev == null ? "" : prev + "+") + tail.subSequence(0, length); } - boolean mayCompound(Root nextStem, int nextPartLength, WordCase originalCase) { + boolean mayCompound(Root nextRoot, int nextPartLength, WordCase originalCase) { boolean patternsOk = enablingPattern != null - ? enablingPattern.prohibitsCompounding(tail, length, stem, nextStem.word) + ? enablingPattern.prohibitsCompounding(tail, length, root, nextRoot) : dictionary.checkCompoundPatterns.stream() - .noneMatch(p -> p.prohibitsCompounding(tail, length, stem, nextStem.word)); + .noneMatch(p -> p.prohibitsCompounding(tail, length, root, nextRoot)); if (!patternsOk) { return false; }