diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/hunspell/Dictionary.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/hunspell/Dictionary.java index 115af24614e..0547c3e4ebc 100644 --- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/hunspell/Dictionary.java +++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/hunspell/Dictionary.java @@ -149,7 +149,7 @@ public class Dictionary { * All flags used in affix continuation classes. If an outer affix's flag isn't here, there's no * need to do 2-level affix stripping with it. */ - private char[] secondStageAffixFlags; + private char[] secondStagePrefixFlags, secondStageSuffixFlags; char circumfix; char keepcase, forceUCase; @@ -333,7 +333,8 @@ public class Dictionary { throws IOException, ParseException { TreeMap> prefixes = new TreeMap<>(); TreeMap> suffixes = new TreeMap<>(); - Set stage2Flags = new HashSet<>(); + Set prefixContFlags = new HashSet<>(); + Set suffixContFlags = new HashSet<>(); Map seenPatterns = new HashMap<>(); // zero condition -> 0 ord @@ -361,9 +362,9 @@ public class Dictionary { } else if ("AM".equals(firstWord)) { parseMorphAlias(line); } else if ("PFX".equals(firstWord)) { - parseAffix(prefixes, stage2Flags, line, reader, false, seenPatterns, seenStrips, flags); + parseAffix(prefixes, prefixContFlags, line, reader, false, seenPatterns, seenStrips, flags); } else if ("SFX".equals(firstWord)) { - parseAffix(suffixes, stage2Flags, line, reader, true, seenPatterns, seenStrips, flags); + parseAffix(suffixes, suffixContFlags, line, reader, true, seenPatterns, seenStrips, flags); } else if (line.equals("COMPLEXPREFIXES")) { complexPrefixes = true; // 2-stage prefix+1-stage suffix instead of 2-stage suffix+1-stage prefix @@ -478,7 +479,8 @@ public class Dictionary { this.prefixes = affixFST(prefixes); this.suffixes = affixFST(suffixes); - secondStageAffixFlags = toSortedCharArray(stage2Flags); + secondStagePrefixFlags = toSortedCharArray(prefixContFlags); + secondStageSuffixFlags = toSortedCharArray(suffixContFlags); int totalChars = 0; for (String strip : seenStrips.keySet()) { @@ -1624,8 +1626,12 @@ public class Dictionary { return chars; } - boolean isSecondStageAffix(char flag) { - return Arrays.binarySearch(secondStageAffixFlags, flag) >= 0; + boolean isSecondStagePrefix(char flag) { + return Arrays.binarySearch(secondStagePrefixFlags, flag) >= 0; + } + + boolean isSecondStageSuffix(char flag) { + return Arrays.binarySearch(secondStageSuffixFlags, flag) >= 0; } /** folds single character (according to LANG if present) */ diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/hunspell/Stemmer.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/hunspell/Stemmer.java index 66fac294c0c..453211bc213 100644 --- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/hunspell/Stemmer.java +++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/hunspell/Stemmer.java @@ -635,11 +635,11 @@ final class Stemmer { if (recursionDepth == 0) { if (prefix) { prefixId = affix; - doPrefix = dictionary.complexPrefixes && dictionary.isSecondStageAffix(flag); + doPrefix = dictionary.complexPrefixes && dictionary.isSecondStagePrefix(flag); // we took away the first prefix. // COMPLEXPREFIXES = true: combine with a second prefix and another suffix // COMPLEXPREFIXES = false: combine with a suffix - } else if (!dictionary.complexPrefixes && dictionary.isSecondStageAffix(flag)) { + } else if (!dictionary.complexPrefixes && dictionary.isSecondStageSuffix(flag)) { doPrefix = false; // we took away a suffix. // COMPLEXPREFIXES = true: we don't recurse! only one suffix allowed @@ -652,7 +652,7 @@ final class Stemmer { if (prefix && dictionary.complexPrefixes) { prefixId = affix; // we took away the second prefix: go look for another suffix - } else if (prefix || dictionary.complexPrefixes || !dictionary.isSecondStageAffix(flag)) { + } else if (prefix || dictionary.complexPrefixes || !dictionary.isSecondStageSuffix(flag)) { return true; } // we took away a prefix, then a suffix: go look for another suffix diff --git a/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/TestPerformance.java b/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/TestPerformance.java index 174ec444e9d..2dfa2a8cdbc 100644 --- a/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/TestPerformance.java +++ b/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/TestPerformance.java @@ -77,7 +77,7 @@ public class TestPerformance extends LuceneTestCase { @Test public void fr() throws Exception { - checkAnalysisPerformance("fr", 80_000); + checkAnalysisPerformance("fr", 100_000); } @Test