hunspell (minor): reduce allocations when processing compound rules (#12316)

2023-05-19 21:36:05 +02:00 · 2023-05-19 21:36:05 +02:00 · a454388b80
parent 84e2e3afc3
commit a454388b80
2 changed files with 20 additions and 6 deletions
--- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/hunspell/Dictionary.java
+++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/hunspell/Dictionary.java
@ -155,7 +155,7 @@ public class Dictionary {
  boolean checkCompoundCase, checkCompoundDup, checkCompoundRep;
  boolean checkCompoundTriple, simplifiedTriple;
  int compoundMin = 3, compoundMax = Integer.MAX_VALUE;
-  List<CompoundRule> compoundRules; // nullable
+  CompoundRule[] compoundRules; // nullable
  List<CheckCompoundPattern> checkCompoundPatterns = new ArrayList<>();

  // ignored characters (dictionary, affix, inputs)
@ -601,11 +601,11 @@ public class Dictionary {
    return parts;
  }

-  private List<CompoundRule> parseCompoundRules(LineNumberReader reader, int num)
+  private CompoundRule[] parseCompoundRules(LineNumberReader reader, int num)
      throws IOException, ParseException {
-    List<CompoundRule> compoundRules = new ArrayList<>();
+    CompoundRule[] compoundRules = new CompoundRule[num];
    for (int i = 0; i < num; i++) {
-      compoundRules.add(new CompoundRule(singleArgument(reader, reader.readLine()), this));
+      compoundRules[i] = new CompoundRule(singleArgument(reader, reader.readLine()), this);
    }
    return compoundRules;
  }
--- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/hunspell/Hunspell.java
+++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/hunspell/Hunspell.java
@ -450,7 +450,7 @@ public class Hunspell {
      if (forms != null) {
        words.add(forms);

-        if (dictionary.compoundRules.stream().anyMatch(r -> r.mayMatch(words))) {
+        if (mayHaveCompoundRule(words)) {
          if (checkLastCompoundPart(wordChars, offset + breakPos, length - breakPos, words)) {
            return true;
          }
@ -467,6 +467,15 @@ public class Hunspell {
    return false;
  }

+  private boolean mayHaveCompoundRule(List<IntsRef> words) {
+    for (CompoundRule rule : dictionary.compoundRules) {
+      if (rule.mayMatch(words)) {
+        return true;
+      }
+    }
+    return false;
+  }
+
  private boolean checkLastCompoundPart(
      char[] wordChars, int start, int length, List<IntsRef> words) {
    IntsRef ref = new IntsRef(new int[1], 0, 1);
@ -475,7 +484,12 @@ public class Hunspell {
    Stemmer.RootProcessor stopOnMatching =
        (stem, formID, morphDataId, outerPrefix, innerPrefix, outerSuffix, innerSuffix) -> {
          ref.ints[0] = formID;
-          return dictionary.compoundRules.stream().noneMatch(r -> r.fullyMatches(words));
+          for (CompoundRule r : dictionary.compoundRules) {
+            if (r.fullyMatches(words)) {
+              return false;
+            }
+          }
+          return true;
        };
    boolean found = !stemmer.doStem(wordChars, start, length, COMPOUND_RULE_END, stopOnMatching);
    words.remove(words.size() - 1);