hunspell (minor): reduce allocations when processing compound rules (#12316)

This commit is contained in:
Peter Gromov 2023-05-19 21:36:05 +02:00 committed by GitHub
parent 84e2e3afc3
commit a454388b80
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
2 changed files with 20 additions and 6 deletions

View File

@ -155,7 +155,7 @@ public class Dictionary {
boolean checkCompoundCase, checkCompoundDup, checkCompoundRep; boolean checkCompoundCase, checkCompoundDup, checkCompoundRep;
boolean checkCompoundTriple, simplifiedTriple; boolean checkCompoundTriple, simplifiedTriple;
int compoundMin = 3, compoundMax = Integer.MAX_VALUE; int compoundMin = 3, compoundMax = Integer.MAX_VALUE;
List<CompoundRule> compoundRules; // nullable CompoundRule[] compoundRules; // nullable
List<CheckCompoundPattern> checkCompoundPatterns = new ArrayList<>(); List<CheckCompoundPattern> checkCompoundPatterns = new ArrayList<>();
// ignored characters (dictionary, affix, inputs) // ignored characters (dictionary, affix, inputs)
@ -601,11 +601,11 @@ public class Dictionary {
return parts; return parts;
} }
private List<CompoundRule> parseCompoundRules(LineNumberReader reader, int num) private CompoundRule[] parseCompoundRules(LineNumberReader reader, int num)
throws IOException, ParseException { throws IOException, ParseException {
List<CompoundRule> compoundRules = new ArrayList<>(); CompoundRule[] compoundRules = new CompoundRule[num];
for (int i = 0; i < num; i++) { for (int i = 0; i < num; i++) {
compoundRules.add(new CompoundRule(singleArgument(reader, reader.readLine()), this)); compoundRules[i] = new CompoundRule(singleArgument(reader, reader.readLine()), this);
} }
return compoundRules; return compoundRules;
} }

View File

@ -450,7 +450,7 @@ public class Hunspell {
if (forms != null) { if (forms != null) {
words.add(forms); words.add(forms);
if (dictionary.compoundRules.stream().anyMatch(r -> r.mayMatch(words))) { if (mayHaveCompoundRule(words)) {
if (checkLastCompoundPart(wordChars, offset + breakPos, length - breakPos, words)) { if (checkLastCompoundPart(wordChars, offset + breakPos, length - breakPos, words)) {
return true; return true;
} }
@ -467,6 +467,15 @@ public class Hunspell {
return false; return false;
} }
private boolean mayHaveCompoundRule(List<IntsRef> words) {
for (CompoundRule rule : dictionary.compoundRules) {
if (rule.mayMatch(words)) {
return true;
}
}
return false;
}
private boolean checkLastCompoundPart( private boolean checkLastCompoundPart(
char[] wordChars, int start, int length, List<IntsRef> words) { char[] wordChars, int start, int length, List<IntsRef> words) {
IntsRef ref = new IntsRef(new int[1], 0, 1); IntsRef ref = new IntsRef(new int[1], 0, 1);
@ -475,7 +484,12 @@ public class Hunspell {
Stemmer.RootProcessor stopOnMatching = Stemmer.RootProcessor stopOnMatching =
(stem, formID, morphDataId, outerPrefix, innerPrefix, outerSuffix, innerSuffix) -> { (stem, formID, morphDataId, outerPrefix, innerPrefix, outerSuffix, innerSuffix) -> {
ref.ints[0] = formID; ref.ints[0] = formID;
return dictionary.compoundRules.stream().noneMatch(r -> r.fullyMatches(words)); for (CompoundRule r : dictionary.compoundRules) {
if (r.fullyMatches(words)) {
return false;
}
}
return true;
}; };
boolean found = !stemmer.doStem(wordChars, start, length, COMPOUND_RULE_END, stopOnMatching); boolean found = !stemmer.doStem(wordChars, start, length, COMPOUND_RULE_END, stopOnMatching);
words.remove(words.size() - 1); words.remove(words.size() - 1);