hunspell (minor): reduce allocations when processing compound rules (#12316)

This commit is contained in:
Peter Gromov 2023-05-19 21:36:05 +02:00 committed by GitHub
parent 84e2e3afc3
commit a454388b80
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
2 changed files with 20 additions and 6 deletions

View File

@ -155,7 +155,7 @@ public class Dictionary {
boolean checkCompoundCase, checkCompoundDup, checkCompoundRep;
boolean checkCompoundTriple, simplifiedTriple;
int compoundMin = 3, compoundMax = Integer.MAX_VALUE;
List<CompoundRule> compoundRules; // nullable
CompoundRule[] compoundRules; // nullable
List<CheckCompoundPattern> checkCompoundPatterns = new ArrayList<>();
// ignored characters (dictionary, affix, inputs)
@ -601,11 +601,11 @@ public class Dictionary {
return parts;
}
private List<CompoundRule> parseCompoundRules(LineNumberReader reader, int num)
private CompoundRule[] parseCompoundRules(LineNumberReader reader, int num)
throws IOException, ParseException {
List<CompoundRule> compoundRules = new ArrayList<>();
CompoundRule[] compoundRules = new CompoundRule[num];
for (int i = 0; i < num; i++) {
compoundRules.add(new CompoundRule(singleArgument(reader, reader.readLine()), this));
compoundRules[i] = new CompoundRule(singleArgument(reader, reader.readLine()), this);
}
return compoundRules;
}

View File

@ -450,7 +450,7 @@ public class Hunspell {
if (forms != null) {
words.add(forms);
if (dictionary.compoundRules.stream().anyMatch(r -> r.mayMatch(words))) {
if (mayHaveCompoundRule(words)) {
if (checkLastCompoundPart(wordChars, offset + breakPos, length - breakPos, words)) {
return true;
}
@ -467,6 +467,15 @@ public class Hunspell {
return false;
}
private boolean mayHaveCompoundRule(List<IntsRef> words) {
for (CompoundRule rule : dictionary.compoundRules) {
if (rule.mayMatch(words)) {
return true;
}
}
return false;
}
private boolean checkLastCompoundPart(
char[] wordChars, int start, int length, List<IntsRef> words) {
IntsRef ref = new IntsRef(new int[1], 0, 1);
@ -475,7 +484,12 @@ public class Hunspell {
Stemmer.RootProcessor stopOnMatching =
(stem, formID, morphDataId, outerPrefix, innerPrefix, outerSuffix, innerSuffix) -> {
ref.ints[0] = formID;
return dictionary.compoundRules.stream().noneMatch(r -> r.fullyMatches(words));
for (CompoundRule r : dictionary.compoundRules) {
if (r.fullyMatches(words)) {
return false;
}
}
return true;
};
boolean found = !stemmer.doStem(wordChars, start, length, COMPOUND_RULE_END, stopOnMatching);
words.remove(words.size() - 1);