mirror of https://github.com/apache/lucene.git
hunspell (minor): reduce allocations when processing compound rules (#12316)
This commit is contained in:
parent
84e2e3afc3
commit
a454388b80
|
@ -155,7 +155,7 @@ public class Dictionary {
|
|||
boolean checkCompoundCase, checkCompoundDup, checkCompoundRep;
|
||||
boolean checkCompoundTriple, simplifiedTriple;
|
||||
int compoundMin = 3, compoundMax = Integer.MAX_VALUE;
|
||||
List<CompoundRule> compoundRules; // nullable
|
||||
CompoundRule[] compoundRules; // nullable
|
||||
List<CheckCompoundPattern> checkCompoundPatterns = new ArrayList<>();
|
||||
|
||||
// ignored characters (dictionary, affix, inputs)
|
||||
|
@ -601,11 +601,11 @@ public class Dictionary {
|
|||
return parts;
|
||||
}
|
||||
|
||||
private List<CompoundRule> parseCompoundRules(LineNumberReader reader, int num)
|
||||
private CompoundRule[] parseCompoundRules(LineNumberReader reader, int num)
|
||||
throws IOException, ParseException {
|
||||
List<CompoundRule> compoundRules = new ArrayList<>();
|
||||
CompoundRule[] compoundRules = new CompoundRule[num];
|
||||
for (int i = 0; i < num; i++) {
|
||||
compoundRules.add(new CompoundRule(singleArgument(reader, reader.readLine()), this));
|
||||
compoundRules[i] = new CompoundRule(singleArgument(reader, reader.readLine()), this);
|
||||
}
|
||||
return compoundRules;
|
||||
}
|
||||
|
|
|
@ -450,7 +450,7 @@ public class Hunspell {
|
|||
if (forms != null) {
|
||||
words.add(forms);
|
||||
|
||||
if (dictionary.compoundRules.stream().anyMatch(r -> r.mayMatch(words))) {
|
||||
if (mayHaveCompoundRule(words)) {
|
||||
if (checkLastCompoundPart(wordChars, offset + breakPos, length - breakPos, words)) {
|
||||
return true;
|
||||
}
|
||||
|
@ -467,6 +467,15 @@ public class Hunspell {
|
|||
return false;
|
||||
}
|
||||
|
||||
private boolean mayHaveCompoundRule(List<IntsRef> words) {
|
||||
for (CompoundRule rule : dictionary.compoundRules) {
|
||||
if (rule.mayMatch(words)) {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
private boolean checkLastCompoundPart(
|
||||
char[] wordChars, int start, int length, List<IntsRef> words) {
|
||||
IntsRef ref = new IntsRef(new int[1], 0, 1);
|
||||
|
@ -475,7 +484,12 @@ public class Hunspell {
|
|||
Stemmer.RootProcessor stopOnMatching =
|
||||
(stem, formID, morphDataId, outerPrefix, innerPrefix, outerSuffix, innerSuffix) -> {
|
||||
ref.ints[0] = formID;
|
||||
return dictionary.compoundRules.stream().noneMatch(r -> r.fullyMatches(words));
|
||||
for (CompoundRule r : dictionary.compoundRules) {
|
||||
if (r.fullyMatches(words)) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
return true;
|
||||
};
|
||||
boolean found = !stemmer.doStem(wordChars, start, length, COMPOUND_RULE_END, stopOnMatching);
|
||||
words.remove(words.size() - 1);
|
||||
|
|
Loading…
Reference in New Issue