mirror of https://github.com/apache/lucene.git
hunspell (minor): reduce allocations when processing compound rules (#12316)
This commit is contained in:
parent
84e2e3afc3
commit
a454388b80
|
@ -155,7 +155,7 @@ public class Dictionary {
|
||||||
boolean checkCompoundCase, checkCompoundDup, checkCompoundRep;
|
boolean checkCompoundCase, checkCompoundDup, checkCompoundRep;
|
||||||
boolean checkCompoundTriple, simplifiedTriple;
|
boolean checkCompoundTriple, simplifiedTriple;
|
||||||
int compoundMin = 3, compoundMax = Integer.MAX_VALUE;
|
int compoundMin = 3, compoundMax = Integer.MAX_VALUE;
|
||||||
List<CompoundRule> compoundRules; // nullable
|
CompoundRule[] compoundRules; // nullable
|
||||||
List<CheckCompoundPattern> checkCompoundPatterns = new ArrayList<>();
|
List<CheckCompoundPattern> checkCompoundPatterns = new ArrayList<>();
|
||||||
|
|
||||||
// ignored characters (dictionary, affix, inputs)
|
// ignored characters (dictionary, affix, inputs)
|
||||||
|
@ -601,11 +601,11 @@ public class Dictionary {
|
||||||
return parts;
|
return parts;
|
||||||
}
|
}
|
||||||
|
|
||||||
private List<CompoundRule> parseCompoundRules(LineNumberReader reader, int num)
|
private CompoundRule[] parseCompoundRules(LineNumberReader reader, int num)
|
||||||
throws IOException, ParseException {
|
throws IOException, ParseException {
|
||||||
List<CompoundRule> compoundRules = new ArrayList<>();
|
CompoundRule[] compoundRules = new CompoundRule[num];
|
||||||
for (int i = 0; i < num; i++) {
|
for (int i = 0; i < num; i++) {
|
||||||
compoundRules.add(new CompoundRule(singleArgument(reader, reader.readLine()), this));
|
compoundRules[i] = new CompoundRule(singleArgument(reader, reader.readLine()), this);
|
||||||
}
|
}
|
||||||
return compoundRules;
|
return compoundRules;
|
||||||
}
|
}
|
||||||
|
|
|
@ -450,7 +450,7 @@ public class Hunspell {
|
||||||
if (forms != null) {
|
if (forms != null) {
|
||||||
words.add(forms);
|
words.add(forms);
|
||||||
|
|
||||||
if (dictionary.compoundRules.stream().anyMatch(r -> r.mayMatch(words))) {
|
if (mayHaveCompoundRule(words)) {
|
||||||
if (checkLastCompoundPart(wordChars, offset + breakPos, length - breakPos, words)) {
|
if (checkLastCompoundPart(wordChars, offset + breakPos, length - breakPos, words)) {
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
@ -467,6 +467,15 @@ public class Hunspell {
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
private boolean mayHaveCompoundRule(List<IntsRef> words) {
|
||||||
|
for (CompoundRule rule : dictionary.compoundRules) {
|
||||||
|
if (rule.mayMatch(words)) {
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
private boolean checkLastCompoundPart(
|
private boolean checkLastCompoundPart(
|
||||||
char[] wordChars, int start, int length, List<IntsRef> words) {
|
char[] wordChars, int start, int length, List<IntsRef> words) {
|
||||||
IntsRef ref = new IntsRef(new int[1], 0, 1);
|
IntsRef ref = new IntsRef(new int[1], 0, 1);
|
||||||
|
@ -475,7 +484,12 @@ public class Hunspell {
|
||||||
Stemmer.RootProcessor stopOnMatching =
|
Stemmer.RootProcessor stopOnMatching =
|
||||||
(stem, formID, morphDataId, outerPrefix, innerPrefix, outerSuffix, innerSuffix) -> {
|
(stem, formID, morphDataId, outerPrefix, innerPrefix, outerSuffix, innerSuffix) -> {
|
||||||
ref.ints[0] = formID;
|
ref.ints[0] = formID;
|
||||||
return dictionary.compoundRules.stream().noneMatch(r -> r.fullyMatches(words));
|
for (CompoundRule r : dictionary.compoundRules) {
|
||||||
|
if (r.fullyMatches(words)) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return true;
|
||||||
};
|
};
|
||||||
boolean found = !stemmer.doStem(wordChars, start, length, COMPOUND_RULE_END, stopOnMatching);
|
boolean found = !stemmer.doStem(wordChars, start, length, COMPOUND_RULE_END, stopOnMatching);
|
||||||
words.remove(words.size() - 1);
|
words.remove(words.size() - 1);
|
||||||
|
|
Loading…
Reference in New Issue