LUCENE-9739: Hunspell: speed up numeric flag parsing (#2316)

This commit is contained in:
Peter Gromov 2021-02-08 11:02:13 +01:00 committed by GitHub
parent 653626399f
commit c3fe9afcc6
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
1 changed files with 17 additions and 21 deletions

View File

@ -1412,30 +1412,26 @@ public class Dictionary {
private static class NumFlagParsingStrategy extends FlagParsingStrategy { private static class NumFlagParsingStrategy extends FlagParsingStrategy {
@Override @Override
public char[] parseFlags(String rawFlags) { public char[] parseFlags(String rawFlags) {
String[] rawFlagParts = rawFlags.trim().split(","); StringBuilder result = new StringBuilder();
char[] flags = new char[rawFlagParts.length]; StringBuilder group = new StringBuilder();
int upto = 0; for (int i = 0; i <= rawFlags.length(); i++) {
if (i == rawFlags.length() || rawFlags.charAt(i) == ',') {
for (String rawFlagPart : rawFlagParts) { if (group.length() > 0) { // ignoring empty flags (this happens in danish, for example)
// note, removing the trailing X/leading I for nepali... what is the rule here?! int flag = Integer.parseInt(group, 0, group.length(), 10);
String replacement = rawFlagPart.replaceAll("[^0-9]", ""); if (flag >= DEFAULT_FLAGS) {
// note, ignoring empty flags (this happens in danish, for example)
if (replacement.isEmpty()) {
continue;
}
int flag = Integer.parseInt(replacement);
if (flag >= Character.MAX_VALUE) { // read default flags as well
// accept 0 due to https://github.com/hunspell/hunspell/issues/708 // accept 0 due to https://github.com/hunspell/hunspell/issues/708
throw new IllegalArgumentException( throw new IllegalArgumentException(
"Num flags should be between 0 and " + DEFAULT_FLAGS + ", found " + flag); "Num flags should be between 0 and " + DEFAULT_FLAGS + ", found " + flag);
} }
flags[upto++] = (char) flag; result.append((char) flag);
group.setLength(0);
}
} else if (rawFlags.charAt(i) >= '0' && rawFlags.charAt(i) <= '9') {
group.append(rawFlags.charAt(i));
}
} }
if (upto < flags.length) { return result.toString().toCharArray();
flags = ArrayUtil.copyOfSubArray(flags, 0, upto);
}
return flags;
} }
} }