mirror of https://github.com/apache/lucene.git
LUCENE-9739: Hunspell: speed up numeric flag parsing (#2316)
This commit is contained in:
parent
653626399f
commit
c3fe9afcc6
|
@ -1412,30 +1412,26 @@ public class Dictionary {
|
||||||
private static class NumFlagParsingStrategy extends FlagParsingStrategy {
|
private static class NumFlagParsingStrategy extends FlagParsingStrategy {
|
||||||
@Override
|
@Override
|
||||||
public char[] parseFlags(String rawFlags) {
|
public char[] parseFlags(String rawFlags) {
|
||||||
String[] rawFlagParts = rawFlags.trim().split(",");
|
StringBuilder result = new StringBuilder();
|
||||||
char[] flags = new char[rawFlagParts.length];
|
StringBuilder group = new StringBuilder();
|
||||||
int upto = 0;
|
for (int i = 0; i <= rawFlags.length(); i++) {
|
||||||
|
if (i == rawFlags.length() || rawFlags.charAt(i) == ',') {
|
||||||
for (String rawFlagPart : rawFlagParts) {
|
if (group.length() > 0) { // ignoring empty flags (this happens in danish, for example)
|
||||||
// note, removing the trailing X/leading I for nepali... what is the rule here?!
|
int flag = Integer.parseInt(group, 0, group.length(), 10);
|
||||||
String replacement = rawFlagPart.replaceAll("[^0-9]", "");
|
if (flag >= DEFAULT_FLAGS) {
|
||||||
// note, ignoring empty flags (this happens in danish, for example)
|
|
||||||
if (replacement.isEmpty()) {
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
int flag = Integer.parseInt(replacement);
|
|
||||||
if (flag >= Character.MAX_VALUE) { // read default flags as well
|
|
||||||
// accept 0 due to https://github.com/hunspell/hunspell/issues/708
|
// accept 0 due to https://github.com/hunspell/hunspell/issues/708
|
||||||
throw new IllegalArgumentException(
|
throw new IllegalArgumentException(
|
||||||
"Num flags should be between 0 and " + DEFAULT_FLAGS + ", found " + flag);
|
"Num flags should be between 0 and " + DEFAULT_FLAGS + ", found " + flag);
|
||||||
}
|
}
|
||||||
flags[upto++] = (char) flag;
|
result.append((char) flag);
|
||||||
|
group.setLength(0);
|
||||||
|
}
|
||||||
|
} else if (rawFlags.charAt(i) >= '0' && rawFlags.charAt(i) <= '9') {
|
||||||
|
group.append(rawFlags.charAt(i));
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if (upto < flags.length) {
|
return result.toString().toCharArray();
|
||||||
flags = ArrayUtil.copyOfSubArray(flags, 0, upto);
|
|
||||||
}
|
|
||||||
return flags;
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue