LUCENE-9681: deduplicate decodeFlags+hasFlag checks (#2223)

This commit is contained in:
Peter Gromov 2021-01-20 17:27:04 +01:00 committed by GitHub
parent 939699f550
commit 89456686ed
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
3 changed files with 27 additions and 51 deletions

View File

@ -1101,7 +1101,9 @@ public class Dictionary {
return hasFlag(flags, HIDDEN_FLAG);
}
static char[] decodeFlags(BytesRef b) {
char[] decodeFlags(int entryId, BytesRef b) {
this.flagLookup.get(entryId, b);
if (b.length == 0) {
return CharsRef.EMPTY_CHARS;
}
@ -1192,8 +1194,7 @@ public class Dictionary {
if (forms != null) {
int formStep = formStep();
for (int i = 0; i < forms.length; i += formStep) {
flagLookup.get(forms.ints[forms.offset + i], scratch);
if (hasFlag(Dictionary.decodeFlags(scratch), (char) forbiddenword)) {
if (hasFlag(forms.ints[forms.offset + i], (char) forbiddenword, scratch)) {
return true;
}
}
@ -1301,6 +1302,10 @@ public class Dictionary {
}
}
boolean hasFlag(int entryId, char flag, BytesRef scratch) {
return hasFlag(decodeFlags(entryId, scratch), flag);
}
static boolean hasFlag(char[] flags, char flag) {
return Arrays.binarySearch(flags, flag) >= 0;
}

View File

@ -18,7 +18,6 @@ package org.apache.lucene.analysis.hunspell;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;
import org.apache.lucene.analysis.CharArraySet;
import org.apache.lucene.util.ArrayUtil;
@ -142,8 +141,7 @@ final class Stemmer {
IntsRef forms = dictionary.lookupWord(word, 0, length);
if (forms != null) {
for (int i = 0; i < forms.length; i += formStep) {
dictionary.flagLookup.get(forms.ints[forms.offset + i], scratch);
char[] wordFlags = Dictionary.decodeFlags(scratch);
char[] wordFlags = dictionary.decodeFlags(forms.ints[forms.offset + i], scratch);
if (!acceptCase(caseVariant, wordFlags)) {
continue;
}
@ -321,19 +319,16 @@ final class Stemmer {
compatible = true;
} else {
// check if affix is allowed in a non-compound word
dictionary.flagLookup.get(append, scratch);
char[] appendFlags = Dictionary.decodeFlags(scratch);
compatible = !Dictionary.hasFlag(appendFlags, (char) dictionary.onlyincompound);
compatible = !dictionary.hasFlag(append, (char) dictionary.onlyincompound, scratch);
}
} else if (crossProduct) {
// cross check incoming continuation class (flag of previous affix) against list.
dictionary.flagLookup.get(append, scratch);
char[] appendFlags = Dictionary.decodeFlags(scratch);
char[] appendFlags = dictionary.decodeFlags(append, scratch);
assert prevFlag >= 0;
boolean allowed =
dictionary.onlyincompound == -1
|| !Dictionary.hasFlag(appendFlags, (char) dictionary.onlyincompound);
compatible = allowed && hasCrossCheckedFlag((char) prevFlag, appendFlags, false);
compatible = allowed && Dictionary.hasFlag(appendFlags, (char) prevFlag);
} else {
compatible = false;
}
@ -417,20 +412,19 @@ final class Stemmer {
compatible = true;
} else {
// check if affix is allowed in a non-compound word
dictionary.flagLookup.get(append, scratch);
char[] appendFlags = Dictionary.decodeFlags(scratch);
compatible = !Dictionary.hasFlag(appendFlags, (char) dictionary.onlyincompound);
compatible = !dictionary.hasFlag(append, (char) dictionary.onlyincompound, scratch);
}
} else if (crossProduct) {
// cross check incoming continuation class (flag of previous affix) against list.
dictionary.flagLookup.get(append, scratch);
char[] appendFlags = Dictionary.decodeFlags(scratch);
char[] appendFlags = dictionary.decodeFlags(append, scratch);
assert prevFlag >= 0;
boolean allowed =
dictionary.onlyincompound == -1
|| !Dictionary.hasFlag(appendFlags, (char) dictionary.onlyincompound);
compatible =
allowed && hasCrossCheckedFlag((char) prevFlag, appendFlags, previousWasPrefix);
allowed
&& ((appendFlags.length == 0 && previousWasPrefix)
|| Dictionary.hasFlag(appendFlags, (char) prevFlag));
} else {
compatible = false;
}
@ -539,8 +533,7 @@ final class Stemmer {
IntsRef forms = dictionary.lookupWord(strippedWord, 0, length);
if (forms != null) {
for (int i = 0; i < forms.length; i += formStep) {
dictionary.flagLookup.get(forms.ints[forms.offset + i], scratch);
char[] wordFlags = Dictionary.decodeFlags(scratch);
char[] wordFlags = dictionary.decodeFlags(forms.ints[forms.offset + i], scratch);
if (Dictionary.hasFlag(wordFlags, flag)) {
// confusing: in this one exception, we already chained the first prefix against the
// second,
@ -550,9 +543,7 @@ final class Stemmer {
&& prefixFlag >= 0
&& !Dictionary.hasFlag(wordFlags, (char) prefixFlag)) {
// see if we can chain prefix thru the suffix continuation class (only if it has any!)
dictionary.flagLookup.get(append, scratch);
char[] appendFlags = Dictionary.decodeFlags(scratch);
if (!hasCrossCheckedFlag((char) prefixFlag, appendFlags, false)) {
if (!dictionary.hasFlag(append, (char) prefixFlag, scratch)) {
continue;
}
}
@ -560,9 +551,8 @@ final class Stemmer {
// if circumfix was previously set by a prefix, we must check this suffix,
// to ensure it has it, and vice versa
if (dictionary.circumfix != -1) {
dictionary.flagLookup.get(append, scratch);
char[] appendFlags = Dictionary.decodeFlags(scratch);
boolean suffixCircumfix = Dictionary.hasFlag(appendFlags, (char) dictionary.circumfix);
boolean suffixCircumfix =
dictionary.hasFlag(append, (char) dictionary.circumfix, scratch);
if (circumfix != suffixCircumfix) {
continue;
}
@ -585,9 +575,7 @@ final class Stemmer {
// if a circumfix flag is defined in the dictionary, and we are a prefix, we need to check if we
// have that flag
if (dictionary.circumfix != -1 && !circumfix && prefix) {
dictionary.flagLookup.get(append, scratch);
char[] appendFlags = Dictionary.decodeFlags(scratch);
circumfix = Dictionary.hasFlag(appendFlags, (char) dictionary.circumfix);
circumfix = dictionary.hasFlag(append, (char) dictionary.circumfix, scratch);
}
if (crossProduct && recursionDepth <= 1) {
@ -635,16 +623,4 @@ final class Stemmer {
return stems;
}
/**
* Checks if the given flag cross checks with the given array of flags
*
* @param flag Flag to cross check with the array of flags
* @param flags Array of flags to cross check against. Can be {@code null}
* @return {@code true} if the flag is found in the array or the array is {@code null}, {@code
* false} otherwise
*/
private boolean hasCrossCheckedFlag(char flag, char[] flags, boolean matchEmpty) {
return (flags.length == 0 && matchEmpty) || Arrays.binarySearch(flags, flag) >= 0;
}
}

View File

@ -49,16 +49,14 @@ public class TestDictionary extends LuceneTestCase {
assertEquals(1, ordList.length);
BytesRef ref = new BytesRef();
dictionary.flagLookup.get(ordList.ints[0], ref);
char[] flags = Dictionary.decodeFlags(ref);
char[] flags = dictionary.decodeFlags(ordList.ints[0], ref);
assertEquals(1, flags.length);
int offset = random().nextInt(10);
ordList = dictionary.lookupWord((" ".repeat(offset) + "lucen").toCharArray(), offset, 5);
assertNotNull(ordList);
assertEquals(1, ordList.length);
dictionary.flagLookup.get(ordList.ints[0], ref);
flags = Dictionary.decodeFlags(ref);
flags = dictionary.decodeFlags(ordList.ints[0], ref);
assertEquals(1, flags.length);
affixStream.close();
@ -76,8 +74,7 @@ public class TestDictionary extends LuceneTestCase {
assertEquals(1, dictionary.lookupPrefix(new char[] {'s'}).length);
IntsRef ordList = dictionary.lookupWord(new char[] {'o', 'l', 'r'}, 0, 3);
BytesRef ref = new BytesRef();
dictionary.flagLookup.get(ordList.ints[0], ref);
char[] flags = Dictionary.decodeFlags(ref);
char[] flags = dictionary.decodeFlags(ordList.ints[0], ref);
assertEquals(1, flags.length);
affixStream.close();
@ -95,8 +92,7 @@ public class TestDictionary extends LuceneTestCase {
assertEquals(1, dictionary.lookupPrefix(new char[] {'s'}).length);
IntsRef ordList = dictionary.lookupWord(new char[] {'o', 'l', 'r'}, 0, 3);
BytesRef ref = new BytesRef();
dictionary.flagLookup.get(ordList.ints[0], ref);
char[] flags = Dictionary.decodeFlags(ref);
char[] flags = dictionary.decodeFlags(ordList.ints[0], ref);
assertEquals(1, flags.length);
affixStream.close();
@ -114,8 +110,7 @@ public class TestDictionary extends LuceneTestCase {
assertEquals(1, dictionary.lookupPrefix(new char[] {'s'}).length);
IntsRef ordList = dictionary.lookupWord(new char[] {'o', 'l', 'r'}, 0, 3);
BytesRef ref = new BytesRef();
dictionary.flagLookup.get(ordList.ints[0], ref);
char[] flags = Dictionary.decodeFlags(ref);
char[] flags = dictionary.decodeFlags(ordList.ints[0], ref);
assertEquals(1, flags.length);
affixStream.close();