LUCENE-9706: Hunspell: support NEEDAFFIX flag on affixes (#2262)

This commit is contained in:
Peter Gromov 2021-01-29 08:24:23 +01:00 committed by GitHub
parent 800f4d0919
commit 4ba78f2ab2
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
7 changed files with 50 additions and 3 deletions

View File

@ -76,7 +76,7 @@ public class Dictionary {
static final char[] NOFLAGS = new char[0];
static final int FLAG_UNSET = 0;
static final char FLAG_UNSET = (char) 0;
private static final int DEFAULT_FLAGS = 65510;
private static final char HIDDEN_FLAG = (char) 65511; // called 'ONLYUPCASEFLAG' in Hunspell

View File

@ -191,7 +191,7 @@ final class Stemmer {
length,
context,
-1,
(char) 0,
Dictionary.FLAG_UNSET,
-1,
0,
true,
@ -361,6 +361,7 @@ final class Stemmer {
pureAffix ? length - i : strippedWord.length,
context,
prefix,
previous,
-1,
recursionDepth,
true,
@ -413,6 +414,7 @@ final class Stemmer {
pureAffix ? i : strippedWord.length,
context,
suffix,
previous,
prefixId,
recursionDepth,
false,
@ -543,6 +545,7 @@ final class Stemmer {
int length,
WordContext context,
int affix,
int previousAffix,
int prefixId,
int recursionDepth,
boolean prefix,
@ -553,7 +556,8 @@ final class Stemmer {
List<CharsRef> stems = new ArrayList<>();
IntsRef forms = dictionary.lookupWord(strippedWord, offset, length);
boolean skipLookup = needsAnotherAffix(affix, previousAffix, !prefix);
IntsRef forms = skipLookup ? null : dictionary.lookupWord(strippedWord, offset, length);
if (forms != null) {
for (int i = 0; i < forms.length; i += formStep) {
char[] wordFlags = dictionary.decodeFlags(forms.ints[forms.offset + i], scratch);
@ -651,6 +655,15 @@ final class Stemmer {
return stems;
}
private boolean needsAnotherAffix(int affix, int previousAffix, boolean isSuffix) {
if (isFlagAppendedByAffix(affix, dictionary.needaffix)) {
return !isSuffix
|| previousAffix < 0
|| isFlagAppendedByAffix(previousAffix, dictionary.needaffix);
}
return false;
}
private boolean isFlagAppendedByAffix(int affixId, char flag) {
if (affixId < 0 || flag == Dictionary.FLAG_UNSET) return false;
int appendId = dictionary.affixData(affixId, Dictionary.AFFIX_APPEND);

View File

@ -46,6 +46,11 @@ public class SpellCheckerTest extends StemmerTestBase {
doTest("i53643");
}
@Test
public void needAffixOnAffixes() throws Exception {
doTest("needaffix5");
}
public void testBreak() throws Exception {
doTest("break");
}

View File

@ -0,0 +1,13 @@
# on affixes
NEEDAFFIX X
SFX A Y 2
SFX A 0 suf/B .
SFX A 0 pseudosuf/XB .
SFX B Y 1
SFX B 0 bar .
PFX C Y 2
PFX C 0 pre .
PFX C 0 pseudopre/X .

View File

@ -0,0 +1,11 @@
foo
prefoo
foosuf
prefoosuf
foosufbar
prefoosufbar
pseudoprefoosuf
pseudoprefoosufbar
pseudoprefoopseudosufbar
prefoopseudosuf
prefoopseudosufbar

View File

@ -0,0 +1,3 @@
pseudoprefoo
foopseudosuf
pseudoprefoopseudosuf