mirror of https://github.com/apache/lucene.git
LUCENE-9706: Hunspell: support NEEDAFFIX flag on affixes (#2262)
This commit is contained in:
parent
800f4d0919
commit
4ba78f2ab2
|
@ -76,7 +76,7 @@ public class Dictionary {
|
|||
|
||||
static final char[] NOFLAGS = new char[0];
|
||||
|
||||
static final int FLAG_UNSET = 0;
|
||||
static final char FLAG_UNSET = (char) 0;
|
||||
private static final int DEFAULT_FLAGS = 65510;
|
||||
private static final char HIDDEN_FLAG = (char) 65511; // called 'ONLYUPCASEFLAG' in Hunspell
|
||||
|
||||
|
|
|
@ -191,7 +191,7 @@ final class Stemmer {
|
|||
length,
|
||||
context,
|
||||
-1,
|
||||
(char) 0,
|
||||
Dictionary.FLAG_UNSET,
|
||||
-1,
|
||||
0,
|
||||
true,
|
||||
|
@ -361,6 +361,7 @@ final class Stemmer {
|
|||
pureAffix ? length - i : strippedWord.length,
|
||||
context,
|
||||
prefix,
|
||||
previous,
|
||||
-1,
|
||||
recursionDepth,
|
||||
true,
|
||||
|
@ -413,6 +414,7 @@ final class Stemmer {
|
|||
pureAffix ? i : strippedWord.length,
|
||||
context,
|
||||
suffix,
|
||||
previous,
|
||||
prefixId,
|
||||
recursionDepth,
|
||||
false,
|
||||
|
@ -543,6 +545,7 @@ final class Stemmer {
|
|||
int length,
|
||||
WordContext context,
|
||||
int affix,
|
||||
int previousAffix,
|
||||
int prefixId,
|
||||
int recursionDepth,
|
||||
boolean prefix,
|
||||
|
@ -553,7 +556,8 @@ final class Stemmer {
|
|||
|
||||
List<CharsRef> stems = new ArrayList<>();
|
||||
|
||||
IntsRef forms = dictionary.lookupWord(strippedWord, offset, length);
|
||||
boolean skipLookup = needsAnotherAffix(affix, previousAffix, !prefix);
|
||||
IntsRef forms = skipLookup ? null : dictionary.lookupWord(strippedWord, offset, length);
|
||||
if (forms != null) {
|
||||
for (int i = 0; i < forms.length; i += formStep) {
|
||||
char[] wordFlags = dictionary.decodeFlags(forms.ints[forms.offset + i], scratch);
|
||||
|
@ -651,6 +655,15 @@ final class Stemmer {
|
|||
return stems;
|
||||
}
|
||||
|
||||
private boolean needsAnotherAffix(int affix, int previousAffix, boolean isSuffix) {
|
||||
if (isFlagAppendedByAffix(affix, dictionary.needaffix)) {
|
||||
return !isSuffix
|
||||
|| previousAffix < 0
|
||||
|| isFlagAppendedByAffix(previousAffix, dictionary.needaffix);
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
private boolean isFlagAppendedByAffix(int affixId, char flag) {
|
||||
if (affixId < 0 || flag == Dictionary.FLAG_UNSET) return false;
|
||||
int appendId = dictionary.affixData(affixId, Dictionary.AFFIX_APPEND);
|
||||
|
|
|
@ -46,6 +46,11 @@ public class SpellCheckerTest extends StemmerTestBase {
|
|||
doTest("i53643");
|
||||
}
|
||||
|
||||
@Test
|
||||
public void needAffixOnAffixes() throws Exception {
|
||||
doTest("needaffix5");
|
||||
}
|
||||
|
||||
public void testBreak() throws Exception {
|
||||
doTest("break");
|
||||
}
|
||||
|
|
|
@ -0,0 +1,13 @@
|
|||
# on affixes
|
||||
NEEDAFFIX X
|
||||
|
||||
SFX A Y 2
|
||||
SFX A 0 suf/B .
|
||||
SFX A 0 pseudosuf/XB .
|
||||
|
||||
SFX B Y 1
|
||||
SFX B 0 bar .
|
||||
|
||||
PFX C Y 2
|
||||
PFX C 0 pre .
|
||||
PFX C 0 pseudopre/X .
|
|
@ -0,0 +1,2 @@
|
|||
1
|
||||
foo/AC
|
|
@ -0,0 +1,11 @@
|
|||
foo
|
||||
prefoo
|
||||
foosuf
|
||||
prefoosuf
|
||||
foosufbar
|
||||
prefoosufbar
|
||||
pseudoprefoosuf
|
||||
pseudoprefoosufbar
|
||||
pseudoprefoopseudosufbar
|
||||
prefoopseudosuf
|
||||
prefoopseudosufbar
|
|
@ -0,0 +1,3 @@
|
|||
pseudoprefoo
|
||||
foopseudosuf
|
||||
pseudoprefoopseudosuf
|
Loading…
Reference in New Issue