LUCENE-9702: Hunspell: support alternate casing for short language codes (#2253)

This commit is contained in:
Peter Gromov 2021-01-29 11:46:45 +01:00 committed by GitHub
parent 6635d7a5e7
commit ff943ece8f
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
6 changed files with 28 additions and 1 deletions

View File

@ -374,7 +374,9 @@ public class Dictionary {
fullStrip = true;
} else if ("LANG".equals(firstWord)) {
language = singleArgument(reader, line);
alternateCasing = "tr_TR".equals(language) || "az_AZ".equals(language);
int underscore = language.indexOf("_");
String langCode = underscore < 0 ? language : language.substring(0, underscore);
alternateCasing = langCode.equals("tr") || langCode.equals("az");
} else if ("BREAK".equals(firstWord)) {
breaks = parseBreaks(reader, line);
} else if ("FORBIDDENWORD".equals(firstWord)) {

View File

@ -56,6 +56,11 @@ public class SpellCheckerTest extends StemmerTestBase {
doTest("i53643");
}
@Test
public void dotless_i() throws Exception {
doTest("dotless_i");
}
@Test
public void needAffixOnAffixes() throws Exception {
doTest("needaffix5");

View File

@ -0,0 +1,2 @@
SET UTF-8
LANG tr

View File

@ -0,0 +1,4 @@
3
ışık
Diyarbakır

View File

@ -0,0 +1,8 @@
Diyarbakır
DİYARBAKIR
İç
ışık
ık
İÇ
IŞIK

View File

@ -0,0 +1,6 @@
Diyarbakir
DIYARBAKIR
İşık
İŞIK