mirror of https://github.com/apache/lucene.git
LUCENE-9702: Hunspell: support alternate casing for short language codes (#2253)
This commit is contained in:
parent
6635d7a5e7
commit
ff943ece8f
|
@ -374,7 +374,9 @@ public class Dictionary {
|
|||
fullStrip = true;
|
||||
} else if ("LANG".equals(firstWord)) {
|
||||
language = singleArgument(reader, line);
|
||||
alternateCasing = "tr_TR".equals(language) || "az_AZ".equals(language);
|
||||
int underscore = language.indexOf("_");
|
||||
String langCode = underscore < 0 ? language : language.substring(0, underscore);
|
||||
alternateCasing = langCode.equals("tr") || langCode.equals("az");
|
||||
} else if ("BREAK".equals(firstWord)) {
|
||||
breaks = parseBreaks(reader, line);
|
||||
} else if ("FORBIDDENWORD".equals(firstWord)) {
|
||||
|
|
|
@ -56,6 +56,11 @@ public class SpellCheckerTest extends StemmerTestBase {
|
|||
doTest("i53643");
|
||||
}
|
||||
|
||||
@Test
|
||||
public void dotless_i() throws Exception {
|
||||
doTest("dotless_i");
|
||||
}
|
||||
|
||||
@Test
|
||||
public void needAffixOnAffixes() throws Exception {
|
||||
doTest("needaffix5");
|
||||
|
|
|
@ -0,0 +1,2 @@
|
|||
SET UTF-8
|
||||
LANG tr
|
|
@ -0,0 +1,4 @@
|
|||
3
|
||||
iç
|
||||
ışık
|
||||
Diyarbakır
|
|
@ -0,0 +1,8 @@
|
|||
Diyarbakır
|
||||
DİYARBAKIR
|
||||
iç
|
||||
İç
|
||||
ışık
|
||||
Işık
|
||||
İÇ
|
||||
IŞIK
|
|
@ -0,0 +1,6 @@
|
|||
Diyarbakir
|
||||
DIYARBAKIR
|
||||
Iç
|
||||
İşık
|
||||
IÇ
|
||||
İŞIK
|
Loading…
Reference in New Issue