mirror of https://github.com/apache/lucene.git
LUCENE-2630: fix intl test bugs that rely on cldr version
git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@997180 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
2d9eb62343
commit
774eaeada0
|
@ -19,6 +19,7 @@ package org.apache.lucene.search;
|
|||
|
||||
import java.io.IOException;
|
||||
import java.io.Serializable;
|
||||
import java.text.Collator;
|
||||
import java.util.ArrayList;
|
||||
import java.util.BitSet;
|
||||
import java.util.HashMap;
|
||||
|
@ -94,6 +95,11 @@ public class TestSort extends LuceneTestCase implements Serializable {
|
|||
{ "Z", "f g", null, null, null, null, null, null, null, null, null, null}
|
||||
};
|
||||
|
||||
// the sort order of Ø versus U depends on the version of the rules being used
|
||||
// for the inherited root locale: Ø's order isnt specified in Locale.US since
|
||||
// its not used in english.
|
||||
private boolean oStrokeFirst = Collator.getInstance(new Locale("")).compare("Ø", "U") < 0;
|
||||
|
||||
// create an index of all the documents, or just the x, or just the y documents
|
||||
private IndexSearcher getIndex (boolean even, boolean odd)
|
||||
throws IOException {
|
||||
|
@ -595,7 +601,7 @@ public class TestSort extends LuceneTestCase implements Serializable {
|
|||
// (which sort differently depending on locale)
|
||||
public void testInternationalSort() throws Exception {
|
||||
sort.setSort (new SortField ("i18n", Locale.US));
|
||||
assertMatches (full, queryY, sort, "BFJDH");
|
||||
assertMatches (full, queryY, sort, oStrokeFirst ? "BFJHD" : "BFJDH");
|
||||
|
||||
sort.setSort (new SortField ("i18n", new Locale("sv", "se")));
|
||||
assertMatches (full, queryY, sort, "BJDFH");
|
||||
|
@ -619,7 +625,7 @@ public class TestSort extends LuceneTestCase implements Serializable {
|
|||
assertMatches (multiSearcher, queryY, sort, "BJDFH");
|
||||
|
||||
sort.setSort (new SortField ("i18n", Locale.US));
|
||||
assertMatches (multiSearcher, queryY, sort, "BFJDH");
|
||||
assertMatches (multiSearcher, queryY, sort, oStrokeFirst ? "BFJHD" : "BFJDH");
|
||||
|
||||
sort.setSort (new SortField ("i18n", new Locale("da", "dk")));
|
||||
assertMatches (multiSearcher, queryY, sort, "BJDHF");
|
||||
|
|
|
@ -31,10 +31,10 @@ public class TestThaiAnalyzer extends BaseTokenStreamTestCase {
|
|||
* testcase for offsets
|
||||
*/
|
||||
public void testOffsets() throws Exception {
|
||||
assertAnalyzesTo(new ThaiAnalyzer(TEST_VERSION_CURRENT), "เดอะนิวยอร์กไทมส์",
|
||||
new String[] { "เด", "อะนิว", "ยอ", "ร์ก", "ไทมส์"},
|
||||
new int[] { 0, 2, 7, 9, 12 },
|
||||
new int[] { 2, 7, 9, 12, 17});
|
||||
assertAnalyzesTo(new ThaiAnalyzer(TEST_VERSION_CURRENT), "การที่ได้ต้องแสดงว่างานดี",
|
||||
new String[] { "การ", "ที่", "ได้", "ต้อง", "แสดง", "ว่า", "งาน", "ดี" },
|
||||
new int[] { 0, 3, 6, 9, 13, 17, 20, 23 },
|
||||
new int[] { 3, 6, 9, 13, 17, 20, 23, 25 });
|
||||
}
|
||||
|
||||
|
||||
|
@ -49,16 +49,18 @@ public class TestThaiAnalyzer extends BaseTokenStreamTestCase {
|
|||
* Instead, allow the definition of alphanum to include relevant categories like nonspacing marks!
|
||||
*/
|
||||
public void testBuggyTokenType() throws Exception {
|
||||
assertAnalyzesTo(new ThaiAnalyzer(TEST_VERSION_CURRENT), "เดอะนิวยอร์กไทมส์ ๑๒๓",
|
||||
new String[] { "เด", "อะนิว", "ยอ", "ร์ก", "ไทมส์", "๑๒๓" },
|
||||
new String[] { "<ALPHANUM>", "<ALPHANUM>", "<ALPHANUM>", "<ALPHANUM>", "<ALPHANUM>", "<ALPHANUM>" });
|
||||
assertAnalyzesTo(new ThaiAnalyzer(TEST_VERSION_CURRENT), "การที่ได้ต้องแสดงว่างานดี ๑๒๓",
|
||||
new String[] { "การ", "ที่", "ได้", "ต้อง", "แสดง", "ว่า", "งาน", "ดี", "๑๒๓" },
|
||||
new String[] { "<ALPHANUM>", "<ALPHANUM>", "<ALPHANUM>", "<ALPHANUM>", "<ALPHANUM>",
|
||||
"<ALPHANUM>", "<ALPHANUM>", "<ALPHANUM>", "<ALPHANUM>" });
|
||||
}
|
||||
|
||||
/* correct testcase
|
||||
public void testTokenType() throws Exception {
|
||||
assertAnalyzesTo(new ThaiAnalyzer(), "เดอะนิวยอร์กไทมส์ ๑๒๓",
|
||||
new String[] { "เด", "อะนิว", "ยอ", "ร์ก", "ไทมส์", "๑๒๓" },
|
||||
new String[] { "<ALPHANUM>", "<ALPHANUM>", "<ALPHANUM>", "<ALPHANUM>", "<ALPHANUM>", "<NUM>" });
|
||||
assertAnalyzesTo(new ThaiAnalyzer(TEST_VERSION_CURRENT), "การที่ได้ต้องแสดงว่างานดี ๑๒๓",
|
||||
new String[] { "การ", "ที่", "ได้", "ต้อง", "แสดง", "ว่า", "งาน", "ดี", "๑๒๓" },
|
||||
new String[] { "<ALPHANUM>", "<ALPHANUM>", "<ALPHANUM>", "<ALPHANUM>", "<ALPHANUM>",
|
||||
"<ALPHANUM>", "<ALPHANUM>", "<ALPHANUM>", "<NUM>" });
|
||||
}
|
||||
*/
|
||||
|
||||
|
@ -90,18 +92,18 @@ public class TestThaiAnalyzer extends BaseTokenStreamTestCase {
|
|||
public void testPositionIncrements() throws Exception {
|
||||
ThaiAnalyzer analyzer = new ThaiAnalyzer(TEST_VERSION_CURRENT);
|
||||
|
||||
assertAnalyzesTo(analyzer, "ประโยคว่า the ประโยคว่า",
|
||||
new String[] { "ประโยค", "ว่า", "ประโยค", "ว่า" },
|
||||
new int[] { 0, 6, 14, 20 },
|
||||
new int[] { 6, 9, 20, 23 },
|
||||
new int[] { 1, 1, 2, 1 });
|
||||
assertAnalyzesTo(new ThaiAnalyzer(TEST_VERSION_CURRENT), "การที่ได้ต้อง the แสดงว่างานดี",
|
||||
new String[] { "การ", "ที่", "ได้", "ต้อง", "แสดง", "ว่า", "งาน", "ดี" },
|
||||
new int[] { 0, 3, 6, 9, 18, 22, 25, 28 },
|
||||
new int[] { 3, 6, 9, 13, 22, 25, 28, 30 },
|
||||
new int[] { 1, 1, 1, 1, 2, 1, 1, 1 });
|
||||
|
||||
// case that a stopword is adjacent to thai text, with no whitespace
|
||||
assertAnalyzesTo(analyzer, "ประโยคว่าtheประโยคว่า",
|
||||
new String[] { "ประโยค", "ว่า", "ประโยค", "ว่า" },
|
||||
new int[] { 0, 6, 12, 18 },
|
||||
new int[] { 6, 9, 18, 21 },
|
||||
new int[] { 1, 1, 2, 1 });
|
||||
assertAnalyzesTo(new ThaiAnalyzer(TEST_VERSION_CURRENT), "การที่ได้ต้องthe แสดงว่างานดี",
|
||||
new String[] { "การ", "ที่", "ได้", "ต้อง", "แสดง", "ว่า", "งาน", "ดี" },
|
||||
new int[] { 0, 3, 6, 9, 17, 21, 24, 27 },
|
||||
new int[] { 3, 6, 9, 13, 21, 24, 27, 29 },
|
||||
new int[] { 1, 1, 1, 1, 2, 1, 1, 1 });
|
||||
}
|
||||
|
||||
public void testReusableTokenStream() throws Exception {
|
||||
|
|
|
@ -25,7 +25,11 @@ import java.util.Locale;
|
|||
|
||||
|
||||
public class TestCollationKeyAnalyzer extends CollationTestBase {
|
||||
|
||||
// the sort order of Ø versus U depends on the version of the rules being used
|
||||
// for the inherited root locale: Ø's order isnt specified in Locale.US since
|
||||
// its not used in english.
|
||||
private boolean oStrokeFirst = Collator.getInstance(new Locale("")).compare("Ø", "U") < 0;
|
||||
|
||||
// Neither Java 1.4.2 nor 1.5.0 has Farsi Locale collation available in
|
||||
// RuleBasedCollator. However, the Arabic Locale seems to order the Farsi
|
||||
// characters properly.
|
||||
|
@ -69,9 +73,9 @@ public class TestCollationKeyAnalyzer extends CollationTestBase {
|
|||
Analyzer denmarkAnalyzer
|
||||
= new CollationKeyAnalyzer(Collator.getInstance(new Locale("da", "dk")));
|
||||
|
||||
// The ICU Collator and java.text.Collator implementations differ in their
|
||||
// The ICU Collator and Sun java.text.Collator implementations differ in their
|
||||
// orderings - "BFJDH" is the ordering for java.text.Collator for Locale.US.
|
||||
testCollationKeySort
|
||||
(usAnalyzer, franceAnalyzer, swedenAnalyzer, denmarkAnalyzer, "BFJDH");
|
||||
(usAnalyzer, franceAnalyzer, swedenAnalyzer, denmarkAnalyzer, oStrokeFirst ? "BFJHD" : "BFJDH");
|
||||
}
|
||||
}
|
||||
|
|
|
@ -28,7 +28,11 @@ import java.io.Reader;
|
|||
|
||||
|
||||
public class TestCollationKeyFilter extends CollationTestBase {
|
||||
|
||||
// the sort order of Ø versus U depends on the version of the rules being used
|
||||
// for the inherited root locale: Ø's order isnt specified in Locale.US since
|
||||
// its not used in english.
|
||||
boolean oStrokeFirst = Collator.getInstance(new Locale("")).compare("Ø", "U") < 0;
|
||||
|
||||
// Neither Java 1.4.2 nor 1.5.0 has Farsi Locale collation available in
|
||||
// RuleBasedCollator. However, the Arabic Locale seems to order the Farsi
|
||||
// characters properly.
|
||||
|
@ -87,9 +91,9 @@ public class TestCollationKeyFilter extends CollationTestBase {
|
|||
Analyzer denmarkAnalyzer
|
||||
= new TestAnalyzer(Collator.getInstance(new Locale("da", "dk")));
|
||||
|
||||
// The ICU Collator and java.text.Collator implementations differ in their
|
||||
// The ICU Collator and Sun java.text.Collator implementations differ in their
|
||||
// orderings - "BFJDH" is the ordering for java.text.Collator for Locale.US.
|
||||
testCollationKeySort
|
||||
(usAnalyzer, franceAnalyzer, swedenAnalyzer, denmarkAnalyzer, "BFJDH");
|
||||
(usAnalyzer, franceAnalyzer, swedenAnalyzer, denmarkAnalyzer, oStrokeFirst ? "BFJHD" : "BFJDH");
|
||||
}
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue