LUCENE-2630: fix intl test bugs that rely on cldr version

git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@997180 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Robert Muir 2010-09-15 03:30:35 +00:00
parent 2d9eb62343
commit 774eaeada0
4 changed files with 44 additions and 28 deletions

View File

@ -19,6 +19,7 @@ package org.apache.lucene.search;
import java.io.IOException;
import java.io.Serializable;
import java.text.Collator;
import java.util.ArrayList;
import java.util.BitSet;
import java.util.HashMap;
@ -94,6 +95,11 @@ public class TestSort extends LuceneTestCase implements Serializable {
{ "Z", "f g", null, null, null, null, null, null, null, null, null, null}
};
// the sort order of Ø versus U depends on the version of the rules being used
// for the inherited root locale: Ø's order isnt specified in Locale.US since
// its not used in english.
private boolean oStrokeFirst = Collator.getInstance(new Locale("")).compare("Ø", "U") < 0;
// create an index of all the documents, or just the x, or just the y documents
private IndexSearcher getIndex (boolean even, boolean odd)
throws IOException {
@ -595,7 +601,7 @@ public class TestSort extends LuceneTestCase implements Serializable {
// (which sort differently depending on locale)
public void testInternationalSort() throws Exception {
sort.setSort (new SortField ("i18n", Locale.US));
assertMatches (full, queryY, sort, "BFJDH");
assertMatches (full, queryY, sort, oStrokeFirst ? "BFJHD" : "BFJDH");
sort.setSort (new SortField ("i18n", new Locale("sv", "se")));
assertMatches (full, queryY, sort, "BJDFH");
@ -619,7 +625,7 @@ public class TestSort extends LuceneTestCase implements Serializable {
assertMatches (multiSearcher, queryY, sort, "BJDFH");
sort.setSort (new SortField ("i18n", Locale.US));
assertMatches (multiSearcher, queryY, sort, "BFJDH");
assertMatches (multiSearcher, queryY, sort, oStrokeFirst ? "BFJHD" : "BFJDH");
sort.setSort (new SortField ("i18n", new Locale("da", "dk")));
assertMatches (multiSearcher, queryY, sort, "BJDHF");

View File

@ -31,10 +31,10 @@ public class TestThaiAnalyzer extends BaseTokenStreamTestCase {
* testcase for offsets
*/
public void testOffsets() throws Exception {
assertAnalyzesTo(new ThaiAnalyzer(TEST_VERSION_CURRENT), "เดอะนิวยอร์กไทมส์",
new String[] { "เด", "อะนิว", "ยอ", "ร์ก", "ไทมส์"},
new int[] { 0, 2, 7, 9, 12 },
new int[] { 2, 7, 9, 12, 17});
assertAnalyzesTo(new ThaiAnalyzer(TEST_VERSION_CURRENT), "การที่ได้ต้องแสดงว่างานดี",
new String[] { "การ", "ที่", "ได้", "ต้อง", "แสดง", "ว่า", "งาน", "ดี" },
new int[] { 0, 3, 6, 9, 13, 17, 20, 23 },
new int[] { 3, 6, 9, 13, 17, 20, 23, 25 });
}
@ -49,16 +49,18 @@ public class TestThaiAnalyzer extends BaseTokenStreamTestCase {
* Instead, allow the definition of alphanum to include relevant categories like nonspacing marks!
*/
public void testBuggyTokenType() throws Exception {
assertAnalyzesTo(new ThaiAnalyzer(TEST_VERSION_CURRENT), "เดอะนิวยอร์กไทมส์ ๑๒๓",
new String[] { "เด", "อะนิว", "ยอ", "ร์ก", "ไทมส์", "๑๒๓" },
new String[] { "<ALPHANUM>", "<ALPHANUM>", "<ALPHANUM>", "<ALPHANUM>", "<ALPHANUM>", "<ALPHANUM>" });
assertAnalyzesTo(new ThaiAnalyzer(TEST_VERSION_CURRENT), "การที่ได้ต้องแสดงว่างานดี ๑๒๓",
new String[] { "การ", "ที่", "ได้", "ต้อง", "แสดง", "ว่า", "งาน", "ดี", "๑๒๓" },
new String[] { "<ALPHANUM>", "<ALPHANUM>", "<ALPHANUM>", "<ALPHANUM>", "<ALPHANUM>",
"<ALPHANUM>", "<ALPHANUM>", "<ALPHANUM>", "<ALPHANUM>" });
}
/* correct testcase
public void testTokenType() throws Exception {
assertAnalyzesTo(new ThaiAnalyzer(), "เดอะนิวยอร์กไทมส์ ๑๒๓",
new String[] { "เด", "อะนิว", "ยอ", "ร์ก", "ไทมส์", "๑๒๓" },
new String[] { "<ALPHANUM>", "<ALPHANUM>", "<ALPHANUM>", "<ALPHANUM>", "<ALPHANUM>", "<NUM>" });
assertAnalyzesTo(new ThaiAnalyzer(TEST_VERSION_CURRENT), "การที่ได้ต้องแสดงว่างานดี ๑๒๓",
new String[] { "การ", "ที่", "ได้", "ต้อง", "แสดง", "ว่า", "งาน", "ดี", "๑๒๓" },
new String[] { "<ALPHANUM>", "<ALPHANUM>", "<ALPHANUM>", "<ALPHANUM>", "<ALPHANUM>",
"<ALPHANUM>", "<ALPHANUM>", "<ALPHANUM>", "<NUM>" });
}
*/
@ -90,18 +92,18 @@ public class TestThaiAnalyzer extends BaseTokenStreamTestCase {
public void testPositionIncrements() throws Exception {
ThaiAnalyzer analyzer = new ThaiAnalyzer(TEST_VERSION_CURRENT);
assertAnalyzesTo(analyzer, "ประโยคว่า the ประโยคว่า",
new String[] { "ประโยค", "ว่า", "ประโยค", "ว่า" },
new int[] { 0, 6, 14, 20 },
new int[] { 6, 9, 20, 23 },
new int[] { 1, 1, 2, 1 });
assertAnalyzesTo(new ThaiAnalyzer(TEST_VERSION_CURRENT), "การที่ได้ต้อง the แสดงว่างานดี",
new String[] { "การ", "ที่", "ได้", "ต้อง", "แสดง", "ว่า", "งาน", "ดี" },
new int[] { 0, 3, 6, 9, 18, 22, 25, 28 },
new int[] { 3, 6, 9, 13, 22, 25, 28, 30 },
new int[] { 1, 1, 1, 1, 2, 1, 1, 1 });
// case that a stopword is adjacent to thai text, with no whitespace
assertAnalyzesTo(analyzer, "ประโยคว่าtheประโยคว่า",
new String[] { "ประโยค", "ว่า", "ประโยค", "ว่า" },
new int[] { 0, 6, 12, 18 },
new int[] { 6, 9, 18, 21 },
new int[] { 1, 1, 2, 1 });
assertAnalyzesTo(new ThaiAnalyzer(TEST_VERSION_CURRENT), "การที่ได้ต้องthe แสดงว่างานดี",
new String[] { "การ", "ที่", "ได้", "ต้อง", "แสดง", "ว่า", "งาน", "ดี" },
new int[] { 0, 3, 6, 9, 17, 21, 24, 27 },
new int[] { 3, 6, 9, 13, 21, 24, 27, 29 },
new int[] { 1, 1, 1, 1, 2, 1, 1, 1 });
}
public void testReusableTokenStream() throws Exception {

View File

@ -25,7 +25,11 @@ import java.util.Locale;
public class TestCollationKeyAnalyzer extends CollationTestBase {
// the sort order of Ø versus U depends on the version of the rules being used
// for the inherited root locale: Ø's order isnt specified in Locale.US since
// its not used in english.
private boolean oStrokeFirst = Collator.getInstance(new Locale("")).compare("Ø", "U") < 0;
// Neither Java 1.4.2 nor 1.5.0 has Farsi Locale collation available in
// RuleBasedCollator. However, the Arabic Locale seems to order the Farsi
// characters properly.
@ -69,9 +73,9 @@ public class TestCollationKeyAnalyzer extends CollationTestBase {
Analyzer denmarkAnalyzer
= new CollationKeyAnalyzer(Collator.getInstance(new Locale("da", "dk")));
// The ICU Collator and java.text.Collator implementations differ in their
// The ICU Collator and Sun java.text.Collator implementations differ in their
// orderings - "BFJDH" is the ordering for java.text.Collator for Locale.US.
testCollationKeySort
(usAnalyzer, franceAnalyzer, swedenAnalyzer, denmarkAnalyzer, "BFJDH");
(usAnalyzer, franceAnalyzer, swedenAnalyzer, denmarkAnalyzer, oStrokeFirst ? "BFJHD" : "BFJDH");
}
}

View File

@ -28,7 +28,11 @@ import java.io.Reader;
public class TestCollationKeyFilter extends CollationTestBase {
// the sort order of Ø versus U depends on the version of the rules being used
// for the inherited root locale: Ø's order isnt specified in Locale.US since
// its not used in english.
boolean oStrokeFirst = Collator.getInstance(new Locale("")).compare("Ø", "U") < 0;
// Neither Java 1.4.2 nor 1.5.0 has Farsi Locale collation available in
// RuleBasedCollator. However, the Arabic Locale seems to order the Farsi
// characters properly.
@ -87,9 +91,9 @@ public class TestCollationKeyFilter extends CollationTestBase {
Analyzer denmarkAnalyzer
= new TestAnalyzer(Collator.getInstance(new Locale("da", "dk")));
// The ICU Collator and java.text.Collator implementations differ in their
// The ICU Collator and Sun java.text.Collator implementations differ in their
// orderings - "BFJDH" is the ordering for java.text.Collator for Locale.US.
testCollationKeySort
(usAnalyzer, franceAnalyzer, swedenAnalyzer, denmarkAnalyzer, "BFJDH");
(usAnalyzer, franceAnalyzer, swedenAnalyzer, denmarkAnalyzer, oStrokeFirst ? "BFJHD" : "BFJDH");
}
}