LUCENE-2630: fix intl test bugs that rely on cldr version

git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@997180 13f79535-47bb-0310-9956-ffa450edef68
2010-09-15 03:30:35 +00:00 · 2010-09-15 03:30:35 +00:00 · 774eaeada0
parent 2d9eb62343
commit 774eaeada0
4 changed files with 44 additions and 28 deletions
--- a/lucene/src/test/org/apache/lucene/search/TestSort.java
+++ b/lucene/src/test/org/apache/lucene/search/TestSort.java
@ -19,6 +19,7 @@ package org.apache.lucene.search;

 import java.io.IOException;
 import java.io.Serializable;
+import java.text.Collator;
 import java.util.ArrayList;
 import java.util.BitSet;
 import java.util.HashMap;
@ -94,6 +95,11 @@ public class TestSort extends LuceneTestCase implements Serializable {
  {   "Z",   "f g",           null,          null,           null,    null,    null,              null,           null, null, null, null}
  };
  
+  // the sort order of Ø versus U depends on the version of the rules being used
+  // for the inherited root locale: Ø's order isnt specified in Locale.US since 
+  // its not used in english.
+  private boolean oStrokeFirst = Collator.getInstance(new Locale("")).compare("Ø", "U") < 0;
+  
  // create an index of all the documents, or just the x, or just the y documents
  private IndexSearcher getIndex (boolean even, boolean odd)
  throws IOException {
@ -595,7 +601,7 @@ public class TestSort extends LuceneTestCase implements Serializable {
  // (which sort differently depending on locale)
  public void testInternationalSort() throws Exception {
    sort.setSort (new SortField ("i18n", Locale.US));
-    assertMatches (full, queryY, sort, "BFJDH");
+    assertMatches (full, queryY, sort, oStrokeFirst ? "BFJHD" : "BFJDH");

    sort.setSort (new SortField ("i18n", new Locale("sv", "se")));
    assertMatches (full, queryY, sort, "BJDFH");
@ -619,7 +625,7 @@ public class TestSort extends LuceneTestCase implements Serializable {
    assertMatches (multiSearcher, queryY, sort, "BJDFH");
    
    sort.setSort (new SortField ("i18n", Locale.US));
-    assertMatches (multiSearcher, queryY, sort, "BFJDH");
+    assertMatches (multiSearcher, queryY, sort, oStrokeFirst ? "BFJHD" : "BFJDH");
    
    sort.setSort (new SortField ("i18n", new Locale("da", "dk")));
    assertMatches (multiSearcher, queryY, sort, "BJDHF");
--- a/modules/analysis/common/src/test/org/apache/lucene/analysis/th/TestThaiAnalyzer.java
+++ b/modules/analysis/common/src/test/org/apache/lucene/analysis/th/TestThaiAnalyzer.java
@ -31,10 +31,10 @@ public class TestThaiAnalyzer extends BaseTokenStreamTestCase {
 	 * testcase for offsets
 	 */
 	public void testOffsets() throws Exception {
-		assertAnalyzesTo(new ThaiAnalyzer(TEST_VERSION_CURRENT), "เดอะนิวยอร์กไทมส์", 
-				new String[] { "เด", "อะนิว", "ยอ", "ร์ก", "ไทมส์"},
-				new int[] { 0, 2, 7, 9, 12 },
-				new int[] { 2, 7, 9, 12, 17});
+		assertAnalyzesTo(new ThaiAnalyzer(TEST_VERSION_CURRENT), "การที่ได้ต้องแสดงว่างานดี", 
+		    new String[] { "การ", "ที่", "ได้", "ต้อง", "แสดง", "ว่า", "งาน", "ดี" },
+				new int[] { 0, 3, 6, 9, 13, 17, 20, 23 },
+				new int[] { 3, 6, 9, 13, 17, 20, 23, 25 });
 	}
 	
 	
@ -49,16 +49,18 @@ public class TestThaiAnalyzer extends BaseTokenStreamTestCase {
 	 * Instead, allow the definition of alphanum to include relevant categories like nonspacing marks!
 	 */
 	public void testBuggyTokenType() throws Exception {
-		assertAnalyzesTo(new ThaiAnalyzer(TEST_VERSION_CURRENT), "เดอะนิวยอร์กไทมส์ ๑๒๓", 
-				new String[] { "เด", "อะนิว", "ยอ", "ร์ก", "ไทมส์", "๑๒๓" },
-				new String[] { "<ALPHANUM>", "<ALPHANUM>", "<ALPHANUM>", "<ALPHANUM>", "<ALPHANUM>", "<ALPHANUM>" });
+		assertAnalyzesTo(new ThaiAnalyzer(TEST_VERSION_CURRENT), "การที่ได้ต้องแสดงว่างานดี ๑๒๓", 
+		    new String[] { "การ", "ที่", "ได้", "ต้อง", "แสดง", "ว่า", "งาน", "ดี", "๑๒๓" },
+				new String[] { "<ALPHANUM>", "<ALPHANUM>", "<ALPHANUM>", "<ALPHANUM>", "<ALPHANUM>", 
+		     "<ALPHANUM>", "<ALPHANUM>", "<ALPHANUM>", "<ALPHANUM>" });
 	}
 	
 	/* correct testcase
 	public void testTokenType() throws Exception {
-		assertAnalyzesTo(new ThaiAnalyzer(), "เดอะนิวยอร์กไทมส์ ๑๒๓", 
-				new String[] { "เด", "อะนิว", "ยอ", "ร์ก", "ไทมส์", "๑๒๓" },
-				new String[] { "<ALPHANUM>", "<ALPHANUM>", "<ALPHANUM>", "<ALPHANUM>", "<ALPHANUM>", "<NUM>" });
+    assertAnalyzesTo(new ThaiAnalyzer(TEST_VERSION_CURRENT), "การที่ได้ต้องแสดงว่างานดี ๑๒๓", 
+        new String[] { "การ", "ที่", "ได้", "ต้อง", "แสดง", "ว่า", "งาน", "ดี", "๑๒๓" },
+        new String[] { "<ALPHANUM>", "<ALPHANUM>", "<ALPHANUM>", "<ALPHANUM>", "<ALPHANUM>", 
+         "<ALPHANUM>", "<ALPHANUM>", "<ALPHANUM>", "<NUM>" });
 	}
 	*/

@ -90,18 +92,18 @@ public class TestThaiAnalyzer extends BaseTokenStreamTestCase {
 	public void testPositionIncrements() throws Exception {
 	  ThaiAnalyzer analyzer = new ThaiAnalyzer(TEST_VERSION_CURRENT);

-	  assertAnalyzesTo(analyzer, "ประโยคว่า the ประโยคว่า",
-	          new String[] { "ประโยค", "ว่า", "ประโยค", "ว่า" },
-	          new int[] { 0, 6, 14, 20 },
-	          new int[] { 6, 9, 20, 23 },
-	          new int[] { 1, 1, 2, 1 });
+    assertAnalyzesTo(new ThaiAnalyzer(TEST_VERSION_CURRENT), "การที่ได้ต้อง the แสดงว่างานดี", 
+        new String[] { "การ", "ที่", "ได้", "ต้อง", "แสดง", "ว่า", "งาน", "ดี" },
+        new int[] { 0, 3, 6, 9, 18, 22, 25, 28 },
+        new int[] { 3, 6, 9, 13, 22, 25, 28, 30 },
+        new int[] { 1, 1, 1, 1, 2, 1, 1, 1 });
 	 
 	  // case that a stopword is adjacent to thai text, with no whitespace
-	  assertAnalyzesTo(analyzer, "ประโยคว่าtheประโยคว่า",
-	      new String[] { "ประโยค", "ว่า", "ประโยค", "ว่า" },
-	      new int[] { 0, 6, 12, 18 },
-	      new int[] { 6, 9, 18, 21 },
-	      new int[] { 1, 1, 2, 1 });
+    assertAnalyzesTo(new ThaiAnalyzer(TEST_VERSION_CURRENT), "การที่ได้ต้องthe แสดงว่างานดี", 
+        new String[] { "การ", "ที่", "ได้", "ต้อง", "แสดง", "ว่า", "งาน", "ดี" },
+        new int[] { 0, 3, 6, 9, 17, 21, 24, 27 },
+        new int[] { 3, 6, 9, 13, 21, 24, 27, 29 },
+        new int[] { 1, 1, 1, 1, 2, 1, 1, 1 });
 	}
 	
 	public void testReusableTokenStream() throws Exception {
--- a/modules/analysis/common/src/test/org/apache/lucene/collation/TestCollationKeyAnalyzer.java
+++ b/modules/analysis/common/src/test/org/apache/lucene/collation/TestCollationKeyAnalyzer.java
@ -25,7 +25,11 @@ import java.util.Locale;


 public class TestCollationKeyAnalyzer extends CollationTestBase {
-
+  // the sort order of Ø versus U depends on the version of the rules being used
+  // for the inherited root locale: Ø's order isnt specified in Locale.US since 
+  // its not used in english.
+  private boolean oStrokeFirst = Collator.getInstance(new Locale("")).compare("Ø", "U") < 0;
+  
  // Neither Java 1.4.2 nor 1.5.0 has Farsi Locale collation available in
  // RuleBasedCollator.  However, the Arabic Locale seems to order the Farsi
  // characters properly.
@ -69,9 +73,9 @@ public class TestCollationKeyAnalyzer extends CollationTestBase {
    Analyzer denmarkAnalyzer 
      = new CollationKeyAnalyzer(Collator.getInstance(new Locale("da", "dk")));
    
-    // The ICU Collator and java.text.Collator implementations differ in their
+    // The ICU Collator and Sun java.text.Collator implementations differ in their
    // orderings - "BFJDH" is the ordering for java.text.Collator for Locale.US.
    testCollationKeySort
-      (usAnalyzer, franceAnalyzer, swedenAnalyzer, denmarkAnalyzer, "BFJDH");
+      (usAnalyzer, franceAnalyzer, swedenAnalyzer, denmarkAnalyzer, oStrokeFirst ? "BFJHD" : "BFJDH");
  }
 }
--- a/modules/analysis/common/src/test/org/apache/lucene/collation/TestCollationKeyFilter.java
+++ b/modules/analysis/common/src/test/org/apache/lucene/collation/TestCollationKeyFilter.java
@ -28,7 +28,11 @@ import java.io.Reader;


 public class TestCollationKeyFilter extends CollationTestBase {
-
+  // the sort order of Ø versus U depends on the version of the rules being used
+  // for the inherited root locale: Ø's order isnt specified in Locale.US since 
+  // its not used in english.
+  boolean oStrokeFirst = Collator.getInstance(new Locale("")).compare("Ø", "U") < 0;
+  
  // Neither Java 1.4.2 nor 1.5.0 has Farsi Locale collation available in
  // RuleBasedCollator.  However, the Arabic Locale seems to order the Farsi
  // characters properly.
@ -87,9 +91,9 @@ public class TestCollationKeyFilter extends CollationTestBase {
    Analyzer denmarkAnalyzer 
      = new TestAnalyzer(Collator.getInstance(new Locale("da", "dk")));
    
-    // The ICU Collator and java.text.Collator implementations differ in their
+    // The ICU Collator and Sun java.text.Collator implementations differ in their
    // orderings - "BFJDH" is the ordering for java.text.Collator for Locale.US.
    testCollationKeySort
-      (usAnalyzer, franceAnalyzer, swedenAnalyzer, denmarkAnalyzer, "BFJDH");
+      (usAnalyzer, franceAnalyzer, swedenAnalyzer, denmarkAnalyzer, oStrokeFirst ? "BFJHD" : "BFJDH");
  }
 }