LUCENE-2014: add a thai test to prevent any similar regression

git-svn-id: https://svn.apache.org/repos/asf/lucene/java/trunk@831189 13f79535-47bb-0310-9956-ffa450edef68
2009-10-30 03:26:44 +00:00 · 2009-10-30 03:26:44 +00:00 · 8861ba2ffd
parent 19e55ea991
commit 8861ba2ffd
1 changed files with 20 additions and 0 deletions
--- a/contrib/analyzers/common/src/test/org/apache/lucene/analysis/th/TestThaiAnalyzer.java
+++ b/contrib/analyzers/common/src/test/org/apache/lucene/analysis/th/TestThaiAnalyzer.java
@ -90,6 +90,26 @@ public class TestThaiAnalyzer extends BaseTokenStreamTestCase {
 			new String[] { "ประโยค", "ว่า", "quick", "brown", "fox", "jumped", "over", "lazy", "dogs" });
 	}
 	
+	/*
+	 * Test that position increments are adjusted correctly for stopwords.
+	 */
+	public void testPositionIncrements() throws Exception {
+	  ThaiAnalyzer analyzer = new ThaiAnalyzer(Version.LUCENE_CURRENT);
+
+	  assertAnalyzesTo(analyzer, "ประโยคว่า the ประโยคว่า",
+	          new String[] { "ประโยค", "ว่า", "ประโยค", "ว่า" },
+	          new int[] { 0, 6, 14, 20 },
+	          new int[] { 6, 9, 20, 23 },
+	          new int[] { 1, 1, 2, 1 });
+	 
+	  // case that a stopword is adjacent to thai text, with no whitespace
+	  assertAnalyzesTo(analyzer, "ประโยคว่าtheประโยคว่า",
+	      new String[] { "ประโยค", "ว่า", "ประโยค", "ว่า" },
+	      new int[] { 0, 6, 12, 18 },
+	      new int[] { 6, 9, 18, 21 },
+	      new int[] { 1, 1, 2, 1 });
+	}
+	
 	public void testReusableTokenStream() throws Exception {
 	  ThaiAnalyzer analyzer = new ThaiAnalyzer(Version.LUCENE_CURRENT);
 	  assertAnalyzesToReuse(analyzer, "", new String[] {});