From 8861ba2ffda71bab956eb6903b79b71d8593c748 Mon Sep 17 00:00:00 2001
From: Robert Muir <rmuir@apache.org>
Date: Fri, 30 Oct 2009 03:26:44 +0000
Subject: [PATCH] LUCENE-2014: add a thai test to prevent any similar
 regression

git-svn-id: https://svn.apache.org/repos/asf/lucene/java/trunk@831189 13f79535-47bb-0310-9956-ffa450edef68
---
 .../lucene/analysis/th/TestThaiAnalyzer.java  | 20 +++++++++++++++++++
 1 file changed, 20 insertions(+)

diff --git a/contrib/analyzers/common/src/test/org/apache/lucene/analysis/th/TestThaiAnalyzer.java b/contrib/analyzers/common/src/test/org/apache/lucene/analysis/th/TestThaiAnalyzer.java
index aae016352f5..cf8a09a5ca3 100644
--- a/contrib/analyzers/common/src/test/org/apache/lucene/analysis/th/TestThaiAnalyzer.java
+++ b/contrib/analyzers/common/src/test/org/apache/lucene/analysis/th/TestThaiAnalyzer.java
@@ -90,6 +90,26 @@ public class TestThaiAnalyzer extends BaseTokenStreamTestCase {
 			new String[] { "ประโยค", "ว่า", "quick", "brown", "fox", "jumped", "over", "lazy", "dogs" });
 	}
 	
+	/*
+	 * Test that position increments are adjusted correctly for stopwords.
+	 */
+	public void testPositionIncrements() throws Exception {
+	  ThaiAnalyzer analyzer = new ThaiAnalyzer(Version.LUCENE_CURRENT);
+
+	  assertAnalyzesTo(analyzer, "ประโยคว่า the ประโยคว่า",
+	          new String[] { "ประโยค", "ว่า", "ประโยค", "ว่า" },
+	          new int[] { 0, 6, 14, 20 },
+	          new int[] { 6, 9, 20, 23 },
+	          new int[] { 1, 1, 2, 1 });
+	 
+	  // case that a stopword is adjacent to thai text, with no whitespace
+	  assertAnalyzesTo(analyzer, "ประโยคว่าtheประโยคว่า",
+	      new String[] { "ประโยค", "ว่า", "ประโยค", "ว่า" },
+	      new int[] { 0, 6, 12, 18 },
+	      new int[] { 6, 9, 18, 21 },
+	      new int[] { 1, 1, 2, 1 });
+	}
+	
 	public void testReusableTokenStream() throws Exception {
 	  ThaiAnalyzer analyzer = new ThaiAnalyzer(Version.LUCENE_CURRENT);
 	  assertAnalyzesToReuse(analyzer, "", new String[] {});