From 8861ba2ffda71bab956eb6903b79b71d8593c748 Mon Sep 17 00:00:00 2001 From: Robert Muir Date: Fri, 30 Oct 2009 03:26:44 +0000 Subject: [PATCH] LUCENE-2014: add a thai test to prevent any similar regression git-svn-id: https://svn.apache.org/repos/asf/lucene/java/trunk@831189 13f79535-47bb-0310-9956-ffa450edef68 --- .../lucene/analysis/th/TestThaiAnalyzer.java | 20 +++++++++++++++++++ 1 file changed, 20 insertions(+) diff --git a/contrib/analyzers/common/src/test/org/apache/lucene/analysis/th/TestThaiAnalyzer.java b/contrib/analyzers/common/src/test/org/apache/lucene/analysis/th/TestThaiAnalyzer.java index aae016352f5..cf8a09a5ca3 100644 --- a/contrib/analyzers/common/src/test/org/apache/lucene/analysis/th/TestThaiAnalyzer.java +++ b/contrib/analyzers/common/src/test/org/apache/lucene/analysis/th/TestThaiAnalyzer.java @@ -90,6 +90,26 @@ public class TestThaiAnalyzer extends BaseTokenStreamTestCase { new String[] { "ประโยค", "ว่า", "quick", "brown", "fox", "jumped", "over", "lazy", "dogs" }); } + /* + * Test that position increments are adjusted correctly for stopwords. + */ + public void testPositionIncrements() throws Exception { + ThaiAnalyzer analyzer = new ThaiAnalyzer(Version.LUCENE_CURRENT); + + assertAnalyzesTo(analyzer, "ประโยคว่า the ประโยคว่า", + new String[] { "ประโยค", "ว่า", "ประโยค", "ว่า" }, + new int[] { 0, 6, 14, 20 }, + new int[] { 6, 9, 20, 23 }, + new int[] { 1, 1, 2, 1 }); + + // case that a stopword is adjacent to thai text, with no whitespace + assertAnalyzesTo(analyzer, "ประโยคว่าtheประโยคว่า", + new String[] { "ประโยค", "ว่า", "ประโยค", "ว่า" }, + new int[] { 0, 6, 12, 18 }, + new int[] { 6, 9, 18, 21 }, + new int[] { 1, 1, 2, 1 }); + } + public void testReusableTokenStream() throws Exception { ThaiAnalyzer analyzer = new ThaiAnalyzer(Version.LUCENE_CURRENT); assertAnalyzesToReuse(analyzer, "", new String[] {});